~sircmpwn/godocs.io

gddo: gddo-server: Clean up module fetching code v1 APPLIED

Adnan Maolood: 1
 gddo-server: Clean up module fetching code

 4 files changed, 98 insertions(+), 84 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/godocs.io/patches/23399/mbox | git am -3
Learn more about email & git
View this thread in the archives

[PATCH gddo] gddo-server: Clean up module fetching code Export this patch

---
 gddo-server/{crawl.go => fetch.go} | 156 ++++++++++++++++-------------
 gddo-server/gemini.go              |   5 +-
 gddo-server/http.go                |   5 +-
 gddo-server/server.go              |  16 ++-
 4 files changed, 98 insertions(+), 84 deletions(-)
 rename gddo-server/{crawl.go => fetch.go} (50%)

diff --git a/gddo-server/crawl.go b/gddo-server/fetch.go
similarity index 50%
rename from gddo-server/crawl.go
rename to gddo-server/fetch.go
index 766ce65..a425d24 100644
--- a/gddo-server/crawl.go
+++ b/gddo-server/fetch.go
@@ -37,89 +37,79 @@ func (v byVersion) Len() int           { return len(v) }
func (v byVersion) Less(i, j int) bool { return semver.Compare(v[i], v[j]) > 0 }
func (v byVersion) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

// crawl fetches package documentation and updates the database.
func (s *Server) crawl(ctx context.Context, modulePath, version string) (database.Module, error) {
// fetch fetches package documentation from the module proxy and updates the database.
func (s *Server) fetch(ctx context.Context, modulePath, version string) error {
	start := time.Now().UTC()

	if blocked, err := s.db.IsBlocked(ctx, modulePath); err != nil {
		return database.Module{}, err
	} else if blocked {
		return database.Module{}, ErrBlocked
	// Check if the module is blocked
	blocked, err := s.db.IsBlocked(ctx, modulePath)
	if err != nil {
		return err
	}
	if blocked {
		return ErrBlocked
	}

	// Get latest version
	var latest string
	if modulePath == stdlib.ModulePath {
		var err error
		latest, err = stdlib.ZipInfo(proxy.LatestVersion)
		if err != nil {
			return database.Module{}, err
		}
	} else {
		info, err := s.proxyClient.GetInfo(ctx, modulePath, proxy.LatestVersion)
		if err != nil {
			return database.Module{}, err
		}
		latest = info.Version
	latest, err := s.latestVersion(ctx, modulePath)
	if err != nil {
		return err
	}

	seriesPath, _, _ := module.SplitPathVersion(modulePath)

	mod, ok, err := s.db.GetModule(ctx, modulePath)
	if err != nil {
		return database.Module{}, err
	}
	if ok && version == "latest" && mod.Version == latest {
		// Update last crawl time
		mod.Updated = start
		if err := s.db.PutModule(ctx, mod); err != nil {
			return database.Module{}, err
	// Only update module information if the latest version was requested.
	if version == proxy.LatestVersion {
		// Retrieve the module from the database
		mod, ok, err := s.db.GetModule(ctx, modulePath)
		if err != nil {
			return err
		}

		if ok && mod.Version == latest {
			// Module is up-to-date. Update last crawl time only.
			mod.Updated = start
			if err := s.db.PutModule(ctx, mod); err != nil {
				return err
			}
			return nil
		}
		return mod, nil
	}

	if version == "latest" {
		version = latest
		// Retrieve the list of versions
		versions, err := s.moduleVersions(ctx, modulePath)
		if err != nil {
			return err
		}
		sort.Sort(byVersion(versions))

		// Update the module
		mod = database.Module{
			ModulePath: modulePath,
			SeriesPath: seriesPath,
			Version:    latest,
			Versions:   versions,
			Updated:    start,
		}
		if err := s.db.PutModule(ctx, mod); err != nil {
			return err
		}
	}

	// Add packages to the database
	// Retrieve module source code.
	src, err := source.Get(ctx, s.proxyClient, modulePath, version)
	if err != nil {
		return database.Module{}, err
		return err
	}
	if src.Path != modulePath {
		// The import paths don't match
		return database.Module{}, ErrMismatch
		return ErrMismatch
	}
	if len(src.Packages) == 0 {
		// The module has no packages
		return database.Module{}, ErrNoPackages
	}

	// Retrieve the list of versions
	var versions []string
	if modulePath == stdlib.ModulePath {
		versions, err = stdlib.Versions()
	} else {
		versions, err = s.proxyClient.ListVersions(ctx, modulePath)
	}
	if err != nil {
		return database.Module{}, err
	}
	sort.Sort(byVersion(versions))

	// Update the module
	mod = database.Module{
		ModulePath: modulePath,
		SeriesPath: seriesPath,
		Version:    latest,
		Versions:   versions,
		Updated:    start,
	}
	if err := s.db.PutModule(ctx, mod); err != nil {
		return database.Module{}, err
		return ErrNoPackages
	}

	// Add packages to the database
	for _, pkg := range src.Packages {
		// TODO: Allow configuring the default GOOS,
		// and optionally let the user specify their own
@@ -140,17 +130,47 @@ func (s *Server) crawl(ctx context.Context, modulePath, version string) (databas
		}
	}

	// Fetch meta
	// Update meta
	if err := s.updateMeta(ctx, modulePath); err != nil {
		log.Printf("Error fetching source meta for %s: %s", err)
	}

	return nil
}

// latestVersion retrieves the latest version of a module from the module proxy.
func (s *Server) latestVersion(ctx context.Context, modulePath string) (string, error) {
	if modulePath == stdlib.ModulePath {
		return stdlib.ZipInfo(proxy.LatestVersion)
	}

	info, err := s.proxyClient.GetInfo(ctx, modulePath, proxy.LatestVersion)
	if err != nil {
		return "", err
	}
	return info.Version, nil
}

// moduleVersions retrieves a module's list of versions from the module proxy.
func (s *Server) moduleVersions(ctx context.Context, modulePath string) ([]string, error) {
	if modulePath == stdlib.ModulePath {
		return stdlib.Versions()
	}
	return s.proxyClient.ListVersions(ctx, modulePath)
}

// updateMeta updates the module's go-source meta tag information.
func (s *Server) updateMeta(ctx context.Context, modulePath string) error {
	meta, err := source.FetchMeta(ctx, s.httpClient, modulePath)
	if err != nil {
		if !errors.Is(err, source.ErrMetaNotFound) {
			log.Printf("Error fetching source meta for %s: %s", modulePath, err)
		}
	} else {
		if err := s.db.PutMeta(ctx, *meta); err != nil {
			return database.Module{}, err
		if errors.Is(err, source.ErrMetaNotFound) {
			return nil
		}
		return err
	}

	return mod, nil
	if err := s.db.PutMeta(ctx, *meta); err != nil {
		return err
	}
	return nil
}
diff --git a/gddo-server/gemini.go b/gddo-server/gemini.go
index 8c1b236..d4e2b6a 100644
--- a/gddo-server/gemini.go
+++ b/gddo-server/gemini.go
@@ -175,8 +175,7 @@ func (s *Server) serveGeminiRefresh(ctx context.Context, w gemini.ResponseWriter

	ch := make(chan error, 1)
	go func() {
		_, err := s.crawl(ctx, pkg.ModulePath, "latest")
		ch <- err
		ch <- s.fetch(ctx, pkg.ModulePath, proxy.LatestVersion)
	}()
	select {
	case err = <-ch:
@@ -195,7 +194,7 @@ func (s *Server) serveGeminiStdlib(ctx context.Context, w gemini.ResponseWriter,
	if err != nil {
		return err
	} else if !ok {
		_, err = s.crawl(ctx, stdlib.ModulePath, "latest")
		err = s.fetch(ctx, stdlib.ModulePath, proxy.LatestVersion)
		if err != nil {
			return err
		}
diff --git a/gddo-server/http.go b/gddo-server/http.go
index def19e2..ce69370 100644
--- a/gddo-server/http.go
+++ b/gddo-server/http.go
@@ -257,8 +257,7 @@ func (s *Server) serveRefresh(resp http.ResponseWriter, req *http.Request) error

	ch := make(chan error, 1)
	go func() {
		_, err := s.crawl(ctx, pkg.ModulePath, "latest")
		ch <- err
		ch <- s.fetch(ctx, pkg.ModulePath, proxy.LatestVersion)
	}()
	select {
	case err = <-ch:
@@ -287,7 +286,7 @@ func (s *Server) serveStdlib(resp http.ResponseWriter, req *http.Request) error
	if err != nil {
		return err
	} else if !ok {
		_, err = s.crawl(req.Context(), stdlib.ModulePath, "latest")
		err = s.fetch(req.Context(), stdlib.ModulePath, proxy.LatestVersion)
		if err != nil {
			return err
		}
diff --git a/gddo-server/server.go b/gddo-server/server.go
index 17532a5..a7f5ea2 100644
--- a/gddo-server/server.go
+++ b/gddo-server/server.go
@@ -84,10 +84,8 @@ func (s *Server) GetDoc(ctx context.Context, importPath, version string) (*datab
			ch <- result{nil, nil, nil, err}
			return
		}
		var mod database.Module
		if !ok {
			var err error
			mod, err = s.crawl(ctx, importPath, version)
			err := s.fetch(ctx, importPath, version)
			if err != nil {
				ch <- result{nil, nil, nil, err}
				return
@@ -97,13 +95,11 @@ func (s *Server) GetDoc(ctx context.Context, importPath, version string) (*datab
				ch <- result{nil, nil, nil, err}
				return
			}
		} else {
			var err error
			mod, _, err = s.db.GetModule(ctx, pkg.ModulePath)
			if err != nil {
				ch <- result{nil, nil, nil, err}
				return
			}
		}
		mod, _, err := s.db.GetModule(ctx, pkg.ModulePath)
		if err != nil {
			ch <- result{nil, nil, nil, err}
			return
		}
		// TODO: Allow the user to configure the GOOS and GOARCH
		pdoc, ok, err := s.db.GetDoc(ctx, importPath, pkg.Version, "linux", "amd64")
-- 
2.32.0
Thanks!

To git@git.sr.ht:~sircmpwn/gddo
   92a10f1..df74f2d  master -> master