~sircmpwn/godocs.io

gddo: gddo-server: Fetch modules in the background v1 APPLIED

Adnan Maolood: 1
 gddo-server: Fetch modules in the background

 3 files changed, 50 insertions(+), 1 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/godocs.io/patches/23410/mbox | git am -3
Learn more about email & git
View this thread in the archives

[PATCH gddo] gddo-server: Fetch modules in the background Export this patch

In a separate goroutine, continuously fetch the oldest module in the
database from the module proxy and update it if necessary. Note that the
--crawl-interval flag must be specified to enable background fetching.

---
A few remaining questions:

- Should we crawl the imports of packages (to discover new packages not
  in the database)? If we do, perhaps this should be opt-in as this
  could cause the database to grow exponentially, which wouldn't be
  desirable for small installations.
- Is PostgreSQL's ORDER BY fast enough for large databases? Or is speed
  not really a concern?
- Should we log FETCH messages, or should we keep silent except when
  errors occur?
 gddo-server/fetch.go          | 18 ++++++++++++++++++
 gddo-server/main.go           |  8 +++++++-
 internal/database/database.go | 25 +++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/gddo-server/fetch.go b/gddo-server/fetch.go
index a425d24..ba795b7 100644
--- a/gddo-server/fetch.go
+++ b/gddo-server/fetch.go
@@ -174,3 +174,21 @@ func (s *Server) updateMeta(ctx context.Context, modulePath string) error {
	}
	return nil
}

// fetchOldest updates the oldest module in the database if necessary.
func (s *Server) fetchOldest(ctx context.Context) {
	modulePath, err := s.db.Oldest(ctx)
	if err != nil {
		log.Printf("Error retrieving oldest module: %v", err)
		return
	}
	if modulePath == "" {
		// No modules in the database yet
		return
	}
	log.Println("FETCH", modulePath)
	if err := s.fetch(ctx, modulePath, proxy.LatestVersion); err != nil {
		log.Printf("Error fetching %s: %v", modulePath, err)
		return
	}
}
diff --git a/gddo-server/main.go b/gddo-server/main.go
index b5f8300..4b6bf93 100644
--- a/gddo-server/main.go
+++ b/gddo-server/main.go
@@ -32,7 +32,13 @@ func main() {
	if err != nil {
		log.Fatal("error creating server:", err)
	}
	// TODO: Crawl old modules in the background.

	// Update modules in the background
	go func() {
		for range time.Tick(s.cfg.CrawlInterval) {
			s.fetchOldest(ctx)
		}
	}()

	var wg sync.WaitGroup
	defer wg.Wait()
diff --git a/internal/database/database.go b/internal/database/database.go
index 4eac497..452befe 100644
--- a/internal/database/database.go
+++ b/internal/database/database.go
@@ -675,3 +675,28 @@ func (db *Database) PutMeta(ctx context.Context, meta source.Meta) error {
		return nil
	})
}

// Oldest returns the module path of the oldest module in the database
// (i.e., the module with the smallest updated timestamp).
func (db *Database) Oldest(ctx context.Context) (string, error) {
	var modulePath string
	err := db.withTx(ctx, nil, func(tx *sql.Tx) error {
		rows, err := tx.QueryContext(ctx,
			`SELECT module_path FROM modules ORDER BY updated LIMIT 1;`)
		if err != nil {
			return err
		}
		defer rows.Close()

		if rows.Next() {
			if err := rows.Scan(&modulePath); err != nil {
				return err
			}
		}
		return rows.Err()
	})
	if err != nil {
		return "", err
	}
	return modulePath, nil
}
-- 
2.32.0
Thanks!

To git@git.sr.ht:~sircmpwn/gddo
   22cd4e8..b136038  master -> master