~sircmpwn/sr.ht-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
3 3

[PATCH pages.sr.ht] contrib: add tool to purge old sites from storage

Details
Message ID
<20240208151421.1687069-1-ch@bitfehler.net>
DKIM signature
pass
Download raw message
Patch: +166 -0
---
Potentially dangerous, for obvious reasons. Please scrutinize.

 contrib/cleanup-old-versions/README.md |   7 ++
 contrib/cleanup-old-versions/main.go   | 159 +++++++++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100644 contrib/cleanup-old-versions/README.md
 create mode 100644 contrib/cleanup-old-versions/main.go

diff --git a/contrib/cleanup-old-versions/README.md b/contrib/cleanup-old-versions/README.md
new file mode 100644
index 0000000..beeef05
--- /dev/null
+++ b/contrib/cleanup-old-versions/README.md
@@ -0,0 +1,7 @@
# Delete obsolete site versions

Currently, old versions of published sites are left to accumulate in storage.
This tool to finds these old, unreachable site versions and deletes them. It
should be safe to re-run this at any time (or re-start it in case of errors).

Build with `go build` and run the resulting executable on pages.sr.ht.
diff --git a/contrib/cleanup-old-versions/main.go b/contrib/cleanup-old-versions/main.go
new file mode 100644
index 0000000..2fe4972
--- /dev/null
+++ b/contrib/cleanup-old-versions/main.go
@@ -0,0 +1,159 @@
package main

import (
	"context"
	"database/sql"
	"log"
	"path"
	"strings"
	"sync"
	"time"

	"git.sr.ht/~sircmpwn/core-go/config"
	"git.sr.ht/~sircmpwn/core-go/s3"
	_ "github.com/lib/pq"
	"github.com/minio/minio-go/v7"
)

type Site struct {
	domain string
	key    string
}

type Version struct {
	id  string
	key string
}

func NewSite(key string) Site {
	return Site{
		domain: path.Base(path.Clean(key)),
		key:    key,
	}
}

func NewVersion(key string) Version {
	return Version{
		id:  path.Base(path.Clean(key)),
		key: key,
	}
}

func contains(list []string, item string) bool {
	for _, i := range list {
		if i == item {
			return true
		}
	}
	return false
}

func getCurrentVersions(db *sql.DB, domain string) ([]string, error) {
	q := `SELECT version FROM "sites" WHERE domain = $1`
	rows, err := db.Query(q, domain)
	if err != nil {
		return nil, err
	}
	defer rows.Close()

	var currentVersions []string

	for rows.Next() {
		var current string
		if err := rows.Scan(&current); err != nil {
			return nil, err
		}
		currentVersions = append(currentVersions, current)
	}
	return currentVersions, nil
}

func main() {
	log.Println("Starting...")

	conf := config.LoadConfig(":5112")

	pgcs, ok := conf.Get("pages.sr.ht", "connection-string")
	if !ok {
		log.Fatalf("No connection string provided in config.ini")
	}

	db, err := sql.Open("postgres", pgcs)
	if err != nil {
		log.Fatalf("Failed to open a database connection: %v", err)
	}

	mc, err := s3.NewClient(conf)
	if err != nil {
		log.Fatal(err)
	}

	bucket, _ := conf.Get("pages.sr.ht", "s3-bucket")
	prefix, _ := conf.Get("pages.sr.ht", "s3-prefix")

	if prefix != "" && !strings.HasSuffix(prefix, "/") {
		prefix = prefix + "/"
	}

	ctx := context.Background()
	listOpts := minio.ListObjectsOptions{
		Prefix: prefix + "sites/",
	}

	var wg sync.WaitGroup

	for entry := range mc.ListObjects(ctx, bucket, listOpts) {
		// Space goroutines a bit apart...
		time.Sleep(1 * time.Second)

		wg.Add(1)
		go func(site Site) {
			defer wg.Done()
			log.Printf("%s: processing...\n", site.domain)

			var versions []Version
			lo := minio.ListObjectsOptions{
				Prefix: site.key,
			}

			for v := range mc.ListObjects(ctx, bucket, lo) {
				versions = append(versions, NewVersion(v.Key))
			}

			// The listed versions can potentially contain one
			// which is being currently uploaded and is not yet in
			// the database. Make sure to wait for the timeout
			// period of upload requests (plus a little) so that
			// the version shows up in the database.
			// If yet another upload starts during that time, no
			// problem: the version it created is not in our
			// listing and will not be touched.
			log.Printf("%s: waiting for potential uploads to settle...\n", site.domain)
			time.Sleep(35 * time.Second)

			current, err := getCurrentVersions(db, site.domain)
			if err != nil {
				log.Fatalf("%s: failed to get current versions: %s", site.domain, err.Error())
			}

			log.Printf("%s: current versions: %s", site.domain, strings.Join(current, ", "))
			for _, v := range versions {
				if contains(current, v.id) {
					continue
				}
				log.Printf("%s: deleting %s", site.domain, v.key)
				lo := minio.ListObjectsOptions{
					Prefix:    v.key,
					Recursive: true,
				}
				objects := mc.ListObjects(ctx, bucket, lo)
				result := mc.RemoveObjects(ctx, bucket, objects, minio.RemoveObjectsOptions{})
				for roe := range result {
					log.Printf("%s: error deleting object %s: %s", site.domain, roe.ObjectName, roe.Err.Error())
				}
			}
		}(NewSite(entry.Key))
	}
	wg.Wait()
	log.Println("Done.")
}
-- 
2.43.0

[pages.sr.ht/patches] build failed

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CYZSOGBPQNQV.29TSB70DI1C4S@fra02>
In-Reply-To
<20240208151421.1687069-1-ch@bitfehler.net> (view parent)
DKIM signature
missing
Download raw message
pages.sr.ht/patches: FAILED in 59s

[contrib: add tool to purge old sites from storage][0] from [Conrad Hoffmann][1]

[0]: https://lists.sr.ht/~sircmpwn/sr.ht-dev/patches/49283
[1]: ch@bitfehler.net

✗ #1145975 FAILED  pages.sr.ht/patches/archlinux.yml https://builds.sr.ht/~sircmpwn/job/1145975
✓ #1145974 SUCCESS pages.sr.ht/patches/alpine.yml    https://builds.sr.ht/~sircmpwn/job/1145974
Details
Message ID
<ZciSftGerCXDSPaN@x250>
In-Reply-To
<20240208151421.1687069-1-ch@bitfehler.net> (view parent)
DKIM signature
missing
Download raw message
There's a stray in the README.

On Thu, Feb 08, 2024 at 04:14:21PM +0100, Conrad Hoffmann wrote:
> ---
> Potentially dangerous, for obvious reasons. Please scrutinize.
> 
>  contrib/cleanup-old-versions/README.md |   7 ++
>  contrib/cleanup-old-versions/main.go   | 159 +++++++++++++++++++++++++
>  2 files changed, 166 insertions(+)
>  create mode 100644 contrib/cleanup-old-versions/README.md
>  create mode 100644 contrib/cleanup-old-versions/main.go
> 
> diff --git a/contrib/cleanup-old-versions/README.md b/contrib/cleanup-old-versions/README.md
> new file mode 100644
> index 0000000..beeef05
> --- /dev/null
> +++ b/contrib/cleanup-old-versions/README.md
> @@ -0,0 +1,7 @@
> +# Delete obsolete site versions
> +
> +Currently, old versions of published sites are left to accumulate in storage.
> +This tool to finds these old, unreachable site versions and deletes them. It

s/to //

> +should be safe to re-run this at any time (or re-start it in case of errors).
> +
> +Build with `go build` and run the resulting executable on pages.sr.ht.
Details
Message ID
<CZ4ORFGBNCEL.3HAQGFZ52RWD9@cmpwn.com>
In-Reply-To
<20240208151421.1687069-1-ch@bitfehler.net> (view parent)
DKIM signature
pass
Download raw message
Thanks!

To git@git.sr.ht:~sircmpwn/pages.sr.ht
   6660d3d..ce6df7d  master -> master
Reply to thread Export thread (mbox)