~bouncepaw/betula

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
3 2

[PATCH] Use betula user agent when sending outgoing requests

Details
Message ID
<20240624184350.684221-2-hi@arnes.space>
DKIM signature
pass
Download raw message
Patch: +45 -4
I thin it's pretty nice that some fediverse software is so talkative in
their user agents. This allows me to figure out where traffic is coming
from and makes it easier to tell malicious from non-malicious bots.

This patch adds a betula user agent string to all outgoing requests,
automatically encoding either information about the current git tag or
an abbreviated revision hash.

I hope it's useful!

---
 .gitignore             |  3 ++-
 Makefile               |  1 +
 fediverse/bookmark.go  |  2 ++
 fediverse/webfinger.go | 11 ++++++++++-
 jobs/jobs.go           |  2 ++
 readpage/readpage.go   | 10 +++++++++-
 readpage/workers.go    | 11 ++++++++++-
 settings/settings.go   |  9 +++++++++
 8 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6f63816..fcd069f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
betula
*.betula
.idea/
**/.DS_Store
\ No newline at end of file
**/.DS_Store
/settings/.version_string.txt
diff --git a/Makefile b/Makefile
index 8a54c58..18d2122 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ export CGO_ENABLED=1
.PHONY: betula debug-run run-with-port clean test

betula:
	go generate ./...
	go build -o betula ./cmd/betula

debug-run: clean betula
diff --git a/fediverse/bookmark.go b/fediverse/bookmark.go
index 102c7c4..8806ef5 100644
--- a/fediverse/bookmark.go
+++ b/fediverse/bookmark.go
@@ -10,6 +10,7 @@ import (
	"net/http"

	"git.sr.ht/~bouncepaw/betula/fediverse/activities"
	"git.sr.ht/~bouncepaw/betula/settings"
	"git.sr.ht/~bouncepaw/betula/types"
)

@@ -22,6 +23,7 @@ func fetchFedi(uri string) (*types.Bookmark, error) {
	if err != nil {
		return nil, err
	}
	req.Header.Set("User-Agent", settings.UserAgent())
	req.Header.Set("Accept", types.OtherActivityType)
	resp, err := client.Do(req)
	if err != nil {
diff --git a/fediverse/webfinger.go b/fediverse/webfinger.go
index 2cd0f8b..661c717 100644
--- a/fediverse/webfinger.go
+++ b/fediverse/webfinger.go
@@ -3,8 +3,11 @@ package fediverse
import (
	"encoding/json"
	"fmt"
	"git.sr.ht/~bouncepaw/betula/settings"
	"git.sr.ht/~bouncepaw/betula/stricks"
	"io"
	"log"
	"net/http"
)

// https://docs.joinmastodon.org/spec/webfinger/
@@ -19,8 +22,14 @@ type webfingerDocument struct {

func requestIdByWebFingerAcct(user, host string) (id string, err error) {
	requestURL := fmt.Sprintf("https://%s/.well-known/webfinger?resource=acct:%s@%s", host, user, host)
	req, err := http.NewRequest(http.MethodGet, requestURL, nil)
	if err != nil {
		log.Printf("Failed to construct request from ‘%s’\n", requestURL)
		return "", err
	}

	resp, err := client.Get(requestURL)
	req.Header.Set("User-Agent", settings.UserAgent())
	resp, err := client.Do(req)
	if err != nil {
		return "", err
	}
diff --git a/jobs/jobs.go b/jobs/jobs.go
index 0bd5827..c314879 100644
--- a/jobs/jobs.go
+++ b/jobs/jobs.go
@@ -10,6 +10,7 @@ import (
	"git.sr.ht/~bouncepaw/betula/db"
	"git.sr.ht/~bouncepaw/betula/fediverse/signing"
	"git.sr.ht/~bouncepaw/betula/jobs/jobtype"
	"git.sr.ht/~bouncepaw/betula/settings"
	"git.sr.ht/~bouncepaw/betula/stricks"
	"git.sr.ht/~bouncepaw/betula/types"
	"log"
@@ -72,6 +73,7 @@ func SendActivityToInbox(activity []byte, inbox string) error {
		return err
	}

	rq.Header.Set("User-Agent", settings.UserAgent())
	rq.Header.Set("Content-Type", types.ActivityType)
	signing.SignRequest(rq, activity)

diff --git a/readpage/readpage.go b/readpage/readpage.go
index 96e6798..27e86dd 100644
--- a/readpage/readpage.go
+++ b/readpage/readpage.go
@@ -5,6 +5,7 @@ import (
	"context"
	"errors"
	"git.sr.ht/~bouncepaw/betula/fediverse/activities"
	"git.sr.ht/~bouncepaw/betula/settings"
	"golang.org/x/net/html"
	"io"
	"log"
@@ -148,7 +149,14 @@ func findData(link string, workers []worker, doc *html.Node) (data FoundData, er

// findDataByLink finds the data you wished for in the document, considering the timeouts.
func findDataByLink(link string, workers []worker) (data FoundData, err error) {
	resp, err := client.Get(link)
	req, err := http.NewRequest(http.MethodGet, link, nil)
	if err != nil {
		log.Printf("Failed to construct request from ‘%s’\n", link)
		return data, err
	}

	req.Header.Set("User-Agent", settings.UserAgent())
	resp, err := client.Do(req)
	if err != nil {
		if err.(*url.Error).Timeout() {
			log.Printf("Request to %s timed out\n", link)
diff --git a/readpage/workers.go b/readpage/workers.go
index 65e3f9a..7df5b11 100644
--- a/readpage/workers.go
+++ b/readpage/workers.go
@@ -1,10 +1,12 @@
package readpage

import (
	"git.sr.ht/~bouncepaw/betula/settings"
	"git.sr.ht/~bouncepaw/betula/stricks"
	"golang.org/x/net/html"
	"io"
	"log"
	"net/http"
	"strings"
)

@@ -154,7 +156,14 @@ func listenForMycomarkup(nodes chan *html.Node, data *FoundData) {

			// We've found a valid <link> to a Mycomarkup document! Let's fetch it.

			resp, err := client.Get(addr.String())
			req, err := http.NewRequest(http.MethodGet, addr.String(), nil)
			if err != nil {
				log.Printf("Failed to construct request from ‘%s’\n", addr.String())
				continue
			}

			req.Header.Set("User-Agent", settings.UserAgent())
			resp, err := client.Do(req)
			if err != nil {
				log.Printf("Failed to fetch Mycomarkup document from ‘%s’\n", addr.String())
			}
diff --git a/settings/settings.go b/settings/settings.go
index 5ea5d8c..0bfbdee 100644
--- a/settings/settings.go
+++ b/settings/settings.go
@@ -3,6 +3,7 @@ package settings

import (
	"database/sql"
	_ "embed"
	"fmt"
	"git.sr.ht/~bouncepaw/betula/stricks"
	"html"
@@ -19,6 +20,10 @@ const defaultHost = "0.0.0.0"
const biggestPort = 65535
const defaultPort = 1738

//go:generate sh -c "{ [ -n \"$(git tag --points-at HEAD)\" ] && git tag --points-at HEAD || git rev-parse --short HEAD; } > .version_string.txt"
//go:embed .version_string.txt
var version string

var cache types.Settings
var adminUsername string

@@ -132,6 +137,10 @@ func SiteDomain() string {
	return stricks.ParseValidURL(SiteURL()).Host
}

func UserAgent() string {
	return fmt.Sprintf("Betula %s; %s; Bot", version, SiteDomain)
}

func SetSettings(settings types.Settings) {
	if settings.SiteName == "" {
		settings.SiteName = "Betula"
-- 
2.45.1
Details
Message ID
<1D63F5EC-77FA-4D3F-A2CC-BA64200CB2F7@ya.ru>
In-Reply-To
<20240624184350.684221-2-hi@arnes.space> (view parent)
DKIM signature
pass
Download raw message
Thanks for the contribution, arne!

Having a User-agent is something we indeed want to have. We have a
ticket [1] for that!

I can't accept the patch in its current form though. The way User
agent is implemented is perfectly fine, I don't like the version
related stuff. You can split this patch in two parts: one user
agent related, one version related, and maybe just ditch the latter
one.

What I don't like about the version:

1) The file storing the version is called .version_string.txt. I do
   not see why it should be a hidden file, or a file to begin with.

2) What if it's empty or does not exist? Not everyone will call
   go generate.

3) The version is already set manually across the code. With this
   patch, the manual version and the generated version might
   mismatch.

If we go down the way of storing the version as a value, I think it
should be done the following way:

1) The version should be a variable set in code, with all mentions
   of the version depending on this variable:

      var version = "v1.4.0"

2) The linker flags [2] should be used to override it. They can be
   passed to the go build command, so I guess this part should
   happen in Makefile, not on the go generate step.


1: https://todo.sr.ht/~bouncepaw/betula/50
2: https://pkg.go.dev/cmd/link
Details
Message ID
<874j9hwccu.fsf@arnes.space>
In-Reply-To
<1D63F5EC-77FA-4D3F-A2CC-BA64200CB2F7@ya.ru> (view parent)
DKIM signature
pass
Download raw message
Patch: +1 -7
Hi Timur,

glad to see the contribution is appreciated. I have added an additional
patch to be applied on top of the first one that removes the code to
retrieve the version logic, since I don't have any strong opinion here.

As to why it was done the way it was:

> 1) The file storing the version is called .version_string.txt. I do
>    not see why it should be a hidden file, or a file to begin with.

It is a file so it can be referenced via `go:embed`. It does not need to
be a hidden file, but I thought this may be useful in not cluttering up
file listings of the directory.

> 2) What if it's empty or does not exist? Not everyone will call
>    go generate.

`go build` fails if `go:embed` is called with a non-existant file. It
will not fail if it is called with an outdated file, or a file
containing arbitrary content.

As for the rest, as I said, I don't have any strong opinions and I think
a user agent without version information is useful enough.

Hope this helps!

---
 .gitignore           | 1 -
 Makefile             | 1 -
 settings/settings.go | 6 +-----
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index fcd069f..e64b4a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,3 @@ betula
*.betula
.idea/
**/.DS_Store
/settings/.version_string.txt
diff --git a/Makefile b/Makefile
index 18d2122..8a54c58 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,6 @@ export CGO_ENABLED=1
.PHONY: betula debug-run run-with-port clean test

betula:
	go generate ./...
	go build -o betula ./cmd/betula

debug-run: clean betula
diff --git a/settings/settings.go b/settings/settings.go
index 0bfbdee..5e0894a 100644
--- a/settings/settings.go
+++ b/settings/settings.go
@@ -20,10 +20,6 @@ const defaultHost = "0.0.0.0"
const biggestPort = 65535
const defaultPort = 1738

//go:generate sh -c "{ [ -n \"$(git tag --points-at HEAD)\" ] && git tag --points-at HEAD || git rev-parse --short HEAD; } > .version_string.txt"
//go:embed .version_string.txt
var version string

var cache types.Settings
var adminUsername string

@@ -138,7 +134,7 @@ func SiteDomain() string {
}

func UserAgent() string {
	return fmt.Sprintf("Betula %s; %s; Bot", version, SiteDomain)
	return fmt.Sprintf("Betula; %s; Bot", SiteDomain)
}

func SetSettings(settings types.Settings) {
-- 
2.45.1
Details
Message ID
<90807B41-C0B3-47E7-9EF3-E1B98C4A42D4@ya.ru>
In-Reply-To
<874j9hwccu.fsf@arnes.space> (view parent)
DKIM signature
pass
Download raw message
Thank you! Applied.
Reply to thread Export thread (mbox)