~rjarry/aerc-devel

maildir: fix data race in maildir worker v3 APPLIED

wagner riffel: 1
 maildir: fix data race in maildir worker

 1 files changed, 82 insertions(+), 25 deletions(-)
Here are the forgotten attachments. Sorry about it.

Next

Next
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~rjarry/aerc-devel/patches/30038/mbox | git am -3
Learn more about email & git

[PATCH v3] maildir: fix data race in maildir worker Export this patch

Fix a data race due dirInfo pointer being read in the main goroutine
by NewMessageStore and written in the anonymous goroutine launched in
Worker.getDirectoryInfo.

To address the issue raised in https://todo.sr.ht/~rjarry/aerc/16, we
use readdir(3) once, parse and cache its results, this replaces
go-maildir library Dir.Flags based  stat(3) and filepath.Glob
causing the issue when N (emails) is large.

Signed-off-by: wagner riffel <w@104d.net>
---

All previous versions of this patch were broken because it's missing a
seek to reset readdir position to the beginning, I found this nicer
workaround, which is also faster than the previous one, but always
falls back to go-maildir Flags if it can't parse a flag, (in my tests,
it could parse all emails in generated and my personal maildir)
currently the main loop is running 400,000 emails in 165~175ms.
 worker/maildir/worker.go | 107 ++++++++++++++++++++++++++++++---------
 1 file changed, 82 insertions(+), 25 deletions(-)

diff --git a/worker/maildir/worker.go b/worker/maildir/worker.go
index 6ff66b2..df37291 100644
--- a/worker/maildir/worker.go
+++ b/worker/maildir/worker.go
@@ -9,6 +9,8 @@ import (
	"net/url"
	"os"
	"path/filepath"
	"sort"
	"strings"

	"github.com/emersion/go-maildir"
	"github.com/fsnotify/fsnotify"
@@ -120,6 +122,36 @@ func (w *Worker) err(msg types.WorkerMessage, err error) {
	}, nil)
}

func splitMaildirFile(name string) (uniq string, flags []maildir.Flag, err error) {
	i := strings.LastIndexByte(name, ':')
	if i < 0 {
		return "", nil, &maildir.MailfileError{Name: name}
	}
	info := name[i+1:]
	uniq = name[:i]
	if len(info) < 2 {
		return "", nil, &maildir.FlagError{Info: info, Experimental: false}
	}
	if info[1] != ',' || info[0] != '2' {
		return "", nil, &maildir.FlagError{Info: info, Experimental: false}
	}
	if info[0] == '1' {
		return "", nil, &maildir.FlagError{Info: info, Experimental: true}
	}
	flags = []maildir.Flag(info[2:])
	sort.Slice(flags, func(i, j int) bool { return info[i] < info[j] })
	return uniq, flags, nil
}

func dirFiles(name string) ([]string, error) {
	dir, err := os.Open(filepath.Join(name, "cur"))
	if err != nil {
		return nil, err
	}
	defer dir.Close()
	return dir.Readdirnames(-1)
}

func (w *Worker) getDirectoryInfo(name string) *models.DirectoryInfo {
	dirInfo := &models.DirectoryInfo{
		Name:     name,
@@ -136,6 +168,21 @@ func (w *Worker) getDirectoryInfo(name string) *models.DirectoryInfo {
	}

	dir := w.c.Dir(name)
	var keyFlags map[string][]maildir.Flag
	files, err := dirFiles(string(dir))
	if err == nil {
		keyFlags = make(map[string][]maildir.Flag, len(files))
		for _, v := range files {
			key, flags, err := splitMaildirFile(v)
			if err != nil {
				w.worker.Logger.Printf("%q: error parsing flags (%q): %v", v, key, err)
				continue
			}
			keyFlags[key] = flags
		}
	} else {
		w.worker.Logger.Printf("disabled flags cache: %q: %v", dir, err)
	}

	uids, err := w.c.UIDs(dir)
	if err != nil {
@@ -144,38 +191,48 @@ func (w *Worker) getDirectoryInfo(name string) *models.DirectoryInfo {
	}

	dirInfo.Exists = len(uids)

	go func() {
		info := dirInfo
		for _, uid := range uids {
			message, err := w.c.Message(dir, uid)
			if err != nil {
				w.worker.Logger.Printf("could not get message: %v", err)
				continue
	for _, uid := range uids {
		message, err := w.c.Message(dir, uid)
		if err != nil {
			w.worker.Logger.Printf("could not get message: %v", err)
			continue
		}
		var flags []maildir.Flag
		if keyFlags != nil {
			ok := false
			flags, ok = keyFlags[message.key]
			if !ok {
				w.worker.Logger.Printf("message (key=%q uid=%d) not found in map cache", message.key, message.uid)
				flags, err = message.Flags()
				if err != nil {
					w.worker.Logger.Printf("could not get flags: %v", err)
					continue
				}
			}
			flags, err := message.Flags()
		} else {
			flags, err = message.Flags()
			if err != nil {
				w.worker.Logger.Printf("could not get flags: %v", err)
				continue
			}
			seen := false
			for _, flag := range flags {
				if flag == maildir.FlagSeen {
					seen = true
				}
			}
			if !seen {
				info.Unseen++
			}
			if w.c.IsRecent(uid) {
				info.Recent++
		}
		seen := false
		for _, flag := range flags {
			if flag == maildir.FlagSeen {
				seen = true
				break
			}
		}
		info.Unseen += info.Recent
		info.Exists += info.Recent
		info.AccurateCounts = true
	}()

		if !seen {
			dirInfo.Unseen++
		}
		if w.c.IsRecent(uid) {
			dirInfo.Recent++
		}
	}
	dirInfo.Unseen += dirInfo.Recent
	dirInfo.Exists += dirInfo.Recent
	dirInfo.AccurateCounts = true
	return dirInfo
}

--
2.34.1
wagner riffel, Mar 06, 2022 at 10:06:
The error flooding has disappeared, thanks!

However, I'm attaching two logs produced after opening my "sent" folder (~20K
messages), one with the Maildir interface and other with notmuch. As you can
see, there is a lag of ~20s. In both cases, the lag occurs after the call to
types.FetchDirectoryContents, which stalls silently and then produce errors
related to the mime structure of some of my mails.

Is this an expected behaviour? I'm not surprised that some of my mails have a
wrong structure, but other softwares do not complain of this and gets to open
my folder much faster (especially notmuch).