~sircmpwn/aerc

push decoding to the workers v1 PROPOSED

We constantly have encoding issues due to the different backends doing different
things regarding the encoding.
This series aims to remedy that by pushing the decoding into the workers.

Reto Brunner (4):
  models: add BodyStructure.PartAtIndex
  FetchBodyParts: decode source in the workers
  imap: decode reader prior to returning them
  maildir/notmuch: don't re-encode readers

 commands/msg/forward.go  |  26 +---------
 commands/msg/pipe.go     |  12 +----
 commands/msg/reply.go    |  55 ++-------------------
 commands/msgview/open.go |   2 +-
 commands/msgview/save.go |   2 +-
 lib/msgstore.go          |  17 +++++--
 models/models.go         |  33 +++++++++++++
 widgets/msgviewer.go     |  26 ++--------
 worker/imap/fetch.go     | 100 +++++++++++++++++++++++++++++++++++----
 worker/lib/parse.go      |  41 ++--------------
 worker/types/messages.go |   6 ++-
 11 files changed, 156 insertions(+), 164 deletions(-)

-- 
2.24.1
Why do you need to re-implement all of this logic? This is already in
go-message.
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~sircmpwn/aerc/patches/9449/mbox | git am -3
Learn more about email & git

[PATCH 1/4] models: add BodyStructure.PartAtIndex Export this patch

---
 models/models.go | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/models/models.go b/models/models.go
index fa3baf2..036a609 100644
--- a/models/models.go
+++ b/models/models.go
@@ -87,6 +87,39 @@ type BodyStructure struct {
	DispositionParams map[string]string
}

//PartAtIndex returns the BodyStructure at the requested index
func (bs *BodyStructure) PartAtIndex(index []int) (*BodyStructure, error) {
	if len(index) == 0 {
		return bs, nil
	}
	cur := index[0]
	rest := index[1:]
	// passed indexes are 1 based, we need to convert back to actual indexes
	curidx := cur - 1
	if curidx < 0 {
		return nil, fmt.Errorf("invalid index, expected 1 based input")
	}

	// no children, base case
	if len(bs.Parts) == 0 {
		if len(rest) != 0 {
			return nil, fmt.Errorf("more index levels given than available")
		}
		if cur == 1 {
			return bs, nil
		} else {
			return nil, fmt.Errorf("invalid index %v for non multipart", cur)
		}
	}

	if cur > len(bs.Parts) {
		return nil, fmt.Errorf("invalid index %v, only have %v children",
			cur, len(bs.Parts))
	}

	return bs.Parts[curidx].PartAtIndex(rest)
}

type Envelope struct {
	Date      time.Time
	Subject   string
-- 
2.24.1

[PATCH 2/4] FetchBodyParts: decode source in the workers Export this patch

Previously the workers returned a mixture of decoded / encoded parts.
This lead to a whole bunch of issues.
This commit changes the msgviewer and the commands to assume parts to already
be decoded
---
 commands/msg/forward.go  | 26 ++-----------------
 commands/msg/pipe.go     | 12 +--------
 commands/msg/reply.go    | 55 +++-------------------------------------
 commands/msgview/open.go |  2 +-
 commands/msgview/save.go |  2 +-
 lib/msgstore.go          | 17 ++++++++++---
 widgets/msgviewer.go     | 26 +++----------------
 worker/types/messages.go |  6 +++--
 8 files changed, 29 insertions(+), 117 deletions(-)

diff --git a/commands/msg/forward.go b/commands/msg/forward.go
index 7570177..35d276e 100644
--- a/commands/msg/forward.go
+++ b/commands/msg/forward.go
@@ -10,9 +10,6 @@ import (
	"path"
	"strings"

	"github.com/emersion/go-message"
	"github.com/emersion/go-message/mail"

	"git.sr.ht/~sircmpwn/aerc/models"
	"git.sr.ht/~sircmpwn/aerc/widgets"
	"git.sr.ht/~sircmpwn/getopt"
@@ -138,28 +135,9 @@ func (forward) Execute(aerc *widgets.Aerc, args []string) error {

		// TODO: something more intelligent than fetching the 1st part
		// TODO: add attachments!
		store.FetchBodyPart(msg.Uid, []int{1}, func(reader io.Reader) {
			header := message.Header{}
			header.SetText(
				"Content-Transfer-Encoding", msg.BodyStructure.Encoding)
			header.SetContentType(
				msg.BodyStructure.MIMEType, msg.BodyStructure.Params)
			header.SetText("Content-Description", msg.BodyStructure.Description)
			entity, err := message.New(header, reader)
			if err != nil {
				// TODO: Do something with the error
				addTab()
				return
			}
			mreader := mail.NewReader(entity)
			part, err := mreader.NextPart()
			if err != nil {
				// TODO: Do something with the error
				addTab()
				return
			}
		store.FetchBodyPart(msg.Uid, msg.BodyStructure, []int{1}, func(reader io.Reader) {
			buf := new(bytes.Buffer)
			buf.ReadFrom(part.Body)
			buf.ReadFrom(reader)
			defaults["Original"] = buf.String()
			addTab()
		})
diff --git a/commands/msg/pipe.go b/commands/msg/pipe.go
index 2faa5de..001577c 100644
--- a/commands/msg/pipe.go
+++ b/commands/msg/pipe.go
@@ -1,13 +1,10 @@
package msg

import (
	"encoding/base64"
	"errors"
	"fmt"
	"io"
	"mime/quotedprintable"
	"os/exec"
	"strings"
	"time"

	"git.sr.ht/~sircmpwn/aerc/commands"
@@ -129,14 +126,7 @@ func (Pipe) Execute(aerc *widgets.Aerc, args []string) error {
		})
	} else if pipePart {
		p := provider.SelectedMessagePart()
		p.Store.FetchBodyPart(p.Msg.Uid, p.Index, func(reader io.Reader) {
			// email parts are encoded as 7bit (plaintext), quoted-printable, or base64
			if strings.EqualFold(p.Part.Encoding, "base64") {
				reader = base64.NewDecoder(base64.StdEncoding, reader)
			} else if strings.EqualFold(p.Part.Encoding, "quoted-printable") {
				reader = quotedprintable.NewReader(reader)
			}

		p.Store.FetchBodyPart(p.Msg.Uid, p.Msg.BodyStructure, p.Index, func(reader io.Reader) {
			if background {
				doExec(reader)
			} else {
diff --git a/commands/msg/reply.go b/commands/msg/reply.go
index 359c5dd..a7379d7 100644
--- a/commands/msg/reply.go
+++ b/commands/msg/reply.go
@@ -9,9 +9,6 @@ import (
	"strings"

	"git.sr.ht/~sircmpwn/getopt"
	"github.com/emersion/go-message"
	_ "github.com/emersion/go-message/charset"
	"github.com/emersion/go-message/mail"

	"git.sr.ht/~sircmpwn/aerc/models"
	"git.sr.ht/~sircmpwn/aerc/widgets"
@@ -155,56 +152,9 @@ func (reply) Execute(aerc *widgets.Aerc, args []string) error {
			template = aerc.Config().Templates.QuotedReply
		}

		store.FetchBodyPart(msg.Uid, []int{1}, func(reader io.Reader) {
			header := message.Header{}
			if len(msg.BodyStructure.Parts) > 0 {
				partID := 0 // TODO: will we always choose first msg part?
				header.SetText(
					"Content-Transfer-Encoding", msg.BodyStructure.Parts[partID].Encoding)
				if msg.BodyStructure.Parts[partID].MIMESubType == "" {
					header.SetContentType(
						msg.BodyStructure.Parts[partID].MIMEType,
						msg.BodyStructure.Parts[partID].Params)
				} else {
					// include SubType if defined (text/plain, text/html, ...)
					header.SetContentType(
						fmt.Sprintf("%s/%s", msg.BodyStructure.Parts[partID].MIMEType,
							msg.BodyStructure.Parts[partID].MIMESubType),
						msg.BodyStructure.Parts[partID].Params)
				}
				header.SetText("Content-Description", msg.BodyStructure.Parts[partID].Description)
			} else { // Parts has no headers, so we use global headers info
				header.SetText(
					"Content-Transfer-Encoding", msg.BodyStructure.Encoding)
				if msg.BodyStructure.MIMESubType == "" {
					header.SetContentType(
						msg.BodyStructure.MIMEType,
						msg.BodyStructure.Params)
				} else {
					// include SubType if defined (text/plain, text/html, ...)
					header.SetContentType(
						fmt.Sprintf("%s/%s", msg.BodyStructure.MIMEType,
							msg.BodyStructure.MIMESubType),
						msg.BodyStructure.Params)
				}
				header.SetText("Content-Description", msg.BodyStructure.Description)
			}
			entity, err := message.New(header, reader)
			if err != nil {
				// TODO: Do something with the error
				addTab()
				return
			}
			mreader := mail.NewReader(entity)
			part, err := mreader.NextPart()
			if err != nil {
				// TODO: Do something with the error
				addTab()
				return
			}

		store.FetchBodyPart(msg.Uid, msg.BodyStructure, []int{1}, func(reader io.Reader) {
			buf := new(bytes.Buffer)
			buf.ReadFrom(part.Body)
			buf.ReadFrom(reader)
			defaults["Original"] = buf.String()
			addTab()
		})
@@ -214,6 +164,7 @@ func (reply) Execute(aerc *widgets.Aerc, args []string) error {
	}
}

//TODO (RPB): unused function
func findPlaintext(bs *models.BodyStructure,
	path []int) (*models.BodyStructure, []int) {

diff --git a/commands/msgview/open.go b/commands/msgview/open.go
index ab023a1..6001d28 100644
--- a/commands/msgview/open.go
+++ b/commands/msgview/open.go
@@ -36,7 +36,7 @@ func (Open) Execute(aerc *widgets.Aerc, args []string) error {
	mv := aerc.SelectedTab().(*widgets.MessageViewer)
	p := mv.SelectedMessagePart()

	p.Store.FetchBodyPart(p.Msg.Uid, p.Index, func(reader io.Reader) {
	p.Store.FetchBodyPart(p.Msg.Uid, p.Msg.BodyStructure, p.Index, func(reader io.Reader) {
		// email parts are encoded as 7bit (plaintext), quoted-printable, or base64

		if strings.EqualFold(p.Part.Encoding, "base64") {
diff --git a/commands/msgview/save.go b/commands/msgview/save.go
index 99abe0e..c017e70 100644
--- a/commands/msgview/save.go
+++ b/commands/msgview/save.go
@@ -60,7 +60,7 @@ func (Save) Execute(aerc *widgets.Aerc, args []string) error {
	mv := aerc.SelectedTab().(*widgets.MessageViewer)
	p := mv.SelectedMessagePart()

	p.Store.FetchBodyPart(p.Msg.Uid, p.Index, func(reader io.Reader) {
	p.Store.FetchBodyPart(p.Msg.Uid, p.Msg.BodyStructure, p.Index, func(reader io.Reader) {
		// email parts are encoded as 7bit (plaintext), quoted-printable, or base64

		if strings.EqualFold(p.Part.Encoding, "base64") {
diff --git a/lib/msgstore.go b/lib/msgstore.go
index f67c49f..7209316 100644
--- a/lib/msgstore.go
+++ b/lib/msgstore.go
@@ -127,11 +127,22 @@ func (store *MessageStore) FetchFull(uids []uint32, cb func(io.Reader)) {
}

func (store *MessageStore) FetchBodyPart(
	uid uint32, part []int, cb func(io.Reader)) {
	uid uint32, parent *models.BodyStructure, part []int, cb func(io.Reader)) {
	partbs, err := parent.PartAtIndex(part)
	if err != nil {
		store.worker.Logger.Printf("FetchBodyPart: %v\n", err)
	}
	var charset string
	var ok bool
	if charset, ok = partbs.Params["charset"]; !ok {
		charset = ""
	}

	store.worker.PostAction(&types.FetchMessageBodyPart{
		Uid:  uid,
		Part: part,
		Uid:      uid,
		Part:     part,
		Encoding: partbs.Encoding,
		Charset:  charset,
	}, func(resp types.WorkerMessage) {
		msg, ok := resp.(*types.MessageBodyPart)
		if !ok {
diff --git a/widgets/msgviewer.go b/widgets/msgviewer.go
index 25bebfa..8c9d842 100644
--- a/widgets/msgviewer.go
+++ b/widgets/msgviewer.go
@@ -10,9 +10,6 @@ import (
	"strings"

	"github.com/danwakefield/fnmatch"
	message "github.com/emersion/go-message"
	_ "github.com/emersion/go-message/charset"
	"github.com/emersion/go-message/mail"
	"github.com/gdamore/tcell"
	"github.com/google/shlex"
	"github.com/mattn/go-runewidth"
@@ -549,10 +546,6 @@ func (pv *PartViewer) SetSource(reader io.Reader) {

func (pv *PartViewer) attemptCopy() {
	if pv.source != nil && pv.pager != nil && pv.pager.Process != nil {
		header := message.Header{}
		header.SetText("Content-Transfer-Encoding", pv.part.Encoding)
		header.SetContentType(fmt.Sprintf("%s/%s", pv.part.MIMEType, pv.part.MIMESubType), pv.part.Params)
		header.SetText("Content-Description", pv.part.Description)
		if pv.filter != nil {
			stdout, _ := pv.filter.StdoutPipe()
			stderr, _ := pv.filter.StderrPipe()
@@ -599,28 +592,15 @@ func (pv *PartViewer) attemptCopy() {
				pv.sink.Write([]byte{'\n'})
			}

			entity, err := message.New(header, pv.source)
			if err != nil {
				pv.err = err
				pv.Invalidate()
				return
			}
			reader := mail.NewReader(entity)
			part, err := reader.NextPart()
			if err != nil {
				pv.err = err
				pv.Invalidate()
				return
			}
			if pv.part.MIMEType == "text" {
				scanner := bufio.NewScanner(part.Body)
				scanner := bufio.NewScanner(pv.source)
				for scanner.Scan() {
					text := scanner.Text()
					text = ansi.ReplaceAllString(text, "")
					io.WriteString(pv.sink, text+"\n")
				}
			} else {
				io.Copy(pv.sink, part.Body)
				io.Copy(pv.sink, pv.source)
			}
			pv.sink.Close()
		}()
@@ -644,7 +624,7 @@ func (pv *PartViewer) Draw(ctx *ui.Context) {
		return
	}
	if !pv.fetched {
		pv.store.FetchBodyPart(pv.msg.Uid, pv.index, pv.SetSource)
		pv.store.FetchBodyPart(pv.msg.Uid, pv.msg.BodyStructure, pv.index, pv.SetSource)
		pv.fetched = true
	}
	if pv.err != nil {
diff --git a/worker/types/messages.go b/worker/types/messages.go
index a38ff94..c7d5077 100644
--- a/worker/types/messages.go
+++ b/worker/types/messages.go
@@ -104,8 +104,10 @@ type FetchFullMessages struct {

type FetchMessageBodyPart struct {
	Message
	Uid  uint32
	Part []int
	Uid      uint32
	Part     []int
	Encoding string
	Charset  string
}

type DeleteMessages struct {
-- 
2.24.1

[PATCH 3/4] imap: decode reader prior to returning them Export this patch

---
 worker/imap/fetch.go | 100 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/worker/imap/fetch.go b/worker/imap/fetch.go
index 1745ead..74ac482 100644
--- a/worker/imap/fetch.go
+++ b/worker/imap/fetch.go
@@ -2,9 +2,16 @@ package imap

import (
	"bufio"
	"bytes"
	"encoding/base64"
	"fmt"
	"io"
	"mime/quotedprintable"
	"strings"

	"github.com/emersion/go-imap"
	"github.com/emersion/go-message"
	_ "github.com/emersion/go-message/charset"
	"github.com/emersion/go-message/mail"
	"github.com/emersion/go-message/textproto"

@@ -66,12 +73,12 @@ func (imapw *IMAPWorker) handleFetchMessages(
	section *imap.BodySectionName) {

	messages := make(chan *imap.Message)
	done := make(chan interface{})
	done := make(chan error)

	go func() {
		for _msg := range messages {
			imapw.seqMap[_msg.SeqNum-1] = _msg.Uid
			switch msg.(type) {
			switch msg := msg.(type) {
			case *types.FetchMessageHeaders:
				reader := _msg.GetBody(section)
				textprotoHeader, err := textproto.ReadHeader(bufio.NewReader(reader))
@@ -91,7 +98,17 @@ func (imapw *IMAPWorker) handleFetchMessages(
					},
				}, nil)
			case *types.FetchFullMessages:
				reader := _msg.GetBody(section)
				r := _msg.GetBody(section)
				if r == nil {
					done <- fmt.Errorf("could not get section %#v", section)
					return
				}
				reader, err := fullReader(r)
				if err != nil {
					done <- fmt.Errorf("could not read mail %#v", section)
					return
				}

				imapw.worker.PostMessage(&types.FullMessage{
					Message: types.RespondTo(msg),
					Content: &models.FullMessage{
@@ -108,7 +125,11 @@ func (imapw *IMAPWorker) handleFetchMessages(
					},
				}, nil)
			case *types.FetchMessageBodyPart:
				reader := _msg.GetBody(section)
				reader, err := getDecodedPart(msg, _msg, section)
				if err != nil {
					done <- err
					return
				}
				imapw.worker.PostMessage(&types.MessageBodyPart{
					Message: types.RespondTo(msg),
					Part: &models.MessageBodyPart{
@@ -129,15 +150,74 @@ func (imapw *IMAPWorker) handleFetchMessages(
		done <- nil
	}()

	set := toSeqSet(uids)
	if err := imapw.client.UidFetch(set, items, messages); err != nil {
	emitErr := func(err error) {
		imapw.worker.PostMessage(&types.Error{
			Message: types.RespondTo(msg),
			Error:   err,
		}, nil)
	} else {
		<-done
		imapw.worker.PostMessage(
			&types.Done{types.RespondTo(msg)}, nil)
	}

	set := toSeqSet(uids)
	if err := imapw.client.UidFetch(set, items, messages); err != nil {
		emitErr(err)
		return
	}
	if err := <-done; err != nil {
		emitErr(err)
		return
	}
	imapw.worker.PostMessage(
		&types.Done{types.RespondTo(msg)}, nil)
}

func getDecodedPart(task *types.FetchMessageBodyPart, msg *imap.Message,
	section *imap.BodySectionName) (io.Reader, error) {
	var r io.Reader
	var err error

	r = msg.GetBody(section)

	if r == nil {
		return nil, fmt.Errorf("getDecodedPart: no message body")
	}
	r = encodingReader(task.Encoding, r)
	if task.Charset != "" {
		r, err = message.CharsetReader(task.Charset, r)
	}
	if err != nil {
		return nil, err
	}

	return r, err
}

func fullReader(r io.Reader) (io.Reader, error) {
	// parse the header for the encoding and also return it in the reader
	br := bufio.NewReader(r)
	textprotoHeader, err := textproto.ReadHeader(br)
	if err != nil {
		return nil, err
	}
	header := &mail.Header{message.Header{textprotoHeader}}
	enc := header.Get("Content-Transfer-Encoding")

	var buf bytes.Buffer
	err = textproto.WriteHeader(&buf, textprotoHeader)
	if err != nil {
		return nil, err
	}
	er := encodingReader(enc, br)
	full := io.MultiReader(&buf, er)
	return full, nil
}

func encodingReader(encoding string, r io.Reader) io.Reader {
	reader := r
	// email parts are encoded as 7bit (plaintext), quoted-printable, or base64
	if strings.EqualFold(encoding, "base64") {
		reader = base64.NewDecoder(base64.StdEncoding, r)
	} else if strings.EqualFold(encoding, "quoted-printable") {
		reader = quotedprintable.NewReader(r)
	}
	return reader
}
-- 
2.24.1
Why do you need to re-implement all of this logic? This is already in
go-message.

[PATCH 4/4] maildir/notmuch: don't re-encode readers Export this patch

---
 worker/lib/parse.go | 41 ++++-------------------------------------
 1 file changed, 4 insertions(+), 37 deletions(-)

diff --git a/worker/lib/parse.go b/worker/lib/parse.go
index eed39cb..288dade 100644
--- a/worker/lib/parse.go
+++ b/worker/lib/parse.go
@@ -2,10 +2,8 @@ package lib

import (
	"bytes"
	"encoding/base64"
	"fmt"
	"io"
	"mime/quotedprintable"
	"strings"

	"git.sr.ht/~sircmpwn/aerc/models"
@@ -29,7 +27,7 @@ func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
			if idx == index[0] {
				rest := index[1:]
				if len(rest) < 1 {
					return fetchEntityReader(part)
					return bufReader(part)
				}
				return FetchEntityPartReader(part, index[1:])
			}
@@ -38,46 +36,15 @@ func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
	if index[0] != 1 {
		return nil, fmt.Errorf("cannont return non-first part of non-multipart")
	}
	return fetchEntityReader(e)
	return bufReader(e)
}

// fetchEntityReader makes an io.Reader for the given entity. Since the
// go-message package decodes the body for us, and the UI expects to deal with
// a reader whose bytes are encoded with the part's encoding, we are in the
// interesting position of needing to re-encode the reader before sending it
// off to the UI layer.
//
// TODO: probably change the UI to expect an already-decoded reader and decode
// in the IMAP worker.
func fetchEntityReader(e *message.Entity) (io.Reader, error) {
	enc := e.Header.Get("content-transfer-encoding")
//TODO: the UI doesn't seem to like readers which aren't buffers
func bufReader(e *message.Entity) (io.Reader, error) {
	var buf bytes.Buffer

	// base64
	if strings.EqualFold(enc, "base64") {
		wc := base64.NewEncoder(base64.StdEncoding, &buf)
		defer wc.Close()
		if _, err := io.Copy(wc, e.Body); err != nil {
			return nil, fmt.Errorf("could not base64 encode: %v", err)
		}
		return &buf, nil
	}

	// quoted-printable
	if strings.EqualFold(enc, "quoted-printable") {
		wc := quotedprintable.NewWriter(&buf)
		defer wc.Close()
		if _, err := io.Copy(wc, e.Body); err != nil {
			return nil, fmt.Errorf("could not quoted-printable encode: %v", err)
		}
		return &buf, nil
	}

	// other general encoding
	if _, err := io.Copy(&buf, e.Body); err != nil {
		return nil, err
	}

	return &buf, nil
}

-- 
2.24.1
View this thread in the archives