Hi Robin
thanks for your input. I understand that this is a bit of a
"workaround", but it shows a "possible" approach to deal with malformed
multipart messages. We can also let that patch sit around a bit and try
to come up with a better solution.
A proper fix would probably be to write our own multipart parser, try to
detect errors (missing or wrong boundaries) and correct them?
Anyways, I've tried to answer your remarks below.
If there's a multipart error, CreateTextPlainBody() will create a new
*models.BodyStructure resembling one for a "normal", non-multipart
message. In such a situation, the message structure is messed up anyways
and not really useful.
The message content itself is never touched. You could still do a :pipe
-m less and get the same raw message.
Parse multipart messages on a best-efforts basis. Allow the user to see
as much of the message as possible, but log the errors.
If a charset or encoding error is encountered for a message part of a
multipart message, the error is logged and ignored. In those cases, we
still get a valid message body but the content is just not decoded or
converted. No error will be propagated.
If a multipart message cannot be parsed, ParseEntityStructure will
return a multipart error. This error indicates that the message is
malformed and there is nothing more we can do. The caller is then
advised to use a single text/plain body structure using
CreateTextPlainPart() to provide the entire message content to the user.
Fixes: https://todo.sr.ht/~rjarry/aerc/288
Signed-off-by: Koni Marti <koni.marti@gmail.com>
---
v2->v3:
* fix tests
Or maybe does this mean that all the message buffer will be sent to the
message view? Does this include top level headers, or only the multipart
headers?
}
if idx == index[0] {
rest := index[1:]
@@ -89,6 +111,22 @@ func fixContentType(h message.Header) (string, map[string]string) {
return "text/plain", nil
}
+// ParseEntityStructure will parse the message and create a multipart structure+// for multipart messages. Parsing is done on a best-efforts basis:+//+// If the content-type cannot be parsed, ParseEntityStructure will try to fix+// it; otherwise, it returns a text/plain mime type as a fallback. No error will+// be returned.+//+// If a charset or encoding error is encountered for a message part of a+// multipart message, the error is logged and ignored. In those cases, we still+// get a valid message body but the content is just not decoded or converted. No+// error will be returned.+//+// If reading a multipart message fails, ParseEntityStructure will return a+// multipart error. This error indicates that this message is malformed and+// there is nothing more we can do. The caller is then advised to use a single+// text/plain body structure using CreateTextPlainPart().func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
var body models.BodyStructure
contentType, ctParams, err := e.Header.ContentType()
@@ -116,10 +154,15 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
if mpr := e.MultipartReader(); mpr != nil {
for {
part, err := mpr.NextPart()
- if errors.Is(err, io.EOF) {+ switch {+ case errors.Is(err, io.EOF): return &body, nil
- } else if err != nil {- return nil, err+ case message.IsUnknownCharset(err):+ log.Warnf("ParseEntityStructure: %v", err)+ case message.IsUnknownEncoding(err):+ log.Warnf("ParseEntityStructure: %v", err)+ case err != nil:+ return nil, MultipartError{err} }
ps, err := ParseEntityStructure(part)
if err != nil {
@@ -131,6 +174,16 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
return &body, nil
}
+// CreateTextPlainBody creats a plain-vanilla text/plain body structure.+func CreateTextPlainBody() *models.BodyStructure {+ body := &models.BodyStructure{}+ body.MIMEType = "text"+ body.MIMESubType = "plain"+ body.Params = map[string]string{"charset": "utf-8"}+ body.Parts = []*models.BodyStructure{}+ return body+}+func parseEnvelope(h *mail.Header) *models.Envelope {
subj, err := h.Subject()
if err != nil {
@@ -308,8 +361,9 @@ func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
return nil, fmt.Errorf("could not read message: %w", err)
}
bs, err := ParseEntityStructure(msg)
- if errors.As(err, new(message.UnknownEncodingError)) {- parseErr = err+ if IsMultipartError(err) {+ log.Warnf("multipart error: %v", err)+ bs = CreateTextPlainBody()
how come you don't need to pass msg to CreateTextPlainBody()?
} else if err != nil {
return nil, fmt.Errorf("could not get structure: %w", err)
}
@@ -394,13 +448,18 @@ func NewCRLFReader(r io.Reader) io.Reader {
// ReadMessage is a wrapper for the message.Read function to read a message
// from r. The message's encoding and charset are automatically decoded to
-// UTF-8. If an unknown charset is encountered, the error is logged but a nil-// error is returned since the entity object can still be read.+// UTF-8. If an unknown charset or unknown encoding is encountered, the error is+// logged but a nil error is returned since the entity object can still be read.func ReadMessage(r io.Reader) (*message.Entity, error) {
entity, err := message.Read(r)
- if message.IsUnknownCharset(err) {- log.Warnf("unknown charset encountered")- } else if err != nil {+ switch {+ case message.IsUnknownCharset(err):+ // message body is valid, just not converted, so continue+ log.Warnf("ReadMessage: %v", err)+ case message.IsUnknownEncoding(err):+ // message body is valid, just not decoded, so continue+ log.Warnf("ReadMessage: %v", err)+ case err != nil: return nil, fmt.Errorf("could not read message: %w", err)
}
return entity, nil
diff --git a/lib/rfc822/message_test.go b/lib/rfc822/message_test.go
index ca5ae014..a5812f8b 100644
--- a/lib/rfc822/message_test.go+++ b/lib/rfc822/message_test.go
@@ -38,8 +38,8 @@ func TestMessageInfoParser(t *testing.T) {
}
}
-func TestMessageInfoHandledError(t *testing.T) {- rootDir := "testdata/message/invalid"+func TestMessageInfoMalformed(t *testing.T) {+ rootDir := "testdata/message/malformed" msgFiles, err := os.ReadDir(rootDir)
die(err)
@@ -51,14 +51,10 @@ func TestMessageInfoHandledError(t *testing.T) {
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
- mi, err := MessageInfo(m)+ _, err := MessageInfo(m) if err != nil {
t.Fatal(err)
}
-- if perr := mi.Error; perr == nil {- t.Fatal("Expected MessageInfo.Error, got none")- } })
}
}
diff --git a/lib/rfc822/testdata/message/invalid/hexa b/lib/rfc822/testdata/message/malformed/hexa
similarity index 100%
rename from lib/rfc822/testdata/message/invalid/hexa
rename to lib/rfc822/testdata/message/malformed/hexa
--
2.47.0