summaryrefslogtreecommitdiff
path: root/libmailutils
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2020-07-23 13:42:23 +0300
committerSergey Poznyakoff <gray@gnu.org>2020-07-23 14:07:58 +0300
commitff9c0a396c14c219532dd667d49ebc7a3f376f9b (patch)
treea0d9a259072018dd97b936042df7d89daa3c8b41 /libmailutils
parent78c28187ffb8f6ae8aa0662e6be193d23ae7808b (diff)
downloadmailutils-ff9c0a396c14c219532dd667d49ebc7a3f376f9b.tar.gz
mailutils-ff9c0a396c14c219532dd667d49ebc7a3f376f9b.tar.bz2
Relax sytnax requirements for MIME structured headers.
* libmailutils/base/ctparse.c (content_type_parse): Rewrite the value splitting on type and subtype. Allow for optional whitespace at both sides of "/". Both type and subtype can contain arbitrary characters (except for "/"). This is not right according to RFC 2045, but reportedly such cases exist in old mails. * libmailutils/mailbox/bodystruct.c (bodystructure_fill): Fix parsing of the Content-Type header. Fix storing the pointer to an automatic variable in assoc object. Unfold the Content-Disposition value. * libmailutils/mime/mimehdr.c (_mime_header_parse): Return in pvalue entire prefix part up to the first semicolon, with leading and trailing whitespace removed. Allow for both output parameters to be NULL. * libmailutils/tests/content-type.at: Add new test case. * libmailutils/tests/conttype.c: ignore empty lines in input. * libmailutils/tests/mimehdr.at: Change mimehdr16 and mimehdr17 tests: this syntax is accepted by the relaxed rules of the modified parser.
Diffstat (limited to 'libmailutils')
-rw-r--r--libmailutils/base/ctparse.c52
-rw-r--r--libmailutils/mailbox/bodystruct.c98
-rw-r--r--libmailutils/mime/mimehdr.c56
-rw-r--r--libmailutils/tests/content-type.at9
-rw-r--r--libmailutils/tests/conttype.c2
-rw-r--r--libmailutils/tests/mimehdr.at11
6 files changed, 134 insertions, 94 deletions
diff --git a/libmailutils/base/ctparse.c b/libmailutils/base/ctparse.c
index a6c93f9f1..b5bed3e86 100644
--- a/libmailutils/base/ctparse.c
+++ b/libmailutils/base/ctparse.c
@@ -29,45 +29,67 @@
#include <mailutils/cctype.h>
#include <mailutils/cstr.h>
-
+/* Parse the content type header value in INPUT. If CHARSET is not
+ NULL, convert textual parameters to this charset.
+
+ Store the result in CT.
+
+ In case of error, CT is left partially constructed. The caller
+ must free it.
+
+ Parsing of the type/subtype value is relaxed: any characters are
+ allowed in either part (except for "/", which can't appear in type).
+ Although RFC 2045 forbids that, mails with such content types reportedly
+ exist (see conversation with Karl Berry on 2020-07-21, particularly
+ <202007220115.06M1FuTh001462@freefriends.org> and my reply
+ <20200722133251.8412@ulysses.gnu.org.ua>).
+
+ Type must not be empty, but empty subtype is allowed.
+*/
static int
content_type_parse (const char *input, const char *charset,
mu_content_type_t ct)
{
int rc;
char *value, *p;
-
+
rc = mu_mime_header_parse (input, charset, &value, &ct->param);
if (rc)
return rc;
+
p = strchr (value, '/');
if (p)
{
size_t len = p - value;
+ while (len > 0 && mu_isspace (value[len-1]))
+ len--;
+ if (len == 0)
+ {
+ rc = MU_ERR_PARSE;
+ goto end;
+ }
+
+ p = mu_str_skip_class (p + 1, MU_CTYPE_SPACE);
+
ct->type = malloc (len + 1);
if (!ct->type)
{
rc = errno;
- free (value);
- return rc;
+ goto end;
}
+
memcpy (ct->type, value, len);
ct->type[len] = 0;
- ct->subtype = strdup (p + 1);
- free (value);
-
+ ct->subtype = strdup (p);
if (!ct->subtype)
- {
- rc = errno;
- return rc;
- }
+ rc = errno;
}
else
- {
- return MU_ERR_PARSE;
- }
- return 0;
+ rc = MU_ERR_PARSE;
+ end:
+ free (value);
+ return rc;
}
int
diff --git a/libmailutils/mailbox/bodystruct.c b/libmailutils/mailbox/bodystruct.c
index cffe503d9..2d48205a9 100644
--- a/libmailutils/mailbox/bodystruct.c
+++ b/libmailutils/mailbox/bodystruct.c
@@ -31,6 +31,7 @@
#include <mailutils/nls.h>
#include <mailutils/cstr.h>
#include <mailutils/body.h>
+#include <mailutils/util.h>
void
mu_list_free_bodystructure (void *item)
@@ -94,59 +95,48 @@ static int
bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs)
{
mu_header_t header = NULL;
- const char *buffer = NULL;
+ char *buffer = NULL;
mu_body_t body = NULL;
- int rc;
int is_multipart = 0;
+ int rc;
rc = mu_message_get_header (msg, &header);
if (rc)
return rc;
- if (mu_header_sget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0)
+ if (mu_header_aget_value_unfold (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0)
{
- char *value;
- char *p;
- size_t len;
-
- rc = mu_mime_header_parse (buffer, "UTF-8", &value, &bs->body_param);
- if (rc)
- return rc;
-
- len = strcspn (value, "/");
+ mu_content_type_t ct;
- if (mu_c_strcasecmp (value, "MESSAGE/RFC822") == 0)
- bs->body_message_type = mu_message_rfc822;
- else if (mu_c_strncasecmp (value, "TEXT", len) == 0)
- bs->body_message_type = mu_message_text;
-
- p = malloc (len + 1);
- if (!p)
- return ENOMEM;
- memcpy (p, value, len);
- p[len] = 0;
-
- bs->body_type = p;
- mu_strupper (bs->body_type);
- if (value[len])
+ rc = mu_content_type_parse (buffer, "UTF-8", &ct);
+ if (rc == 0)
{
- bs->body_subtype = strdup (value + len + 1);
- if (!bs->body_subtype)
- return ENOMEM;
+ if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0 &&
+ mu_c_strcasecmp (ct->subtype, "RFC822") == 0)
+ bs->body_message_type = mu_message_rfc822;
+ else if (mu_c_strcasecmp (ct->type, "TEXT") == 0)
+ bs->body_message_type = mu_message_text;
+
+ bs->body_type = ct->type;
+ ct->type = NULL;
+ mu_strupper (bs->body_type);
+ bs->body_subtype = ct->subtype;
+ ct->subtype = NULL;
mu_strupper (bs->body_subtype);
- }
+ bs->body_param = ct->param;
+ ct->param = NULL;
+ mu_content_type_destroy (&ct);
- /* body parameter parenthesized list: Content-type attributes */
-
- rc = mu_message_is_multipart (msg, &is_multipart);
- if (rc)
- return rc;
- if (is_multipart)
- bs->body_message_type = mu_message_multipart;
+ /* body parameter parenthesized list: Content-type attributes */
+ mu_message_is_multipart (msg, &is_multipart);
+ if (is_multipart)
+ bs->body_message_type = mu_message_multipart;
+ }
+ free (buffer);
}
else
{
- struct mu_mime_param param;
+ struct mu_mime_param *param;
/* Default? If Content-Type is not present consider as text/plain. */
bs->body_type = strdup ("TEXT");
@@ -161,21 +151,22 @@ bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs)
rc = mu_mime_param_assoc_create (&bs->body_param);
if (rc)
return rc;
- memset (&param, 0, sizeof (param));
- param.value = strdup ("US-ASCII");
- if (!param.value)
- {
- free (bs->body_type);
- free (bs->body_subtype);
- return ENOMEM;
- }
- rc = mu_assoc_install (bs->body_param, "CHARSET", &param);
- if (rc)
+ param = calloc (1, sizeof (*param));
+ if (param && (param->value = strdup ("US-ASCII")) != NULL)
{
- free (param.value);
- return rc;
+ rc = mu_assoc_install (bs->body_param, "CHARSET", param);
+ if (rc)
+ {
+ mu_mime_param_free (param);
+ return rc;
+ }
+ bs->body_message_type = mu_message_text;
}
- bs->body_message_type = mu_message_text;
+ else
+ {
+ free (param);
+ return ENOMEM;
+ }
}
if (is_multipart)
@@ -283,12 +274,13 @@ bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs)
return rc;
/* body disposition: Content-Disposition. */
- rc = mu_header_sget_value (header, MU_HEADER_CONTENT_DISPOSITION,
- &buffer);
+ rc = mu_header_aget_value_unfold (header, MU_HEADER_CONTENT_DISPOSITION,
+ &buffer);
if (rc == 0)
{
rc = mu_mime_header_parse (buffer, "UTF-8", &bs->body_disposition,
&bs->body_disp_param);
+ free (buffer);
if (rc)
return rc;
}
diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c
index ec883329b..b057a48a5 100644
--- a/libmailutils/mime/mimehdr.c
+++ b/libmailutils/mime/mimehdr.c
@@ -570,8 +570,14 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
success.
ASSOC - Unless NULL, parameters are stored here.
- Either PVALUE or ASSOC (but not both) can be NULL, meaning that the
- corresponding data are of no interest to the caller.
+ Both output pointers can be NULL, meaning that the corresponding data
+ are of no interest to the caller.
+
+ The value returned in PVALUE is the initial part of TEXT up to the
+ start of parameters (i.e. to the first semicolon) with leading and
+ trailing whitespace removed. No other syntactic checking is done on
+ the value. It is the responsibility of the caller to verify that it
+ complies to the syntax of the particular header.
*/
static int
_mime_header_parse (const char *text, char **pvalue,
@@ -581,7 +587,28 @@ _mime_header_parse (const char *text, char **pvalue,
struct mu_wordsplit ws;
struct param_continuation cont;
size_t i;
+ char *value = NULL;
+ size_t val_len;
+ val_len = strcspn (text, ";");
+ if (pvalue)
+ {
+ value = malloc (val_len + 1);
+ if (!value)
+ return ENOMEM;
+ memcpy (value, text, val_len);
+ value[val_len] = 0;
+ mu_rtrim_class (value, MU_CTYPE_SPACE);
+ mu_ltrim_class (value, MU_CTYPE_SPACE);
+ if (value[0] == 0)
+ {
+ free (value);
+ return MU_ERR_PARSE;
+ }
+ }
+
+ text += val_len;
+
ws.ws_delim = " \t\r\n;";
ws.ws_escape[0] = ws.ws_escape[1] = "\\\\\"\"";
ws.ws_options = 0;
@@ -596,28 +623,20 @@ _mime_header_parse (const char *text, char **pvalue,
mu_debug (MU_DEBCAT_MIME, MU_DEBUG_ERROR,
(_("wordsplit: %s"), mu_wordsplit_strerror (&ws)));
mu_wordsplit_free (&ws);
+ free (value);
return MU_ERR_PARSE;
}
- if (ws.ws_wordc == 0)
- {
- mu_wordsplit_free (&ws);
- return MU_ERR_PARSE;
- }
-
if (!assoc)
{
- if (!pvalue)
- return MU_ERR_OUT_PTR_NULL;
- *pvalue = strdup (ws.ws_wordv[0]);
+ if (pvalue)
+ *pvalue = value;
mu_wordsplit_free (&ws);
- if (!*pvalue)
- return ENOMEM;
return 0;
}
memset (&cont, 0, sizeof (cont));
- for (i = 1; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;)
+ for (i = 0; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;)
;
if (rc == MU_ERR_USER0)
rc = 0;
@@ -627,13 +646,10 @@ _mime_header_parse (const char *text, char **pvalue,
if (rc == 0)
{
if (pvalue)
- {
- *pvalue = strdup (ws.ws_wordv[0]);
- if (!*pvalue)
- rc = ENOMEM;
- }
+ *pvalue = value;
}
-
+ else
+ free (value);
mu_wordsplit_free (&ws);
if (subset)
diff --git a/libmailutils/tests/content-type.at b/libmailutils/tests/content-type.at
index adeeeadeb..7dbda629f 100644
--- a/libmailutils/tests/content-type.at
+++ b/libmailutils/tests/content-type.at
@@ -49,5 +49,14 @@ CTHDR([missing subtype],[ctparse content-type],
[conttype: Parse error
])
+CTHDR([whitespace],[ctparse content-type],
+[ text / plain ; charset = utf-8;p =foo],
+[0],
+[type = text
+subtype = plain
+ 0: charset=utf-8
+ 1: p=foo
+])
+
m4_popdef([CTHDR])
diff --git a/libmailutils/tests/conttype.c b/libmailutils/tests/conttype.c
index 1f3920b13..1cbd695c3 100644
--- a/libmailutils/tests/conttype.c
+++ b/libmailutils/tests/conttype.c
@@ -53,6 +53,8 @@ main (int argc, char **argv)
while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0)
{
mu_rtrim_class (buf, MU_CTYPE_ENDLN);
+ if (buf[0] == 0)
+ continue;
if (parse (buf))
result = 1;
}
diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at
index cef3a6e9b..ea6fab5f4 100644
--- a/libmailutils/tests/mimehdr.at
+++ b/libmailutils/tests/mimehdr.at
@@ -277,18 +277,17 @@ MIMEHDR_FAIL([empty input],[mimehdr15],
[mimehdr: mu_mime_header_parse() failed: Parse error
])
-MIMEHDR_FAIL([missing semicolon after type],[mimehdr16],
+MIMEHDR([missing semicolon after type],[mimehdr16],
[],
[message name="foo"],
-[],
-[mimehdr: mu_mime_header_parse() failed: Parse error
+[message name="foo"
])
-MIMEHDR_FAIL([whitespace in type],[mimehdr17],
+MIMEHDR([whitespace in type],[mimehdr17],
[],
[TeX file/plain; name=foo],
-[],
-[mimehdr: mu_mime_header_parse() failed: Parse error
+[TeX file/plain
+name=foo
])
m4_popdef([MIMEHDR])

Return to:

Send suggestions and report system problems to the System administrator.