diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2020-07-23 13:42:23 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2020-07-23 14:07:58 +0300 |
commit | ff9c0a396c14c219532dd667d49ebc7a3f376f9b (patch) | |
tree | a0d9a259072018dd97b936042df7d89daa3c8b41 /libmailutils | |
parent | 78c28187ffb8f6ae8aa0662e6be193d23ae7808b (diff) | |
download | mailutils-ff9c0a396c14c219532dd667d49ebc7a3f376f9b.tar.gz mailutils-ff9c0a396c14c219532dd667d49ebc7a3f376f9b.tar.bz2 |
Relax sytnax requirements for MIME structured headers.
* libmailutils/base/ctparse.c (content_type_parse): Rewrite
the value splitting on type and subtype. Allow for optional
whitespace at both sides of "/". Both type and subtype can contain
arbitrary characters (except for "/"). This is not right according
to RFC 2045, but reportedly such cases exist in old mails.
* libmailutils/mailbox/bodystruct.c (bodystructure_fill): Fix
parsing of the Content-Type header.
Fix storing the pointer to an automatic variable in assoc object.
Unfold the Content-Disposition value.
* libmailutils/mime/mimehdr.c (_mime_header_parse): Return in pvalue
entire prefix part up to the first semicolon, with leading and
trailing whitespace removed. Allow for both output parameters to
be NULL.
* libmailutils/tests/content-type.at: Add new test case.
* libmailutils/tests/conttype.c: ignore empty lines in input.
* libmailutils/tests/mimehdr.at: Change mimehdr16 and mimehdr17
tests: this syntax is accepted by the relaxed rules of the
modified parser.
Diffstat (limited to 'libmailutils')
-rw-r--r-- | libmailutils/base/ctparse.c | 52 | ||||
-rw-r--r-- | libmailutils/mailbox/bodystruct.c | 98 | ||||
-rw-r--r-- | libmailutils/mime/mimehdr.c | 56 | ||||
-rw-r--r-- | libmailutils/tests/content-type.at | 9 | ||||
-rw-r--r-- | libmailutils/tests/conttype.c | 2 | ||||
-rw-r--r-- | libmailutils/tests/mimehdr.at | 11 |
6 files changed, 134 insertions, 94 deletions
diff --git a/libmailutils/base/ctparse.c b/libmailutils/base/ctparse.c index a6c93f9f1..b5bed3e86 100644 --- a/libmailutils/base/ctparse.c +++ b/libmailutils/base/ctparse.c @@ -29,45 +29,67 @@ #include <mailutils/cctype.h> #include <mailutils/cstr.h> - +/* Parse the content type header value in INPUT. If CHARSET is not + NULL, convert textual parameters to this charset. + + Store the result in CT. + + In case of error, CT is left partially constructed. The caller + must free it. + + Parsing of the type/subtype value is relaxed: any characters are + allowed in either part (except for "/", which can't appear in type). + Although RFC 2045 forbids that, mails with such content types reportedly + exist (see conversation with Karl Berry on 2020-07-21, particularly + <202007220115.06M1FuTh001462@freefriends.org> and my reply + <20200722133251.8412@ulysses.gnu.org.ua>). + + Type must not be empty, but empty subtype is allowed. +*/ static int content_type_parse (const char *input, const char *charset, mu_content_type_t ct) { int rc; char *value, *p; - + rc = mu_mime_header_parse (input, charset, &value, &ct->param); if (rc) return rc; + p = strchr (value, '/'); if (p) { size_t len = p - value; + while (len > 0 && mu_isspace (value[len-1])) + len--; + if (len == 0) + { + rc = MU_ERR_PARSE; + goto end; + } + + p = mu_str_skip_class (p + 1, MU_CTYPE_SPACE); + ct->type = malloc (len + 1); if (!ct->type) { rc = errno; - free (value); - return rc; + goto end; } + memcpy (ct->type, value, len); ct->type[len] = 0; - ct->subtype = strdup (p + 1); - free (value); - + ct->subtype = strdup (p); if (!ct->subtype) - { - rc = errno; - return rc; - } + rc = errno; } else - { - return MU_ERR_PARSE; - } - return 0; + rc = MU_ERR_PARSE; + end: + free (value); + return rc; } int diff --git a/libmailutils/mailbox/bodystruct.c b/libmailutils/mailbox/bodystruct.c index cffe503d9..2d48205a9 100644 --- a/libmailutils/mailbox/bodystruct.c +++ b/libmailutils/mailbox/bodystruct.c @@ -31,6 +31,7 @@ #include <mailutils/nls.h> #include <mailutils/cstr.h> #include <mailutils/body.h> +#include <mailutils/util.h> void mu_list_free_bodystructure (void *item) @@ -94,59 +95,48 @@ static int bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs) { mu_header_t header = NULL; - const char *buffer = NULL; + char *buffer = NULL; mu_body_t body = NULL; - int rc; int is_multipart = 0; + int rc; rc = mu_message_get_header (msg, &header); if (rc) return rc; - if (mu_header_sget_value (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0) + if (mu_header_aget_value_unfold (header, MU_HEADER_CONTENT_TYPE, &buffer) == 0) { - char *value; - char *p; - size_t len; - - rc = mu_mime_header_parse (buffer, "UTF-8", &value, &bs->body_param); - if (rc) - return rc; - - len = strcspn (value, "/"); + mu_content_type_t ct; - if (mu_c_strcasecmp (value, "MESSAGE/RFC822") == 0) - bs->body_message_type = mu_message_rfc822; - else if (mu_c_strncasecmp (value, "TEXT", len) == 0) - bs->body_message_type = mu_message_text; - - p = malloc (len + 1); - if (!p) - return ENOMEM; - memcpy (p, value, len); - p[len] = 0; - - bs->body_type = p; - mu_strupper (bs->body_type); - if (value[len]) + rc = mu_content_type_parse (buffer, "UTF-8", &ct); + if (rc == 0) { - bs->body_subtype = strdup (value + len + 1); - if (!bs->body_subtype) - return ENOMEM; + if (mu_c_strcasecmp (ct->type, "MESSAGE") == 0 && + mu_c_strcasecmp (ct->subtype, "RFC822") == 0) + bs->body_message_type = mu_message_rfc822; + else if (mu_c_strcasecmp (ct->type, "TEXT") == 0) + bs->body_message_type = mu_message_text; + + bs->body_type = ct->type; + ct->type = NULL; + mu_strupper (bs->body_type); + bs->body_subtype = ct->subtype; + ct->subtype = NULL; mu_strupper (bs->body_subtype); - } + bs->body_param = ct->param; + ct->param = NULL; + mu_content_type_destroy (&ct); - /* body parameter parenthesized list: Content-type attributes */ - - rc = mu_message_is_multipart (msg, &is_multipart); - if (rc) - return rc; - if (is_multipart) - bs->body_message_type = mu_message_multipart; + /* body parameter parenthesized list: Content-type attributes */ + mu_message_is_multipart (msg, &is_multipart); + if (is_multipart) + bs->body_message_type = mu_message_multipart; + } + free (buffer); } else { - struct mu_mime_param param; + struct mu_mime_param *param; /* Default? If Content-Type is not present consider as text/plain. */ bs->body_type = strdup ("TEXT"); @@ -161,21 +151,22 @@ bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs) rc = mu_mime_param_assoc_create (&bs->body_param); if (rc) return rc; - memset (¶m, 0, sizeof (param)); - param.value = strdup ("US-ASCII"); - if (!param.value) - { - free (bs->body_type); - free (bs->body_subtype); - return ENOMEM; - } - rc = mu_assoc_install (bs->body_param, "CHARSET", ¶m); - if (rc) + param = calloc (1, sizeof (*param)); + if (param && (param->value = strdup ("US-ASCII")) != NULL) { - free (param.value); - return rc; + rc = mu_assoc_install (bs->body_param, "CHARSET", param); + if (rc) + { + mu_mime_param_free (param); + return rc; + } + bs->body_message_type = mu_message_text; } - bs->body_message_type = mu_message_text; + else + { + free (param); + return ENOMEM; + } } if (is_multipart) @@ -283,12 +274,13 @@ bodystructure_fill (mu_message_t msg, struct mu_bodystructure *bs) return rc; /* body disposition: Content-Disposition. */ - rc = mu_header_sget_value (header, MU_HEADER_CONTENT_DISPOSITION, - &buffer); + rc = mu_header_aget_value_unfold (header, MU_HEADER_CONTENT_DISPOSITION, + &buffer); if (rc == 0) { rc = mu_mime_header_parse (buffer, "UTF-8", &bs->body_disposition, &bs->body_disp_param); + free (buffer); if (rc) return rc; } diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c index ec883329b..b057a48a5 100644 --- a/libmailutils/mime/mimehdr.c +++ b/libmailutils/mime/mimehdr.c @@ -570,8 +570,14 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, success. ASSOC - Unless NULL, parameters are stored here. - Either PVALUE or ASSOC (but not both) can be NULL, meaning that the - corresponding data are of no interest to the caller. + Both output pointers can be NULL, meaning that the corresponding data + are of no interest to the caller. + + The value returned in PVALUE is the initial part of TEXT up to the + start of parameters (i.e. to the first semicolon) with leading and + trailing whitespace removed. No other syntactic checking is done on + the value. It is the responsibility of the caller to verify that it + complies to the syntax of the particular header. */ static int _mime_header_parse (const char *text, char **pvalue, @@ -581,7 +587,28 @@ _mime_header_parse (const char *text, char **pvalue, struct mu_wordsplit ws; struct param_continuation cont; size_t i; + char *value = NULL; + size_t val_len; + val_len = strcspn (text, ";"); + if (pvalue) + { + value = malloc (val_len + 1); + if (!value) + return ENOMEM; + memcpy (value, text, val_len); + value[val_len] = 0; + mu_rtrim_class (value, MU_CTYPE_SPACE); + mu_ltrim_class (value, MU_CTYPE_SPACE); + if (value[0] == 0) + { + free (value); + return MU_ERR_PARSE; + } + } + + text += val_len; + ws.ws_delim = " \t\r\n;"; ws.ws_escape[0] = ws.ws_escape[1] = "\\\\\"\""; ws.ws_options = 0; @@ -596,28 +623,20 @@ _mime_header_parse (const char *text, char **pvalue, mu_debug (MU_DEBCAT_MIME, MU_DEBUG_ERROR, (_("wordsplit: %s"), mu_wordsplit_strerror (&ws))); mu_wordsplit_free (&ws); + free (value); return MU_ERR_PARSE; } - if (ws.ws_wordc == 0) - { - mu_wordsplit_free (&ws); - return MU_ERR_PARSE; - } - if (!assoc) { - if (!pvalue) - return MU_ERR_OUT_PTR_NULL; - *pvalue = strdup (ws.ws_wordv[0]); + if (pvalue) + *pvalue = value; mu_wordsplit_free (&ws); - if (!*pvalue) - return ENOMEM; return 0; } memset (&cont, 0, sizeof (cont)); - for (i = 1; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;) + for (i = 0; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;) ; if (rc == MU_ERR_USER0) rc = 0; @@ -627,13 +646,10 @@ _mime_header_parse (const char *text, char **pvalue, if (rc == 0) { if (pvalue) - { - *pvalue = strdup (ws.ws_wordv[0]); - if (!*pvalue) - rc = ENOMEM; - } + *pvalue = value; } - + else + free (value); mu_wordsplit_free (&ws); if (subset) diff --git a/libmailutils/tests/content-type.at b/libmailutils/tests/content-type.at index adeeeadeb..7dbda629f 100644 --- a/libmailutils/tests/content-type.at +++ b/libmailutils/tests/content-type.at @@ -49,5 +49,14 @@ CTHDR([missing subtype],[ctparse content-type], [conttype: Parse error ]) +CTHDR([whitespace],[ctparse content-type], +[ text / plain ; charset = utf-8;p =foo], +[0], +[type = text +subtype = plain + 0: charset=utf-8 + 1: p=foo +]) + m4_popdef([CTHDR]) diff --git a/libmailutils/tests/conttype.c b/libmailutils/tests/conttype.c index 1f3920b13..1cbd695c3 100644 --- a/libmailutils/tests/conttype.c +++ b/libmailutils/tests/conttype.c @@ -53,6 +53,8 @@ main (int argc, char **argv) while ((rc = mu_stream_getline (mu_strin, &buf, &size, &n)) == 0 && n > 0) { mu_rtrim_class (buf, MU_CTYPE_ENDLN); + if (buf[0] == 0) + continue; if (parse (buf)) result = 1; } diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at index cef3a6e9b..ea6fab5f4 100644 --- a/libmailutils/tests/mimehdr.at +++ b/libmailutils/tests/mimehdr.at @@ -277,18 +277,17 @@ MIMEHDR_FAIL([empty input],[mimehdr15], [mimehdr: mu_mime_header_parse() failed: Parse error ]) -MIMEHDR_FAIL([missing semicolon after type],[mimehdr16], +MIMEHDR([missing semicolon after type],[mimehdr16], [], [message name="foo"], -[], -[mimehdr: mu_mime_header_parse() failed: Parse error +[message name="foo" ]) -MIMEHDR_FAIL([whitespace in type],[mimehdr17], +MIMEHDR([whitespace in type],[mimehdr17], [], [TeX file/plain; name=foo], -[], -[mimehdr: mu_mime_header_parse() failed: Parse error +[TeX file/plain +name=foo ]) m4_popdef([MIMEHDR]) |