diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2020-07-22 13:08:43 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2020-07-22 13:08:43 +0300 |
commit | 5b3431af0bfd1f51224a9c2b0709184a0cc944a7 (patch) | |
tree | d795fc20ad8a30b7e75bded188724ed0bcbf4591 /libmailutils | |
parent | 2d9c9918fce547f88236598804fe5c4286f8cf53 (diff) | |
download | mailutils-5b3431af0bfd1f51224a9c2b0709184a0cc944a7.tar.gz mailutils-5b3431af0bfd1f51224a9c2b0709184a0cc944a7.tar.bz2 |
Stricter parsing of structured MIME headers
The mu_content_type_parse function allowed for missing
/subtype, which caused grief in case of blatantly invalid
inputs.
* decodemail/decodemail.c (message_decode): Handle
mu_content_type_parse failures racefully: return a reference to
the input message.
Improve diagnostics.
* lib/mdecode.c (message_body_stream): Improve diagnostics.
* libmailutils/base/assoc.c (merge_sort): Bugfix: accept empty
input list.
* libmailutils/base/ctparse.c (content_type_parse): Require that
both type and subtype be present.
* libmailutils/mime/mimehdr.c (_mime_header_parse): Move parameter
parsing to a separate function. Do better syntax checking.
* libmailutils/tests/Makefile.am: Add new tests.
* libmailutils/tests/testsuite.at: Likewise.
* libmailutils/tests/content-type.at: New tests.
* libmailutils/tests/conttype.c: Return 1 if at least one parse
fails.
* libmailutils/tests/mimehdr.at: Add tests for invalid inputs.
* libmailutils/tests/mimehdr.c: Better diagnostics.
Diffstat (limited to 'libmailutils')
-rw-r--r-- | libmailutils/base/assoc.c | 2 | ||||
-rw-r--r-- | libmailutils/base/ctparse.c | 3 | ||||
-rw-r--r-- | libmailutils/mime/mimehdr.c | 531 | ||||
-rw-r--r-- | libmailutils/tests/Makefile.am | 1 | ||||
-rw-r--r-- | libmailutils/tests/content-type.at | 53 | ||||
-rw-r--r-- | libmailutils/tests/conttype.c | 6 | ||||
-rw-r--r-- | libmailutils/tests/mimehdr.at | 38 | ||||
-rw-r--r-- | libmailutils/tests/mimehdr.c | 8 | ||||
-rw-r--r-- | libmailutils/tests/testsuite.at | 1 | ||||
m--------- | libmailutils/wordsplit | 0 |
10 files changed, 386 insertions, 257 deletions
diff --git a/libmailutils/base/assoc.c b/libmailutils/base/assoc.c index def2c80f3..439cda286 100644 --- a/libmailutils/base/assoc.c +++ b/libmailutils/base/assoc.c @@ -762,7 +762,7 @@ merge_sort (struct _mu_assoc_elem *list, size_t length, size_t left_len, right_len, i; struct _mu_assoc_elem *elt; - if (length == 1) + if (length <= 1) return list; if (length == 2) diff --git a/libmailutils/base/ctparse.c b/libmailutils/base/ctparse.c index 246ad0d5f..a6c93f9f1 100644 --- a/libmailutils/base/ctparse.c +++ b/libmailutils/base/ctparse.c @@ -65,8 +65,7 @@ content_type_parse (const char *input, const char *charset, } else { - ct->type = value; - ct->subtype = NULL; + return MU_ERR_PARSE; } return 0; } diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c index d68ee7777..6bbfcfdb9 100644 --- a/libmailutils/mime/mimehdr.c +++ b/libmailutils/mime/mimehdr.c @@ -264,6 +264,277 @@ mu_mime_param_assoc_add (mu_assoc_t assoc, const char *name) return mu_assoc_install (assoc, name, NULL); } +static inline char * +getword (struct mu_wordsplit *ws, size_t *pi) +{ + if (*pi == ws->ws_wordc) + return NULL; + return ws->ws_wordv[(*pi)++]; +} + +static int +parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, + struct param_continuation *param_cont, + const char *outcharset, int subset) +{ + size_t klen; + char *key; + char *val; + const char *lang = NULL; + const char *cset = NULL; + char *langp = NULL; + char *csetp = NULL; + char *p; + char *decoded; + int flags = 0; + struct mu_mime_param *param; + int rc; + + key = getword (ws, pi); + if (key == NULL) + return MU_ERR_USER0; + + if (strcmp (key, ";") == 0) + { + /* Reportedly, some MUAs insert several semicolons */ + do + { + key = getword (ws, pi); + if (key == NULL) + return MU_ERR_USER0; + } + while (strcmp (key, ";") == 0); + } + else + return MU_ERR_PARSE; + + p = strchr (key, '='); + if (!p) + val = ""; + else + { + *p++ = 0; + val = p; + } + + klen = strlen (key); + if (klen == 0) + /* Ignore empty parameter */ + return 0; + + p = strchr (key, '*'); + if (p) + { + /* It is a parameter value continuation (RFC 2231, Section 3) + or parameter value character set and language information + (ibid., Section 4). */ + klen = p - key; + if (p[1]) + { + if (mu_isdigit (p[1])) + { + char *q; + unsigned long n = strtoul (p + 1, &q, 10); + + if (*q && *q != '*') + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("malformed parameter name %s: skipping"), + key)); + return 0; + } + + if (n != param_cont->param_cind) + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("continuation index out of sequence in %s: " + "skipping"), + key)); + /* Ignore this parameter. Another possibility would be + to drop the continuation assembled so far. That makes + little difference, because the string is malformed + anyway. + + We try to continue just to gather as many information + as possible from this mess. + */ + return 0; + } + + if (n == 0) + { + param_cont->param_name = malloc (klen + 1); + if (!param_cont->param_name) + return ENOMEM; + param_cont->param_length = klen; + memcpy (param_cont->param_name, key, klen); + param_cont->param_name[klen] = 0; + + rc = mu_memory_stream_create (¶m_cont->param_value, + MU_STREAM_RDWR); + if (rc) + return rc; + } + else if (param_cont->param_length != klen || + memcmp (param_cont->param_name, key, klen)) + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("continuation name mismatch: %s: " + "skipping"), + key)); + return 0; + } + + if (*q == '*') + flags |= MU_MIMEHDR_CSINFO; + + param_cont->param_cind++; + flags |= MU_MIMEHDR_MULTILINE; + } + } + else + { + flags |= MU_MIMEHDR_CSINFO; + *p = 0; + } + } + else if (param_cont->param_name) + { + rc = flush_param (param_cont, assoc, subset, outcharset); + free_param_continuation (param_cont); + if (rc) + return rc; + } + + if (flags & MU_MIMEHDR_CSINFO) + { + p = strchr (val, '\''); + if (p) + { + char *q = strchr (p + 1, '\''); + if (q) + { + cset = val; + *p++ = 0; + lang = p; + *q++ = 0; + val = q; + } + } + + if ((flags & MU_MIMEHDR_MULTILINE) && param_cont->param_cind == 1) + { + param_cont->param_lang = lang; + param_cont->param_cset = cset; + } + } + + if (flags & MU_MIMEHDR_CSINFO) + { + char *tmp; + + rc = mu_str_url_decode (&tmp, val); + if (rc) + return rc; + if (!(flags & MU_MIMEHDR_MULTILINE)) + { + if (!outcharset || mu_c_strcasecmp (cset, outcharset) == 0) + decoded = tmp; + else + { + rc = _recode_string (tmp, cset, outcharset, &decoded); + free (tmp); + if (rc) + return rc; + } + } + else + decoded = tmp; + } + else + { + struct mu_mime_param *param; + rc = mu_rfc2047_decode_param (outcharset, val, ¶m); + if (rc) + return rc; + cset = csetp = param->cset; + lang = langp = param->lang; + decoded = param->value; + free (param); + } + val = decoded; + + if (flags & MU_MIMEHDR_MULTILINE) + { + rc = mu_stream_write (param_cont->param_value, val, strlen (val), NULL); + free (decoded); + free (csetp); + free (langp); + return rc; + } + + param = calloc (1, sizeof (*param)); + if (!param) + rc = ENOMEM; + else + { + if (lang) + { + param->lang = strdup (lang); + if (!param->lang) + rc = ENOMEM; + } + + if (rc == 0 && cset) + { + param->cset = strdup (cset); + if (!param->cset) + { + free (param->lang); + rc = ENOMEM; + } + } + + free (csetp); + free (langp); + } + + if (rc) + { + free (decoded); + return rc; + } + + param->value = strdup (val); + free (decoded); + if (!param->value) + { + mu_mime_param_free (param); + return ENOMEM; + } + + if (subset) + { + struct mu_mime_param **p; + if (mu_assoc_lookup_ref (assoc, key, &p) == 0) + *p = param; + else + mu_mime_param_free (param); + } + else + { + rc = mu_assoc_install (assoc, key, param); + if (rc) + { + mu_mime_param_free (param); + return rc; + } + } + + return 0; +} + + /* A working horse of this module. Parses input string, which should be a header field value complying to RFCs 2045, 2183, 2231.3. @@ -306,6 +577,12 @@ _mime_header_parse (const char *text, char **pvalue, return MU_ERR_PARSE; } + if (ws.ws_wordc == 0) + { + mu_wordsplit_free (&ws); + return MU_ERR_PARSE; + } + if (!assoc) { if (!pvalue) @@ -318,256 +595,10 @@ _mime_header_parse (const char *text, char **pvalue, } memset (&cont, 0, sizeof (cont)); - for (i = 1; i < ws.ws_wordc; i++) - { - size_t klen; - char *key; - char *val; - const char *lang = NULL; - const char *cset = NULL; - char *langp = NULL; - char *csetp = NULL; - char *p; - char *decoded; - int flags = 0; - struct mu_mime_param *param; - - key = ws.ws_wordv[i]; - if (key[0] == ';') - /* Reportedly, some MUAs insert several semicolons */ - continue; - p = strchr (key, '='); - if (!p) - val = ""; - else - { - *p++ = 0; - val = p; - } - - klen = strlen (key); - if (klen == 0) - continue; - - p = strchr (key, '*'); - if (p) - { - /* It is a parameter value continuation (RFC 2231, Section 3) - or parameter value character set and language information - (ibid., Section 4). */ - klen = p - key; - if (p[1]) - { - if (mu_isdigit (p[1])) - { - char *q; - unsigned long n = strtoul (p + 1, &q, 10); - - if (*q && *q != '*') - { - mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, - (_("malformed parameter name %s: skipping"), - key)); - continue; - } - - if (n != cont.param_cind) - { - mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, - (_("continuation index out of sequence in %s: " - "skipping"), - key)); - /* Ignore this parameter. Another possibility would be - to drop the continuation assembled so far. That makes - little difference, because the string is malformed - anyway. - - We try to continue just to gather as many information - as possible from this mess. - */ - continue; - } - - if (n == 0) - { - cont.param_name = malloc (klen + 1); - if (!cont.param_name) - { - rc = ENOMEM; - break; - } - cont.param_length = klen; - memcpy (cont.param_name, key, klen); - cont.param_name[klen] = 0; - - rc = mu_memory_stream_create (&cont.param_value, - MU_STREAM_RDWR); - if (rc) - break; - } - else if (cont.param_length != klen || - memcmp (cont.param_name, key, klen)) - { - mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, - (_("continuation name mismatch: %s: " - "skipping"), - key)); - continue; - } - - if (*q == '*') - flags |= MU_MIMEHDR_CSINFO; - - cont.param_cind++; - flags |= MU_MIMEHDR_MULTILINE; - } - } - else - { - flags |= MU_MIMEHDR_CSINFO; - *p = 0; - } - } - else if (cont.param_name) - { - rc = flush_param (&cont, assoc, subset, outcharset); - free_param_continuation (&cont); - if (rc) - break; - } - - if (flags & MU_MIMEHDR_CSINFO) - { - p = strchr (val, '\''); - if (p) - { - char *q = strchr (p + 1, '\''); - if (q) - { - cset = val; - *p++ = 0; - lang = p; - *q++ = 0; - val = q; - } - } - - if ((flags & MU_MIMEHDR_MULTILINE) && cont.param_cind == 1) - { - cont.param_lang = lang; - cont.param_cset = cset; - } - } - - if (flags & MU_MIMEHDR_CSINFO) - { - char *tmp; - - rc = mu_str_url_decode (&tmp, val); - if (rc) - break; - if (!(flags & MU_MIMEHDR_MULTILINE)) - { - if (!outcharset || mu_c_strcasecmp (cset, outcharset) == 0) - decoded = tmp; - else - { - rc = _recode_string (tmp, cset, outcharset, &decoded); - free (tmp); - } - if (rc) - break; - } - else - decoded = tmp; - } - else - { - struct mu_mime_param *param; - rc = mu_rfc2047_decode_param (outcharset, val, ¶m); - if (rc) - { - mu_wordsplit_free (&ws); - return rc; - } - cset = csetp = param->cset; - lang = langp = param->lang; - decoded = param->value; - free (param); - } - val = decoded; - - if (flags & MU_MIMEHDR_MULTILINE) - { - rc = mu_stream_write (cont.param_value, val, strlen (val), NULL); - free (decoded); - free (csetp); - free (langp); - if (rc) - break; - continue; - } - - param = calloc (1, sizeof (*param)); - if (!param) - rc = ENOMEM; - else - { - if (lang) - { - param->lang = strdup (lang); - if (!param->lang) - rc = ENOMEM; - } - - if (rc == 0 && cset) - { - param->cset = strdup (cset); - if (!param->cset) - { - free (param->lang); - rc = ENOMEM; - } - } - - free (csetp); - free (langp); - } - - if (rc) - { - free (decoded); - break; - } - - param->value = strdup (val); - free (decoded); - if (!param->value) - { - mu_mime_param_free (param); - rc = ENOMEM; - break; - } - - if (subset) - { - struct mu_mime_param **p; - if (mu_assoc_lookup_ref (assoc, key, &p) == 0) - *p = param; - else - mu_mime_param_free (param); - } - else - { - rc = mu_assoc_install (assoc, key, param); - if (rc) - { - mu_mime_param_free (param); - break; - } - } - } - + for (i = 1; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;) + ; + if (rc == MU_ERR_USER0) + rc = 0; if (rc == 0 && cont.param_name) rc = flush_param (&cont, assoc, subset, outcharset); free_param_continuation (&cont); diff --git a/libmailutils/tests/Makefile.am b/libmailutils/tests/Makefile.am index e9f772223..8071aeee9 100644 --- a/libmailutils/tests/Makefile.am +++ b/libmailutils/tests/Makefile.am @@ -94,6 +94,7 @@ TESTSUITE_AT += \ crlf.at\ crlfdot.at\ ctm.at\ + content-type.at\ encode2047.at\ exp.at\ fromflt.at\ diff --git a/libmailutils/tests/content-type.at b/libmailutils/tests/content-type.at new file mode 100644 index 000000000..adeeeadeb --- /dev/null +++ b/libmailutils/tests/content-type.at @@ -0,0 +1,53 @@ +# This file is part of GNU Mailutils. -*- Autotest -*- +# Copyright (C) 2011-2020 Free Software Foundation, Inc. +# +# GNU Mailutils is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3, or (at +# your option) any later version. +# +# GNU Mailutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. + +dnl --------------------------------------------------------------------- +dnl CTHDR([NAME], [KW], [INPUT], [STATUS = 0], [STDOUT = `'], [STDERR = `']) +dnl +m4_pushdef([CTHDR],[ +m4_pushdef([MU_TEST_GROUP],[content-type]) +m4_pushdef([MU_TEST_KEYWORDS],[content-type rfc2231]) +m4_pushdef([MU_TEST_COMMAND],[conttype]) +MU_GENERIC_TEST($@) +m4_popdef([MU_TEST_COMMAND]) +m4_popdef([MU_TEST_KEYWORDS]) +m4_popdef([MU_TEST_GROUP]) +]) + +CTHDR([no parameters],[ctparse content-type], +[text/plain], +[0], +[type = text +subtype = plain +]) + +CTHDR([with parameters],[ctparse content-type], +[text/plain; charset=utf-8], +[0], +[type = text +subtype = plain + 0: charset=utf-8 +]) + +CTHDR([missing subtype],[ctparse content-type], +[text], +[1], +[], +[conttype: Parse error +]) + +m4_popdef([CTHDR]) + diff --git a/libmailutils/tests/conttype.c b/libmailutils/tests/conttype.c index a94bfaa99..1f3920b13 100644 --- a/libmailutils/tests/conttype.c +++ b/libmailutils/tests/conttype.c @@ -43,7 +43,7 @@ main (int argc, char **argv) { char *buf = NULL; size_t size = 0, n; - int rc; + int rc, result = 0; mu_set_program_name (argv[0]); mu_stdstream_setup (MU_STDSTREAM_RESET_NONE); @@ -54,8 +54,8 @@ main (int argc, char **argv) { mu_rtrim_class (buf, MU_CTYPE_ENDLN); if (parse (buf)) - rc = 1; + result = 1; } - return rc; + return result; } diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at index ee2b21941..951a35a80 100644 --- a/libmailutils/tests/mimehdr.at +++ b/libmailutils/tests/mimehdr.at @@ -30,6 +30,16 @@ m4_popdef([MU_TEST_COMMAND]) m4_popdef([MU_TEST_KEYWORDS]) m4_popdef([MU_TEST_GROUP]) ]) + +m4_pushdef([MIMEHDR_FAIL],[ +m4_pushdef([MU_TEST_GROUP],[mimehdr]) +m4_pushdef([MU_TEST_KEYWORDS],[mimehdr rfc2231]) +m4_pushdef([MU_TEST_COMMAND],[mimehdr $3]) +MU_GENERIC_TEST([$1],[$2],[$4],[2],[$5],[$6]) +m4_popdef([MU_TEST_COMMAND]) +m4_popdef([MU_TEST_KEYWORDS]) +m4_popdef([MU_TEST_GROUP]) +]) dnl --------------------------------------------------------------------- MIMEHDR([simple],[mimehdr00 mimehdr-simple], @@ -243,4 +253,32 @@ MIMEHDR([format: language info 5],[mimehdr12 mimehdr12e], ]) +MIMEHDR([missing parameters],[mimehdr13], +[], +[message], +[message +]) + +MIMEHDR_FAIL([empty input],[mimehdr14], +[], +[], +[], +[mimehdr: mu_mime_header_parse() failed: Parse error +]) + +MIMEHDR_FAIL([missing semicolon after type],[mimehdr15], +[], +[message name="foo"], +[], +[mimehdr: mu_mime_header_parse() failed: Parse error +]) + +MIMEHDR_FAIL([whitespace in type],[mimehdr16], +[], +[TeX file/plain; name=foo], +[], +[mimehdr: mu_mime_header_parse() failed: Parse error +]) + m4_popdef([MIMEHDR]) +m4_popdef([MIMEHDR_FAIL]) diff --git a/libmailutils/tests/mimehdr.c b/libmailutils/tests/mimehdr.c index 6d819f356..a955489a7 100644 --- a/libmailutils/tests/mimehdr.c +++ b/libmailutils/tests/mimehdr.c @@ -55,6 +55,7 @@ int main (int argc, char **argv) { int i; + int rc; mu_stream_t tmp; mu_transport_t trans[2]; char *value; @@ -100,7 +101,12 @@ main (int argc, char **argv) MU_ASSERT (mu_stream_ioctl (tmp, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET, trans)); - MU_ASSERT (mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc)); + rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc); + if (rc) + { + mu_diag_funcall (MU_DIAG_ERROR, "mu_mime_header_parse", NULL, rc); + return 2; + } if (header_name) { diff --git a/libmailutils/tests/testsuite.at b/libmailutils/tests/testsuite.at index 2926459bf..a5e947356 100644 --- a/libmailutils/tests/testsuite.at +++ b/libmailutils/tests/testsuite.at @@ -249,6 +249,7 @@ m4_include([fsaftomod.at]) m4_include([modtofsaf.at]) m4_include([mimehdr.at]) +m4_include([content-type.at]) m4_include([msgset.at]) diff --git a/libmailutils/wordsplit b/libmailutils/wordsplit -Subproject 6a7581f2e60a600a4915e4f55b74a15c8070197 +Subproject cf2c7c86debce18ab24c038afa6dde580c9706e |