summaryrefslogtreecommitdiff
path: root/libmailutils
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2020-07-22 13:08:43 +0300
committerSergey Poznyakoff <gray@gnu.org>2020-07-22 13:08:43 +0300
commit5b3431af0bfd1f51224a9c2b0709184a0cc944a7 (patch)
treed795fc20ad8a30b7e75bded188724ed0bcbf4591 /libmailutils
parent2d9c9918fce547f88236598804fe5c4286f8cf53 (diff)
downloadmailutils-5b3431af0bfd1f51224a9c2b0709184a0cc944a7.tar.gz
mailutils-5b3431af0bfd1f51224a9c2b0709184a0cc944a7.tar.bz2
Stricter parsing of structured MIME headers
The mu_content_type_parse function allowed for missing /subtype, which caused grief in case of blatantly invalid inputs. * decodemail/decodemail.c (message_decode): Handle mu_content_type_parse failures racefully: return a reference to the input message. Improve diagnostics. * lib/mdecode.c (message_body_stream): Improve diagnostics. * libmailutils/base/assoc.c (merge_sort): Bugfix: accept empty input list. * libmailutils/base/ctparse.c (content_type_parse): Require that both type and subtype be present. * libmailutils/mime/mimehdr.c (_mime_header_parse): Move parameter parsing to a separate function. Do better syntax checking. * libmailutils/tests/Makefile.am: Add new tests. * libmailutils/tests/testsuite.at: Likewise. * libmailutils/tests/content-type.at: New tests. * libmailutils/tests/conttype.c: Return 1 if at least one parse fails. * libmailutils/tests/mimehdr.at: Add tests for invalid inputs. * libmailutils/tests/mimehdr.c: Better diagnostics.
Diffstat (limited to 'libmailutils')
-rw-r--r--libmailutils/base/assoc.c2
-rw-r--r--libmailutils/base/ctparse.c3
-rw-r--r--libmailutils/mime/mimehdr.c531
-rw-r--r--libmailutils/tests/Makefile.am1
-rw-r--r--libmailutils/tests/content-type.at53
-rw-r--r--libmailutils/tests/conttype.c6
-rw-r--r--libmailutils/tests/mimehdr.at38
-rw-r--r--libmailutils/tests/mimehdr.c8
-rw-r--r--libmailutils/tests/testsuite.at1
m---------libmailutils/wordsplit0
10 files changed, 386 insertions, 257 deletions
diff --git a/libmailutils/base/assoc.c b/libmailutils/base/assoc.c
index def2c80f3..439cda286 100644
--- a/libmailutils/base/assoc.c
+++ b/libmailutils/base/assoc.c
@@ -762,7 +762,7 @@ merge_sort (struct _mu_assoc_elem *list, size_t length,
size_t left_len, right_len, i;
struct _mu_assoc_elem *elt;
- if (length == 1)
+ if (length <= 1)
return list;
if (length == 2)
diff --git a/libmailutils/base/ctparse.c b/libmailutils/base/ctparse.c
index 246ad0d5f..a6c93f9f1 100644
--- a/libmailutils/base/ctparse.c
+++ b/libmailutils/base/ctparse.c
@@ -65,8 +65,7 @@ content_type_parse (const char *input, const char *charset,
}
else
{
- ct->type = value;
- ct->subtype = NULL;
+ return MU_ERR_PARSE;
}
return 0;
}
diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c
index d68ee7777..6bbfcfdb9 100644
--- a/libmailutils/mime/mimehdr.c
+++ b/libmailutils/mime/mimehdr.c
@@ -264,6 +264,277 @@ mu_mime_param_assoc_add (mu_assoc_t assoc, const char *name)
return mu_assoc_install (assoc, name, NULL);
}
+static inline char *
+getword (struct mu_wordsplit *ws, size_t *pi)
+{
+ if (*pi == ws->ws_wordc)
+ return NULL;
+ return ws->ws_wordv[(*pi)++];
+}
+
+static int
+parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
+ struct param_continuation *param_cont,
+ const char *outcharset, int subset)
+{
+ size_t klen;
+ char *key;
+ char *val;
+ const char *lang = NULL;
+ const char *cset = NULL;
+ char *langp = NULL;
+ char *csetp = NULL;
+ char *p;
+ char *decoded;
+ int flags = 0;
+ struct mu_mime_param *param;
+ int rc;
+
+ key = getword (ws, pi);
+ if (key == NULL)
+ return MU_ERR_USER0;
+
+ if (strcmp (key, ";") == 0)
+ {
+ /* Reportedly, some MUAs insert several semicolons */
+ do
+ {
+ key = getword (ws, pi);
+ if (key == NULL)
+ return MU_ERR_USER0;
+ }
+ while (strcmp (key, ";") == 0);
+ }
+ else
+ return MU_ERR_PARSE;
+
+ p = strchr (key, '=');
+ if (!p)
+ val = "";
+ else
+ {
+ *p++ = 0;
+ val = p;
+ }
+
+ klen = strlen (key);
+ if (klen == 0)
+ /* Ignore empty parameter */
+ return 0;
+
+ p = strchr (key, '*');
+ if (p)
+ {
+ /* It is a parameter value continuation (RFC 2231, Section 3)
+ or parameter value character set and language information
+ (ibid., Section 4). */
+ klen = p - key;
+ if (p[1])
+ {
+ if (mu_isdigit (p[1]))
+ {
+ char *q;
+ unsigned long n = strtoul (p + 1, &q, 10);
+
+ if (*q && *q != '*')
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("malformed parameter name %s: skipping"),
+ key));
+ return 0;
+ }
+
+ if (n != param_cont->param_cind)
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("continuation index out of sequence in %s: "
+ "skipping"),
+ key));
+ /* Ignore this parameter. Another possibility would be
+ to drop the continuation assembled so far. That makes
+ little difference, because the string is malformed
+ anyway.
+
+ We try to continue just to gather as many information
+ as possible from this mess.
+ */
+ return 0;
+ }
+
+ if (n == 0)
+ {
+ param_cont->param_name = malloc (klen + 1);
+ if (!param_cont->param_name)
+ return ENOMEM;
+ param_cont->param_length = klen;
+ memcpy (param_cont->param_name, key, klen);
+ param_cont->param_name[klen] = 0;
+
+ rc = mu_memory_stream_create (&param_cont->param_value,
+ MU_STREAM_RDWR);
+ if (rc)
+ return rc;
+ }
+ else if (param_cont->param_length != klen ||
+ memcmp (param_cont->param_name, key, klen))
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("continuation name mismatch: %s: "
+ "skipping"),
+ key));
+ return 0;
+ }
+
+ if (*q == '*')
+ flags |= MU_MIMEHDR_CSINFO;
+
+ param_cont->param_cind++;
+ flags |= MU_MIMEHDR_MULTILINE;
+ }
+ }
+ else
+ {
+ flags |= MU_MIMEHDR_CSINFO;
+ *p = 0;
+ }
+ }
+ else if (param_cont->param_name)
+ {
+ rc = flush_param (param_cont, assoc, subset, outcharset);
+ free_param_continuation (param_cont);
+ if (rc)
+ return rc;
+ }
+
+ if (flags & MU_MIMEHDR_CSINFO)
+ {
+ p = strchr (val, '\'');
+ if (p)
+ {
+ char *q = strchr (p + 1, '\'');
+ if (q)
+ {
+ cset = val;
+ *p++ = 0;
+ lang = p;
+ *q++ = 0;
+ val = q;
+ }
+ }
+
+ if ((flags & MU_MIMEHDR_MULTILINE) && param_cont->param_cind == 1)
+ {
+ param_cont->param_lang = lang;
+ param_cont->param_cset = cset;
+ }
+ }
+
+ if (flags & MU_MIMEHDR_CSINFO)
+ {
+ char *tmp;
+
+ rc = mu_str_url_decode (&tmp, val);
+ if (rc)
+ return rc;
+ if (!(flags & MU_MIMEHDR_MULTILINE))
+ {
+ if (!outcharset || mu_c_strcasecmp (cset, outcharset) == 0)
+ decoded = tmp;
+ else
+ {
+ rc = _recode_string (tmp, cset, outcharset, &decoded);
+ free (tmp);
+ if (rc)
+ return rc;
+ }
+ }
+ else
+ decoded = tmp;
+ }
+ else
+ {
+ struct mu_mime_param *param;
+ rc = mu_rfc2047_decode_param (outcharset, val, &param);
+ if (rc)
+ return rc;
+ cset = csetp = param->cset;
+ lang = langp = param->lang;
+ decoded = param->value;
+ free (param);
+ }
+ val = decoded;
+
+ if (flags & MU_MIMEHDR_MULTILINE)
+ {
+ rc = mu_stream_write (param_cont->param_value, val, strlen (val), NULL);
+ free (decoded);
+ free (csetp);
+ free (langp);
+ return rc;
+ }
+
+ param = calloc (1, sizeof (*param));
+ if (!param)
+ rc = ENOMEM;
+ else
+ {
+ if (lang)
+ {
+ param->lang = strdup (lang);
+ if (!param->lang)
+ rc = ENOMEM;
+ }
+
+ if (rc == 0 && cset)
+ {
+ param->cset = strdup (cset);
+ if (!param->cset)
+ {
+ free (param->lang);
+ rc = ENOMEM;
+ }
+ }
+
+ free (csetp);
+ free (langp);
+ }
+
+ if (rc)
+ {
+ free (decoded);
+ return rc;
+ }
+
+ param->value = strdup (val);
+ free (decoded);
+ if (!param->value)
+ {
+ mu_mime_param_free (param);
+ return ENOMEM;
+ }
+
+ if (subset)
+ {
+ struct mu_mime_param **p;
+ if (mu_assoc_lookup_ref (assoc, key, &p) == 0)
+ *p = param;
+ else
+ mu_mime_param_free (param);
+ }
+ else
+ {
+ rc = mu_assoc_install (assoc, key, param);
+ if (rc)
+ {
+ mu_mime_param_free (param);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+
/* A working horse of this module. Parses input string, which should
be a header field value complying to RFCs 2045, 2183, 2231.3.
@@ -306,6 +577,12 @@ _mime_header_parse (const char *text, char **pvalue,
return MU_ERR_PARSE;
}
+ if (ws.ws_wordc == 0)
+ {
+ mu_wordsplit_free (&ws);
+ return MU_ERR_PARSE;
+ }
+
if (!assoc)
{
if (!pvalue)
@@ -318,256 +595,10 @@ _mime_header_parse (const char *text, char **pvalue,
}
memset (&cont, 0, sizeof (cont));
- for (i = 1; i < ws.ws_wordc; i++)
- {
- size_t klen;
- char *key;
- char *val;
- const char *lang = NULL;
- const char *cset = NULL;
- char *langp = NULL;
- char *csetp = NULL;
- char *p;
- char *decoded;
- int flags = 0;
- struct mu_mime_param *param;
-
- key = ws.ws_wordv[i];
- if (key[0] == ';')
- /* Reportedly, some MUAs insert several semicolons */
- continue;
- p = strchr (key, '=');
- if (!p)
- val = "";
- else
- {
- *p++ = 0;
- val = p;
- }
-
- klen = strlen (key);
- if (klen == 0)
- continue;
-
- p = strchr (key, '*');
- if (p)
- {
- /* It is a parameter value continuation (RFC 2231, Section 3)
- or parameter value character set and language information
- (ibid., Section 4). */
- klen = p - key;
- if (p[1])
- {
- if (mu_isdigit (p[1]))
- {
- char *q;
- unsigned long n = strtoul (p + 1, &q, 10);
-
- if (*q && *q != '*')
- {
- mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
- (_("malformed parameter name %s: skipping"),
- key));
- continue;
- }
-
- if (n != cont.param_cind)
- {
- mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
- (_("continuation index out of sequence in %s: "
- "skipping"),
- key));
- /* Ignore this parameter. Another possibility would be
- to drop the continuation assembled so far. That makes
- little difference, because the string is malformed
- anyway.
-
- We try to continue just to gather as many information
- as possible from this mess.
- */
- continue;
- }
-
- if (n == 0)
- {
- cont.param_name = malloc (klen + 1);
- if (!cont.param_name)
- {
- rc = ENOMEM;
- break;
- }
- cont.param_length = klen;
- memcpy (cont.param_name, key, klen);
- cont.param_name[klen] = 0;
-
- rc = mu_memory_stream_create (&cont.param_value,
- MU_STREAM_RDWR);
- if (rc)
- break;
- }
- else if (cont.param_length != klen ||
- memcmp (cont.param_name, key, klen))
- {
- mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
- (_("continuation name mismatch: %s: "
- "skipping"),
- key));
- continue;
- }
-
- if (*q == '*')
- flags |= MU_MIMEHDR_CSINFO;
-
- cont.param_cind++;
- flags |= MU_MIMEHDR_MULTILINE;
- }
- }
- else
- {
- flags |= MU_MIMEHDR_CSINFO;
- *p = 0;
- }
- }
- else if (cont.param_name)
- {
- rc = flush_param (&cont, assoc, subset, outcharset);
- free_param_continuation (&cont);
- if (rc)
- break;
- }
-
- if (flags & MU_MIMEHDR_CSINFO)
- {
- p = strchr (val, '\'');
- if (p)
- {
- char *q = strchr (p + 1, '\'');
- if (q)
- {
- cset = val;
- *p++ = 0;
- lang = p;
- *q++ = 0;
- val = q;
- }
- }
-
- if ((flags & MU_MIMEHDR_MULTILINE) && cont.param_cind == 1)
- {
- cont.param_lang = lang;
- cont.param_cset = cset;
- }
- }
-
- if (flags & MU_MIMEHDR_CSINFO)
- {
- char *tmp;
-
- rc = mu_str_url_decode (&tmp, val);
- if (rc)
- break;
- if (!(flags & MU_MIMEHDR_MULTILINE))
- {
- if (!outcharset || mu_c_strcasecmp (cset, outcharset) == 0)
- decoded = tmp;
- else
- {
- rc = _recode_string (tmp, cset, outcharset, &decoded);
- free (tmp);
- }
- if (rc)
- break;
- }
- else
- decoded = tmp;
- }
- else
- {
- struct mu_mime_param *param;
- rc = mu_rfc2047_decode_param (outcharset, val, &param);
- if (rc)
- {
- mu_wordsplit_free (&ws);
- return rc;
- }
- cset = csetp = param->cset;
- lang = langp = param->lang;
- decoded = param->value;
- free (param);
- }
- val = decoded;
-
- if (flags & MU_MIMEHDR_MULTILINE)
- {
- rc = mu_stream_write (cont.param_value, val, strlen (val), NULL);
- free (decoded);
- free (csetp);
- free (langp);
- if (rc)
- break;
- continue;
- }
-
- param = calloc (1, sizeof (*param));
- if (!param)
- rc = ENOMEM;
- else
- {
- if (lang)
- {
- param->lang = strdup (lang);
- if (!param->lang)
- rc = ENOMEM;
- }
-
- if (rc == 0 && cset)
- {
- param->cset = strdup (cset);
- if (!param->cset)
- {
- free (param->lang);
- rc = ENOMEM;
- }
- }
-
- free (csetp);
- free (langp);
- }
-
- if (rc)
- {
- free (decoded);
- break;
- }
-
- param->value = strdup (val);
- free (decoded);
- if (!param->value)
- {
- mu_mime_param_free (param);
- rc = ENOMEM;
- break;
- }
-
- if (subset)
- {
- struct mu_mime_param **p;
- if (mu_assoc_lookup_ref (assoc, key, &p) == 0)
- *p = param;
- else
- mu_mime_param_free (param);
- }
- else
- {
- rc = mu_assoc_install (assoc, key, param);
- if (rc)
- {
- mu_mime_param_free (param);
- break;
- }
- }
- }
-
+ for (i = 1; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;)
+ ;
+ if (rc == MU_ERR_USER0)
+ rc = 0;
if (rc == 0 && cont.param_name)
rc = flush_param (&cont, assoc, subset, outcharset);
free_param_continuation (&cont);
diff --git a/libmailutils/tests/Makefile.am b/libmailutils/tests/Makefile.am
index e9f772223..8071aeee9 100644
--- a/libmailutils/tests/Makefile.am
+++ b/libmailutils/tests/Makefile.am
@@ -94,6 +94,7 @@ TESTSUITE_AT += \
crlf.at\
crlfdot.at\
ctm.at\
+ content-type.at\
encode2047.at\
exp.at\
fromflt.at\
diff --git a/libmailutils/tests/content-type.at b/libmailutils/tests/content-type.at
new file mode 100644
index 000000000..adeeeadeb
--- /dev/null
+++ b/libmailutils/tests/content-type.at
@@ -0,0 +1,53 @@
+# This file is part of GNU Mailutils. -*- Autotest -*-
+# Copyright (C) 2011-2020 Free Software Foundation, Inc.
+#
+# GNU Mailutils is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3, or (at
+# your option) any later version.
+#
+# GNU Mailutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>.
+
+dnl ---------------------------------------------------------------------
+dnl CTHDR([NAME], [KW], [INPUT], [STATUS = 0], [STDOUT = `'], [STDERR = `'])
+dnl
+m4_pushdef([CTHDR],[
+m4_pushdef([MU_TEST_GROUP],[content-type])
+m4_pushdef([MU_TEST_KEYWORDS],[content-type rfc2231])
+m4_pushdef([MU_TEST_COMMAND],[conttype])
+MU_GENERIC_TEST($@)
+m4_popdef([MU_TEST_COMMAND])
+m4_popdef([MU_TEST_KEYWORDS])
+m4_popdef([MU_TEST_GROUP])
+])
+
+CTHDR([no parameters],[ctparse content-type],
+[text/plain],
+[0],
+[type = text
+subtype = plain
+])
+
+CTHDR([with parameters],[ctparse content-type],
+[text/plain; charset=utf-8],
+[0],
+[type = text
+subtype = plain
+ 0: charset=utf-8
+])
+
+CTHDR([missing subtype],[ctparse content-type],
+[text],
+[1],
+[],
+[conttype: Parse error
+])
+
+m4_popdef([CTHDR])
+
diff --git a/libmailutils/tests/conttype.c b/libmailutils/tests/conttype.c
index a94bfaa99..1f3920b13 100644
--- a/libmailutils/tests/conttype.c
+++ b/libmailutils/tests/conttype.c
@@ -43,7 +43,7 @@ main (int argc, char **argv)
{
char *buf = NULL;
size_t size = 0, n;
- int rc;
+ int rc, result = 0;
mu_set_program_name (argv[0]);
mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
@@ -54,8 +54,8 @@ main (int argc, char **argv)
{
mu_rtrim_class (buf, MU_CTYPE_ENDLN);
if (parse (buf))
- rc = 1;
+ result = 1;
}
- return rc;
+ return result;
}
diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at
index ee2b21941..951a35a80 100644
--- a/libmailutils/tests/mimehdr.at
+++ b/libmailutils/tests/mimehdr.at
@@ -30,6 +30,16 @@ m4_popdef([MU_TEST_COMMAND])
m4_popdef([MU_TEST_KEYWORDS])
m4_popdef([MU_TEST_GROUP])
])
+
+m4_pushdef([MIMEHDR_FAIL],[
+m4_pushdef([MU_TEST_GROUP],[mimehdr])
+m4_pushdef([MU_TEST_KEYWORDS],[mimehdr rfc2231])
+m4_pushdef([MU_TEST_COMMAND],[mimehdr $3])
+MU_GENERIC_TEST([$1],[$2],[$4],[2],[$5],[$6])
+m4_popdef([MU_TEST_COMMAND])
+m4_popdef([MU_TEST_KEYWORDS])
+m4_popdef([MU_TEST_GROUP])
+])
dnl ---------------------------------------------------------------------
MIMEHDR([simple],[mimehdr00 mimehdr-simple],
@@ -243,4 +253,32 @@ MIMEHDR([format: language info 5],[mimehdr12 mimehdr12e],
])
+MIMEHDR([missing parameters],[mimehdr13],
+[],
+[message],
+[message
+])
+
+MIMEHDR_FAIL([empty input],[mimehdr14],
+[],
+[],
+[],
+[mimehdr: mu_mime_header_parse() failed: Parse error
+])
+
+MIMEHDR_FAIL([missing semicolon after type],[mimehdr15],
+[],
+[message name="foo"],
+[],
+[mimehdr: mu_mime_header_parse() failed: Parse error
+])
+
+MIMEHDR_FAIL([whitespace in type],[mimehdr16],
+[],
+[TeX file/plain; name=foo],
+[],
+[mimehdr: mu_mime_header_parse() failed: Parse error
+])
+
m4_popdef([MIMEHDR])
+m4_popdef([MIMEHDR_FAIL])
diff --git a/libmailutils/tests/mimehdr.c b/libmailutils/tests/mimehdr.c
index 6d819f356..a955489a7 100644
--- a/libmailutils/tests/mimehdr.c
+++ b/libmailutils/tests/mimehdr.c
@@ -55,6 +55,7 @@ int
main (int argc, char **argv)
{
int i;
+ int rc;
mu_stream_t tmp;
mu_transport_t trans[2];
char *value;
@@ -100,7 +101,12 @@ main (int argc, char **argv)
MU_ASSERT (mu_stream_ioctl (tmp, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET,
trans));
- MU_ASSERT (mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc));
+ rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc);
+ if (rc)
+ {
+ mu_diag_funcall (MU_DIAG_ERROR, "mu_mime_header_parse", NULL, rc);
+ return 2;
+ }
if (header_name)
{
diff --git a/libmailutils/tests/testsuite.at b/libmailutils/tests/testsuite.at
index 2926459bf..a5e947356 100644
--- a/libmailutils/tests/testsuite.at
+++ b/libmailutils/tests/testsuite.at
@@ -249,6 +249,7 @@ m4_include([fsaftomod.at])
m4_include([modtofsaf.at])
m4_include([mimehdr.at])
+m4_include([content-type.at])
m4_include([msgset.at])
diff --git a/libmailutils/wordsplit b/libmailutils/wordsplit
-Subproject 6a7581f2e60a600a4915e4f55b74a15c8070197
+Subproject cf2c7c86debce18ab24c038afa6dde580c9706e

Return to:

Send suggestions and report system problems to the System administrator.