summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2020-07-25 18:48:18 +0300
committerSergey Poznyakoff <gray@gnu.org>2020-07-25 18:57:04 +0300
commitb3c4a8be9286967ecfd7c720f2eafd598db06cb3 (patch)
tree97e373886f2d3d099a481fbc42263adce2312d56
parentf111a7add00a0dd6ea5bda1ea3f507377a82bbe8 (diff)
downloadmailutils-b3c4a8be9286967ecfd7c720f2eafd598db06cb3.tar.gz
mailutils-b3c4a8be9286967ecfd7c720f2eafd598db06cb3.tar.bz2
Improve error tolerance when parsing MIME structured headers
* include/mailutils/assoc.h (mu_assoc_set_mark) (mu_assoc_head_set_mark, mu_assoc_tail_set_mark) (mu_assoc_pop, mu_assoc_shift) (mu_assoc_sweep_unset, mu_assoc_pull): New protos. * libmailutils/base/assoc.c: New functions (see above). * libmailutils/mime/mimehdr.c (parse_param): Mark last installed element for sweeping. When seeing a semicolon, unmark it. Return MU_ERR_PARSE on errors. On success, install the created parameter in the assoc array. (_mime_header_parse): Change type of the subset parameter. Attempt error recovery on parse errors. If it succeeds, sweep out the last installed parameter. If subset is specified, use mu_assoc_pull to populate it from the created assoc. (mu_mime_header_parse_subset) (mu_mime_param_assoc_create): Update invocations of _mime_header_parse. * libmailutils/tests/mimehdr.at: Test error tolerance. * libmailutils/tests/mimehdr.c: Use mu_cli_simple to parse command line. Treat positional arguments as names of the parameters to retain on output.
-rw-r--r--include/mailutils/assoc.h12
-rw-r--r--libmailutils/base/assoc.c172
-rw-r--r--libmailutils/mime/mimehdr.c169
-rw-r--r--libmailutils/tests/mimehdr.at9
-rw-r--r--libmailutils/tests/mimehdr.c81
5 files changed, 304 insertions, 139 deletions
diff --git a/include/mailutils/assoc.h b/include/mailutils/assoc.h
index 3e67b617f..a96d6a614 100644
--- a/include/mailutils/assoc.h
+++ b/include/mailutils/assoc.h
@@ -55,9 +55,17 @@ typedef int (*mu_assoc_comparator_t) (const char *, const void *,
int mu_assoc_sort_r (mu_assoc_t assoc, mu_assoc_comparator_t cmp, void *data);
int mu_assoc_mark (mu_assoc_t asc, int (*cond) (char const *, void *, void *),
- void *data);
+ void *data);
+int mu_assoc_set_mark (mu_assoc_t asc, char const *name, int mark);
+int mu_assoc_head_set_mark (mu_assoc_t asc, int mark);
+int mu_assoc_tail_set_mark (mu_assoc_t asc, int mark);
+int mu_assoc_pop (mu_assoc_t asc, char const *name, void *ret_val);
+int mu_assoc_shift (mu_assoc_t asc, char const *name, void *ret_val);
+
int mu_assoc_sweep (mu_assoc_t asc);
-
+int mu_assoc_sweep_unset (mu_assoc_t asc);
+
+void mu_assoc_pull (mu_assoc_t a, mu_assoc_t b);
#ifdef __cplusplus
}
diff --git a/libmailutils/base/assoc.c b/libmailutils/base/assoc.c
index 439cda286..f0c60edac 100644
--- a/libmailutils/base/assoc.c
+++ b/libmailutils/base/assoc.c
@@ -207,6 +207,27 @@ assoc_remove (mu_assoc_t assoc, unsigned idx)
return 0;
}
+static int
+assoc_remove_elem (mu_assoc_t assoc, struct _mu_assoc_elem *elem, int nd)
+{
+ unsigned i;
+
+ if (elem)
+ {
+ for (i = 0; i < hash_size[assoc->hash_num]; i++)
+ {
+ if (assoc->tab[i] == elem)
+ {
+ if (nd)
+ assoc->tab[i]->data = NULL;
+ assoc_remove (assoc, i);
+ return 0;
+ }
+ }
+ }
+ return MU_ERR_NOENT;
+}
+
#define name_cmp(assoc,a,b) (((assoc)->flags & MU_ASSOC_ICASE) ? \
mu_c_strcasecmp(a,b) : strcmp(a,b))
@@ -502,22 +523,7 @@ itrctl (void *owner, enum mu_itrctl_req req, void *arg)
case mu_itrctl_delete:
case mu_itrctl_delete_nd:
/* Delete current element */
- if (itr->elem)
- {
- unsigned i;
-
- for (i = 0; i < hash_size[assoc->hash_num]; i++)
- {
- if (assoc->tab[i] == itr->elem)
- {
- if (req == mu_itrctl_delete_nd)
- assoc->tab[i]->data = NULL;
- assoc_remove (assoc, i);
- return 0;
- }
- }
- }
- return MU_ERR_NOENT;
+ return assoc_remove_elem (assoc, itr->elem, req == mu_itrctl_delete_nd);
case mu_itrctl_replace:
case mu_itrctl_replace_nd:
@@ -843,12 +849,140 @@ mu_assoc_sweep (mu_assoc_t asc)
if (!asc)
return EINVAL;
- for (i = hash_size[asc->hash_num]; i > 0; i--)
+ if (asc->tab)
+ {
+ for (i = hash_size[asc->hash_num]; i > 0; i--)
+ {
+ if (asc->tab[i-1] && asc->tab[i-1]->mark)
+ assoc_remove (asc, i-1);
+ }
+ }
+
+ return 0;
+}
+
+int
+mu_assoc_sweep_unset (mu_assoc_t asc)
+{
+ unsigned i;
+
+ if (!asc)
+ return EINVAL;
+
+ if (asc->tab)
{
- if (asc->tab[i-1] && asc->tab[i-1]->mark)
- assoc_remove (asc, i-1);
+ for (i = hash_size[asc->hash_num]; i > 0; i--)
+ {
+ if (asc->tab[i-1] && asc->tab[i-1]->mark)
+ {
+ if (asc->free)
+ asc->free (asc->tab[i]->data);
+ asc->tab[i]->data = NULL;
+ }
+ }
}
return 0;
}
+int
+mu_assoc_set_mark (mu_assoc_t asc, char const *name, int mark)
+{
+ int rc;
+ unsigned i;
+
+ if (!asc || !name)
+ return EINVAL;
+
+ rc = assoc_find_slot (asc, name, NULL, &i);
+ if (rc == 0)
+ asc->tab[i]->mark = !!mark;
+ return rc;
+}
+
+int
+mu_assoc_head_set_mark (mu_assoc_t asc, int mark)
+{
+ if (!asc)
+ return EINVAL;
+ if (asc->head)
+ asc->head->mark = !!mark;
+ return 0;
+}
+
+int
+mu_assoc_tail_set_mark (mu_assoc_t asc, int mark)
+{
+ if (!asc)
+ return EINVAL;
+ if (asc->tail)
+ asc->tail->mark = !!mark;
+ return 0;
+}
+
+int
+mu_assoc_pop (mu_assoc_t asc, char const *name, void *ret_val)
+{
+ if (!asc || !name)
+ return EINVAL;
+
+ if (asc->tail && ret_val != NULL)
+ {
+ *(void**)ret_val = asc->tail->data;
+ }
+ return assoc_remove_elem (asc, asc->tail, ret_val != NULL);
+}
+
+int
+mu_assoc_shift (mu_assoc_t asc, char const *name, void *ret_val)
+{
+ if (!asc || !name)
+ return EINVAL;
+
+ if (asc->head && ret_val != NULL)
+ {
+ *(void**)ret_val = asc->head->data;
+ }
+ return assoc_remove_elem (asc, asc->head, ret_val != NULL);
+}
+
+/* Given A and B - two associative arrays keeping the same kind of data,
+ move from B to A all elements whose names are present in A. Remove
+ from A all elements not thus updated.
+*/
+void
+mu_assoc_pull (mu_assoc_t a, mu_assoc_t b)
+{
+ unsigned i;
+
+ for (i = 0; i < hash_size[a->hash_num]; i++)
+ {
+ if (a->tab[i])
+ {
+ unsigned j;
+ int rc;
+
+ rc = assoc_find_slot (b, a->tab[i]->name, NULL, &j);
+ if (rc == 0)
+ {
+ if (a->free)
+ a->free (a->tab[i]->data);
+ a->tab[i]->data = b->tab[j]->data;
+ b->tab[j]->data = NULL;
+ assoc_remove (b, j);
+ }
+ else
+ assoc_remove (a, i);
+ }
+ }
+}
+
+/* TODO
+ mu_assoc_union (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b)
+ Computes the union of A and B. Stores the result in R.
+ mu_assoc_intersect (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b)
+ Computes the intersection of A and B.
+ mu_assoc_symdiff (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b)
+ Computes the symmetric difference (disjunctive union) of the two
+ arrays.
+*/
diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c
index b057a48a5..7d3052f29 100644
--- a/libmailutils/mime/mimehdr.c
+++ b/libmailutils/mime/mimehdr.c
@@ -152,20 +152,13 @@ free_param_continuation (struct param_continuation *p)
is already in ASSOC. If OUTCHARSET is not NULL, the value from
CONT->param_value will be recoded to that charset before storing it. */
static int
-flush_param (struct param_continuation *cont, mu_assoc_t assoc, int subset,
+flush_param (struct param_continuation *cont, mu_assoc_t assoc,
const char *outcharset)
{
int rc;
- struct mu_mime_param *param, **param_slot;
+ struct mu_mime_param *param;
mu_off_t size;
- if (subset)
- {
- rc = mu_assoc_lookup_ref (assoc, cont->param_name, &param_slot);
- if (rc)
- return 0;
- }
-
param = calloc (1, sizeof *param);
if (!param)
return errno;
@@ -231,16 +224,9 @@ flush_param (struct param_continuation *cont, mu_assoc_t assoc, int subset,
param->value = tmp;
}
- if (subset)
- {
- *param_slot = param;
- }
- else
- {
- rc = mu_assoc_install (assoc, cont->param_name, param);
- if (rc)
- mu_mime_param_free (param);
- }
+ rc = mu_assoc_install (assoc, cont->param_name, param);
+ if (rc)
+ mu_mime_param_free (param);
return rc;
}
@@ -275,7 +261,7 @@ getword (struct mu_wordsplit *ws, size_t *pi)
static int
parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
struct param_continuation *param_cont,
- const char *outcharset, int subset)
+ const char *outcharset)
{
size_t klen;
char *key;
@@ -296,6 +282,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
if (strcmp (key, ";") == 0)
{
+ mu_assoc_tail_set_mark (assoc, 0);
/* Reportedly, some MUAs insert several semicolons */
do
{
@@ -306,7 +293,11 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
while (strcmp (key, ";") == 0);
}
else
- return MU_ERR_PARSE;
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("semicolon missing (found %s)"), key));
+ return MU_ERR_PARSE;
+ }
p = strchr (key, '=');
if (p)
@@ -318,7 +309,11 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
val = p;
}
else if ((val = getword (ws, pi)) == NULL)
- return MU_ERR_PARSE;
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("missing parameter value")));
+ return MU_ERR_PARSE;
+ }
/* key= WSP val */
}
else
@@ -332,11 +327,19 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
val = p + 1;
}
else if ((val = getword (ws, pi)) == NULL)
- return MU_ERR_PARSE;
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("missing parameter value")));
+ return MU_ERR_PARSE;
+ }
/* key WSP = WSP val */
}
else
- return MU_ERR_PARSE;
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("missing = after parameter name")));
+ return MU_ERR_PARSE;
+ }
}
klen = strlen (key);
@@ -361,9 +364,9 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
if (*q && *q != '*')
{
mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
- (_("malformed parameter name %s: skipping"),
+ (_("malformed parameter name %s"),
key));
- return 0;
+ return MU_ERR_PARSE;
}
if (n != param_cont->param_cind)
@@ -372,15 +375,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
(_("continuation index out of sequence in %s: "
"skipping"),
key));
- /* Ignore this parameter. Another possibility would be
- to drop the continuation assembled so far. That makes
- little difference, because the string is malformed
- anyway.
-
- We try to continue just to gather as many information
- as possible from this mess.
- */
- return 0;
+ return MU_ERR_PARSE;
}
if (n == 0)
@@ -401,10 +396,8 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
memcmp (param_cont->param_name, key, klen))
{
mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
- (_("continuation name mismatch: %s: "
- "skipping"),
- key));
- return 0;
+ (_("continuation name mismatch: %s"), key));
+ return MU_ERR_PARSE;
}
if (*q == '*')
@@ -422,7 +415,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
}
else if (param_cont->param_name)
{
- rc = flush_param (param_cont, assoc, subset, outcharset);
+ rc = flush_param (param_cont, assoc, outcharset);
free_param_continuation (param_cont);
if (rc)
return rc;
@@ -535,23 +528,13 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
return ENOMEM;
}
- if (subset)
- {
- struct mu_mime_param **p;
- if (mu_assoc_lookup_ref (assoc, key, &p) == 0)
- *p = param;
- else
- mu_mime_param_free (param);
- }
- else
+ rc = mu_assoc_install (assoc, key, param);
+ if (rc)
{
- rc = mu_assoc_install (assoc, key, param);
- if (rc)
- {
- mu_mime_param_free (param);
- return rc;
- }
+ mu_mime_param_free (param);
+ return rc;
}
+ mu_assoc_tail_set_mark (assoc, 1);
return 0;
}
@@ -581,7 +564,8 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc,
*/
static int
_mime_header_parse (const char *text, char **pvalue,
- mu_assoc_t assoc, const char *outcharset, int subset)
+ mu_assoc_t assoc, const char *outcharset,
+ mu_assoc_t subset)
{
int rc = 0;
struct mu_wordsplit ws;
@@ -636,13 +620,42 @@ _mime_header_parse (const char *text, char **pvalue,
}
memset (&cont, 0, sizeof (cont));
- for (i = 0; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;)
- ;
- if (rc == MU_ERR_USER0)
- rc = 0;
+ i = 0;
+ while (1)
+ {
+ rc = parse_param (&ws, &i, assoc, &cont, outcharset);
+ if (rc)
+ {
+ if (rc == MU_ERR_PARSE)
+ {
+ char *p;
+ mu_assoc_sweep (assoc);
+ /* Attempt error recovery */
+ do
+ p = getword (&ws, &i);
+ while (p && strcmp (p, ";"));
+ if (p)
+ {
+ mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0,
+ (_("finished error recovery at ; %s"),
+ ws.ws_wordv[i]));
+ /* put the semicolon back */
+ i--;
+ continue;
+ }
+ rc = 0;
+ }
+ else if (rc == MU_ERR_USER0)
+ rc = 0;
+ break;
+ }
+ }
+
if (rc == 0 && cont.param_name)
- rc = flush_param (&cont, assoc, subset, outcharset);
+ rc = flush_param (&cont, assoc, outcharset);
free_param_continuation (&cont);
+ mu_assoc_tail_set_mark (assoc, 0);
+
if (rc == 0)
{
if (pvalue)
@@ -653,26 +666,7 @@ _mime_header_parse (const char *text, char **pvalue,
mu_wordsplit_free (&ws);
if (subset)
- {
- /* Eliminate empty elements. */
- mu_iterator_t itr;
-
- rc = mu_assoc_get_iterator (assoc, &itr);
- if (rc == 0)
- {
- for (mu_iterator_first (itr); !mu_iterator_is_done (itr);
- mu_iterator_next (itr))
- {
- const char *name;
- struct mu_mime_param *p;
-
- mu_iterator_current_kv (itr, (const void **)&name, (void**)&p);
- if (!p)
- mu_iterator_ctl (itr, mu_itrctl_delete, NULL);
- }
- mu_iterator_destroy (&itr);
- }
- }
+ mu_assoc_pull (subset, assoc);
return rc;
}
@@ -699,7 +693,14 @@ int
mu_mime_header_parse_subset (const char *text, const char *cset,
char **pvalue, mu_assoc_t assoc)
{
- return _mime_header_parse (text, pvalue, assoc, cset, 1);
+ mu_assoc_t tmp;
+ int rc = mu_mime_param_assoc_create (&tmp);
+ if (rc == 0)
+ {
+ rc = _mime_header_parse (text, pvalue, tmp, cset, assoc);
+ mu_assoc_destroy (&tmp);
+ }
+ return rc;
}
/* Parse header value from TEXT and return its value and parameters.
@@ -725,7 +726,7 @@ mu_mime_header_parse (const char *text, char const *cset, char **pvalue,
rc = mu_mime_param_assoc_create (&assoc);
if (rc == 0)
{
- rc = _mime_header_parse (text, pvalue, assoc, cset, 0);
+ rc = _mime_header_parse (text, pvalue, assoc, cset, NULL);
if (rc || !passoc)
mu_assoc_destroy (&assoc);
else
@@ -733,7 +734,7 @@ mu_mime_header_parse (const char *text, char const *cset, char **pvalue,
}
return rc;
-}
+}
/* TEXT is a value of a structured MIME header, e.g. Content-Type.
This function returns the `disposition part' of it. In other
diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at
index ea6fab5f4..2b6bf4a99 100644
--- a/libmailutils/tests/mimehdr.at
+++ b/libmailutils/tests/mimehdr.at
@@ -290,5 +290,14 @@ MIMEHDR([whitespace in type],[mimehdr17],
name=foo
])
+MIMEHDR([error tolerance],[mimehdr18],
+[],
+[application/x-stuff;name="one";title some text ; charset= utf8;
+output = foo bar],
+[application/x-stuff
+charset=utf8
+name=one
+])
+
m4_popdef([MIMEHDR])
m4_popdef([MIMEHDR_FAIL])
diff --git a/libmailutils/tests/mimehdr.c b/libmailutils/tests/mimehdr.c
index a955489a7..463db8d96 100644
--- a/libmailutils/tests/mimehdr.c
+++ b/libmailutils/tests/mimehdr.c
@@ -31,6 +31,7 @@
#include <mailutils/cctype.h>
#include <mailutils/error.h>
#include <mailutils/errno.h>
+#include <mailutils/cli.h>
static int
sort_names (char const *aname, void const *adata,
@@ -51,57 +52,69 @@ print_param (const char *name, void *item, void *data)
return 0;
}
+static void
+cli_debug (struct mu_parseopt *po, struct mu_option *opt, char const *arg)
+{
+ mu_debug_parse_spec (arg);
+}
+
+char *charset;
+char *header_name;
+unsigned long width = 76;
+
+struct mu_option options[] = {
+ { "debug", 0, "SPEC", MU_OPTION_DEFAULT,
+ "set debug level", mu_c_string, NULL, cli_debug },
+ { "charset", 0, "NAME", MU_OPTION_DEFAULT,
+ "convert values to this charset", mu_c_string, &charset },
+ { "header", 0, "NAME", MU_OPTION_DEFAULT,
+ "set header name", mu_c_string, &header_name },
+ { "width", 0, "N", MU_OPTION_DEFAULT,
+ "output width", mu_c_ulong, &width },
+ MU_OPTION_END
+};
+
int
main (int argc, char **argv)
{
- int i;
int rc;
mu_stream_t tmp;
mu_transport_t trans[2];
char *value;
mu_assoc_t assoc;
- char *charset = NULL;
- char *header_name = NULL;
- unsigned long width = 76;
mu_set_program_name (argv[0]);
- for (i = 1; i < argc; i++)
- {
- char *opt = argv[i];
-
- if (strncmp (opt, "-debug=", 7) == 0)
- mu_debug_parse_spec (opt + 7);
- else if (strncmp (opt, "-charset=", 9) == 0)
- charset = opt + 9;
- else if (strcmp (opt, "-h") == 0 || strcmp (opt, "-help") == 0)
- {
- mu_printf ("usage: %s [-charset=cs] [-debug=SPEC] [-header=NAME] [-width=N]", mu_program_name);
- return 0;
- }
- else if (strncmp (opt, "-header=", 8) == 0)
- header_name = opt + 8;
- else if (strncmp (opt, "-width=", 7) == 0)
- width = strtoul (opt + 7, NULL, 10);
- else
- {
- mu_error ("unknown option %s", opt);
- return 1;
- }
- }
-
- if (i != argc)
- {
- mu_error ("too many arguments");
- return 1;
- }
+ mu_cli_simple (argc, argv,
+ MU_CLI_OPTION_OPTIONS, options,
+ MU_CLI_OPTION_SINGLE_DASH,
+ MU_CLI_OPTION_PROG_DOC, "mu_mime_header_parse test",
+ MU_CLI_OPTION_PROG_ARGS, "[PARAM...]",
+ MU_CLI_OPTION_EXTRA_INFO, "If arguments (PARAM) are "
+ "specified, only the matching parameters will be displayed.",
+ MU_CLI_OPTION_RETURN_ARGC, &argc,
+ MU_CLI_OPTION_RETURN_ARGV, &argv,
+ MU_CLI_OPTION_END);
MU_ASSERT (mu_memory_stream_create (&tmp, MU_STREAM_RDWR));
MU_ASSERT (mu_stream_copy (tmp, mu_strin, 0, NULL));
MU_ASSERT (mu_stream_write (tmp, "", 1, NULL));
MU_ASSERT (mu_stream_ioctl (tmp, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET,
trans));
+
+ if (argc)
+ {
+ int i;
- rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc);
+ MU_ASSERT (mu_mime_param_assoc_create (&assoc));
+ for (i = 0; i < argc; i++)
+ mu_assoc_install (assoc, argv[i], NULL);
+ rc = mu_mime_header_parse_subset ((char*)trans[0], charset, &value,
+ assoc);
+ }
+ else
+ {
+ rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc);
+ }
if (rc)
{
mu_diag_funcall (MU_DIAG_ERROR, "mu_mime_header_parse", NULL, rc);

Return to:

Send suggestions and report system problems to the System administrator.