diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2020-07-25 18:48:18 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2020-07-25 18:57:04 +0300 |
commit | b3c4a8be9286967ecfd7c720f2eafd598db06cb3 (patch) | |
tree | 97e373886f2d3d099a481fbc42263adce2312d56 | |
parent | f111a7add00a0dd6ea5bda1ea3f507377a82bbe8 (diff) | |
download | mailutils-b3c4a8be9286967ecfd7c720f2eafd598db06cb3.tar.gz mailutils-b3c4a8be9286967ecfd7c720f2eafd598db06cb3.tar.bz2 |
Improve error tolerance when parsing MIME structured headers
* include/mailutils/assoc.h (mu_assoc_set_mark)
(mu_assoc_head_set_mark, mu_assoc_tail_set_mark)
(mu_assoc_pop, mu_assoc_shift)
(mu_assoc_sweep_unset, mu_assoc_pull): New protos.
* libmailutils/base/assoc.c: New functions (see above).
* libmailutils/mime/mimehdr.c (parse_param): Mark last installed
element for sweeping. When seeing a semicolon, unmark it. Return
MU_ERR_PARSE on errors. On success, install the created parameter
in the assoc array.
(_mime_header_parse): Change type of the subset parameter.
Attempt error recovery on parse errors. If it succeeds, sweep out
the last installed parameter.
If subset is specified, use mu_assoc_pull to populate it from the
created assoc.
(mu_mime_header_parse_subset)
(mu_mime_param_assoc_create): Update invocations of _mime_header_parse.
* libmailutils/tests/mimehdr.at: Test error tolerance.
* libmailutils/tests/mimehdr.c: Use mu_cli_simple to parse command line.
Treat positional arguments as names of the parameters to retain on
output.
-rw-r--r-- | include/mailutils/assoc.h | 12 | ||||
-rw-r--r-- | libmailutils/base/assoc.c | 172 | ||||
-rw-r--r-- | libmailutils/mime/mimehdr.c | 169 | ||||
-rw-r--r-- | libmailutils/tests/mimehdr.at | 9 | ||||
-rw-r--r-- | libmailutils/tests/mimehdr.c | 81 |
5 files changed, 304 insertions, 139 deletions
diff --git a/include/mailutils/assoc.h b/include/mailutils/assoc.h index 3e67b617f..a96d6a614 100644 --- a/include/mailutils/assoc.h +++ b/include/mailutils/assoc.h @@ -55,9 +55,17 @@ typedef int (*mu_assoc_comparator_t) (const char *, const void *, int mu_assoc_sort_r (mu_assoc_t assoc, mu_assoc_comparator_t cmp, void *data); int mu_assoc_mark (mu_assoc_t asc, int (*cond) (char const *, void *, void *), - void *data); + void *data); +int mu_assoc_set_mark (mu_assoc_t asc, char const *name, int mark); +int mu_assoc_head_set_mark (mu_assoc_t asc, int mark); +int mu_assoc_tail_set_mark (mu_assoc_t asc, int mark); +int mu_assoc_pop (mu_assoc_t asc, char const *name, void *ret_val); +int mu_assoc_shift (mu_assoc_t asc, char const *name, void *ret_val); + int mu_assoc_sweep (mu_assoc_t asc); - +int mu_assoc_sweep_unset (mu_assoc_t asc); + +void mu_assoc_pull (mu_assoc_t a, mu_assoc_t b); #ifdef __cplusplus } diff --git a/libmailutils/base/assoc.c b/libmailutils/base/assoc.c index 439cda286..f0c60edac 100644 --- a/libmailutils/base/assoc.c +++ b/libmailutils/base/assoc.c @@ -207,6 +207,27 @@ assoc_remove (mu_assoc_t assoc, unsigned idx) return 0; } +static int +assoc_remove_elem (mu_assoc_t assoc, struct _mu_assoc_elem *elem, int nd) +{ + unsigned i; + + if (elem) + { + for (i = 0; i < hash_size[assoc->hash_num]; i++) + { + if (assoc->tab[i] == elem) + { + if (nd) + assoc->tab[i]->data = NULL; + assoc_remove (assoc, i); + return 0; + } + } + } + return MU_ERR_NOENT; +} + #define name_cmp(assoc,a,b) (((assoc)->flags & MU_ASSOC_ICASE) ? \ mu_c_strcasecmp(a,b) : strcmp(a,b)) @@ -502,22 +523,7 @@ itrctl (void *owner, enum mu_itrctl_req req, void *arg) case mu_itrctl_delete: case mu_itrctl_delete_nd: /* Delete current element */ - if (itr->elem) - { - unsigned i; - - for (i = 0; i < hash_size[assoc->hash_num]; i++) - { - if (assoc->tab[i] == itr->elem) - { - if (req == mu_itrctl_delete_nd) - assoc->tab[i]->data = NULL; - assoc_remove (assoc, i); - return 0; - } - } - } - return MU_ERR_NOENT; + return assoc_remove_elem (assoc, itr->elem, req == mu_itrctl_delete_nd); case mu_itrctl_replace: case mu_itrctl_replace_nd: @@ -843,12 +849,140 @@ mu_assoc_sweep (mu_assoc_t asc) if (!asc) return EINVAL; - for (i = hash_size[asc->hash_num]; i > 0; i--) + if (asc->tab) + { + for (i = hash_size[asc->hash_num]; i > 0; i--) + { + if (asc->tab[i-1] && asc->tab[i-1]->mark) + assoc_remove (asc, i-1); + } + } + + return 0; +} + +int +mu_assoc_sweep_unset (mu_assoc_t asc) +{ + unsigned i; + + if (!asc) + return EINVAL; + + if (asc->tab) { - if (asc->tab[i-1] && asc->tab[i-1]->mark) - assoc_remove (asc, i-1); + for (i = hash_size[asc->hash_num]; i > 0; i--) + { + if (asc->tab[i-1] && asc->tab[i-1]->mark) + { + if (asc->free) + asc->free (asc->tab[i]->data); + asc->tab[i]->data = NULL; + } + } } return 0; } +int +mu_assoc_set_mark (mu_assoc_t asc, char const *name, int mark) +{ + int rc; + unsigned i; + + if (!asc || !name) + return EINVAL; + + rc = assoc_find_slot (asc, name, NULL, &i); + if (rc == 0) + asc->tab[i]->mark = !!mark; + return rc; +} + +int +mu_assoc_head_set_mark (mu_assoc_t asc, int mark) +{ + if (!asc) + return EINVAL; + if (asc->head) + asc->head->mark = !!mark; + return 0; +} + +int +mu_assoc_tail_set_mark (mu_assoc_t asc, int mark) +{ + if (!asc) + return EINVAL; + if (asc->tail) + asc->tail->mark = !!mark; + return 0; +} + +int +mu_assoc_pop (mu_assoc_t asc, char const *name, void *ret_val) +{ + if (!asc || !name) + return EINVAL; + + if (asc->tail && ret_val != NULL) + { + *(void**)ret_val = asc->tail->data; + } + return assoc_remove_elem (asc, asc->tail, ret_val != NULL); +} + +int +mu_assoc_shift (mu_assoc_t asc, char const *name, void *ret_val) +{ + if (!asc || !name) + return EINVAL; + + if (asc->head && ret_val != NULL) + { + *(void**)ret_val = asc->head->data; + } + return assoc_remove_elem (asc, asc->head, ret_val != NULL); +} + +/* Given A and B - two associative arrays keeping the same kind of data, + move from B to A all elements whose names are present in A. Remove + from A all elements not thus updated. +*/ +void +mu_assoc_pull (mu_assoc_t a, mu_assoc_t b) +{ + unsigned i; + + for (i = 0; i < hash_size[a->hash_num]; i++) + { + if (a->tab[i]) + { + unsigned j; + int rc; + + rc = assoc_find_slot (b, a->tab[i]->name, NULL, &j); + if (rc == 0) + { + if (a->free) + a->free (a->tab[i]->data); + a->tab[i]->data = b->tab[j]->data; + b->tab[j]->data = NULL; + assoc_remove (b, j); + } + else + assoc_remove (a, i); + } + } +} + +/* TODO + mu_assoc_union (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b) + Computes the union of A and B. Stores the result in R. + mu_assoc_intersect (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b) + Computes the intersection of A and B. + mu_assoc_symdiff (mu_assoc_t *r, mu_assoc_t a, mu_assoc_t b) + Computes the symmetric difference (disjunctive union) of the two + arrays. +*/ diff --git a/libmailutils/mime/mimehdr.c b/libmailutils/mime/mimehdr.c index b057a48a5..7d3052f29 100644 --- a/libmailutils/mime/mimehdr.c +++ b/libmailutils/mime/mimehdr.c @@ -152,20 +152,13 @@ free_param_continuation (struct param_continuation *p) is already in ASSOC. If OUTCHARSET is not NULL, the value from CONT->param_value will be recoded to that charset before storing it. */ static int -flush_param (struct param_continuation *cont, mu_assoc_t assoc, int subset, +flush_param (struct param_continuation *cont, mu_assoc_t assoc, const char *outcharset) { int rc; - struct mu_mime_param *param, **param_slot; + struct mu_mime_param *param; mu_off_t size; - if (subset) - { - rc = mu_assoc_lookup_ref (assoc, cont->param_name, ¶m_slot); - if (rc) - return 0; - } - param = calloc (1, sizeof *param); if (!param) return errno; @@ -231,16 +224,9 @@ flush_param (struct param_continuation *cont, mu_assoc_t assoc, int subset, param->value = tmp; } - if (subset) - { - *param_slot = param; - } - else - { - rc = mu_assoc_install (assoc, cont->param_name, param); - if (rc) - mu_mime_param_free (param); - } + rc = mu_assoc_install (assoc, cont->param_name, param); + if (rc) + mu_mime_param_free (param); return rc; } @@ -275,7 +261,7 @@ getword (struct mu_wordsplit *ws, size_t *pi) static int parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, struct param_continuation *param_cont, - const char *outcharset, int subset) + const char *outcharset) { size_t klen; char *key; @@ -296,6 +282,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, if (strcmp (key, ";") == 0) { + mu_assoc_tail_set_mark (assoc, 0); /* Reportedly, some MUAs insert several semicolons */ do { @@ -306,7 +293,11 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, while (strcmp (key, ";") == 0); } else - return MU_ERR_PARSE; + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("semicolon missing (found %s)"), key)); + return MU_ERR_PARSE; + } p = strchr (key, '='); if (p) @@ -318,7 +309,11 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, val = p; } else if ((val = getword (ws, pi)) == NULL) - return MU_ERR_PARSE; + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("missing parameter value"))); + return MU_ERR_PARSE; + } /* key= WSP val */ } else @@ -332,11 +327,19 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, val = p + 1; } else if ((val = getword (ws, pi)) == NULL) - return MU_ERR_PARSE; + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("missing parameter value"))); + return MU_ERR_PARSE; + } /* key WSP = WSP val */ } else - return MU_ERR_PARSE; + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("missing = after parameter name"))); + return MU_ERR_PARSE; + } } klen = strlen (key); @@ -361,9 +364,9 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, if (*q && *q != '*') { mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, - (_("malformed parameter name %s: skipping"), + (_("malformed parameter name %s"), key)); - return 0; + return MU_ERR_PARSE; } if (n != param_cont->param_cind) @@ -372,15 +375,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, (_("continuation index out of sequence in %s: " "skipping"), key)); - /* Ignore this parameter. Another possibility would be - to drop the continuation assembled so far. That makes - little difference, because the string is malformed - anyway. - - We try to continue just to gather as many information - as possible from this mess. - */ - return 0; + return MU_ERR_PARSE; } if (n == 0) @@ -401,10 +396,8 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, memcmp (param_cont->param_name, key, klen)) { mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, - (_("continuation name mismatch: %s: " - "skipping"), - key)); - return 0; + (_("continuation name mismatch: %s"), key)); + return MU_ERR_PARSE; } if (*q == '*') @@ -422,7 +415,7 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, } else if (param_cont->param_name) { - rc = flush_param (param_cont, assoc, subset, outcharset); + rc = flush_param (param_cont, assoc, outcharset); free_param_continuation (param_cont); if (rc) return rc; @@ -535,23 +528,13 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, return ENOMEM; } - if (subset) - { - struct mu_mime_param **p; - if (mu_assoc_lookup_ref (assoc, key, &p) == 0) - *p = param; - else - mu_mime_param_free (param); - } - else + rc = mu_assoc_install (assoc, key, param); + if (rc) { - rc = mu_assoc_install (assoc, key, param); - if (rc) - { - mu_mime_param_free (param); - return rc; - } + mu_mime_param_free (param); + return rc; } + mu_assoc_tail_set_mark (assoc, 1); return 0; } @@ -581,7 +564,8 @@ parse_param (struct mu_wordsplit *ws, size_t *pi, mu_assoc_t assoc, */ static int _mime_header_parse (const char *text, char **pvalue, - mu_assoc_t assoc, const char *outcharset, int subset) + mu_assoc_t assoc, const char *outcharset, + mu_assoc_t subset) { int rc = 0; struct mu_wordsplit ws; @@ -636,13 +620,42 @@ _mime_header_parse (const char *text, char **pvalue, } memset (&cont, 0, sizeof (cont)); - for (i = 0; (rc = parse_param (&ws, &i, assoc, &cont, outcharset, subset)) == 0;) - ; - if (rc == MU_ERR_USER0) - rc = 0; + i = 0; + while (1) + { + rc = parse_param (&ws, &i, assoc, &cont, outcharset); + if (rc) + { + if (rc == MU_ERR_PARSE) + { + char *p; + mu_assoc_sweep (assoc); + /* Attempt error recovery */ + do + p = getword (&ws, &i); + while (p && strcmp (p, ";")); + if (p) + { + mu_debug (MU_DEBCAT_MIME, MU_DEBUG_TRACE0, + (_("finished error recovery at ; %s"), + ws.ws_wordv[i])); + /* put the semicolon back */ + i--; + continue; + } + rc = 0; + } + else if (rc == MU_ERR_USER0) + rc = 0; + break; + } + } + if (rc == 0 && cont.param_name) - rc = flush_param (&cont, assoc, subset, outcharset); + rc = flush_param (&cont, assoc, outcharset); free_param_continuation (&cont); + mu_assoc_tail_set_mark (assoc, 0); + if (rc == 0) { if (pvalue) @@ -653,26 +666,7 @@ _mime_header_parse (const char *text, char **pvalue, mu_wordsplit_free (&ws); if (subset) - { - /* Eliminate empty elements. */ - mu_iterator_t itr; - - rc = mu_assoc_get_iterator (assoc, &itr); - if (rc == 0) - { - for (mu_iterator_first (itr); !mu_iterator_is_done (itr); - mu_iterator_next (itr)) - { - const char *name; - struct mu_mime_param *p; - - mu_iterator_current_kv (itr, (const void **)&name, (void**)&p); - if (!p) - mu_iterator_ctl (itr, mu_itrctl_delete, NULL); - } - mu_iterator_destroy (&itr); - } - } + mu_assoc_pull (subset, assoc); return rc; } @@ -699,7 +693,14 @@ int mu_mime_header_parse_subset (const char *text, const char *cset, char **pvalue, mu_assoc_t assoc) { - return _mime_header_parse (text, pvalue, assoc, cset, 1); + mu_assoc_t tmp; + int rc = mu_mime_param_assoc_create (&tmp); + if (rc == 0) + { + rc = _mime_header_parse (text, pvalue, tmp, cset, assoc); + mu_assoc_destroy (&tmp); + } + return rc; } /* Parse header value from TEXT and return its value and parameters. @@ -725,7 +726,7 @@ mu_mime_header_parse (const char *text, char const *cset, char **pvalue, rc = mu_mime_param_assoc_create (&assoc); if (rc == 0) { - rc = _mime_header_parse (text, pvalue, assoc, cset, 0); + rc = _mime_header_parse (text, pvalue, assoc, cset, NULL); if (rc || !passoc) mu_assoc_destroy (&assoc); else @@ -733,7 +734,7 @@ mu_mime_header_parse (const char *text, char const *cset, char **pvalue, } return rc; -} +} /* TEXT is a value of a structured MIME header, e.g. Content-Type. This function returns the `disposition part' of it. In other diff --git a/libmailutils/tests/mimehdr.at b/libmailutils/tests/mimehdr.at index ea6fab5f4..2b6bf4a99 100644 --- a/libmailutils/tests/mimehdr.at +++ b/libmailutils/tests/mimehdr.at @@ -290,5 +290,14 @@ MIMEHDR([whitespace in type],[mimehdr17], name=foo ]) +MIMEHDR([error tolerance],[mimehdr18], +[], +[application/x-stuff;name="one";title some text ; charset= utf8; +output = foo bar], +[application/x-stuff +charset=utf8 +name=one +]) + m4_popdef([MIMEHDR]) m4_popdef([MIMEHDR_FAIL]) diff --git a/libmailutils/tests/mimehdr.c b/libmailutils/tests/mimehdr.c index a955489a7..463db8d96 100644 --- a/libmailutils/tests/mimehdr.c +++ b/libmailutils/tests/mimehdr.c @@ -31,6 +31,7 @@ #include <mailutils/cctype.h> #include <mailutils/error.h> #include <mailutils/errno.h> +#include <mailutils/cli.h> static int sort_names (char const *aname, void const *adata, @@ -51,57 +52,69 @@ print_param (const char *name, void *item, void *data) return 0; } +static void +cli_debug (struct mu_parseopt *po, struct mu_option *opt, char const *arg) +{ + mu_debug_parse_spec (arg); +} + +char *charset; +char *header_name; +unsigned long width = 76; + +struct mu_option options[] = { + { "debug", 0, "SPEC", MU_OPTION_DEFAULT, + "set debug level", mu_c_string, NULL, cli_debug }, + { "charset", 0, "NAME", MU_OPTION_DEFAULT, + "convert values to this charset", mu_c_string, &charset }, + { "header", 0, "NAME", MU_OPTION_DEFAULT, + "set header name", mu_c_string, &header_name }, + { "width", 0, "N", MU_OPTION_DEFAULT, + "output width", mu_c_ulong, &width }, + MU_OPTION_END +}; + int main (int argc, char **argv) { - int i; int rc; mu_stream_t tmp; mu_transport_t trans[2]; char *value; mu_assoc_t assoc; - char *charset = NULL; - char *header_name = NULL; - unsigned long width = 76; mu_set_program_name (argv[0]); - for (i = 1; i < argc; i++) - { - char *opt = argv[i]; - - if (strncmp (opt, "-debug=", 7) == 0) - mu_debug_parse_spec (opt + 7); - else if (strncmp (opt, "-charset=", 9) == 0) - charset = opt + 9; - else if (strcmp (opt, "-h") == 0 || strcmp (opt, "-help") == 0) - { - mu_printf ("usage: %s [-charset=cs] [-debug=SPEC] [-header=NAME] [-width=N]", mu_program_name); - return 0; - } - else if (strncmp (opt, "-header=", 8) == 0) - header_name = opt + 8; - else if (strncmp (opt, "-width=", 7) == 0) - width = strtoul (opt + 7, NULL, 10); - else - { - mu_error ("unknown option %s", opt); - return 1; - } - } - - if (i != argc) - { - mu_error ("too many arguments"); - return 1; - } + mu_cli_simple (argc, argv, + MU_CLI_OPTION_OPTIONS, options, + MU_CLI_OPTION_SINGLE_DASH, + MU_CLI_OPTION_PROG_DOC, "mu_mime_header_parse test", + MU_CLI_OPTION_PROG_ARGS, "[PARAM...]", + MU_CLI_OPTION_EXTRA_INFO, "If arguments (PARAM) are " + "specified, only the matching parameters will be displayed.", + MU_CLI_OPTION_RETURN_ARGC, &argc, + MU_CLI_OPTION_RETURN_ARGV, &argv, + MU_CLI_OPTION_END); MU_ASSERT (mu_memory_stream_create (&tmp, MU_STREAM_RDWR)); MU_ASSERT (mu_stream_copy (tmp, mu_strin, 0, NULL)); MU_ASSERT (mu_stream_write (tmp, "", 1, NULL)); MU_ASSERT (mu_stream_ioctl (tmp, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET, trans)); + + if (argc) + { + int i; - rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc); + MU_ASSERT (mu_mime_param_assoc_create (&assoc)); + for (i = 0; i < argc; i++) + mu_assoc_install (assoc, argv[i], NULL); + rc = mu_mime_header_parse_subset ((char*)trans[0], charset, &value, + assoc); + } + else + { + rc = mu_mime_header_parse ((char*)trans[0], charset, &value, &assoc); + } if (rc) { mu_diag_funcall (MU_DIAG_ERROR, "mu_mime_header_parse", NULL, rc); |