summaryrefslogtreecommitdiff
path: root/libmailutils/mimehdr.c
diff options
context:
space:
mode:
Diffstat (limited to 'libmailutils/mimehdr.c')
-rw-r--r--libmailutils/mimehdr.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/libmailutils/mimehdr.c b/libmailutils/mimehdr.c
new file mode 100644
index 000000000..0314734fc
--- /dev/null
+++ b/libmailutils/mimehdr.c
@@ -0,0 +1,695 @@
+/* GNU Mailutils -- a suite of utilities for electronic mail
+ Copyright (C) 1999, 2000, 2001, 2004, 2005, 2007, 2009, 2010 Free
+ Software Foundation, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General
+ Public License along with this library. If not,
+ see <http://www.gnu.org/licenses/>. */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <mailutils/cctype.h>
+#include <mailutils/cstr.h>
+#include <mailutils/errno.h>
+#include <mailutils/message.h>
+#include <mailutils/header.h>
+#include <mailutils/stream.h>
+#include <mailutils/url.h> /* FIXME: for mu_url_decode, which should
+ be renamed! */
+#include <mailutils/mime.h>
+#include <mailutils/filter.h>
+#include <mailutils/mutil.h>
+
+/* See RFC 2045, 5.1. Syntax of the Content-Type Header Field */
+#define _ISSPECIAL(c) !!strchr ("()<>@,;:\\\"/[]?=", c)
+
+/* _header_get_param - an auxiliary function to extract values from
+ Content-Type, Content-Disposition and similar headers.
+
+ Arguments:
+
+ FIELD_BODY Header value, complying to RFCs 2045, 2183, 2231.3;
+ DISP Disposition. Unless it is NULL, the disposition part
+ of FIELD_BODY is compared with it. If they differ,
+ the function returns MU_ERR_NOENT.
+ PARAM Name of the parameter to extract from FIELD_BODY;
+ BUF Where to extract the value to;
+ BUFSZ Size of BUF;
+ PRET Pointer to the memory location for the return buffer (see
+ below).
+ PLEN Pointer to the return size.
+ PFLAGS On return, flags describing the parameter are stored there.
+ The MU_MIMEHDR_MULTILINE bit is set if the parameter value
+ was multiline (RFC 2231.3). The MU_MIMEHDR_CSINFO bit is set
+ if the parameter value includes charset/language
+ information (RFC 2231.4).
+
+ The function parses FIELD_BODY and extracts the value of the parameter
+ PARAM.
+
+ If BUF is not NULL and BUFSZ is not 0, the extracted value is stored into
+ BUF. At most BUFSZ-1 bytes are copied.
+
+ Otherwise, if PRET is not NULL, the function allocates enough memory to
+ hold the extracted value, copies there the result, and stores the
+ pointer to the allocated memory into the location pointed to by PRET.
+
+ If PLEN is not NULL, the size of the extracted value (without terminating
+ NUL character) is stored there.
+
+ If BUF==NULL *and* PRET==NULL, no memory is allocated, but PLEN is
+ honored anyway, i.e. unless it is NULL it receives size of the result.
+ This can be used to estimate the needed buffer size.
+
+ Return values:
+ 0 on success.
+ MU_ERR_NOENT, requested parameter not found, or disposition does
+ not match DISP.
+ MU_ERR_PARSE, if FIELD_BODY does not comply to any of the abovemntioned
+ RFCs.
+ ENOMEM , if unable to allocate memory.
+*/
+
+/* Internal flag used by _header_get_param to delay increasing
+ estimated continuation index. */
+#define _MU_MIMEHDR_INCR_CIND 0x8000
+
+int
+_header_get_param (const char *field_body,
+ const char *disp,
+ const char *param,
+ char *buf, size_t bufsz,
+ char **pret, size_t *plen,
+ int *pflags)
+{
+ int res = MU_ERR_NOENT; /* Return value, pessimistic default */
+ size_t param_len = strlen (param);
+ char *p;
+ size_t size;
+ char *mem = NULL; /* Allocated memory storage */
+ size_t retlen = 0; /* Total number of bytes copied */
+ unsigned long cind = 0; /* Expected continued parameter index.
+ See RFC 2231, Section 3,
+ "Parameter Value Continuations" */
+ int flags = 0;
+
+ if (field_body == NULL)
+ return EINVAL;
+
+ if (bufsz == 0) /* Make sure buf value is meaningful */
+ buf = NULL;
+
+ p = strchr (field_body, ';');
+ if (!p)
+ return MU_ERR_NOENT;
+ /* Allow for possible whitespace before the semicolon */
+ for (size = p - field_body;
+ size > 0 && mu_isblank (field_body[size-1]); size--)
+ ;
+ /* Remove surrounding quotes.
+ FIXME: unescape the quoted contents. */
+ if (field_body[0] == '"' && field_body[size-1] == '"')
+ {
+ field_body++;
+ size -= 2;
+ }
+ if (disp && mu_c_strncasecmp (field_body, disp, size))
+ return MU_ERR_NOENT;
+
+ while (p && *p)
+ {
+ char *v, *e, *ep, *cp;
+ size_t len, escaped_chars = 0;
+
+ if (*p != ';')
+ {
+ res = MU_ERR_PARSE;
+ break;
+ }
+
+ /* walk upto start of param */
+ p = mu_str_skip_class (p + 1, MU_CTYPE_SPACE);
+
+ /* Reportedly, some MUAs insert several semicolons */
+ if (*p == ';')
+ continue;
+
+ /* Ignore stray characters */
+ if (_ISSPECIAL (*p))
+ {
+ p = strchr (p, ';');
+ continue;
+ }
+
+ if ((ep = strchr (p, '=')) == NULL)
+ break;
+ /* Allow for optional whitespace after '=' */
+ v = mu_str_skip_class (ep + 1, MU_CTYPE_SPACE);
+ /* Find end of the parameter */
+ if (*v == '"')
+ {
+ /* Quoted string */
+ for (e = ++v; *e != '"'; e++)
+ {
+ if (*e == 0) /* Malformed header */
+ {
+ res = MU_ERR_PARSE;
+ break;
+ }
+ if (*e == '\\')
+ {
+ if (*++e == 0)
+ {
+ res = MU_ERR_PARSE;
+ break;
+ }
+ escaped_chars++;
+ }
+ }
+ if (res == MU_ERR_PARSE)
+ break;
+ len = e - v;
+ e++;
+ }
+ else
+ {
+ for (e = v + 1; *e && !(*e == ';' || mu_isspace (*e)); e++)
+ ;
+ len = e - v;
+ }
+
+ /* Is it our parameter? */
+ if (mu_c_strncasecmp (p, param, param_len))
+ { /* nope, jump to next */
+ p = strchr (e, ';');
+ continue;
+ }
+
+ cp = p + param_len;
+
+ if (*cp == '*')
+ {
+ cp++;
+ /* It is a parameter value continuation (RFC 2231, Section 3)
+ or parameter value character set and language information
+ (ibid., Section 4). */
+ if (mu_isdigit (*cp))
+ {
+ /* See if the index is OK */
+
+ char *end;
+ unsigned long n = strtoul (cp, &end, 10);
+
+ if (*end == '*')
+ {
+ flags |= MU_MIMEHDR_CSINFO;
+ end++;
+ }
+ if (n != cind)
+ {
+ res = MU_ERR_PARSE;
+ break;
+ }
+ /* Everything OK, mark this as a multiline (continued)
+ parameter. We also need to increment the estimation,
+ but it cannot be done right now because its value is
+ used below to decide whether to do flag cleanup
+ on error. So we set _MU_MIMEHDR_INCR_CIND flag instead
+ and increment cind later. */
+ flags |= (MU_MIMEHDR_MULTILINE|_MU_MIMEHDR_INCR_CIND);
+ /* And point cp to the last character: there are more
+ checks ahead. */
+ cp = end;
+ }
+ else
+ flags |= MU_MIMEHDR_CSINFO;
+ }
+ /* Allow for optional whitespace before '=' */
+ cp = mu_str_skip_class (cp, MU_CTYPE_SPACE);
+ /* cp must now point to the equals sign */
+ if (cp != ep)
+ {
+ /* Clean up everything, unless we're in the middle of a
+ parameter continuation. */
+ if (cind == 0)
+ flags = 0;
+
+ /* Try next parameter */
+ p = strchr (e, ';');
+ continue;
+ }
+
+ if (flags & _MU_MIMEHDR_INCR_CIND)
+ {
+ /* Increase the estimation. */
+ flags &= ~_MU_MIMEHDR_INCR_CIND;
+ cind++;
+ }
+
+ res = 0; /* Indicate success */
+
+ /* Prepare P for the next iteration */
+ p = e;
+
+ /* Escape characters that appear in quoted-pairs are
+ semantically "invisible" (RFC 2822, Section 3.2.2,
+ "Quoted characters") */
+ len -= escaped_chars;
+
+ /* Adjust len if nearing end of the buffer */
+ if (bufsz && len >= bufsz)
+ len = bufsz - 1;
+
+ if (pret)
+ {
+ /* The caller wants us to allocate the memory */
+ if (!buf && !mem)
+ {
+ mem = malloc (len + 1);
+ if (!mem)
+ {
+ res = ENOMEM;
+ break;
+ }
+ buf = mem;
+ }
+ else if (mem)
+ {
+ /* If we got here, it means we are iterating over
+ a parameter value continuation, and cind=0 has
+ already been passed. Reallocate the memory to
+ accomodate next chunk of data. */
+ char *newmem = realloc (mem, retlen + len + 1);
+ if (!newmem)
+ {
+ res = ENOMEM;
+ break;
+ }
+ buf = mem = newmem;
+ }
+ }
+
+ if (buf)
+ {
+ /* Actually copy the data. Buf is not NULL either because
+ the user passed it as an argument, or because we allocated
+ memory for it. */
+ if (escaped_chars)
+ {
+ int i;
+ for (i = 0; i < len; i++)
+ {
+ if (*v == '\\')
+ ++v;
+ buf[retlen + i] = *v++;
+ }
+ }
+ else
+ memcpy (buf + retlen, v, len);
+ }
+ /* Adjust total result size ... */
+ retlen += len;
+ /* ... and remaining buffer size, if necessary */
+ if (bufsz)
+ {
+ bufsz -= len;
+ if (bufsz == 0)
+ break;
+ }
+ }
+
+ if (res == 0)
+ {
+ /* Everything OK, prepare the returned data. */
+ if (buf)
+ buf[retlen] = 0;
+ if (plen)
+ *plen = retlen;
+ if (pret)
+ *pret = mem;
+ if (pflags)
+ *pflags = flags;
+ }
+ else if (mem)
+ free (mem);
+ return res;
+}
+
+static size_t
+disp_segment_len (const char *str)
+{
+ char *p = strchr (str, ';');
+ size_t size;
+
+ if (!p)
+ size = strlen (str);
+ else
+ size = p - str;
+ while (size > 0 && mu_isblank (str[size-1]))
+ size--;
+ return size;
+}
+
+/* STR is a value of a structured MIME header, e.g. Content-Type.
+ This function returns the `disposition part' of it. In other
+ words, it returns disposition, if STR is a Content-Disposition
+ value, and `type/subtype' part, if it is a Content-Type value.
+*/
+int
+mu_mimehdr_get_disp (const char *str, char *buf, size_t bufsz, size_t *retsz)
+{
+ size_t size;
+
+ str = mu_str_skip_class (str, MU_CTYPE_BLANK);
+ size = disp_segment_len (str);
+ if (size > 2 && str[0] == '"' && str[size-1] == '"')
+ {
+ str++;
+ size -= 2;
+ }
+ if (buf)
+ size = mu_cpystr (buf, str, size);
+ if (retsz)
+ *retsz = size;
+ return 0;
+}
+
+/* Same as mu_mimehdr_get_disp, but allocates memory */
+int
+mu_mimehdr_aget_disp (const char *str, char **pvalue)
+{
+ char *p;
+ size_t size;
+
+ str = mu_str_skip_class (str, MU_CTYPE_BLANK);
+ size = disp_segment_len (str);
+ if (size > 2 && str[0] == '"' && str[size-1] == '"')
+ {
+ str++;
+ size -= 2;
+ }
+
+ p = malloc (size + 1);
+ if (!p)
+ return ENOMEM;
+ memcpy (p, str, size);
+ p[size] = 0;
+ *pvalue = p;
+ return 0;
+}
+
+/* Get the value of the parameter PARAM from STR, which must be
+ a value of a structured MIME header.
+ At most BUFSZ-1 of data are stored in BUF. A terminating NUL
+ character is appended to it.
+
+ Unless NULL, RETSZ is filled with the actual length of the
+ returned data (not including the NUL terminator).
+
+ Unless PFLAGS is null it will contain, on return, the flags describing
+ the parameter. The MU_MIMEHDR_MULTILINE bit is set if the parameter value
+ was multiline (RFC 2231.3). The MU_MIMEHDR_CSINFO bit is set if the
+ parameter value includes charset/language information (RFC 2231.4).
+
+ BUF may be NULL, in which case the function will only fill
+ RETSZ and PFLAGS, as described above. */
+int
+mu_mimehdr_get_param (const char *str, const char *param,
+ char *buf, size_t bufsz, size_t *retsz,
+ int *pflags)
+{
+ return _header_get_param (str, NULL, param, buf, bufsz, NULL, retsz,
+ pflags);
+}
+
+/* Same as mu_mimehdr_get_param, but allocates memory. */
+int
+mu_mimehdr_aget_param (const char *str, const char *param,
+ char **pval, int *pflags)
+{
+ return _header_get_param (str, NULL, param, NULL, 0, pval, NULL, pflags);
+}
+
+/* Decode a parameter value. Arguments:
+
+ Input:
+ VALUE Parameter value.
+ FLAGS Flags obtained from a previous call to one of the functions
+ above.
+ CHARSET Output charset.
+
+ Output:
+ PVAL A pointer to the decoded value is stored there.
+ The memory is allocated using malloc.
+ PLANG If language information was present in VALUE, its
+ malloc'ed copy is stored in the memory location pointed
+ to by this variable. If there was no language information,
+ *PLANG is set to NULL.
+
+ Both PVAL and PLANG may be NULL if that particular piece of information
+ is not needed. */
+int
+mu_mimehdr_decode_param (const char *value, int flags,
+ const char *charset, char **pval, char **plang)
+{
+ char *decoded;
+ int rc;
+ char *lang = NULL;
+ char *data;
+
+ if (flags == 0)
+ {
+ rc = mu_rfc2047_decode (charset, value, &decoded);
+ if (rc)
+ return rc;
+ }
+ else
+ {
+ decoded = mu_url_decode (value);
+ if (!decoded)
+ return ENOMEM;
+
+ if ((flags & MU_MIMEHDR_CSINFO)
+ && (lang = strchr (decoded, '\''))
+ && (data = strchr (lang + 1, '\'')))
+ {
+ char *source_cs = decoded;
+
+ *lang++ = 0;
+ *data++ = 0;
+
+ lang = lang[0] ? strdup (lang) : NULL;
+
+ if (source_cs[0] && charset && mu_c_strcasecmp (source_cs, charset))
+ {
+ char *outval = NULL;
+ mu_stream_t instr = NULL;
+ mu_stream_t outstr = NULL;
+ mu_stream_t cvt = NULL;
+ char iobuf[512];
+
+ do
+ {
+ size_t total = 0, pos;
+ size_t nbytes;
+
+ rc = mu_memory_stream_create (&instr, 0);
+ if (rc)
+ break;
+ rc = mu_stream_write (instr, data, strlen (data), NULL);
+ if (rc)
+ break;
+
+ rc = mu_memory_stream_create (&outstr, 0);
+ if (rc)
+ break;
+
+ rc = mu_filter_iconv_create (&cvt, instr, source_cs, charset,
+ 0,
+ mu_default_fallback_mode);
+ if (rc)
+ break;
+
+ rc = mu_stream_open (cvt);
+ if (rc)
+ break;
+
+ while (mu_stream_read (cvt, iobuf, sizeof (iobuf),
+ &nbytes) == 0
+ && nbytes)
+ {
+ rc = mu_stream_write (outstr, iobuf, nbytes, NULL);
+ if (rc)
+ break;
+ total += nbytes;
+ }
+
+ if (rc)
+ break;
+
+ outval = malloc (total + 1);
+ if (!outval)
+ {
+ rc = ENOMEM;
+ break;
+ }
+
+ mu_stream_seek (outstr, 0, MU_SEEK_SET, NULL);
+ pos = 0;
+ while (mu_stream_read (outstr, outval + pos,
+ total - pos, &nbytes) == 0
+ && nbytes)
+ pos += nbytes;
+ outval[pos] = 0;
+ }
+ while (0);
+
+ mu_stream_close (cvt);
+ mu_stream_destroy (&cvt);
+ mu_stream_close (instr);
+ mu_stream_destroy (&instr);
+ mu_stream_close (outstr);
+ mu_stream_destroy (&outstr);
+
+ free (decoded);
+
+ if (rc)
+ {
+ /* Cleanup after an error. */
+ free (lang);
+ free (outval);
+ return rc;
+ }
+ decoded = outval;
+ }
+ else
+ memmove (decoded, data, strlen (data) + 1);
+ }
+ }
+
+ if (pval)
+ *pval = decoded;
+ else
+ free (decoded);
+
+ if (plang)
+ *plang = lang;
+ return 0;
+}
+
+/* Similar to mu_mimehdr_aget_param, but the returned value is decoded
+ according to the CHARSET. Unless PLANG is NULL, it receives malloc'ed
+ language name from STR. If there was no language name, *PLANG is set
+ to NULL.
+*/
+int
+mu_mimehdr_aget_decoded_param (const char *str, const char *param,
+ const char *charset,
+ char **pval, char **plang)
+{
+ char *value;
+ int rc;
+ int flags;
+
+ rc = mu_mimehdr_aget_param (str, param, &value, &flags);
+ if (rc == 0)
+ {
+ rc = mu_mimehdr_decode_param (value, flags, charset, pval, plang);
+ free (value);
+ }
+ return rc;
+}
+
+/* Get the attachment name from MSG. See _header_get_param, for a
+ description of the rest of arguments. */
+static int
+_get_attachment_name (mu_message_t msg, char *buf, size_t bufsz,
+ char **pbuf, size_t *sz, int *pflags)
+{
+ int ret = EINVAL;
+ mu_header_t hdr;
+ char *value = NULL;
+
+ if (!msg)
+ return ret;
+
+ if ((ret = mu_message_get_header (msg, &hdr)) != 0)
+ return ret;
+
+ ret = mu_header_aget_value_unfold (hdr, "Content-Disposition", &value);
+
+ /* If the header wasn't there, we'll fall back to Content-Type, but
+ other errors are fatal. */
+ if (ret != 0 && ret != MU_ERR_NOENT)
+ return ret;
+
+ if (ret == 0 && value != NULL)
+ {
+ ret = _header_get_param (value, "attachment",
+ "filename", buf, bufsz, pbuf, sz, pflags);
+ free (value);
+ value = NULL;
+ if (ret == 0 || ret != MU_ERR_NOENT)
+ return ret;
+ }
+
+ /* If we didn't get the name, we fall back on the Content-Type name
+ parameter. */
+
+ free (value);
+ ret = mu_header_aget_value_unfold (hdr, "Content-Type", &value);
+ if (ret == 0)
+ ret = _header_get_param (value, NULL, "name", buf, bufsz, pbuf, sz,
+ pflags);
+ free (value);
+
+ return ret;
+}
+
+int
+mu_message_aget_attachment_name (mu_message_t msg, char **name, int *pflags)
+{
+ if (name == NULL)
+ return MU_ERR_OUT_PTR_NULL;
+ return _get_attachment_name (msg, NULL, 0, name, NULL, pflags);
+}
+
+int
+mu_message_aget_decoded_attachment_name (mu_message_t msg,
+ const char *charset,
+ char **pval,
+ char **plang)
+{
+ char *value;
+ int flags;
+ int rc = mu_message_aget_attachment_name (msg, &value, &flags);
+ if (rc == 0)
+ {
+ rc = mu_mimehdr_decode_param (value, flags, charset, pval, plang);
+ free (value);
+ }
+ return rc;
+}
+
+int
+mu_message_get_attachment_name (mu_message_t msg, char *buf, size_t bufsz,
+ size_t *sz, int *pflags)
+{
+ return _get_attachment_name (msg, buf, bufsz, NULL, sz, pflags);
+}
+

Return to:

Send suggestions and report system problems to the System administrator.