/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 1999, 2000, 2001, 2005, 2007, 2009, 2010, 2011 Free
Software Foundation, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General
Public License along with this library. If not, see
. */
/*
FIXME: what is the status of this TODO list?
Things to consider:
- When parsing phrase, should I ignore non-ascii, or replace with a
'?' character? Right now parsing fails.
--> Should ignore non-ascii, it is unicode or iso8892-1.
- Are comments allowed in domain-literals?
- Need a way to mark the *end* of a group. Maybe add a field to _mu_address,
int group_end;, so if you care, you can search for the end of
a group with address_is_group_end();
--> Groups no longer show up in the mu_address_t list.
- Need a way to parse ",,,", it's a valid address-list, it just doesn't
have any addresses.
- The personal for ""Sam"" is "Sam", and for "'s@b'"
is 's@b', should I strip those outside parentheses, or is that
too intrusive? Maybe an apps business if it wants to?
- Should we do best effort parsing, so parsing "sam@locahost, foo@"
gets one address, or just say it is or it isn't in RFC format?
Right now we're strict, we'll see how it goes.
- parse Received: field?
- test for memory leaks on malloc failure
- fix the realloc, try a struct _string { char* b, size_t sz };
The lexer finds consecutive sequences of characters, so it should
define:
struct parse822_token_t {
const char* b; // beginning of token
const char* e; // one past end of token
}
typedef struct parse822_token_t TOK;
Then I can have str_append_token(), and the lexer functions can
look like:
int mu_parse822_atom(const char** p, const char* e, TOK* atom);
Just a quick thought, I'll have to see how many functions that will
actually help.
- get example addresses from rfc2822, and from the perl code.
*/
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include
#include
#ifdef HAVE_STRINGS_H
# include
#endif
#include
#include
#include
#include
#include
#ifdef EOK
# undef EOK
#endif
#define EOK 0
#define EPARSE MU_ERR_BAD_822_FORMAT
/*
* Some convenience functions for dealing with dynamically re-sized
* strings.
*/
static int
str_append_n (char **to, const char *from, size_t n)
{
size_t l = 0;
/* if not to, then silently discard data */
if (!to)
{
return EOK;
}
if (*to)
{
char *bigger;
l = strlen (*to);
bigger = realloc (*to, l + n + 1);
if (!bigger)
{
return ENOMEM;
}
*to = bigger;
}
else
{
*to = malloc (n + 1);
}
strncpy (&to[0][l], from, n);
/* strncpy is lame, nul terminate our buffer */
to[0][l + n] = 0;
return EOK;
}
static int
str_append (char **to, const char *from)
{
if (!from)
return 0;
return str_append_n (to, from, strlen (from));
}
static int
str_append_char (char **to, char c)
{
return str_append_n (to, &c, 1);
}
static int
str_append_range (char **to, const char *b, const char *e)
{
return str_append_n (to, b, e - b);
}
static void
str_free (char **s)
{
if (s && *s)
{
free (*s);
*s = 0;
}
}
/*
* Character Classification - could be rewritten in a C library
* independent way, my system's C library matches the RFC
* definitions. I don't know if that's guaranteed.
*
* Note that all return values are:
* 1 -> TRUE
* 0 -> FALSE
* This may be appear different than the 0 == success return
* values of the other functions, but I was getting lost in
* boolean arithmetic.
*/
int
mu_parse822_is_char (char c)
{
return mu_isascii (c);
}
int
mu_parse822_is_digit (char c)
{
/* digit = */
return mu_isdigit ((unsigned) c);
}
int
mu_parse822_is_ctl (char c)
{
return mu_iscntrl ((unsigned) c) || c == 127 /* DEL */ ;
}
int
mu_parse822_is_space (char c)
{
return c == ' ';
}
int
mu_parse822_is_htab (char c)
{
return c == '\t';
}
int
mu_parse822_is_lwsp_char (char c)
{
return mu_parse822_is_space (c) || mu_parse822_is_htab (c);
}
int
mu_parse822_is_special (char c)
{
return strchr ("()<>@,;:\\\".[]", c) ? 1 : 0;
}
int
parse822_is_atom_char_ex (char c)
{
return !mu_parse822_is_special (c)
&& !mu_parse822_is_space (c)
&& !mu_parse822_is_ctl (c);
}
int
mu_parse822_is_atom_char (char c)
{
return mu_parse822_is_char (c) && parse822_is_atom_char_ex (c);
}
int
mu_parse822_is_q_text (char c)
{
return
mu_parse822_is_char (c) &&
c != '"' &&
c != '\\' &&
c != '\r';
}
int
mu_parse822_is_d_text (char c)
{
return
mu_parse822_is_char (c) &&
c != '[' &&
c != ']' &&
c != '\\' &&
c != '\r';
}
/*
* SMTP's version of qtext, called in the RFC 821 syntax,
* also excludes .
*/
int
mu_parse822_is_smtp_q (char c)
{
return
mu_parse822_is_q_text (c) &&
c != '\n';
}
/***** From RFC 822, 3.3 Lexical Tokens *****/
int
mu_parse822_skip_nl (const char **p, const char *e)
{
/* Here we consider a new-line (NL) to be either a bare LF, or
* a CRLF pair as required by the RFC.
*/
const char *s = *p;
if ((&s[1] < e) && s[0] == '\r' && s[1] == '\n')
{
*p += 2;
return EOK;
}
if ((&s[0] < e) && s[0] == '\n')
{
*p += 1;
return EOK;
}
return EPARSE;
}
int
mu_parse822_skip_lwsp_char (const char **p, const char *e)
{
if (*p < e && mu_parse822_is_lwsp_char (**p))
{
*p += 1;
return EOK;
}
return EPARSE;
}
int
mu_parse822_skip_lwsp (const char **p, const char *e)
{
/*
* linear-white-space = 1*([[CR]LF] LWSP-char)
*
* We interpret a bare LF as identical to the canonical CRLF
* line ending, I don't know another way since on a Unix system
* all CRLF will be translated to the local convention, a bare
* LF, and thus we can not deal with bare NLs in the message.
*/
int space = 0;
while (*p != e)
{
const char *save = *p;
if (mu_parse822_skip_lwsp_char (p, e) == EOK)
{
space = 1;
continue;
}
if (mu_parse822_skip_nl (p, e) == EOK)
{
if (mu_parse822_skip_lwsp_char (p, e) == EOK)
{
continue;
}
*p = save;
return EPARSE;
}
break;
}
return space ? EOK : EPARSE;
}
int
mu_parse822_skip_comments (const char **p, const char *e)
{
int status;
while ((status = mu_parse822_comment (p, e, 0)) == EOK)
;
return EOK;
}
int
mu_parse822_digits (const char **p, const char *e, int min, int max, int *digits)
{
const char *save = *p;
int i = 0;
assert (digits);
*digits = 0;
while (*p < e && mu_parse822_is_digit (**p))
{
*digits = *digits * 10 + **p - '0';
*p += 1;
++i;
if (max != 0 && i == max)
{
break;
}
}
if (i < min)
{
*p = save;
return EPARSE;
}
return EOK;
}
int
mu_parse822_special (const char **p, const char *e, char c)
{
mu_parse822_skip_lwsp (p, e); /* not comments, they start with a special... */
if ((*p != e) && **p == c)
{
*p += 1;
return EOK;
}
return EPARSE;
}
int
mu_parse822_comment (const char **p, const char *e, char **comment)
{
/* comment = "(" *(ctext / quoted-pair / comment) ")"
* ctext =
*/
const char *save = *p;
int rc;
if ((rc = mu_parse822_special (p, e, '(')))
{
return rc;
}
while (*p != e)
{
char c = **p;
if (c == ')')
{
*p += 1;
return EOK; /* found end-of-comment */
}
else if (c == '(')
{
rc = mu_parse822_comment (p, e, comment);
}
else if (c == '\\')
{
rc = mu_parse822_quoted_pair (p, e, comment);
}
else if (c == '\r')
{
/* invalid character... */
*p += 1;
}
else if (mu_parse822_is_char (c))
{
rc = str_append_char (comment, c);
*p += 1;
}
else
{
/* invalid character... */
*p += 1;
}
if (rc != EOK)
break;
}
if (*p == e)
{
rc = EPARSE; /* end-of-comment not found */
}
*p = save;
assert (rc != EOK);
return rc;
}
int
mu_parse822_atom (const char **p, const char *e, char **atom)
{
/* atom = 1* */
const char *save = *p;
int rc = EPARSE;
mu_parse822_skip_comments (p, e);
save = *p;
while ((*p != e) && (**p == '.' || mu_parse822_is_atom_char (**p)))
{
rc = str_append_char (atom, **p);
*p += 1;
if (rc != EOK)
{
*p = save;
break;
}
}
return rc;
}
int
parse822_atom_ex (const char **p, const char *e, char **atom)
{
/* atom = 1* */
const char *ptr;
int rc;
mu_parse822_skip_comments (p, e);
for (ptr = *p; (ptr != e) && parse822_is_atom_char_ex (*ptr); ptr++)
;
if (ptr - *p == 0)
return EPARSE;
rc = str_append_n (atom, *p, ptr - *p);
if (rc == 0)
*p = ptr;
return rc;
}
int
mu_parse822_quoted_pair (const char **p, const char *e, char **qpair)
{
/* quoted-pair = "\" char */
int rc;
/* need TWO characters to be available */
if ((e - *p) < 2)
return EPARSE;
if (**p != '\\')
return EPARSE;
if ((rc = str_append_char (qpair, *(*p + 1))))
return rc;
*p += 2;
return EOK;
}
int
mu_parse822_quoted_string (const char **p, const char *e, char **qstr)
{
/* quoted-string = <"> *(qtext/quoted-pair) <">
* qtext = char except <">, "\", & CR, including lwsp-char
*/
const char *save = *p;
int rc;
mu_parse822_skip_comments (p, e);
save = *p;
if ((rc = mu_parse822_special (p, e, '"')))
return rc;
while (*p != e)
{
char c = **p;
if (c == '"')
{
*p += 1;
return EOK; /* found end-of-qstr */
}
else if (c == '\\')
{
rc = mu_parse822_quoted_pair (p, e, qstr);
}
else if (c == '\r')
{
/* invalid character... */
*p += 1;
}
else if (mu_parse822_is_char (c))
{
rc = str_append_char (qstr, c);
*p += 1;
}
else
{
/* invalid character... */
*p += 1;
}
if (rc)
{
*p = save;
str_free (qstr);
return rc;
}
}
*p = save;
str_free (qstr);
return EPARSE; /* end-of-qstr not found */
}
int
mu_parse822_word (const char **p, const char *e, char **word)
{
/* word = atom / quoted-string */
const char *save = *p;
int rc = EOK;
mu_parse822_skip_comments (p, e);
save = *p;
{
char *qstr = 0;
if ((rc = mu_parse822_quoted_string (p, e, &qstr)) == EOK && qstr)
{
rc = str_append (word, qstr);
str_free (&qstr);
if (rc != EOK)
*p = save;
return rc;
}
assert (qstr == NULL);
}
if (rc != EPARSE)
{
/* it's fatal */
return rc;
}
/* Necessary because the quoted string could have found
* a partial string (invalid syntax). Thus reset, the atom
* will fail to if the syntax is invalid.
* We use parse822_atom_ex to allow for non-rfc-compliant atoms:
*
* "Be liberal in what you accept, and conservative in what you send."
*/
{
char *atom = 0;
if (parse822_atom_ex (p, e, &atom) == EOK)
{
rc = str_append (word, atom);
str_free (&atom);
if (rc != EOK)
*p = save;
return rc;
}
assert (atom == NULL);
}
return EPARSE;
}
/* Some mailers do not quote personal part even if it contains dot.
Try to be smart about it.
*/
int
parse822_word_dot (const char **p, const char *e, char **word)
{
int rc = mu_parse822_word (p, e, word);
for (;rc == 0 && (*p != e) && **p == '.'; ++*p)
rc = str_append (word, ".");
return rc;
}
int
mu_parse822_phrase (const char **p, const char *e, char **phrase)
{
/* phrase = 1*word */
const char *save = *p;
int rc;
if ((rc = parse822_word_dot (p, e, phrase)))
return rc;
/* ok, got the 1 word, now append all the others we can */
{
char *word = 0;
while ((rc = parse822_word_dot (p, e, &word)) == EOK)
{
rc = str_append_char (phrase, ' ');
if (rc == EOK)
rc = str_append (phrase, word);
str_free (&word);
if (rc != EOK)
break;
}
assert (word == NULL);
if (rc == EPARSE)
rc = EOK; /* its not an error to find no more words */
}
if (rc)
*p = save;
return rc;
}
/***** From RFC 822, 6.1 Address Specification Syntax *****/
static mu_address_t
new_mb (void)
{
return calloc (1, sizeof (struct mu_address));
}
static char *
addr_field_by_mask (mu_address_t addr, int mask)
{
switch (mask)
{
case MU_ADDR_HINT_ADDR:
return addr->addr;
case MU_ADDR_HINT_COMMENTS:
return addr->comments;
case MU_ADDR_HINT_PERSONAL:
return addr->personal;
case MU_ADDR_HINT_EMAIL:
return addr->email;
case MU_ADDR_HINT_LOCAL:
return addr->local_part;
case MU_ADDR_HINT_DOMAIN:
return addr->domain;
case MU_ADDR_HINT_ROUTE:
return addr->route;
}
return NULL;
}
static char *
get_val (mu_address_t hint, int hflags, char *value, int mask, int *memflag)
{
if (!value && hint && (hflags & mask))
{
char *p = addr_field_by_mask (hint, mask);
if (p)
{
if (memflag)
*memflag |= mask;
value = strdup (p);
}
}
return value;
}
static void
addr_free_fields (mu_address_t a, int memflag)
{
char *p;
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_ADDR)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_COMMENTS)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_PERSONAL)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_EMAIL)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_LOCAL)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_DOMAIN)))
free (p);
if ((p = addr_field_by_mask (a, memflag & MU_ADDR_HINT_ROUTE)))
free (p);
}
static int
fill_mb (mu_address_t *pa,
char *comments, char *personal, char *local, char *domain,
mu_address_t hint, int hflags)
{
int rc = EOK;
mu_address_t a;
int memflag = 0;
a = new_mb ();
if (!a)
return ENOMEM;
a->comments = get_val (hint, hflags, comments, MU_ADDR_HINT_COMMENTS,
&memflag);
a->personal = get_val (hint, hflags, personal, MU_ADDR_HINT_PERSONAL,
&memflag);
domain = get_val (hint, hflags, domain, MU_ADDR_HINT_DOMAIN,
&memflag);
local = get_val (hint, hflags, local, MU_ADDR_HINT_LOCAL,
&memflag);
do
{
/* loop exists only to break out of */
if (!local)
/* no email to construct */
break;
if ((rc = mu_parse822_quote_local_part (&a->email, local)))
break;
if (domain)
{
if ((rc = str_append (&a->email, "@")))
break;
if ((rc = str_append (&a->email, domain)))
break;
}
}
while (0);
a->local_part = local;
a->domain = domain;
if (rc != EOK)
{
addr_free_fields (a, memflag);
/* note that the arguments have NOT been freed, we only own
* them on success. */
free (a);
}
else
*pa = a;
return rc;
}
int
mu_parse822_address_list (mu_address_t *a, const char *s,
mu_address_t hint, int hflags)
{
/* address-list = #(address) */
const char **p = &s;
const char *e = &s[strlen (s)];
int rc = EOK;
mu_address_t *n = a; /* the next address we'll be creating */
rc = mu_parse822_address (p, e, n, hint, hflags);
/* A list may start with a leading <,>, we'll find out if
* that's not the case at the top of the while, but give
* this a conditional OK unless there was some other kind
* of error.
*/
if (rc != EOK && rc != EPARSE)
{
return rc;
}
while (*p < e)
{
mu_parse822_skip_comments (p, e);
/* An address can contain a group, so an entire
* list of addresses may have been appended, or no
* addresses at all. Walk to the end.
*/
while (*n)
{
n = &(*n)->next;
}
/* Remember that ',,a@b' is a valid list! So, we must find
* the <,>, but the address after it can be empty.
*/
if ((rc = mu_parse822_special (p, e, ',')))
{
break;
}
mu_parse822_skip_comments (p, e);
rc = mu_parse822_address (p, e, n, hint, hflags);
if (rc == EOK || rc == EPARSE)
{
/* that's cool, it may be a <,>, we'll find out if it isn't
* at the top of the loop
*/
rc = EOK;
}
else
{
/* anything else is a fatal error, break out */
break;
}
}
if (rc)
{
mu_address_destroy (a);
}
return rc;
}
int
mu_parse822_address (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* address = mailbox / group / unix-mbox */
int rc;
if ((rc = mu_parse822_mail_box (p, e, a, hint, hflags)) == EPARSE)
{
if ((rc = mu_parse822_group (p, e, a, hint, hflags)) == EPARSE)
{
rc = mu_parse822_unix_mbox (p, e, a, hint, hflags);
}
}
if (rc == 0 && *a && !(*a)->route)
(*a)->route = get_val (hint, hflags, NULL, MU_ADDR_HINT_ROUTE, NULL);
return rc;
}
/* No longer put groups into an address node, it wasn't useful, was
* troublesome, and since there wasn't an end-group marker, wasn't
* even conceivably useful.
*/
#undef ADD_GROUPS
int
mu_parse822_group (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* group = phrase ":" [#mailbox] ";" */
const char *save = *p;
mu_address_t *asave = a; /* so we can destroy these if parsing fails */
int rc;
char *phrase = 0;
mu_parse822_skip_comments (p, e);
*p = save;
if ((rc = mu_parse822_phrase (p, e, &phrase)))
{
return rc;
}
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, ':')))
{
*p = save;
str_free (&phrase);
return rc;
}
#ifdef ADD_GROUPS
/* fake up an address node for the group's descriptive phrase, if
* it fails, clean-up will happen after the loop
*/
if ((rc = fill_mb (a, 0, phrase, 0, 0, hint, hflags)) == EOK)
{
a = &(*a)->next;
}
else
{
str_free (&phrase);
}
#else
str_free (&phrase);
#endif
/* Basically, on each loop, we may find a mailbox, but we must find
* a comma after the mailbox, otherwise we've popped off the end
* of the list.
*/
while (!rc)
{
mu_parse822_skip_comments (p, e);
/* it's ok not be a mailbox, but other errors are fatal */
rc = mu_parse822_mail_box (p, e, a, hint, hflags);
if (rc == EOK)
{
a = &(*a)->next;
mu_parse822_skip_comments (p, e);
}
else if (rc != EPARSE)
{
break;
}
if ((rc = mu_parse822_special (p, e, ',')))
{
/* the commas aren't optional */
break;
}
}
if (rc == EPARSE)
{
rc = EOK; /* ok, as long as we find the ";" next */
}
if (rc || (rc = mu_parse822_special (p, e, ';')))
{
*p = save;
mu_address_destroy (asave);
}
return rc;
}
int
mu_parse822_mail_box (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* mailbox =
* addr-spec [ "(" comment ")" ] /
* [phrase] route-addr
*
* Note: we parse the ancient comment on the right since
* it's such "common practice". :-(
* Note: phrase is called display-name in drums.
* Note: phrase is optional in drums, though not in RFC 822.
*/
const char *save = *p;
int rc;
/* -> addr-spec */
if ((rc = mu_parse822_addr_spec (p, e, a, hint, hflags)) == EOK)
{
mu_parse822_skip_lwsp (p, e);
/* yuck. */
if ((rc = mu_parse822_comment (p, e, &(*a)->personal)) == EPARSE)
{
rc = EOK;
/* cool if there's no comment, */
}
/* but if something else is wrong, destroy the address */
if (rc)
{
mu_address_destroy (a);
*p = save;
}
return rc;
}
/* -> phrase route-addr */
{
char *phrase = 0;
rc = mu_parse822_phrase (p, e, &phrase);
if (rc != EPARSE && rc != EOK)
{
return rc;
}
if ((rc = mu_parse822_route_addr (p, e, a, hint, hflags)) == EOK)
{
/* add the phrase */
(*a)->personal = phrase;
return EOK;
}
/* some internal error, fail out */
str_free (&phrase);
*p = save;
return rc;
}
return rc;
}
int
mu_parse822_route_addr (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* route-addr = "<" [route] addr-spec ">" */
const char *save = *p;
char *route = NULL;
int rc;
int memflag = 0;
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, '<')))
{
*p = save;
return rc;
}
if (!(rc = mu_parse822_special (p, e, '>')))
{
if ((rc = fill_mb (a, 0, 0, 0, 0, hint, hflags)) == EOK)
rc = str_append (&(*a)->email, "");
return rc;
}
mu_parse822_route (p, e, &route);
if ((rc = mu_parse822_addr_spec (p, e, a, hint, hflags)))
{
*p = save;
str_free (&route);
return rc;
}
(*a)->route = get_val (hint, hflags, route, MU_ADDR_HINT_ROUTE,
&memflag);
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, '>')))
{
*p = save;
mu_address_destroy (a);
return rc;
}
return EOK;
}
int
mu_parse822_route (const char **p, const char *e, char **route)
{
/* route = 1#("@" domain ) ":" */
const char *save = *p;
char *accumulator = 0;
int rc = EOK;
for (;;)
{
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, '@')))
{
break;
}
if ((rc = str_append (&accumulator, "@")))
{
break;
}
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_domain (p, e, &accumulator)))
{
/* it looked like a route, but there's no domain! */
break;
}
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, ',')) == EPARSE)
{
/* no more routes, but we got one so its ok */
rc = EOK;
break;
}
if ((rc = str_append (&accumulator, ", ")))
{
break;
}
}
mu_parse822_skip_comments (p, e);
if (!rc)
{
rc = mu_parse822_special (p, e, ':');
}
if (!rc)
{
rc = str_append (route, accumulator);
}
if (rc)
{
*p = save;
}
str_free (&accumulator);
return rc;
}
int
mu_parse822_addr_spec (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* addr-spec = local-part "@" domain */
const char *save = *p;
char *local_part = 0;
char *domain = 0;
int rc;
rc = mu_parse822_local_part (p, e, &local_part);
mu_parse822_skip_comments (p, e);
if (!rc)
{
rc = mu_parse822_special (p, e, '@');
if (!rc)
{
rc = mu_parse822_domain (p, e, &domain);
if (!rc)
rc = fill_mb (a, 0, 0, local_part, domain, hint, hflags);
}
}
if (rc)
{
*p = save;
str_free (&local_part);
str_free (&domain);
}
return rc;
}
int
mu_parse822_unix_mbox (const char **p, const char *e, mu_address_t *a,
mu_address_t hint, int hflags)
{
/* unix-mbox = atom */
const char *save = *p;
char *mbox = 0;
int rc;
mu_parse822_skip_comments (p, e);
rc = mu_parse822_atom (p, e, &mbox);
if (!rc)
rc = fill_mb (a, 0, 0, mbox, 0, hint, hflags);
if (rc)
{
*p = save;
str_free (&mbox);
}
return rc;
}
int
mu_parse822_local_part (const char **p, const char *e, char **local_part)
{
/* local-part = word *("." word)
*
* Note: rewrite as -> word ["." local-part]
*/
const char *save = *p;
const char *save2 = *p;
int rc;
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_word (p, e, local_part)))
{
*p = save;
return rc;
}
/* We've got a local-part, but keep looking for more. */
mu_parse822_skip_comments (p, e);
/* If we get a parse error, we roll back to save2, but if
* something else failed, we have to roll back to save.
*/
save2 = *p;
rc = mu_parse822_special (p, e, '.');
if (!rc)
{
char *more = 0;
if ((rc = mu_parse822_local_part (p, e, &more)) == EOK)
{
/* append more */
if ((rc = str_append (local_part, ".")) == EOK)
{
rc = str_append (local_part, more);
}
}
str_free (&more);
}
if (rc == EPARSE)
{
/* if we didn't get more ("." word) pairs, that's ok */
*p = save2;
rc = EOK;
}
if (rc)
{
/* if anything else failed, that's real */
*p = save;
str_free (local_part);
}
return rc;
}
int
mu_parse822_domain (const char **p, const char *e, char **domain)
{
/* domain = sub-domain *("." sub-domain)
*
* Note: rewrite as -> sub-domain ("." domain)
*/
const char *save = *p;
const char *save2 = 0;
int rc;
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_sub_domain (p, e, domain)))
{
*p = save;
return rc;
}
/* We save before skipping comments to preserve the comment
* at the end of a domain, the addr-spec may want to abuse it
* for a personal name.
*/
save2 = *p;
/* we've got the 1, keep looking for more */
mu_parse822_skip_comments (p, e);
rc = mu_parse822_special (p, e, '.');
if (!rc)
{
char *more = 0;
if ((rc = mu_parse822_domain (p, e, &more)) == EOK)
{
if ((rc = str_append (domain, ".")) == EOK)
{
rc = str_append (domain, more);
}
}
str_free (&more);
}
if (rc == EPARSE)
{
/* we didn't parse more ("." sub-domain) pairs, that's ok */
*p = save2;
rc = EOK;
}
if (rc)
{
/* something else failed, roll it all back */
*p = save;
str_free (domain);
}
return rc;
}
int
mu_parse822_sub_domain (const char **p, const char *e, char **sub_domain)
{
/* sub-domain = domain-ref / domain-literal
*/
int rc;
if ((rc = mu_parse822_domain_ref (p, e, sub_domain)) == EPARSE)
rc = mu_parse822_domain_literal (p, e, sub_domain);
return rc;
}
int
mu_parse822_domain_ref (const char **p, const char *e, char **domain_ref)
{
/* domain-ref = atom */
return mu_parse822_atom (p, e, domain_ref);
}
int
mu_parse822_d_text (const char **p, const char *e, char **dtext)
{
/* d-text = 1*dtext
*
* Note: dtext is only defined as a character class in
* RFC822, but this definition is more useful for
* slurping domain literals.
*/
const char *start = *p;
int rc = EOK;
while (*p < e && mu_parse822_is_d_text (**p))
{
*p += 1;
}
if (start == *p)
{
return EPARSE;
}
if ((rc = str_append_range (dtext, start, *p)))
{
*p = start;
}
return rc;
}
int
mu_parse822_domain_literal (const char **p, const char *e, char **domain_literal)
{
/* domain-literal = "[" *(dtext / quoted-pair) "]" */
const char *save = *p;
char *literal = 0;
int rc;
if ((rc = mu_parse822_special (p, e, '[')))
{
return rc;
}
if ((rc = str_append_char (&literal, '[')))
{
*p = save;
return rc;
}
while ((rc = mu_parse822_d_text (p, e, &literal)) == EOK ||
(rc = mu_parse822_quoted_pair (p, e, &literal)) == EOK)
{
/* Eat all of this we can get! */
}
if (rc == EPARSE)
{
rc = EOK;
}
if (!rc)
{
rc = mu_parse822_special (p, e, ']');
}
if (!rc)
{
rc = str_append_char (&literal, ']');
}
if (!rc)
{
rc = str_append (domain_literal, literal);
}
str_free (&literal);
if (rc)
{
*p = save;
}
return rc;
}
/***** From RFC 822, 5.1 Date and Time Specification Syntax *****/
int
mu_parse822_day (const char **p, const char *e, int *day)
{
/* day = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" */
const char *days[] = {
"Sun",
"Mon",
"Tue",
"Wed",
"Thu",
"Fri",
"Sat",
NULL
};
int d;
mu_parse822_skip_comments (p, e);
if ((e - *p) < 3)
return EPARSE;
for (d = 0; days[d]; d++)
{
if (mu_c_strncasecmp (*p, days[d], 3) == 0)
{
*p += 3;
if (day)
*day = d;
return EOK;
}
}
return EPARSE;
}
int
mu_parse822_date (const char **p, const char *e, int *day, int *mon, int *year)
{
/* date = 1*2DIGIT month 2*4DIGIT
* month = "Jan" / "Feb" / "Mar" / "Apr"
* / "May" / "Jun" / "Jul" / "Aug"
* / "Sep" / "Oct" / "Nov" / "Dec"
*/
const char *mons[] = {
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
NULL
};
const char *save = *p;
int rc = EOK;
int m = 0;
int yr = 0;
const char *yrbeg = 0;
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_digits (p, e, 1, 2, day)))
{
*p = save;
return rc;
}
mu_parse822_skip_comments (p, e);
if ((e - *p) < 3)
return EPARSE;
for (m = 0; mons[m]; m++)
{
if (mu_c_strncasecmp (*p, mons[m], 3) == 0)
{
*p += 3;
if (mon)
*mon = m;
break;
}
}
if (!mons[m])
{
*p = save;
return EPARSE;
}
mu_parse822_skip_comments (p, e);
/* We need to count how many digits their were, and adjust the
* interpretation of the year accordingly. This is from RFC 2822,
* Section 4.3, Obsolete Date and Time. */
yrbeg = *p;
if ((rc = mu_parse822_digits (p, e, 2, 4, &yr)))
{
*p = save;
return rc;
}
/* rationalize year to four digit, then adjust to tz notation */
switch (*p - yrbeg)
{
case 2:
if (yr >= 0 && yr <= 49)
{
yr += 2000;
break;
}
case 3:
yr += 1900;
break;
}
if (year)
*year = yr - 1900;
return EOK;
}
int
mu_parse822_time (const char **p, const char *e,
int *hour, int *min, int *sec, int *tz, const char **tz_name)
{
/* time = hour zone
* hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT] ; 00:00:00 - 23:59:59
* zone = "UT" / "GMT" ; Universal Time
* ; North American : UT
* / "EST" / "EDT" ; Eastern: - 5/ - 4
* / "CST" / "CDT" ; Central: - 6/ - 5
* / "MST" / "MDT" ; Mountain: - 7/ - 6
* / "PST" / "PDT" ; Pacific: - 8/ - 7
* / 1ALPHA ; RFC 822 was wrong, RFC 2822
* ; says treat these all as -0000.
* / ( ("+" / "-") 4DIGIT ) ; Local differential
* ; hours+min. (HHMM)
*/
struct
{
const char *tzname;
int tz;
}
tzs[] =
{
{ "UT", 0 * 60 * 60 },
{ "UTC", 0 * 60 * 60 },
{ "GMT", 0 * 60 * 60 },
{ "EST", -5 * 60 * 60 },
{ "EDT", -4 * 60 * 60 },
{ "CST", -6 * 60 * 60 },
{ "CDT", -5 * 60 * 60 },
{ "MST", -7 * 60 * 60 },
{ "MDT", -6 * 60 * 60 },
{ "PST", -8 * 60 * 60 },
{ "PDT", -7 * 60 * 60 },
{ NULL, 0}
};
const char *save = *p;
int rc = EOK;
int z = 0;
char *zone = NULL;
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_digits (p, e, 1, 2, hour)))
{
*p = save;
return rc;
}
if ((rc = mu_parse822_special (p, e, ':')))
{
*p = save;
return rc;
}
if ((rc = mu_parse822_digits (p, e, 1, 2, min)))
{
*p = save;
return rc;
}
if ((rc = mu_parse822_special (p, e, ':')))
{
*sec = 0;
}
else if ((rc = mu_parse822_digits (p, e, 1, 2, sec)))
{
*p = save;
return rc;
}
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_atom (p, e, &zone)))
{
/* zone is optional */
if (tz)
*tz = 0;
return EOK;
}
/* see if it's a timezone */
for (; tzs[z].tzname; z++)
{
if (mu_c_strcasecmp (zone, tzs[z].tzname) == 0)
break;
}
if (tzs[z].tzname)
{
if (tz_name)
*tz_name = tzs[z].tzname;
if (tz)
*tz = tzs[z].tz;
}
else if (strlen (zone) > 5 || strlen (zone) < 4)
{
if (*tz)
*tz = 0; /* Assume UTC */
}
else
{
/* zone = ( + / - ) hhmm */
int hh;
int mm;
int sign;
char *zp = zone;
switch (zp[0])
{
case '-':
sign = -1;
zp++;
break;
case '+':
sign = +1;
zp++;
break;
default:
sign = 1;
break;
}
if (strspn (zp, "0123456789") == 4)
{
/* convert to seconds from UTC */
hh = (zone[1] - '0') * 10 + (zone[2] - '0');
mm = (zone[3] - '0') * 10 + (zone[4] - '0');
}
else
{
hh = mm = 0; /* Consider equivalent to -0000 */
}
if (tz)
*tz = sign * (hh * 60 * 60 + mm * 60);
}
str_free (&zone);
return EOK;
}
#if 0
For reference, especially the for the required range and values of the
integer fields.
struct tm
{
int tm_sec; /* Seconds. [0-60] (1 leap second) */
int tm_min; /* Minutes. [0-59] */
int tm_hour; /* Hours. [0-23] */
int tm_mday; /* Day. [1-31] */
int tm_mon; /* Month. [0-11] */
int tm_year; /* Year - 1900. */
int tm_wday; /* Day of week. [0-6] */
int tm_yday; /* Days in year.[0-365] */
int tm_isdst; /* DST. [-1/0/1]*/
int tm_gmtoff; /* Seconds east of UTC. */
const char *tm_zone; /* Timezone abbreviation. */
};
#endif
int
mu_parse822_date_time (const char **p, const char *e, struct tm *tm,
struct mu_timezone *tz)
{
/* date-time = [ day "," ] date time */
const char *save = *p;
int rc = 0;
int wday = 0;
int mday = 0;
int mon = 0;
int year = 0;
int hour = 0;
int min = 0;
int sec = 0;
int tzoffset = 0;
const char *tz_name = 0;
if ((rc = mu_parse822_day (p, e, &wday)))
{
if (rc != EPARSE)
return rc;
}
else
{
/* If we got a day, we MUST have a ','. */
mu_parse822_skip_comments (p, e);
if ((rc = mu_parse822_special (p, e, ',')))
{
*p = save;
return rc;
}
}
if ((rc = mu_parse822_date (p, e, &mday, &mon, &year)))
{
*p = save;
return rc;
}
if ((rc = mu_parse822_time (p, e, &hour, &min, &sec, &tzoffset, &tz_name)))
{
*p = save;
return rc;
}
if (tm)
{
memset (tm, 0, sizeof (*tm));
tm->tm_wday = wday;
tm->tm_mday = mday;
tm->tm_mon = mon;
tm->tm_year = year;
tm->tm_hour = hour;
tm->tm_min = min;
tm->tm_sec = sec;
#ifdef HAVE_STRUCT_TM_TM_ISDST
tm->tm_isdst = -1; /* unknown whether it's dst or not */
#endif
#ifdef HAVE_STRUCT_TM_TM_GMTOFF
tm->tm_gmtoff = tzoffset;
#endif
#ifdef HAVE_STRUCT_TM_TM_ZONE
tm->tm_zone = (char*) tz_name;
#endif
}
if (tz)
{
tz->utc_offset = tzoffset;
tz->tz_name = tz_name;
}
return EOK;
}
/***** From RFC 822, 3.2 Header Field Definitions *****/
int
mu_parse822_field_name (const char **p, const char *e, char **fieldname)
{
/* field-name = 1* ":" */
const char *save = *p;
char *fn = NULL;
while (*p != e)
{
char c = **p;
if (!mu_parse822_is_char (c))
break;
if (mu_parse822_is_ctl (c))
break;
if (mu_parse822_is_space (c))
break;
if (c == ':')
break;
str_append_char (&fn, c);
*p += 1;
}
/* must be at least one char in the field name */
if (!fn)
{
*p = save;
return EPARSE;
}
mu_parse822_skip_comments (p, e);
if (!mu_parse822_special (p, e, ':'))
{
*p = save;
if (fn)
free (fn);
return EPARSE;
}
*fieldname = fn;
return EOK;
}
int
mu_parse822_field_body (const char **p, const char *e, char **fieldbody)
{
/* field-body = *text [CRLF lwsp-char field-body] */
/*const char *save = *p; */
char *fb = NULL;
for (;;)
{
const char *eol = *p;
while (eol != e)
{
/*char c = *eol; */
if (eol[0] == '\r' && (eol + 1) != e && eol[1] == '\n')
break;
++eol;
}
str_append_range (&fb, *p, eol);
*p = eol;
if (eol == e)
break; /* no more, so we're done */
/*assert(p[0] == '\r'); */
/*assert(p[1] == '\n'); */
*p += 2;
if (*p == e)
break; /* no more, so we're done */
/* check if next line is a continuation line */
if (**p != ' ' && **p != '\t')
break;
}
*fieldbody = fb;
return EOK;
}
/***** RFC 822 Quoting Functions *****/
int
mu_parse822_quote_string (char **quoted, const char *raw)
{
/* quoted-string = <"> *(qtext/quoted-pair) <">
*
* So double quote the string, and back quote anything that
* isn't qtext.
*/
int rc = EOK;
const char *s;
if (!raw || !quoted || *quoted)
{
return EINVAL;
}
s = raw;
rc = str_append_char (quoted, '"');
while (!rc && *s)
{
if (!mu_parse822_is_q_text (*s))
{
rc = str_append_char (quoted, '\\');
}
if (!rc)
{
rc = str_append_char (quoted, *s);
}
++s;
}
if (!rc)
{
rc = str_append_char (quoted, '"');
}
if (rc)
{
str_free (quoted);
}
return rc;
}
int
mu_parse822_quote_local_part (char **quoted, const char *raw)
{
/* local-part = word * ("." word)
* word = atom / quoted-string
*
* So, if any character isn't a "." or an atom character, we quote
* the whole thing as a string, for simplicity, otherwise just
* copy it.
*/
const char *s = 0;
if (!raw || !quoted || *quoted)
{
return EINVAL;
}
s = raw;
while (*s)
{
if (*s != '.' && !mu_parse822_is_atom_char (*s))
{
return mu_parse822_quote_string (quoted, raw);
}
++s;
}
/* if we don't have to quote it, just copy it over */
return str_append (quoted, raw);
}