/* GNU Mailutils -- a suite of utilities for electronic mail
Copyright (C) 1999-2001, 2005, 2007, 2009-2012, 2014-2018 Free
Software Foundation, Inc.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General
Public License along with this library. If not, see
<http://www.gnu.org/licenses/>. */
/*
FIXME: what is the status of this TODO list?
Things to consider:
- When parsing phrase, should I ignore non-ascii, or replace with a
'?' character? Right now parsing fails.
--> Should ignore non-ascii, it is unicode or iso8892-1.
- Are comments allowed in domain-literals?
- Need a way to mark the *end* of a group. Maybe add a field to _mu_address,
int group_end;, so if you care, you can search for the end of
a group with address_is_group_end();
--> Groups no longer show up in the mu_address_t list.
- Need a way to parse ",,,", it's a valid address-list, it just doesn't
have any addresses.
- The personal for ""Sam"" <sam@here> is "Sam", and for "'s@b'" <s@b>
is 's@b', should I strip those outside parentheses, or is that
too intrusive? Maybe an apps business if it wants to?
- Should we do best effort parsing, so parsing "sam@locahost, foo@"
gets one address, or just say it is or it isn't in RFC format?
Right now we're strict, we'll see how it goes.
- parse Received: field?
- test for memory leaks on malloc failure
- fix the realloc, try a struct _string { char* b, size_t sz };
The lexer finds consecutive sequences of characters, so it should
define:
struct parse822_token_t {
const char* b; // beginning of token
const char* e; // one past end of token
}
typedef struct parse822_token_t TOK;
Then I can have str_append_token(), and the lexer functions can
look like:
int mu_parse822_atom(const char** p, const char* e, TOK* atom);
Just a quick thought, I'll have to see how many functions that will
actually help.
- get example addresses from rfc2822, and from the perl code.
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <assert.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#ifdef HAVE_STRINGS_H
# include <strings.h>
#endif
#include <mailutils/cctype.h>
#include <mailutils/cstr.h>
#include <mailutils/errno.h>
#include <mailutils/parse822.h>
#include <mailutils/address.h>
#ifdef EOK
# undef EOK
#endif
#define EOK 0
#define EPARSE MU_ERR_INVALID_EMAIL
/*
* Some convenience functions for dealing with dynamically re-sized
* strings.
*/
static int
str_append_n (char **to, const char *from, size_t n)
{
size_t l = 0;
/* if not to, then silently discard data */
if (!to)
{
return EOK;
}
if (*to)
{
char *bigger;
l = strlen (*to);
bigger = realloc (*to, l + n + 1);
if (!bigger)
{
return ENOMEM;
}
*to = bigger;
}
else
{
*to = malloc (n + 1);
}
strncpy (&to[0][l], from, n);
/* strncpy is lame, nul terminate our buffer */
to[0][l + n] = 0;
return EOK;
}
static int
str_append (char **to, const char *from)
{
if (!from)
return 0;
return str_append_n (to, from, strlen (from));
}
static int
str_append_char (char **to, char c)
{
return str_append_n (to, &c, 1);
}
static int
str_append_range (char **to, const char *b, const char *e)
{
return str_append_n (to, b, e - b);
}
static void
str_free (char **s)
{
if (s && *s)
{
free (*s);
*s = 0;
}
}
/***** From RFC 822, 3.3 Lexical Tokens *****/
int
mu_parse822_skip_nl (const char **p, const char *e)
{
/* Here we consider a new-line (NL) to be either a bare LF, or
* a CRLF pair as required by the RFC.
*/
const char *s = *p;
if ((&s[1] < e) && s[0] == '\r' && s[1] == '\n')
{
*p += 2;
return EOK;
}
if ((&s[0] < e) && s[0] == '\n')
{
*p += 1;
return EOK;
}
return EPARSE;
}
int
mu_parse822_skip_lwsp_char (const char **p, const char *e)
{
if (*p < e && mu_parse822_is_lwsp_char (**p))
{
*p += 1;
return EOK;
}
return EPARSE;
}
int
mu_parse822_skip_lwsp (const char **p, const char *e)
{
/*
* linear-white-space = 1*([[CR]LF] LWSP-char)
*
* We interpret a bare LF as identical to the canonical CRLF
* line ending, I don't know another way since on a Unix system
* all CRLF will be translated to the local convention, a bare
* LF, and thus we can not deal with bare NLs in the message.
*/
int space = 0;
while (*p != e)
{
const char *save = *p;
if (mu_parse822_skip_lwsp_char (p, e) == EOK)
{
space = 1;
continue;
}
if (mu_parse822_skip_nl (p, e) == EOK)
{
if (mu_parse822_skip_lwsp_char (p, e) == EOK)
{
continue;
}
*p = save;
return EPARSE;
}
break;
}
return space ? EOK : EPARSE;
}
int
mu_parse822_skip_comments (const char **p, const char *e)
{
int status;
while ((status = mu_parse822_comment (p, e, 0)) == EOK)
;
return EOK;
}
int
mu_parse822
|