summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2020-11-27 12:27:37 +0200
committerSergey Poznyakoff <gray@gnu.org>2020-11-27 12:27:37 +0200
commitdf29254df82d4aa466f066dd76e00446e540e1cb (patch)
tree344d437a084650c7ce414c4e67df99d1f7292904
parentd13ccd98c79b9f5dc658c1dd5b1cb2287377affb (diff)
downloadmailutils-df29254df82d4aa466f066dd76e00446e540e1cb.tar.gz
mailutils-df29254df82d4aa466f066dd76e00446e540e1cb.tar.bz2
mu_scan_datetime: Fix error recovery. %Z handles both time-zone forms.
The value stored in the memory location pointed to by the endp argument has changed. On the MU_ERR_PARSE error, it is (as earlier) the position in input where conversion stopped. On the MU_ERR_FORMAT, however, it is the character in fmt where the format error was detected. * libmailutils/datetime/scantime.c (peek_state): Return MU_ERR_FORMAT if mu_list_tail fails (the list is empty). This reflects unbalanced closing parenthesis or bracket. (scan_recovery): fix algorithm. (mu_scan_datetime): %Z handles both abbreviated time-zones and time-zones as hour offset from GMT. Improve error handling. On MU_ERR_PARSE error, point *endp to the character in input where conversion stopped. On MU_ERR_FORMAT error, point it to the character in fmt, where the format error was detected. * libmailutils/tests/scantime.c: Improve error handling. * libmailutils/tests/scantime.at: More testcases.
-rw-r--r--libmailutils/datetime/scantime.c154
-rw-r--r--libmailutils/tests/scantime.at84
-rw-r--r--libmailutils/tests/scantime.c14
3 files changed, 201 insertions, 51 deletions
diff --git a/libmailutils/datetime/scantime.c b/libmailutils/datetime/scantime.c
index 8e19a803b..0f3ba5d11 100644
--- a/libmailutils/datetime/scantime.c
+++ b/libmailutils/datetime/scantime.c
@@ -161,7 +161,7 @@ peek_state (mu_list_t list, int *state, const char **input)
rc = mu_list_tail (list, (void**)&inp);
if (rc)
- return rc;
+ return MU_ERR_FORMAT;
*state = inp->state;
if (input)
*input = inp->input;
@@ -212,6 +212,20 @@ state_to_closing_bracket (int st)
return '?';
}
+/*
+ * Recovery begins with nesting_level 1
+ *
+ * Recovery stops when:
+ * 1. Nesting level falls to 0 and there's nothing on stack:
+ * If tos was ST_OPT - success
+ * If tos was ST_ALT - failure
+ * 2. Nesting level falls to 0 and %| is found:
+ * Success
+ * 3. Nesting level falls to 0 and end of fmt string is encountered
+ * See 1.
+r * 4. Nesting level falls to negative:
+ * Failure
+ */
static int
scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
const char **endp,
@@ -219,7 +233,7 @@ scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
{
int c, rc = 0;
int nesting_level = 1;
- int st;
+ int st = ST_NON;
const char *p;
while (*fmt)
@@ -248,6 +262,15 @@ scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
case ')':
case ']':
+ if (nesting_level == 0)
+ {
+ mu_debug (MU_DEBCAT_MAILBOX, MU_DEBUG_ERROR,
+ ("%s:%d: error in format: %%%c out of context",
+ __FILE__, __LINE__, c));
+ rc = MU_ERR_FORMAT;
+ break;
+ }
+
rc = pop_input (*plist, &st, &p);
if (rc || st != bracket_to_state (c))
{
@@ -257,29 +280,31 @@ scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
rc = MU_ERR_FORMAT;
break;
}
- if (--nesting_level == 0)
+
+ --nesting_level;
+ if (nesting_level == 0)
{
*endp = fmt;
if (skip_alt)
return 0;
*input = p;
- if (st == ST_ALT)
- {
- if (*fmt == '%' && (fmt[1] == '|' || fmt[1] == ']'))
- return 0;
- return MU_ERR_PARSE; /* No match found */
- }
- return 0;
+ if (st == ST_OPT)
+ return 0;
+ if (mu_list_is_empty (*plist))
+ break;
}
break;
case '|':
if (skip_alt)
continue;
- if (nesting_level == 1)
+ if (nesting_level <= 1)
{
+ rc = peek_state (*plist, &st, input);
+ if (rc)
+ fmt -= 2;
*endp = fmt;
- return peek_state (*plist, &st, input);
+ return rc;
}
break;
@@ -296,6 +321,15 @@ scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
}
}
}
+
+ if (nesting_level == 0)
+ {
+ if (st == ST_ALT)
+ /* No match found */
+ return MU_ERR_PARSE;
+ else if (st == ST_OPT)
+ return 0;
+ }
peek_state (*plist, &st, NULL);
mu_debug (MU_DEBCAT_MAILBOX, MU_DEBUG_ERROR,
@@ -305,6 +339,15 @@ scan_recovery (const char *fmt, mu_list_t *plist, int skip_alt,
return MU_ERR_FORMAT;
}
+static inline int
+looks_like_numeric_tz (char const *p)
+{
+ if (p[0] == '+' || p[0] == '-')
+ p++;
+ return (mu_isdigit (p[0]) && mu_isdigit (p[1]) &&
+ mu_isdigit (p[2]) && mu_isdigit (p[3]));
+}
+
int
mu_scan_datetime (const char *input, const char *fmt,
struct tm *tm, struct mu_timezone *tz, char **endp)
@@ -314,7 +357,6 @@ mu_scan_datetime (const char *input, const char *fmt,
int n;
int c;
int st;
- int recovery = 0;
int eof_ok = 0;
int datetime_parts = 0;
mu_list_t save_input_list = NULL;
@@ -506,8 +548,40 @@ mu_scan_datetime (const char *input, const char *fmt,
}
break;
+ case 'Z':
+ if (!looks_like_numeric_tz (input))
+ {
+ /* Time-zone in abbreviated form */
+ char tzs[6];
+ p = mu_str_skip_class_comp (input, MU_CTYPE_SPACE);
+ n = p - input;
+ if (n > sizeof (tzs) - 1)
+ {
+ rc = MU_ERR_PARSE;
+ break;
+ }
+ memcpy (tzs, input, n);
+ tzs[n] = 0;
+ if (mu_timezone_offset (tzs, &n))
+ {
+ rc = MU_ERR_PARSE;
+ break;
+ }
+ if (tz)
+ tz->utc_offset = n;
+ input = p;
+ break;
+ }
+ /* fall through */
case 'z':
- /* The time-zone as hour offset from GMT */
+ /*
+ * The time-zone as hour offset from GMT.
+ * Notice, that unless '+' or '-' is used explicitely, the
+ * time-zone in this form can be confused with the year.
+ * However, no one possibly expects emails dated 13th century
+ * and earlier, so the possibility of such confusion is
+ * vanishingly small.
+ */
{
int sign = 1;
int hr;
@@ -539,30 +613,6 @@ mu_scan_datetime (const char *input, const char *fmt,
}
break;
- case 'Z':
- /* Time-zone in abbreviated form */
- {
- char tzs[6];
- p = mu_str_skip_class_comp (input, MU_CTYPE_SPACE);
- n = p - input;
- if (n > sizeof (tzs) - 1)
- {
- rc = MU_ERR_PARSE;
- break;
- }
- memcpy (tzs, input, n);
- tzs[n] = 0;
- if (mu_timezone_offset (tzs, &n))
- {
- rc = MU_ERR_PARSE;
- break;
- }
- if (tz)
- tz->utc_offset = n;
- input = p;
- }
- break;
-
case '%':
if (*input == '%')
input++;
@@ -570,9 +620,6 @@ mu_scan_datetime (const char *input, const char *fmt,
rc = MU_ERR_PARSE;
break;
- rc = push_input (&save_input_list, ST_ALT, (void*)input);
- break;
-
case '(':
case '[':
rc = push_input (&save_input_list, bracket_to_state (c),
@@ -638,7 +685,7 @@ mu_scan_datetime (const char *input, const char *fmt,
if (eof_ok && rc == 0 && *input == 0)
break;
}
- else if (!recovery && *input != *fmt)
+ else if (*input != *fmt)
rc = MU_ERR_PARSE;
else
input++;
@@ -646,12 +693,17 @@ mu_scan_datetime (const char *input, const char *fmt,
if (rc == MU_ERR_PARSE && !mu_list_is_empty (save_input_list))
{
rc = scan_recovery (fmt, &save_input_list, 0, &fmt, &input);
- if (rc == 0)
- --fmt;
+ --fmt;
}
}
- if (!mu_list_is_empty (save_input_list))
+ if (rc == MU_ERR_FORMAT)
+ {
+ mu_debug (MU_DEBCAT_MAILBOX, MU_DEBUG_ERROR,
+ ("%s:%d: error in format string near %s",
+ __FILE__, __LINE__, fmt));
+ }
+ else if (!mu_list_is_empty (save_input_list))
{
mu_debug (MU_DEBCAT_MAILBOX, MU_DEBUG_ERROR,
("%s:%d: error in format: closing bracket missing",
@@ -660,9 +712,6 @@ mu_scan_datetime (const char *input, const char *fmt,
}
mu_list_destroy (&save_input_list);
- if (rc == 0 && recovery)
- rc = MU_ERR_PARSE;
-
if (!eof_ok && rc == 0 && *input == 0 && *fmt)
rc = MU_ERR_PARSE;
@@ -675,9 +724,14 @@ mu_scan_datetime (const char *input, const char *fmt,
tm->tm_yday = mu_datetime_dayofyear (tm->tm_year + 1900,
tm->tm_mon + 1, tm->tm_mday) - 1;
}
-
+
if (endp)
- *endp = (char*) input;
+ {
+ if (rc == MU_ERR_FORMAT)
+ *endp = (char *) fmt;
+ else
+ *endp = (char*) input;
+ }
return rc;
}
diff --git a/libmailutils/tests/scantime.at b/libmailutils/tests/scantime.at
index 3b5a1b1a1..7969158f4 100644
--- a/libmailutils/tests/scantime.at
+++ b/libmailutils/tests/scantime.at
@@ -153,5 +153,89 @@ sec=26,min=25,hour=13,mday=3,mon=4,year=111,wday=2,yday=122,tz=7200
scantime: 5: parse failed near ; 03 May 2011 13:25:26 +0200
])
+# This expanded scan specification takes into account all possible
+# variants of the date/time specification that ever existed in the
+# From_ line of UNIX mbox mailboxes.
+SCANTIME([MBOX From_ time],[from_],
+[[%a %b %e %H:%M%[:%S%] %(%(%z%|%Z%) %Y%|%Y %(%z%|%Z%)%|%Y%)]],
+[Wed Dec 2 05:53 1992
+Wed Dec 2 05:53:22 1992
+Wed Dec 2 05:53 PST 1992
+Wed Dec 2 05:53:22 PST 1992
+Wed Dec 2 05:53 -0700 1992
+Wed Dec 2 05:53:22 -0700 1992
+Wed Dec 2 05:53 1992 PST
+Wed Dec 2 05:53:22 1992 PST
+Wed Dec 2 05:53 1992 -0700
+Wed Dec 2 05:53:22 1992 -0700
+],
+[sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+])
+
+# This one is a simplification of the above, taking into account the
+# fact that %Z handles correctly numeric time zones as well (after
+# 2020-11-27). Notice ordering of the alternative forms.
+SCANTIME([MBOX From_ time (alternative)],[from_],
+[[%a %b %e %H:%M%[:%S%] %(%(%Z %Y%|%Y %Z%)%|%Y%)]],
+[Wed Dec 2 05:53 1992
+Wed Dec 2 05:53:22 1992
+Wed Dec 2 05:53 PST 1992
+Wed Dec 2 05:53:22 PST 1992
+Wed Dec 2 05:53 -0700 1992
+Wed Dec 2 05:53:22 -0700 1992
+Wed Dec 2 05:53 1992 PST
+Wed Dec 2 05:53:22 1992 PST
+Wed Dec 2 05:53 1992 -0700
+Wed Dec 2 05:53:22 1992 -0700
+],
+[sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+])
+
+# This is a final version of the From_ format line. It eliminates
+# one level of alternative specs by joining "%Y %Z" and "%Y" into
+# one spec.
+SCANTIME([MBOX From_ time (simplified)],[from_],
+[[%a %b %e %H:%M%[:%S%] %(%Z %Y%|%Y%[ %Z%]%)]],
+[Wed Dec 2 05:53 1992
+Wed Dec 2 05:53:22 1992
+Wed Dec 2 05:53 PST 1992
+Wed Dec 2 05:53:22 PST 1992
+Wed Dec 2 05:53 -0700 1992
+Wed Dec 2 05:53:22 -0700 1992
+Wed Dec 2 05:53 1992 PST
+Wed Dec 2 05:53:22 1992 PST
+Wed Dec 2 05:53 1992 -0700
+Wed Dec 2 05:53:22 1992 -0700
+],
+[sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=0
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-28800
+sec=0,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+sec=22,min=53,hour=5,mday=2,mon=11,year=92,wday=3,yday=336,tz=-25200
+])
+
m4_popdef([SCANTIME])
dnl ---------------------------------------------------------------------
diff --git a/libmailutils/tests/scantime.c b/libmailutils/tests/scantime.c
index a9a01af71..5a0d51141 100644
--- a/libmailutils/tests/scantime.c
+++ b/libmailutils/tests/scantime.c
@@ -65,13 +65,25 @@ main (int argc, char **argv)
if (!*buf)
continue;
rc = mu_scan_datetime (buf, format, &tm, &tz, &endp);
- if (rc)
+ switch (rc)
{
+ case 0:
+ break;
+
+ case MU_ERR_PARSE:
if (*endp)
mu_error ("%d: parse failed near %s", line, endp);
else
mu_error ("%d: parse failed at end of input", line);
continue;
+
+ case MU_ERR_FORMAT:
+ mu_error ("%d: error in format string near %s", line, endp);
+ continue;
+
+ default:
+ mu_error ("%d: %s", line, mu_strerror (rc));
+ exit (1);
}
if (*endp)
mu_printf ("# %d: stopped at %s\n", line, endp);

Return to:

Send suggestions and report system problems to the System administrator.