diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2019-01-26 00:30:28 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-01-26 01:05:52 +0200 |
commit | 3798a0f09da7fcaa6daa12e9ae483ebf99f7c46a (patch) | |
tree | 97c80866168b24c3c45f2d1961311d80b6ff1823 | |
parent | bc4d023a3810a7da6ede2cc522cdc34fa3129378 (diff) | |
download | mailutils-3798a0f09da7fcaa6daa12e9ae483ebf99f7c46a.tar.gz mailutils-3798a0f09da7fcaa6daa12e9ae483ebf99f7c46a.tar.bz2 |
imap4d: implement SEARCH CHARSET; fix BODY and TEXT searches.
* imap4d/search.c: Implement proper BODY searches. Implement CHARSET.
* imap4d/util.c (util_strcasestr): Remove. Use mu_c_strcasestr instead.
* imap4d/imap4d.h: Likewise.
* NEWS: Document changes.
-rw-r--r-- | NEWS | 11 | ||||
-rw-r--r-- | imap4d/imap4d.h | 1 | ||||
-rw-r--r-- | imap4d/search.c | 262 | ||||
-rw-r--r-- | imap4d/util.c | 8 | ||||
-rw-r--r-- | libmailutils/mailbox/header.c | 0 |
5 files changed, 241 insertions, 41 deletions
@@ -1,2 +1,2 @@ -GNU mailutils NEWS -- history of user-visible changes. 2019-01-23 +GNU mailutils NEWS -- history of user-visible changes. 2019-01-26 Copyright (C) 2002-2019 Free Software Foundation, Inc. @@ -8,2 +8,11 @@ Version 3.5.90 (git) +* imap4d: SEARCH command + +** Implemented SEARCH CHARSET + +** Improved SEARCH BODY and SEARCH TEXT commands + +Both commands now properly descend into multipart message parts and +decode messages, if necessary + * Fixes in the 'mail' utility diff --git a/imap4d/imap4d.h b/imap4d/imap4d.h index 2ad339a30..b90d653a0 100644 --- a/imap4d/imap4d.h +++ b/imap4d/imap4d.h @@ -456,3 +456,2 @@ extern int util_parse_ctime_date (const char *date, time_t *timep, enum datetime_parse_mode flag); -extern char *util_strcasestr (const char *haystack, const char *needle); extern char *util_localname (void); diff --git a/imap4d/search.c b/imap4d/search.c index 1d28b570f..3c243eddf 100644 --- a/imap4d/search.c +++ b/imap4d/search.c @@ -17,2 +17,3 @@ #include "imap4d.h" +#include <mailutils/assoc.h> @@ -226,2 +227,3 @@ struct parsebuf struct mem_chain *alloc; /* Chain of objects allocated during parsing */ + char *charset; /* Charset, other than US-ASCII requested */ @@ -243,2 +245,3 @@ static int search_run (struct parsebuf *pb); static void do_search (struct parsebuf *pb); +static int available_charset (const char *charset); @@ -298,5 +301,11 @@ imap4d_search0 (imap4d_tokbuf_t tok, int isuid, char **err_text) { - *err_text = "Charset not supported"; + parsebuf.charset = parse_strdup (&parsebuf, parsebuf.token); + if (!available_charset (parsebuf.charset)) + { + *err_text = "[BADCHARSET] Charset not supported"; return RESP_NO; } + } + else + parsebuf.charset = NULL; @@ -307,3 +316,2 @@ imap4d_search0 (imap4d_tokbuf_t tok, int isuid, char **err_text) } - } @@ -798,11 +806,34 @@ _scan_header (struct parsebuf *pb, char *name, char *value) { - const char *hval; + char *hval; mu_header_t header = NULL; + int i, rc; + int result = 0; mu_message_get_header (pb->msg, &header); - if (mu_header_sget_value (header, name, &hval) == 0) + + for (i = 1; + result == 0 + && (rc = mu_header_aget_value_unfold_n (header, name, i, &hval)) == 0; + i++) + { + if (pb->charset) + { + char *tmp; + rc = mu_rfc2047_decode (pb->charset, hval, &tmp); + if (rc) { - return util_strcasestr (hval, value) != NULL; + mu_diag_funcall (MU_DIAG_ERR, "mu_rfc2047_decode", hval, rc); + free (hval); + continue; } - return 0; + free (hval); + hval = tmp; + } + result = mu_c_strcasestr (hval, value) != NULL; + free (hval); + } + if (!(rc == 0 || rc == MU_ERR_NOENT)) + mu_diag_funcall (MU_DIAG_ERR, "mu_header_aget_value_unfold_n", NULL, rc); + + return result; } @@ -827,3 +858,2 @@ _scan_header_all (struct parsebuf *pb, char *text) { - const char *hval; mu_header_t header = NULL; @@ -831,2 +861,3 @@ _scan_header_all (struct parsebuf *pb, char *text) int i, rc; + int result; @@ -834,36 +865,181 @@ _scan_header_all (struct parsebuf *pb, char *text) mu_header_get_field_count (header, &fcount); - for (i = rc = 0; i < fcount; i++) + result = 0; + for (i = 1; result == 0 && i < fcount; i++) { - if (mu_header_sget_field_value (header, i, &hval) == 0) - rc = util_strcasestr (hval, text) != NULL; + char *hval; + + rc = mu_header_aget_field_value_unfold (header, i, &hval); + if (rc) + { + mu_diag_funcall (MU_DIAG_ERR, "mu_header_aget_field_value_unfold", + NULL, rc); + continue; } - return rc; + + if (pb->charset) + { + char *tmp; + rc = mu_rfc2047_decode (pb->charset, hval, &tmp); + if (rc) + { + mu_diag_funcall (MU_DIAG_ERR, "mu_rfc2047_decode", hval, rc); + free (hval); + continue; + } + + free (hval); + hval = tmp; + } + result = mu_c_strcasestr (hval, text) != NULL; + free (hval); + } + return result; } -/* Scan body of the message for the occurrence of a substring */ -/* FIXME: The algorithm below is broken */ static int -_scan_body (struct parsebuf *pb, char *text) +_match_text (struct parsebuf *pb, mu_message_t msg, mu_content_type_t ct, + char const *encoding, + char *text) { - mu_body_t body = NULL; - mu_stream_t stream = NULL; - size_t size = 0, lines = 0; - char buffer[128]; - size_t n = 0; + mu_body_t body; + mu_stream_t str; int rc; + int result; + char *buffer = NULL; + size_t bufsize = 0; + size_t n; + + mu_message_get_body (msg, &body); + mu_body_get_streamref (body, &str); + + if (encoding) + { + mu_stream_t flt; + rc = mu_filter_create (&flt, str, encoding, + MU_FILTER_DECODE, + MU_STREAM_READ); + mu_stream_unref (str); + if (rc) + { + mu_error (_("can't handle encoding %s: %s"), + encoding, mu_strerror (rc)); + return 0; + } + str = flt; + } + + if (pb->charset) + { + struct mu_mime_param *param; + if (mu_assoc_lookup (ct->param, "charset", ¶m) == 0 + && mu_c_strcasecmp (param->value, pb->charset)) + { + char const *argv[] = { "iconv", NULL, NULL, NULL }; + mu_stream_t flt; - mu_message_get_body (pb->msg, &body); - mu_body_size (body, &size); - mu_body_lines (body, &lines); - mu_body_get_streamref (body, &stream); - rc = 0; - while (rc == 0 - && mu_stream_read (stream, buffer, sizeof(buffer)-1, &n) == 0 + argv[1] = param->value; + argv[2] = pb->charset; + rc = mu_filter_chain_create (&flt, str, + MU_FILTER_ENCODE, + MU_STREAM_READ, + MU_ARRAY_SIZE (argv) - 1, + (char**) argv); + mu_stream_unref (str); + if (rc) + { + mu_error (_("can't convert from charset %s to %s"), + param->value, pb->charset); + return 0; + } + str = flt; + } + } + + result = 0; + while ((rc = mu_stream_getline (str, &buffer, &bufsize, &n)) == 0 && n > 0) { - buffer[n] = 0; - rc = util_strcasestr (buffer, text) != NULL; + result = mu_c_strcasestr (buffer, text) != NULL; + if (result) + break; + } + mu_stream_destroy (&str); + if (rc) + mu_diag_funcall (MU_DIAG_ERR, "mu_stream_getline", NULL, rc); + return result; +} + +static int +_match_multipart (struct parsebuf *pb, mu_message_t msg, char *text) +{ + mu_header_t hdr; + char *encoding; + int ismp; + int result; + mu_content_type_t ct; + char *buf; + int rc; + + if (mu_message_is_multipart (msg, &ismp)) + return 0; + if (mu_message_get_header (msg, &hdr)) + return 0; + + if (mu_header_aget_value_unfold (hdr, MU_HEADER_CONTENT_TYPE, &buf)) + { + buf = strdup ("text/plain"); + if (!buf) + return 0; + } + rc = mu_content_type_parse (buf, NULL, &ct); + free (buf); + if (rc) + return 0; + + if (mu_header_aget_value_unfold (hdr, MU_HEADER_CONTENT_TRANSFER_ENCODING, + &encoding)) + encoding = NULL; + + if (ismp) + { + size_t i, nparts; + + mu_message_get_num_parts (msg, &nparts); + + for (i = 1; i <= nparts; i++) + { + mu_message_t submsg = NULL; + + if (mu_message_get_part (msg, i, &submsg) == 0) + { + result = _match_multipart (pb, submsg, text); + if (result) + break; + } + } + } + else if (mu_c_strcasecmp (ct->type, "message") == 0 + && mu_c_strcasecmp (ct->subtype, "rfc822") == 0) + { + mu_message_t submsg = NULL; + + if (mu_message_unencapsulate (msg, &submsg, NULL) == 0) + { + result = _match_multipart (pb, submsg, text); + } } - mu_stream_destroy (&stream); - return rc; + else if (mu_c_strcasecmp (ct->type, "text") == 0) + result = _match_text (pb, msg, ct, encoding, text); + + free (encoding); + mu_content_type_destroy (&ct); + + return result; +} + +/* Scan body of the message for the occurrence of a substring */ +static int +_scan_body (struct parsebuf *pb, char *text) +{ + return _match_multipart (pb, pb->msg, text); } @@ -936,3 +1112,3 @@ cond_from (struct parsebuf *pb, struct search_node *node, struct value *arg, if (mu_envelope_sget_sender (env, &from) == 0) - rc = util_strcasestr (from, s) != NULL; + rc = mu_c_strcasestr (from, s) != NULL; @@ -1103 +1279,25 @@ cond_uid (struct parsebuf *pb, struct search_node *node, struct value *arg, +/* Return 1 if the CHARSET is available. + This function assumes that charset is available if it is possible + to create a filter for encoding ASCII data into it. + */ +static int +available_charset (const char *charset) +{ + int rc; + mu_stream_t flt; + mu_stream_t null; + char const *argv[] = { "iconv", "US-ASCII", NULL, NULL }; + + rc = mu_nullstream_create (&null, MU_STREAM_READ); + if (rc) + return 0; + argv[2] = charset; + rc = mu_filter_chain_create (&flt, null, MU_FILTER_ENCODE, MU_STREAM_READ, + MU_ARRAY_SIZE (argv) - 1, (char**) argv); + mu_stream_unref (null); + if (rc) + return 0; + mu_stream_destroy (&flt); + return 1; +} diff --git a/imap4d/util.c b/imap4d/util.c index 19a7d82fd..cc494788b 100644 --- a/imap4d/util.c +++ b/imap4d/util.c @@ -158,10 +158,2 @@ util_parse_ctime_date (const char *date, time_t *timep, -/* Return the first ocurrence of NEEDLE in HAYSTACK. Case insensitive - comparison */ -char * -util_strcasestr (const char *haystack, const char *needle) -{ - return mu_c_strcasestr (haystack, needle); -} - void diff --git a/libmailutils/mailbox/header.c b/libmailutils/mailbox/header.c index 66af14209..98433b740 100644 --- a/libmailutils/mailbox/header.c +++ b/libmailutils/mailbox/header.c |