diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2019-01-29 14:56:24 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2019-01-29 15:02:15 +0200 |
commit | 1a29c40ae9f491fe1f6895b383d3391450bc3736 (patch) | |
tree | 6cc2c9c3190ff526555fe3afc2290476269046db | |
parent | 059fab534ac609e67ee54020ba764ec7fafddf96 (diff) | |
download | mailutils-1a29c40ae9f491fe1f6895b383d3391450bc3736.tar.gz mailutils-1a29c40ae9f491fe1f6895b383d3391450bc3736.tar.bz2 |
imap4d: Proper case-insensitive searches over multibyte strings.
This requires libunistring.
* NEWS: Update.
* README: Update.
* configure.ac: Check for libunistring.
* libmailutils/base/version.c: Reflect libunistring usage.
* imap4d/Makefile.am: Link with libunistring, if available.
* imap4d/imap4d.h (unistr_downcase)
(unistr_is_substring): New protos.
* imap4d/search.c (_scan_header)
(_scan_header_all,_match_text): Use unistr_is_substring for
searches.
* imap4d/unistr_c.c: New file.
* imap4d/unistr_u8.c: New file.
* imap4d/tests/search.at: Add new test.
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | configure.ac | 44 | ||||
-rw-r--r-- | imap4d/Makefile.am | 11 | ||||
-rw-r--r-- | imap4d/imap4d.h | 3 | ||||
-rw-r--r-- | imap4d/search.c | 26 | ||||
-rw-r--r-- | imap4d/tests/.gitignore | 6 | ||||
-rw-r--r-- | imap4d/tests/search.at | 11 | ||||
-rw-r--r-- | imap4d/unistr_c.c | 30 | ||||
-rw-r--r-- | imap4d/unistr_u8.c | 43 | ||||
-rw-r--r-- | libmailutils/base/version.c | 3 |
11 files changed, 175 insertions, 13 deletions
@@ -1,4 +1,4 @@ -GNU mailutils NEWS -- history of user-visible changes. 2019-01-26 +GNU mailutils NEWS -- history of user-visible changes. 2019-01-29 Copyright (C) 2002-2019 Free Software Foundation, Inc. See the end of file for copying conditions. @@ -13,7 +13,7 @@ Version 3.5.90 (git) ** Improved SEARCH BODY and SEARCH TEXT commands Both commands now properly descend into multipart message parts and -decode messages, if necessary +decode messages, if necessary. This requires libiconv and libunistring. * Fixes in the 'mail' utility @@ -151,6 +151,13 @@ mailutils-specific configuration options: Build 'mail' without readline support. + --without-unistring + + Build 'imap4d' without libunistring. Note, that libunistring + is necessary for SEARCH command to properly work on multibyte + string. The use of this option is therefore discouraged. It + is here mainly for development and debugging purposes. + --without-gnutls Disable the TLS/SSL encryption via GnuTLS (a Transport Layer diff --git a/configure.ac b/configure.ac index 0ef4a235d..4efd44198 100644 --- a/configure.ac +++ b/configure.ac @@ -1057,8 +1057,8 @@ char *crypt(const char *key, const char *salt); AC_CHECK_LIB(crypt, crypt) -#When using thread support some platforms need -D_REENTRANT to get the -#right prototypes including errno. +# When using thread support some platforms need -D_REENTRANT to get the +# right prototypes including errno. dnl Check threading support # We have to rearrange things a little, it appears that the new autoconf # does not like long cascading AC_CHECK_LIB. @@ -1132,6 +1132,44 @@ fi AM_CONDITIONAL([MU_COND_READLINE], [test "$status_readline" = "yes"]) +# Check for libunistring +AC_ARG_WITH([unistring], + AC_HELP_STRING([--without-unistring], + [do not use unistring]), + [ +case "${withval}" in + yes) status_unistring=yes ;; + no) status_unistring=no ;; + *) AC_MSG_ERROR(bad value ${withval} for --without-unistring) ;; +esac],[status_unistring=probe]) + +AC_SUBST(UNISTRING_LIBS) + +if test "$status_unistring" != "no"; then + AC_CHECK_HEADERS([unicase.h unistr.h]) + + if test "$ac_cv_header_unicase_h$ac_cv_header_unistr_h" = yesyes; then + AC_CHECK_LIB(unistring, u8_tolower, + [UNISTRING_LIBS=-lunistring + status_unistring=yes], + [if test "$status_unistring" = "yes"; then + AC_MSG_ERROR(required library libunistring not found) + else + status_unistring=no + fi]) + elif test "$status_unistring" = "yes"; then + AC_MSG_ERROR(header files for the required library libunistring not found) + else + status_unistring=no + fi +fi + +AH_TEMPLATE([WITH_UNISTRING],[Define to 1 if using libunistring]) +if test "$status_unistring" = "yes"; then + AC_DEFINE(WITH_UNISTRING,1,[Using libunistring]) +fi +AM_CONDITIONAL([MU_COND_UNISTRING],[test "$status_unistring" = "yes"]) + AH_BOTTOM([ /* Newer versions of readline have rl_completion_matches */ #ifndef HAVE_RL_COMPLETION_MATCHES @@ -1381,6 +1419,7 @@ Use GSSAPI .................... $status_gssapi Use TCP wrappers .............. $status_tcpwrap Pthread support ............... $status_pthread Readline support .............. $status_readline +Libunistring support .......... $status_unistring MySQL support ................. $status_mysql PostgreSQL support ............ $status_pgsql LDAP support .................. $status_ldap @@ -1435,6 +1474,7 @@ status_cxx=$mu_cv_enable_cxx status_tcpwrap=$status_tcpwrap status_pthread=$usepthread status_readline=$status_readline +status_unistring=$status_unistring status_mysql=$status_mysql status_pgsql=$status_pgsql status_radius=$mu_cv_enable_radius diff --git a/imap4d/Makefile.am b/imap4d/Makefile.am index a1212a84a..e23c2fb63 100644 --- a/imap4d/Makefile.am +++ b/imap4d/Makefile.am @@ -78,7 +78,16 @@ imap4d_LDADD = \ ${MU_LIB_AUTH}\ @MU_AUTHLIBS@ \ ${MU_LIB_MAILUTILS}\ - @SERV_AUTHLIBS@ @MU_COMMON_LIBRARIES@ @MU_TCPWRAP_LIBRARIES@ + @SERV_AUTHLIBS@\ + @MU_COMMON_LIBRARIES@\ + @MU_TCPWRAP_LIBRARIES@ + +if MU_COND_UNISTRING + imap4d_LDADD += -lunistring + imap4d_SOURCES += unistr_u8.c +else + imap4d_SOURCES += unistr_c.c +endif ## This kludge is necessary to correctly establish imap4d -> MU_AUTHLIBS ## dependencies. Automake stupidly refuses to include them. diff --git a/imap4d/imap4d.h b/imap4d/imap4d.h index b90d653a0..2bfae7a61 100644 --- a/imap4d/imap4d.h +++ b/imap4d/imap4d.h @@ -467,6 +467,9 @@ int is_atom (const char *s); int util_isdelim (const char *str); int util_trim_nl (char *s, size_t len); +void unistr_downcase (char const *input, char **output); +int unistr_is_substring (char const *haystack, char const *needle); + int set_xscript_level (int xlev); int imap4d_init_tls_server (struct mu_tls_config *); diff --git a/imap4d/search.c b/imap4d/search.c index 47acabbd7..8bc9770c1 100644 --- a/imap4d/search.c +++ b/imap4d/search.c @@ -813,9 +813,12 @@ _scan_header (struct parsebuf *pb, char *name, char *value) mu_header_t header = NULL; int i, rc; int result = 0; - + char *needle; + mu_message_get_header (pb->msg, &header); + unistr_downcase (value, &needle); + for (i = 1; result == 0 && (rc = mu_header_aget_value_unfold_n (header, name, i, &hval)) == 0; @@ -834,12 +837,13 @@ _scan_header (struct parsebuf *pb, char *name, char *value) free (hval); hval = tmp; } - result = mu_c_strcasestr (hval, value) != NULL; + result = unistr_is_substring (hval, needle); free (hval); } if (!(rc == 0 || rc == MU_ERR_NOENT)) mu_diag_funcall (MU_DIAG_ERR, "mu_header_aget_value_unfold_n", NULL, rc); - + free (needle); + return result; } @@ -865,9 +869,11 @@ _scan_header_all (struct parsebuf *pb, char *text) size_t fcount = 0; int i, rc; int result; - + char *needle; + mu_message_get_header (pb->msg, &header); mu_header_get_field_count (header, &fcount); + unistr_downcase (text, &needle); result = 0; for (i = 1; result == 0 && i < fcount; i++) { @@ -895,9 +901,10 @@ _scan_header_all (struct parsebuf *pb, char *text) free (hval); hval = tmp; } - result = mu_c_strcasestr (hval, text) != NULL; - free (hval); + result = unistr_is_substring (hval, needle); + free (hval); } + free (needle); return result; } @@ -913,7 +920,8 @@ _match_text (struct parsebuf *pb, mu_message_t msg, mu_content_type_t ct, char *buffer = NULL; size_t bufsize = 0; size_t n; - + char *needle; + mu_message_get_body (msg, &body); mu_body_get_streamref (body, &str); @@ -960,14 +968,16 @@ _match_text (struct parsebuf *pb, mu_message_t msg, mu_content_type_t ct, } } + unistr_downcase (text, &needle); result = 0; while ((rc = mu_stream_getline (str, &buffer, &bufsize, &n)) == 0 && n > 0) { - result = mu_c_strcasestr (buffer, text) != NULL; + result = unistr_is_substring (buffer, needle); if (result) break; } + free (needle); mu_stream_destroy (&str); if (rc) mu_diag_funcall (MU_DIAG_ERR, "mu_stream_getline", NULL, rc); diff --git a/imap4d/tests/.gitignore b/imap4d/tests/.gitignore index 9a0dcda59..8e0167aeb 100644 --- a/imap4d/tests/.gitignore +++ b/imap4d/tests/.gitignore @@ -1 +1,7 @@ ckiconv +atconfig +atlocal +package.m4 +testsuite +testsuite.dir +testsuite.log diff --git a/imap4d/tests/search.at b/imap4d/tests/search.at index d2369cead..69664e93f 100644 --- a/imap4d/tests/search.at +++ b/imap4d/tests/search.at @@ -194,5 +194,16 @@ ICONV_TEST) SEARCH_CHECK([TEXT],[search30], [TEXT how], [2 3]) + +IMAP4D_WITH_PREREQ( +[test `echo "seo=" | ckiconv iso-8859-2 utf-8` = "xIXEmQ==" \ + && imap4d --show-config-options | grep WITH_UNISTRING >/dev/null], +[ +SEARCH_CHECK([BODY CHARSET (case-insensitive UTF)],[search31], +[CHARSET utf-8 BODY "ĆWICZĄC, NAJWYŻEJ SŁYSZĘ"], +[4], +ICONV_TEST) +]) + m4_popdef([SEARCH_MBOX]) diff --git a/imap4d/unistr_c.c b/imap4d/unistr_c.c new file mode 100644 index 000000000..6c999f142 --- /dev/null +++ b/imap4d/unistr_c.c @@ -0,0 +1,30 @@ +/* GNU Mailutils -- a suite of utilities for electronic mail + Copyright (C) 1999-2019 Free Software Foundation, Inc. + + GNU Mailutils is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GNU Mailutils is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */ + +#include "imap4d.h" + +void +unistr_downcase (char const *input, char **output) +{ + /* nothing */ + *output = mu_strdup (input); +} + +int +unistr_is_substring (char const *haystack, char const *needle) +{ + return mu_c_strcasestr (haystack, needle) != NULL; +} diff --git a/imap4d/unistr_u8.c b/imap4d/unistr_u8.c new file mode 100644 index 000000000..5d29f2152 --- /dev/null +++ b/imap4d/unistr_u8.c @@ -0,0 +1,43 @@ +/* GNU Mailutils -- a suite of utilities for electronic mail + Copyright (C) 1999-2019 Free Software Foundation, Inc. + + GNU Mailutils is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GNU Mailutils is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */ + +#include "imap4d.h" +#include <stdlib.h> +#include <unicase.h> +#include <unistr.h> +#include <string.h> + +void +unistr_downcase (char const *input, char **output) +{ + size_t len; + uint8_t *result = u8_tolower ((const uint8_t *)input, strlen (input)+1, + NULL, NULL, NULL, &len); + *output = (char*)result; +} + +int +unistr_is_substring (char const *haystack, char const *needle) +{ + char *lc; + int result; + + unistr_downcase (haystack, &lc); + result = u8_strstr ((const uint8_t*) lc, (const uint8_t*) needle) != NULL; + free (lc); + return result; +} + diff --git a/libmailutils/base/version.c b/libmailutils/base/version.c index 0903b9966..df3f5ad5f 100644 --- a/libmailutils/base/version.c +++ b/libmailutils/base/version.c @@ -85,6 +85,9 @@ static struct mu_conf_option mu_conf_option[] = { #ifdef WITH_READLINE { "WITH_READLINE", N_("GNU Readline") }, #endif +#ifdef WITH_UNISTRING + { "WITH_UNISTRING", N_("Using GNU libunistring") }, +#endif #ifdef HAVE_MYSQL { "HAVE_MYSQL", N_("MySQL") }, #endif |