summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2019-01-29 14:56:24 +0200
committerSergey Poznyakoff <gray@gnu.org>2019-01-29 15:02:15 +0200
commit1a29c40ae9f491fe1f6895b383d3391450bc3736 (patch)
tree6cc2c9c3190ff526555fe3afc2290476269046db
parent059fab534ac609e67ee54020ba764ec7fafddf96 (diff)
downloadmailutils-1a29c40ae9f491fe1f6895b383d3391450bc3736.tar.gz
mailutils-1a29c40ae9f491fe1f6895b383d3391450bc3736.tar.bz2
imap4d: Proper case-insensitive searches over multibyte strings.
This requires libunistring. * NEWS: Update. * README: Update. * configure.ac: Check for libunistring. * libmailutils/base/version.c: Reflect libunistring usage. * imap4d/Makefile.am: Link with libunistring, if available. * imap4d/imap4d.h (unistr_downcase) (unistr_is_substring): New protos. * imap4d/search.c (_scan_header) (_scan_header_all,_match_text): Use unistr_is_substring for searches. * imap4d/unistr_c.c: New file. * imap4d/unistr_u8.c: New file. * imap4d/tests/search.at: Add new test.
-rw-r--r--NEWS4
-rw-r--r--README7
-rw-r--r--configure.ac44
-rw-r--r--imap4d/Makefile.am11
-rw-r--r--imap4d/imap4d.h3
-rw-r--r--imap4d/search.c26
-rw-r--r--imap4d/tests/.gitignore6
-rw-r--r--imap4d/tests/search.at11
-rw-r--r--imap4d/unistr_c.c30
-rw-r--r--imap4d/unistr_u8.c43
-rw-r--r--libmailutils/base/version.c3
11 files changed, 175 insertions, 13 deletions
diff --git a/NEWS b/NEWS
index 4f5c0dec9..972b9dacf 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-GNU mailutils NEWS -- history of user-visible changes. 2019-01-26
+GNU mailutils NEWS -- history of user-visible changes. 2019-01-29
Copyright (C) 2002-2019 Free Software Foundation, Inc.
See the end of file for copying conditions.
@@ -13,7 +13,7 @@ Version 3.5.90 (git)
** Improved SEARCH BODY and SEARCH TEXT commands
Both commands now properly descend into multipart message parts and
-decode messages, if necessary
+decode messages, if necessary. This requires libiconv and libunistring.
* Fixes in the 'mail' utility
diff --git a/README b/README
index 117b05d0e..ff4a322de 100644
--- a/README
+++ b/README
@@ -151,6 +151,13 @@ mailutils-specific configuration options:
Build 'mail' without readline support.
+ --without-unistring
+
+ Build 'imap4d' without libunistring. Note, that libunistring
+ is necessary for SEARCH command to properly work on multibyte
+ string. The use of this option is therefore discouraged. It
+ is here mainly for development and debugging purposes.
+
--without-gnutls
Disable the TLS/SSL encryption via GnuTLS (a Transport Layer
diff --git a/configure.ac b/configure.ac
index 0ef4a235d..4efd44198 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1057,8 +1057,8 @@ char *crypt(const char *key, const char *salt);
AC_CHECK_LIB(crypt, crypt)
-#When using thread support some platforms need -D_REENTRANT to get the
-#right prototypes including errno.
+# When using thread support some platforms need -D_REENTRANT to get the
+# right prototypes including errno.
dnl Check threading support
# We have to rearrange things a little, it appears that the new autoconf
# does not like long cascading AC_CHECK_LIB.
@@ -1132,6 +1132,44 @@ fi
AM_CONDITIONAL([MU_COND_READLINE], [test "$status_readline" = "yes"])
+# Check for libunistring
+AC_ARG_WITH([unistring],
+ AC_HELP_STRING([--without-unistring],
+ [do not use unistring]),
+ [
+case "${withval}" in
+ yes) status_unistring=yes ;;
+ no) status_unistring=no ;;
+ *) AC_MSG_ERROR(bad value ${withval} for --without-unistring) ;;
+esac],[status_unistring=probe])
+
+AC_SUBST(UNISTRING_LIBS)
+
+if test "$status_unistring" != "no"; then
+ AC_CHECK_HEADERS([unicase.h unistr.h])
+
+ if test "$ac_cv_header_unicase_h$ac_cv_header_unistr_h" = yesyes; then
+ AC_CHECK_LIB(unistring, u8_tolower,
+ [UNISTRING_LIBS=-lunistring
+ status_unistring=yes],
+ [if test "$status_unistring" = "yes"; then
+ AC_MSG_ERROR(required library libunistring not found)
+ else
+ status_unistring=no
+ fi])
+ elif test "$status_unistring" = "yes"; then
+ AC_MSG_ERROR(header files for the required library libunistring not found)
+ else
+ status_unistring=no
+ fi
+fi
+
+AH_TEMPLATE([WITH_UNISTRING],[Define to 1 if using libunistring])
+if test "$status_unistring" = "yes"; then
+ AC_DEFINE(WITH_UNISTRING,1,[Using libunistring])
+fi
+AM_CONDITIONAL([MU_COND_UNISTRING],[test "$status_unistring" = "yes"])
+
AH_BOTTOM([
/* Newer versions of readline have rl_completion_matches */
#ifndef HAVE_RL_COMPLETION_MATCHES
@@ -1381,6 +1419,7 @@ Use GSSAPI .................... $status_gssapi
Use TCP wrappers .............. $status_tcpwrap
Pthread support ............... $status_pthread
Readline support .............. $status_readline
+Libunistring support .......... $status_unistring
MySQL support ................. $status_mysql
PostgreSQL support ............ $status_pgsql
LDAP support .................. $status_ldap
@@ -1435,6 +1474,7 @@ status_cxx=$mu_cv_enable_cxx
status_tcpwrap=$status_tcpwrap
status_pthread=$usepthread
status_readline=$status_readline
+status_unistring=$status_unistring
status_mysql=$status_mysql
status_pgsql=$status_pgsql
status_radius=$mu_cv_enable_radius
diff --git a/imap4d/Makefile.am b/imap4d/Makefile.am
index a1212a84a..e23c2fb63 100644
--- a/imap4d/Makefile.am
+++ b/imap4d/Makefile.am
@@ -78,7 +78,16 @@ imap4d_LDADD = \
${MU_LIB_AUTH}\
@MU_AUTHLIBS@ \
${MU_LIB_MAILUTILS}\
- @SERV_AUTHLIBS@ @MU_COMMON_LIBRARIES@ @MU_TCPWRAP_LIBRARIES@
+ @SERV_AUTHLIBS@\
+ @MU_COMMON_LIBRARIES@\
+ @MU_TCPWRAP_LIBRARIES@
+
+if MU_COND_UNISTRING
+ imap4d_LDADD += -lunistring
+ imap4d_SOURCES += unistr_u8.c
+else
+ imap4d_SOURCES += unistr_c.c
+endif
## This kludge is necessary to correctly establish imap4d -> MU_AUTHLIBS
## dependencies. Automake stupidly refuses to include them.
diff --git a/imap4d/imap4d.h b/imap4d/imap4d.h
index b90d653a0..2bfae7a61 100644
--- a/imap4d/imap4d.h
+++ b/imap4d/imap4d.h
@@ -467,6 +467,9 @@ int is_atom (const char *s);
int util_isdelim (const char *str);
int util_trim_nl (char *s, size_t len);
+void unistr_downcase (char const *input, char **output);
+int unistr_is_substring (char const *haystack, char const *needle);
+
int set_xscript_level (int xlev);
int imap4d_init_tls_server (struct mu_tls_config *);
diff --git a/imap4d/search.c b/imap4d/search.c
index 47acabbd7..8bc9770c1 100644
--- a/imap4d/search.c
+++ b/imap4d/search.c
@@ -813,9 +813,12 @@ _scan_header (struct parsebuf *pb, char *name, char *value)
mu_header_t header = NULL;
int i, rc;
int result = 0;
-
+ char *needle;
+
mu_message_get_header (pb->msg, &header);
+ unistr_downcase (value, &needle);
+
for (i = 1;
result == 0
&& (rc = mu_header_aget_value_unfold_n (header, name, i, &hval)) == 0;
@@ -834,12 +837,13 @@ _scan_header (struct parsebuf *pb, char *name, char *value)
free (hval);
hval = tmp;
}
- result = mu_c_strcasestr (hval, value) != NULL;
+ result = unistr_is_substring (hval, needle);
free (hval);
}
if (!(rc == 0 || rc == MU_ERR_NOENT))
mu_diag_funcall (MU_DIAG_ERR, "mu_header_aget_value_unfold_n", NULL, rc);
-
+ free (needle);
+
return result;
}
@@ -865,9 +869,11 @@ _scan_header_all (struct parsebuf *pb, char *text)
size_t fcount = 0;
int i, rc;
int result;
-
+ char *needle;
+
mu_message_get_header (pb->msg, &header);
mu_header_get_field_count (header, &fcount);
+ unistr_downcase (text, &needle);
result = 0;
for (i = 1; result == 0 && i < fcount; i++)
{
@@ -895,9 +901,10 @@ _scan_header_all (struct parsebuf *pb, char *text)
free (hval);
hval = tmp;
}
- result = mu_c_strcasestr (hval, text) != NULL;
- free (hval);
+ result = unistr_is_substring (hval, needle);
+ free (hval);
}
+ free (needle);
return result;
}
@@ -913,7 +920,8 @@ _match_text (struct parsebuf *pb, mu_message_t msg, mu_content_type_t ct,
char *buffer = NULL;
size_t bufsize = 0;
size_t n;
-
+ char *needle;
+
mu_message_get_body (msg, &body);
mu_body_get_streamref (body, &str);
@@ -960,14 +968,16 @@ _match_text (struct parsebuf *pb, mu_message_t msg, mu_content_type_t ct,
}
}
+ unistr_downcase (text, &needle);
result = 0;
while ((rc = mu_stream_getline (str, &buffer, &bufsize, &n)) == 0
&& n > 0)
{
- result = mu_c_strcasestr (buffer, text) != NULL;
+ result = unistr_is_substring (buffer, needle);
if (result)
break;
}
+ free (needle);
mu_stream_destroy (&str);
if (rc)
mu_diag_funcall (MU_DIAG_ERR, "mu_stream_getline", NULL, rc);
diff --git a/imap4d/tests/.gitignore b/imap4d/tests/.gitignore
index 9a0dcda59..8e0167aeb 100644
--- a/imap4d/tests/.gitignore
+++ b/imap4d/tests/.gitignore
@@ -1 +1,7 @@
ckiconv
+atconfig
+atlocal
+package.m4
+testsuite
+testsuite.dir
+testsuite.log
diff --git a/imap4d/tests/search.at b/imap4d/tests/search.at
index d2369cead..69664e93f 100644
--- a/imap4d/tests/search.at
+++ b/imap4d/tests/search.at
@@ -194,5 +194,16 @@ ICONV_TEST)
SEARCH_CHECK([TEXT],[search30],
[TEXT how],
[2 3])
+
+IMAP4D_WITH_PREREQ(
+[test `echo "seo=" | ckiconv iso-8859-2 utf-8` = "xIXEmQ==" \
+ && imap4d --show-config-options | grep WITH_UNISTRING >/dev/null],
+[
+SEARCH_CHECK([BODY CHARSET (case-insensitive UTF)],[search31],
+[CHARSET utf-8 BODY "ĆWICZĄC, NAJWYŻEJ SŁYSZĘ"],
+[4],
+ICONV_TEST)
+])
+
m4_popdef([SEARCH_MBOX])
diff --git a/imap4d/unistr_c.c b/imap4d/unistr_c.c
new file mode 100644
index 000000000..6c999f142
--- /dev/null
+++ b/imap4d/unistr_c.c
@@ -0,0 +1,30 @@
+/* GNU Mailutils -- a suite of utilities for electronic mail
+ Copyright (C) 1999-2019 Free Software Foundation, Inc.
+
+ GNU Mailutils is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GNU Mailutils is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "imap4d.h"
+
+void
+unistr_downcase (char const *input, char **output)
+{
+ /* nothing */
+ *output = mu_strdup (input);
+}
+
+int
+unistr_is_substring (char const *haystack, char const *needle)
+{
+ return mu_c_strcasestr (haystack, needle) != NULL;
+}
diff --git a/imap4d/unistr_u8.c b/imap4d/unistr_u8.c
new file mode 100644
index 000000000..5d29f2152
--- /dev/null
+++ b/imap4d/unistr_u8.c
@@ -0,0 +1,43 @@
+/* GNU Mailutils -- a suite of utilities for electronic mail
+ Copyright (C) 1999-2019 Free Software Foundation, Inc.
+
+ GNU Mailutils is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GNU Mailutils is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "imap4d.h"
+#include <stdlib.h>
+#include <unicase.h>
+#include <unistr.h>
+#include <string.h>
+
+void
+unistr_downcase (char const *input, char **output)
+{
+ size_t len;
+ uint8_t *result = u8_tolower ((const uint8_t *)input, strlen (input)+1,
+ NULL, NULL, NULL, &len);
+ *output = (char*)result;
+}
+
+int
+unistr_is_substring (char const *haystack, char const *needle)
+{
+ char *lc;
+ int result;
+
+ unistr_downcase (haystack, &lc);
+ result = u8_strstr ((const uint8_t*) lc, (const uint8_t*) needle) != NULL;
+ free (lc);
+ return result;
+}
+
diff --git a/libmailutils/base/version.c b/libmailutils/base/version.c
index 0903b9966..df3f5ad5f 100644
--- a/libmailutils/base/version.c
+++ b/libmailutils/base/version.c
@@ -85,6 +85,9 @@ static struct mu_conf_option mu_conf_option[] = {
#ifdef WITH_READLINE
{ "WITH_READLINE", N_("GNU Readline") },
#endif
+#ifdef WITH_UNISTRING
+ { "WITH_UNISTRING", N_("Using GNU libunistring") },
+#endif
#ifdef HAVE_MYSQL
{ "HAVE_MYSQL", N_("MySQL") },
#endif

Return to:

Send suggestions and report system problems to the System administrator.