aboutsummaryrefslogtreecommitdiff
path: root/lib/utf8.c
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-09-16 10:24:48 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-09-16 10:24:48 +0300
commit1c5e14432740e8af3a42e5e83a396e82221499ad (patch)
tree622ba85b52b1e6c09e7acd01d22419d2df4b5bfc /lib/utf8.c
parentb785e98d2a06eb1f74f71337431e38f896f397a2 (diff)
downloaddico-1c5e14432740e8af3a42e5e83a396e82221499ad.tar.gz
dico-1c5e14432740e8af3a42e5e83a396e82221499ad.tar.bz2
utf8_compare: new general-purpose comparator function
* lib/utf8.c (utf8_strcmp_cc) (utf8_strcmp_alnumspace_cc,utf8_strcmp_alnumspace) (utf8_strcasecmp_alnumspace): Remove (utf8_compare): New function. (utf8_strcmp,utf8_strcasecmp) (utf8_strncasecmp): Rewrite as a wrapper over utf8_compare * include/dico/utf8.h: Update protos. * modules/dict.org/dictorg.c: Use new utf comparators.
Diffstat (limited to 'lib/utf8.c')
-rw-r--r--lib/utf8.c125
1 files changed, 27 insertions, 98 deletions
diff --git a/lib/utf8.c b/lib/utf8.c
index 837a594..3ba6048 100644
--- a/lib/utf8.c
+++ b/lib/utf8.c
@@ -1748,120 +1748,43 @@ urf8_symcasecmp(char *a, char *b)
return 1;
return 0;
}
-
-enum {
- case_sensitive,
- case_insensitive
-};
-
-int
-utf8_strcmp_cc(char *a, char *b, int ci)
-{
- int alen, blen;
-
- for (; *a; a += alen, b += blen) {
- unsigned wa, wb;
-
- if (*b == 0)
- return 1;
-
- alen = utf8_char_width(a);
- if (alen == 0)
- return -1;
- utf8_mbtowc(&wa, a, alen);
- blen = utf8_char_width(b);
- if (blen == 0)
- return 1;
- utf8_mbtowc(&wb, b, blen);
- if (ci == case_insensitive) {
- wa = utf8_wc_toupper(wa);
- wb = utf8_wc_toupper(wb);
- }
- if (wa < wb)
- return -1;
- if (wa > wb)
- return 1;
-
- }
- if (*b)
- return -1;
- return 0;
-}
-
-int
-utf8_strcmp(char *a, char *b)
-{
- return utf8_strcmp_cc(a, b, case_sensitive);
-}
-
-int
-utf8_strcasecmp(char *a, char *b)
-{
- return utf8_strcmp_cc(a, b, case_insensitive);
-}
-
-int
-utf8_strncasecmp(char *a, char *b, size_t maxlen)
-{
- int alen, blen;
- unsigned asz = 0, bsz = 0;
-
- while (asz < maxlen) {
- unsigned wa, wb;
-
- if (*a == 0)
- return (*b == 0) ? 0 : -1;
-
- if (*b == 0)
- return 1;
-
- alen = utf8_char_width(a);
- if (alen == 0)
- return -1;
- utf8_mbtowc(&wa, a, alen);
- blen = utf8_char_width(b);
- if (blen == 0)
- return 1;
- utf8_mbtowc(&wb, b, blen);
- wa = utf8_wc_toupper(wa);
- wb = utf8_wc_toupper(wb);
- if (wa < wb)
- return -1;
- if (wa > wb)
- return 1;
- a += alen;
- b += blen;
- asz ++;
- bsz ++;
- }
- return 0;
-}
-#define is_alnumspace(c) (utf8_wc_is_alnum(c) || utf8_wc_is_space(c))
int
-utf8_strcmp_alnumspace_cc(char *a, char *b, int ci)
+utf8_compare(char const *a, char const *b,
+ int ci, size_t maxlen, int (*wcsel)(unsigned))
{
int alen, blen;
+ size_t an = 0, bn = 0;
unsigned wa, wb;
- while (*a) {
+ while (1) {
+ if (maxlen != 0 && an == maxlen)
+ return 0;
+ if (*a == 0)
+ break;
+
alen = utf8_char_width(a);
if (alen == 0)
return -1;
+
utf8_mbtowc(&wa, a, alen);
a += alen;
+ an++;
- if (is_alnumspace(wa)) {
+ if (!wcsel || wcsel(wa)) {
if (*b == 0)
return 1;
while (*b) {
+ if (maxlen != 0 && bn == maxlen)
+ return 0;
blen = utf8_char_width(b);
if (blen == 0)
return 1;
utf8_mbtowc(&wb, b, blen);
b += blen;
+ bn++;
- if (is_alnumspace(wb)) {
+ if (!wcsel || wcsel(wb)) {
if (ci == case_insensitive) {
wa = utf8_wc_toupper(wa);
wb = utf8_wc_toupper(wb);
@@ -1882,23 +1805,29 @@ utf8_strcmp_alnumspace_cc(char *a, char *b, int ci)
return 1;
utf8_mbtowc(&wb, b, blen);
b += blen;
- if (is_alnumspace(wb))
+ if (!wcsel || wcsel(wb))
return -1;
}
return 0;
}
+
+int
+utf8_strcmp(char const *a, char const *b)
+{
+ return utf8_compare(a, b, case_sensitive, 0, NULL);
+}
int
-utf8_strcmp_alnumspace(char *a, char *b)
+utf8_strcasecmp(char const *a, char const *b)
{
- return utf8_strcmp_alnumspace_cc(a, b, case_sensitive);
+ return utf8_compare(a, b, case_insensitive, 0, NULL);
}
int
-utf8_strcasecmp_alnumspace(char *a, char *b)
+utf8_strncasecmp(char const *a, char const *b, size_t maxlen)
{
- return utf8_strcmp_alnumspace_cc(a, b, case_insensitive);
+ return utf8_compare(a, b, case_insensitive, maxlen, NULL);
}
unsigned

Return to:

Send suggestions and report system problems to the System administrator.