From c598bc4dee28a9480ca9b7e9d5a20d75a5baccda Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Mon, 6 Jun 2011 17:37:28 +0300 Subject: Rewrite all elstr- functions to take either elstr or string as arguments. --- src/ellinika/elmorph.c | 217 +++++++++++++++++++++++-------------------------- 1 file changed, 102 insertions(+), 115 deletions(-) (limited to 'src/ellinika/elmorph.c') diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 5785f8d..f55e010 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -102,9 +102,12 @@ _elstr_alloc(const char *instr, int syl) elstr->str = wptr; elstr->len = wlen; elstr->sylmap = NULL; + elstr->nsyl = 0; + elstr->acc_syl = 0; + elstr->acc_pos = 0; if (syl) _elstr_syllabize(elstr); - + SCM_RETURN_NEWSMOB(_elstr_tag, elstr); } @@ -140,6 +143,8 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) { unsigned *wp; + if (src->len == 0) + return; wp = realloc(dest->str, sizeof(dest->str[0]) * (dest->len + src->len)); if (!wp) @@ -219,6 +224,35 @@ _elstr_init() #define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) +static SCM +force_elstr(struct elstr **ep, SCM scm, int sylopt, + int arg, const char *func_name) +{ + struct elstr *elstr; + + if (scm_is_elstr(scm)) { + elstr = (struct elstr*) SCM_CDR(scm); + } else { + SCM newscm; + char *str; + + SCM_ASSERT(scm_is_string(scm), scm, arg, func_name); + str = scm_to_locale_string(scm); + newscm = _elstr_alloc(str, sylopt); + free(str); + if (newscm == SCM_EOL) + scm_misc_error(func_name, + "Invalid input string: ~S", + scm_list_1(scm)); + scm = newscm; + elstr = (struct elstr*) SCM_CDR(newscm); + } + if (ep) + *ep = elstr; + return scm; +} + + SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, (SCM string), "Return true if STRING is an elstr\n") @@ -237,14 +271,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, SCM scm; SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); - str = scm_to_locale_string(string); - scm = _elstr_alloc(str, 1); - free(str); - if (scm == SCM_EOL) - scm_misc_error(FUNC_NAME, - "Invalid input string: ~S", - scm_list_1(string)); - return scm; + return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME); } #undef FUNC_NAME @@ -275,9 +302,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0, #define FUNC_NAME s_scm_elstr_length { struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->len); } #undef FUNC_NAME @@ -289,9 +314,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables", #define FUNC_NAME s_scm_elstr_number_of_syllables { struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->nsyl); } #undef FUNC_NAME @@ -305,8 +328,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop", struct elstr *elstr; unsigned num, start; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); num = scm_to_uint(n); if (num > elstr->nsyl) @@ -330,9 +352,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0, #define FUNC_NAME s_scm_elstr_accent_position { struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->acc_pos); } #undef FUNC_NAME @@ -344,9 +364,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable", #define FUNC_NAME s_scm_elstr_accented_syllable { struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->acc_syl); } #undef FUNC_NAME @@ -362,8 +380,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable", SCM scm; unsigned num, start; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); num = scm_to_uint(n); if (num > elstr->nsyl) @@ -398,8 +415,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character", char r[6]; int len; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); num = scm_to_uint(n); if (num >= elstr->len) @@ -423,13 +439,16 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t), struct elstr *elstr; SCM scm; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); - elstr = (struct elstr*) SCM_CDR(el); - if (destructive) + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); scm = SCM_UNSPECIFIED; - else { - scm = _elstr_dup(elstr); - elstr = (struct elstr*) SCM_CDR(scm); + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { + scm = _elstr_dup(elstr); + elstr = (struct elstr*) SCM_CDR(scm); + } } chgfun(elstr->str, elstr->len); return scm; @@ -482,13 +501,16 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name) unsigned i; SCM scm; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); - elstr = (struct elstr*) SCM_CDR(el); - if (destructive) + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); scm = SCM_UNSPECIFIED; - else { - scm = _elstr_dup(elstr); - elstr = (struct elstr*) SCM_CDR(scm); + elstr = (struct elstr*) SCM_CDR(el); + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { + scm = _elstr_dup(elstr); + elstr = (struct elstr*) SCM_CDR(scm); + } } for (i = 0; i < elstr->len; i++) elstr->str[i] = elchr_deaccent(elstr->str[i]); @@ -526,9 +548,13 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) SCM scm; int dstate; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); + } else + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); - elstr = (struct elstr*) SCM_CDR(el); num = scm_to_uint(n); if (num > elstr->nsyl) scm_misc_error(func_name, @@ -542,7 +568,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) if (destructive) scm = SCM_UNSPECIFIED; - else { + else if (scm == el) { scm = _elstr_dup(elstr); elstr = (struct elstr*) SCM_CDR(scm); } @@ -598,9 +624,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask", struct elstr *elstr; int num; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); num = scm_to_int(n); if (num < 0) num += elstr->len; @@ -684,13 +709,16 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name) unsigned *wc; size_t wclen; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); - elstr = (struct elstr*) SCM_CDR(el); - if (destructive) + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); scm = SCM_UNSPECIFIED; - else { - scm = _elstr_dup(elstr); - elstr = (struct elstr*) SCM_CDR(scm); + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { + scm = _elstr_dup(elstr); + elstr = (struct elstr*) SCM_CDR(scm); + } } if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen)) scm_memory_error(func_name); @@ -726,10 +754,13 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) unsigned len; SCM scm; - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); + } else + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); - elstr = (struct elstr*) SCM_CDR(el); num = scm_to_int(n); len = scm_to_uint(l); @@ -742,7 +773,7 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) if (destructive) scm = SCM_UNSPECIFIED; - else { + else if (scm == el) { scm = _elstr_dup(elstr); elstr = (struct elstr*) SCM_CDR(scm); } @@ -781,33 +812,13 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", "") #define FUNC_NAME s_scm_elstr_index { - struct elstr *elstr; - unsigned *wc, *wtmp = NULL, *p; - unsigned wlen; - - SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(word); - if (scm_is_elstr(needle)) { - struct elstr *ep = (struct elstr*) SCM_CDR(needle); - wc = ep->str; - wlen = ep->len; - } else { - SCM scm; - char *str; - - SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME); - str = scm_to_locale_string(needle); - if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) { - free(str); - scm_misc_error(FUNC_NAME, - "Invalid needle string: ~S", - scm_list_1(needle)); - } - free(str); - wc = wtmp; - } - p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen); - free(wtmp); + struct elstr *elstr, *ep; + unsigned *p; + + force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); + force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME); + p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, + ep->str, ep->len); if (p) return scm_from_int(p - elstr->str); return SCM_BOOL_F; @@ -815,22 +826,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", #undef FUNC_NAME static int -_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name) +_suffix_matches(struct elstr *elstr, struct elstr *ep) { - struct elstr *ep; - - if (scm_is_elstr(suffix)) { - ep = (struct elstr*) SCM_CDR(suffix); - } else { - SCM scm; - char *str; - - SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name); - str = scm_to_locale_string(suffix); - scm = _elstr_alloc(str, 0); - free(str); - ep = (struct elstr*) SCM_CDR(scm); - } return (ep->len < elstr->len && memcmp(elstr->str + elstr->len - ep->len, ep->str, @@ -843,16 +840,17 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?", "Return #t if WORDS ends with SUFFIX") #define FUNC_NAME s_scm_elstr_suffix_p { - struct elstr *elstr; + struct elstr *elstr, *ep; - SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(word); - if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME)) + force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); + force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME); + if (_suffix_matches(elstr, ep)) return suffix; for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { SCM val = SCM_CAR(rest); - if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME)) + force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME); + if (_suffix_matches(elstr, ep)) return val; } return SCM_BOOL_F; @@ -869,22 +867,11 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", struct elstr *elstr = (struct elstr*) SCM_CDR(ret); for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { + struct elstr *elt; SCM val = SCM_CAR(rest); - if (scm_is_elstr(val)) { - struct elstr *elt = (struct elstr*) SCM_CDR(val); - _elstr_concat(elstr, elt, FUNC_NAME); - } else if (scm_is_string(val)) { - char *s = scm_to_locale_string(val); - if (s[0]) { - SCM tmp = _elstr_alloc(s, 0); - free(s); - _elstr_concat(elstr, - (struct elstr*) SCM_CDR(tmp), - FUNC_NAME); - } else - free(s); - } else - scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest); + + force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME); + _elstr_concat(elstr, elt, FUNC_NAME); } _elstr_syllabize(elstr); return ret; -- cgit v1.2.1