diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-06 17:37:28 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-06 17:37:28 +0300 |
commit | c598bc4dee28a9480ca9b7e9d5a20d75a5baccda (patch) | |
tree | c768bf56be6fda3e5cac9ed3f56d5289be5905c9 /src/ellinika | |
parent | 8a7e9b26e073731c82c02594d081c57aa474eade (diff) | |
download | ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.gz ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.bz2 |
Rewrite all elstr- functions to take either elstr or string as arguments.
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/elmorph.c | 185 | ||||
-rw-r--r-- | src/ellinika/elmorph.scm4 | 10 |
2 files changed, 94 insertions, 101 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 5785f8d..f55e010 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -104,2 +104,5 @@ _elstr_alloc(const char *instr, int syl) elstr->sylmap = NULL; + elstr->nsyl = 0; + elstr->acc_syl = 0; + elstr->acc_pos = 0; if (syl) @@ -142,2 +145,4 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) + if (src->len == 0) + return; wp = realloc(dest->str, @@ -221,2 +226,31 @@ _elstr_init() +static SCM +force_elstr(struct elstr **ep, SCM scm, int sylopt, + int arg, const char *func_name) +{ + struct elstr *elstr; + + if (scm_is_elstr(scm)) { + elstr = (struct elstr*) SCM_CDR(scm); + } else { + SCM newscm; + char *str; + + SCM_ASSERT(scm_is_string(scm), scm, arg, func_name); + str = scm_to_locale_string(scm); + newscm = _elstr_alloc(str, sylopt); + free(str); + if (newscm == SCM_EOL) + scm_misc_error(func_name, + "Invalid input string: ~S", + scm_list_1(scm)); + scm = newscm; + elstr = (struct elstr*) SCM_CDR(newscm); + } + if (ep) + *ep = elstr; + return scm; +} + + SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, @@ -239,10 +273,3 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); - str = scm_to_locale_string(string); - scm = _elstr_alloc(str, 1); - free(str); - if (scm == SCM_EOL) - scm_misc_error(FUNC_NAME, - "Invalid input string: ~S", - scm_list_1(string)); - return scm; + return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME); } @@ -277,5 +304,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0, struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->len); @@ -291,5 +316,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables", struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->nsyl); @@ -307,4 +330,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop", - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); @@ -332,5 +354,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0, struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->acc_pos); @@ -346,5 +366,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable", struct elstr *elstr; - - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->acc_syl); @@ -364,4 +382,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable", - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); @@ -400,4 +417,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character", - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); @@ -425,7 +441,9 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t), + if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); elstr = (struct elstr*) SCM_CDR(el); - if (destructive) scm = SCM_UNSPECIFIED; - else { + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { scm = _elstr_dup(elstr); @@ -433,2 +451,3 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t), } + } chgfun(elstr->str, elstr->len); @@ -484,7 +503,9 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name) + if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); - elstr = (struct elstr*) SCM_CDR(el); - if (destructive) scm = SCM_UNSPECIFIED; - else { + elstr = (struct elstr*) SCM_CDR(el); + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { scm = _elstr_dup(elstr); @@ -492,2 +513,3 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name) } + } for (i = 0; i < elstr->len; i++) @@ -528,5 +550,9 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) + if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); - SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); elstr = (struct elstr*) SCM_CDR(el); + } else + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + + SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); num = scm_to_uint(n); @@ -544,3 +570,3 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) scm = SCM_UNSPECIFIED; - else { + else if (scm == el) { scm = _elstr_dup(elstr); @@ -600,5 +626,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask", - SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); + force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(el); num = scm_to_int(n); @@ -686,7 +711,9 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name) + if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); elstr = (struct elstr*) SCM_CDR(el); - if (destructive) scm = SCM_UNSPECIFIED; - else { + } else { + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); + if (scm == el) { scm = _elstr_dup(elstr); @@ -694,2 +721,3 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name) } + } if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen)) @@ -728,6 +756,9 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) + if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); + } else + scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); - elstr = (struct elstr*) SCM_CDR(el); num = scm_to_int(n); @@ -744,3 +775,3 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) scm = SCM_UNSPECIFIED; - else { + else if (scm == el) { scm = _elstr_dup(elstr); @@ -783,29 +814,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", { - struct elstr *elstr; - unsigned *wc, *wtmp = NULL, *p; - unsigned wlen; - - SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(word); - if (scm_is_elstr(needle)) { - struct elstr *ep = (struct elstr*) SCM_CDR(needle); - wc = ep->str; - wlen = ep->len; - } else { - SCM scm; - char *str; + struct elstr *elstr, *ep; + unsigned *p; - SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME); - str = scm_to_locale_string(needle); - if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) { - free(str); - scm_misc_error(FUNC_NAME, - "Invalid needle string: ~S", - scm_list_1(needle)); - } - free(str); - wc = wtmp; - } - p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen); - free(wtmp); + force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); + force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME); + p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, + ep->str, ep->len); if (p) @@ -817,18 +828,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", static int -_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name) +_suffix_matches(struct elstr *elstr, struct elstr *ep) { - struct elstr *ep; - - if (scm_is_elstr(suffix)) { - ep = (struct elstr*) SCM_CDR(suffix); - } else { - SCM scm; - char *str; - - SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name); - str = scm_to_locale_string(suffix); - scm = _elstr_alloc(str, 0); - free(str); - ep = (struct elstr*) SCM_CDR(scm); - } return (ep->len < elstr->len && @@ -845,7 +842,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?", { - struct elstr *elstr; + struct elstr *elstr, *ep; - SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); - elstr = (struct elstr*) SCM_CDR(word); - if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME)) + force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); + force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME); + if (_suffix_matches(elstr, ep)) return suffix; @@ -854,3 +851,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?", SCM val = SCM_CAR(rest); - if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME)) + force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME); + if (_suffix_matches(elstr, ep)) return val; @@ -871,18 +869,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { + struct elstr *elt; SCM val = SCM_CAR(rest); - if (scm_is_elstr(val)) { - struct elstr *elt = (struct elstr*) SCM_CDR(val); + + force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME); _elstr_concat(elstr, elt, FUNC_NAME); - } else if (scm_is_string(val)) { - char *s = scm_to_locale_string(val); - if (s[0]) { - SCM tmp = _elstr_alloc(s, 0); - free(s); - _elstr_concat(elstr, - (struct elstr*) SCM_CDR(tmp), - FUNC_NAME); - } else - free(s); - } else - scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest); } diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4 index e3ed4b5..f916d1c 100644 --- a/src/ellinika/elmorph.scm4 +++ b/src/ellinika/elmorph.scm4 @@ -23,2 +23,5 @@ (define-public (elstr-trim word n) + (let ((word (if (string? word) + (string->elstr word) + word))) (cond @@ -29,5 +32,8 @@ (else - word))) + word)))) (define-public (elstr-trim! word n) + (let ((word (if (string? word) + (string->elstr word) + word))) (cond @@ -36,3 +42,3 @@ ((< n 0) - (elstr-slice! word 0 (+ (elstr-length word) n))))) + (elstr-slice! word 0 (+ (elstr-length word) n)))))) |