author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-06 14:37:28 (GMT) |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-06 14:37:28 (GMT) |
commit | c598bc4dee28a9480ca9b7e9d5a20d75a5baccda (patch) (unidiff) | |
tree | c768bf56be6fda3e5cac9ed3f56d5289be5905c9 /src/ellinika/elmorph.c | |
parent | 8a7e9b26e073731c82c02594d081c57aa474eade (diff) | |
download | ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.gz ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.bz2 |
Rewrite all elstr- functions to take either elstr or string as arguments.
-rw-r--r-- | src/ellinika/elmorph.c | 217 |
1 files changed, 102 insertions, 115 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 5785f8d..f55e010 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c | |||
@@ -104,5 +104,8 @@ _elstr_alloc(const char *instr, int syl) | |||
104 | elstr->sylmap = NULL; | 104 | elstr->sylmap = NULL; |
105 | elstr->nsyl = 0; | ||
106 | elstr->acc_syl = 0; | ||
107 | elstr->acc_pos = 0; | ||
105 | if (syl) | 108 | if (syl) |
106 | _elstr_syllabize(elstr); | 109 | _elstr_syllabize(elstr); |
107 | 110 | ||
108 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); | 111 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); |
@@ -142,2 +145,4 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) | |||
142 | 145 | ||
146 | if (src->len == 0) | ||
147 | return; | ||
143 | wp = realloc(dest->str, | 148 | wp = realloc(dest->str, |
@@ -221,2 +226,31 @@ _elstr_init() | |||
221 | 226 | ||
227 | static SCM | ||
228 | force_elstr(struct elstr **ep, SCM scm, int sylopt, | ||
229 | int arg, const char *func_name) | ||
230 | { | ||
231 | struct elstr *elstr; | ||
232 | |||
233 | if (scm_is_elstr(scm)) { | ||
234 | elstr = (struct elstr*) SCM_CDR(scm); | ||
235 | } else { | ||
236 | SCM newscm; | ||
237 | char *str; | ||
238 | |||
239 | SCM_ASSERT(scm_is_string(scm), scm, arg, func_name); | ||
240 | str = scm_to_locale_string(scm); | ||
241 | newscm = _elstr_alloc(str, sylopt); | ||
242 | free(str); | ||
243 | if (newscm == SCM_EOL) | ||
244 | scm_misc_error(func_name, | ||
245 | "Invalid input string: ~S", | ||
246 | scm_list_1(scm)); | ||
247 | scm = newscm; | ||
248 | elstr = (struct elstr*) SCM_CDR(newscm); | ||
249 | } | ||
250 | if (ep) | ||
251 | *ep = elstr; | ||
252 | return scm; | ||
253 | } | ||
254 | |||
255 | |||
222 | SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, | 256 | SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, |
@@ -239,10 +273,3 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | |||
239 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); | 273 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); |
240 | str = scm_to_locale_string(string); | 274 | return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME); |
241 | scm = _elstr_alloc(str, 1); | ||
242 | free(str); | ||
243 | if (scm == SCM_EOL) | ||
244 | scm_misc_error(FUNC_NAME, | ||
245 | "Invalid input string: ~S", | ||
246 | scm_list_1(string)); | ||
247 | return scm; | ||
248 | } | 275 | } |
@@ -277,5 +304,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0, | |||
277 | struct elstr *elstr; | 304 | struct elstr *elstr; |
278 | 305 | force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); | |
279 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | ||
280 | elstr = (struct elstr*) SCM_CDR(el); | ||
281 | return scm_from_uint(elstr->len); | 306 | return scm_from_uint(elstr->len); |
@@ -291,5 +316,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables", | |||
291 | struct elstr *elstr; | 316 | struct elstr *elstr; |
292 | 317 | force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); | |
293 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | ||
294 | elstr = (struct elstr*) SCM_CDR(el); | ||
295 | return scm_from_uint(elstr->nsyl); | 318 | return scm_from_uint(elstr->nsyl); |
@@ -307,4 +330,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop", | |||
307 | 330 | ||
308 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | 331 | force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); |
309 | elstr = (struct elstr*) SCM_CDR(el); | ||
310 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); | 332 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); |
@@ -332,5 +354,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0, | |||
332 | struct elstr *elstr; | 354 | struct elstr *elstr; |
333 | 355 | force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); | |
334 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | ||
335 | elstr = (struct elstr*) SCM_CDR(el); | ||
336 | return scm_from_uint(elstr->acc_pos); | 356 | return scm_from_uint(elstr->acc_pos); |
@@ -346,5 +366,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable", | |||
346 | struct elstr *elstr; | 366 | struct elstr *elstr; |
347 | 367 | force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); | |
348 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | ||
349 | elstr = (struct elstr*) SCM_CDR(el); | ||
350 | return scm_from_uint(elstr->acc_syl); | 368 | return scm_from_uint(elstr->acc_syl); |
@@ -364,4 +382,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable", | |||
364 | 382 | ||
365 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | 383 | force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); |
366 | elstr = (struct elstr*) SCM_CDR(el); | ||
367 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); | 384 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); |
@@ -400,4 +417,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character", | |||
400 | 417 | ||
401 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | 418 | force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); |
402 | elstr = (struct elstr*) SCM_CDR(el); | ||
403 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); | 419 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); |
@@ -425,9 +441,12 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t), | |||
425 | 441 | ||
426 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | 442 | if (destructive) { |
427 | elstr = (struct elstr*) SCM_CDR(el); | 443 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); |
428 | if (destructive) | 444 | elstr = (struct elstr*) SCM_CDR(el); |
429 | scm = SCM_UNSPECIFIED; | 445 | scm = SCM_UNSPECIFIED; |
430 | else { | 446 | } else { |
431 | scm = _elstr_dup(elstr); | 447 | scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); |
432 | elstr = (struct elstr*) SCM_CDR(scm); | 448 | if (scm == el) { |
449 | scm = _elstr_dup(elstr); | ||
450 | elstr = (struct elstr*) SCM_CDR(scm); | ||
451 | } | ||
433 | } | 452 | } |
@@ -484,9 +503,12 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name) | |||
484 | 503 | ||
485 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | 504 | if (destructive) { |
486 | elstr = (struct elstr*) SCM_CDR(el); | 505 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); |
487 | if (destructive) | ||
488 | scm = SCM_UNSPECIFIED; | 506 | scm = SCM_UNSPECIFIED; |
489 | else { | 507 | elstr = (struct elstr*) SCM_CDR(el); |
490 | scm = _elstr_dup(elstr); | 508 | } else { |
491 | elstr = (struct elstr*) SCM_CDR(scm); | 509 | scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); |
510 | if (scm == el) { | ||
511 | scm = _elstr_dup(elstr); | ||
512 | elstr = (struct elstr*) SCM_CDR(scm); | ||
513 | } | ||
492 | } | 514 | } |
@@ -528,5 +550,9 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) | |||
528 | 550 | ||
529 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | 551 | if (destructive) { |
552 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | ||
553 | elstr = (struct elstr*) SCM_CDR(el); | ||
554 | } else | ||
555 | scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); | ||
556 | |||
530 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); | 557 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); |
531 | elstr = (struct elstr*) SCM_CDR(el); | ||
532 | num = scm_to_uint(n); | 558 | num = scm_to_uint(n); |
@@ -544,3 +570,3 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) | |||
544 | scm = SCM_UNSPECIFIED; | 570 | scm = SCM_UNSPECIFIED; |
545 | else { | 571 | else if (scm == el) { |
546 | scm = _elstr_dup(elstr); | 572 | scm = _elstr_dup(elstr); |
@@ -600,5 +626,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask", | |||
600 | 626 | ||
601 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); | 627 | force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); |
602 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); | 628 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); |
603 | elstr = (struct elstr*) SCM_CDR(el); | ||
604 | num = scm_to_int(n); | 629 | num = scm_to_int(n); |
@@ -686,9 +711,12 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name) | |||
686 | 711 | ||
687 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | 712 | if (destructive) { |
688 | elstr = (struct elstr*) SCM_CDR(el); | 713 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); |
689 | if (destructive) | 714 | elstr = (struct elstr*) SCM_CDR(el); |
690 | scm = SCM_UNSPECIFIED; | 715 | scm = SCM_UNSPECIFIED; |
691 | else { | 716 | } else { |
692 | scm = _elstr_dup(elstr); | 717 | scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); |
693 | elstr = (struct elstr*) SCM_CDR(scm); | 718 | if (scm == el) { |
719 | scm = _elstr_dup(elstr); | ||
720 | elstr = (struct elstr*) SCM_CDR(scm); | ||
721 | } | ||
694 | } | 722 | } |
@@ -728,6 +756,9 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) | |||
728 | 756 | ||
729 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | 757 | if (destructive) { |
758 | SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); | ||
759 | elstr = (struct elstr*) SCM_CDR(el); | ||
760 | } else | ||
761 | scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name); | ||
730 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); | 762 | SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); |
731 | SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); | 763 | SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); |
732 | elstr = (struct elstr*) SCM_CDR(el); | ||
733 | num = scm_to_int(n); | 764 | num = scm_to_int(n); |
@@ -744,3 +775,3 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name) | |||
744 | scm = SCM_UNSPECIFIED; | 775 | scm = SCM_UNSPECIFIED; |
745 | else { | 776 | else if (scm == el) { |
746 | scm = _elstr_dup(elstr); | 777 | scm = _elstr_dup(elstr); |
@@ -783,29 +814,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", | |||
783 | { | 814 | { |
784 | struct elstr *elstr; | 815 | struct elstr *elstr, *ep; |
785 | unsigned *wc, *wtmp = NULL, *p; | 816 | unsigned *p; |
786 | unsigned wlen; | 817 | |
787 | 818 | force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); | |
788 | SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); | 819 | force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME); |
789 | elstr = (struct elstr*) SCM_CDR(word); | 820 | p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, |
790 | if (scm_is_elstr(needle)) { | 821 | ep->str, ep->len); |
791 | struct elstr *ep = (struct elstr*) SCM_CDR(needle); | ||
792 | wc = ep->str; | ||
793 | wlen = ep->len; | ||
794 | } else { | ||
795 | SCM scm; | ||
796 | char *str; | ||
797 | |||
798 | SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME); | ||
799 | str = scm_to_locale_string(needle); | ||
800 | if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) { | ||
801 | free(str); | ||
802 | scm_misc_error(FUNC_NAME, | ||
803 | "Invalid needle string: ~S", | ||
804 | scm_list_1(needle)); | ||
805 | } | ||
806 | free(str); | ||
807 | wc = wtmp; | ||
808 | } | ||
809 | p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen); | ||
810 | free(wtmp); | ||
811 | if (p) | 822 | if (p) |
@@ -817,18 +828,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", | |||
817 | static int | 828 | static int |
818 | _suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name) | 829 | _suffix_matches(struct elstr *elstr, struct elstr *ep) |
819 | { | 830 | { |
820 | struct elstr *ep; | ||
821 | |||
822 | if (scm_is_elstr(suffix)) { | ||
823 | ep = (struct elstr*) SCM_CDR(suffix); | ||
824 | } else { | ||
825 | SCM scm; | ||
826 | char *str; | ||
827 | |||
828 | SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name); | ||
829 | str = scm_to_locale_string(suffix); | ||
830 | scm = _elstr_alloc(str, 0); | ||
831 | free(str); | ||
832 | ep = (struct elstr*) SCM_CDR(scm); | ||
833 | } | ||
834 | return (ep->len < elstr->len && | 831 | return (ep->len < elstr->len && |
@@ -845,7 +842,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?", | |||
845 | { | 842 | { |
846 | struct elstr *elstr; | 843 | struct elstr *elstr, *ep; |
847 | 844 | ||
848 | SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); | 845 | force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME); |
849 | elstr = (struct elstr*) SCM_CDR(word); | 846 | force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME); |
850 | if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME)) | 847 | if (_suffix_matches(elstr, ep)) |
851 | return suffix; | 848 | return suffix; |
@@ -854,3 +851,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?", | |||
854 | SCM val = SCM_CAR(rest); | 851 | SCM val = SCM_CAR(rest); |
855 | if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME)) | 852 | force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME); |
853 | if (_suffix_matches(elstr, ep)) | ||
856 | return val; | 854 | return val; |
@@ -871,18 +869,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", | |||
871 | for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { | 869 | for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { |
870 | struct elstr *elt; | ||
872 | SCM val = SCM_CAR(rest); | 871 | SCM val = SCM_CAR(rest); |
873 | if (scm_is_elstr(val)) { | 872 | |
874 | struct elstr *elt = (struct elstr*) SCM_CDR(val); | 873 | force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME); |
875 | _elstr_concat(elstr, elt, FUNC_NAME); | 874 | _elstr_concat(elstr, elt, FUNC_NAME); |
876 | } else if (scm_is_string(val)) { | ||
877 | char *s = scm_to_locale_string(val); | ||
878 | if (s[0]) { | ||
879 | SCM tmp = _elstr_alloc(s, 0); | ||
880 | free(s); | ||
881 | _elstr_concat(elstr, | ||
882 | (struct elstr*) SCM_CDR(tmp), | ||
883 | FUNC_NAME); | ||
884 | } else | ||
885 | free(s); | ||
886 | } else | ||
887 | scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest); | ||
888 | } | 875 | } |