summaryrefslogtreecommitdiffabout
path: root/src/ellinika
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-06 14:37:28 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2011-06-06 14:37:28 (GMT)
commitc598bc4dee28a9480ca9b7e9d5a20d75a5baccda (patch) (unidiff)
treec768bf56be6fda3e5cac9ed3f56d5289be5905c9 /src/ellinika
parent8a7e9b26e073731c82c02594d081c57aa474eade (diff)
downloadellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.gz
ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.bz2
Rewrite all elstr- functions to take either elstr or string as arguments.
Diffstat (limited to 'src/ellinika') (more/less context) (ignore whitespace changes)
-rw-r--r--src/ellinika/elmorph.c217
-rw-r--r--src/ellinika/elmorph.scm430
2 files changed, 120 insertions, 127 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -103,7 +103,10 @@ _elstr_alloc(const char *instr, int syl)
103 elstr->len = wlen; 103 elstr->len = wlen;
104 elstr->sylmap = NULL; 104 elstr->sylmap = NULL;
105 elstr->nsyl = 0;
106 elstr->acc_syl = 0;
107 elstr->acc_pos = 0;
105 if (syl) 108 if (syl)
106 _elstr_syllabize(elstr); 109 _elstr_syllabize(elstr);
107 110
108 SCM_RETURN_NEWSMOB(_elstr_tag, elstr); 111 SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
109} 112}
@@ -141,4 +144,6 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
141 unsigned *wp; 144 unsigned *wp;
142 145
146 if (src->len == 0)
147 return;
143 wp = realloc(dest->str, 148 wp = realloc(dest->str,
144 sizeof(dest->str[0]) * (dest->len + src->len)); 149 sizeof(dest->str[0]) * (dest->len + src->len));
@@ -220,4 +225,33 @@ _elstr_init()
220#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) 225#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
221 226
227static SCM
228force_elstr(struct elstr **ep, SCM scm, int sylopt,
229 int arg, const char *func_name)
230{
231 struct elstr *elstr;
232
233 if (scm_is_elstr(scm)) {
234 elstr = (struct elstr*) SCM_CDR(scm);
235 } else {
236 SCM newscm;
237 char *str;
238
239 SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
240 str = scm_to_locale_string(scm);
241 newscm = _elstr_alloc(str, sylopt);
242 free(str);
243 if (newscm == SCM_EOL)
244 scm_misc_error(func_name,
245 "Invalid input string: ~S",
246 scm_list_1(scm));
247 scm = newscm;
248 elstr = (struct elstr*) SCM_CDR(newscm);
249 }
250 if (ep)
251 *ep = elstr;
252 return scm;
253}
254
255
222SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, 256SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
223 (SCM string), 257 (SCM string),
@@ -238,12 +272,5 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
238 272
239 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); 273 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
240 str = scm_to_locale_string(string); 274 return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
241 scm = _elstr_alloc(str, 1);
242 free(str);
243 if (scm == SCM_EOL)
244 scm_misc_error(FUNC_NAME,
245 "Invalid input string: ~S",
246 scm_list_1(string));
247 return scm;
248} 275}
249#undef FUNC_NAME 276#undef FUNC_NAME
@@ -276,7 +303,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
276{ 303{
277 struct elstr *elstr; 304 struct elstr *elstr;
278 305 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
279 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
280 elstr = (struct elstr*) SCM_CDR(el);
281 return scm_from_uint(elstr->len); 306 return scm_from_uint(elstr->len);
282} 307}
@@ -290,7 +315,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
290{ 315{
291 struct elstr *elstr; 316 struct elstr *elstr;
292 317 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
293 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
294 elstr = (struct elstr*) SCM_CDR(el);
295 return scm_from_uint(elstr->nsyl); 318 return scm_from_uint(elstr->nsyl);
296} 319}
@@ -306,6 +329,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
306 unsigned num, start; 329 unsigned num, start;
307 330
308 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 331 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
309 elstr = (struct elstr*) SCM_CDR(el);
310 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 332 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
311 num = scm_to_uint(n); 333 num = scm_to_uint(n);
@@ -331,7 +353,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
331{ 353{
332 struct elstr *elstr; 354 struct elstr *elstr;
333 355 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
334 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
335 elstr = (struct elstr*) SCM_CDR(el);
336 return scm_from_uint(elstr->acc_pos); 356 return scm_from_uint(elstr->acc_pos);
337} 357}
@@ -345,7 +365,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
345{ 365{
346 struct elstr *elstr; 366 struct elstr *elstr;
347 367 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
348 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
349 elstr = (struct elstr*) SCM_CDR(el);
350 return scm_from_uint(elstr->acc_syl); 368 return scm_from_uint(elstr->acc_syl);
351} 369}
@@ -363,6 +381,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
363 unsigned num, start; 381 unsigned num, start;
364 382
365 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 383 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
366 elstr = (struct elstr*) SCM_CDR(el);
367 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 384 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
368 num = scm_to_uint(n); 385 num = scm_to_uint(n);
@@ -399,6 +416,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
399 int len; 416 int len;
400 417
401 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 418 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
402 elstr = (struct elstr*) SCM_CDR(el);
403 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 419 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
404 num = scm_to_uint(n); 420 num = scm_to_uint(n);
@@ -424,11 +440,14 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
424 SCM scm; 440 SCM scm;
425 441
426 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 442 if (destructive) {
427 elstr = (struct elstr*) SCM_CDR(el); 443 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
428 if (destructive) 444 elstr = (struct elstr*) SCM_CDR(el);
429 scm = SCM_UNSPECIFIED; 445 scm = SCM_UNSPECIFIED;
430 else { 446 } else {
431 scm = _elstr_dup(elstr); 447 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
432 elstr = (struct elstr*) SCM_CDR(scm); 448 if (scm == el) {
449 scm = _elstr_dup(elstr);
450 elstr = (struct elstr*) SCM_CDR(scm);
451 }
433 } 452 }
434 chgfun(elstr->str, elstr->len); 453 chgfun(elstr->str, elstr->len);
@@ -483,11 +502,14 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
483 SCM scm; 502 SCM scm;
484 503
485 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 504 if (destructive) {
486 elstr = (struct elstr*) SCM_CDR(el); 505 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
487 if (destructive)
488 scm = SCM_UNSPECIFIED; 506 scm = SCM_UNSPECIFIED;
489 else { 507 elstr = (struct elstr*) SCM_CDR(el);
490 scm = _elstr_dup(elstr); 508 } else {
491 elstr = (struct elstr*) SCM_CDR(scm); 509 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
510 if (scm == el) {
511 scm = _elstr_dup(elstr);
512 elstr = (struct elstr*) SCM_CDR(scm);
513 }
492 } 514 }
493 for (i = 0; i < elstr->len; i++) 515 for (i = 0; i < elstr->len; i++)
@@ -527,7 +549,11 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
527 int dstate; 549 int dstate;
528 550
529 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 551 if (destructive) {
552 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
553 elstr = (struct elstr*) SCM_CDR(el);
554 } else
555 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
556
530 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); 557 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
531 elstr = (struct elstr*) SCM_CDR(el);
532 num = scm_to_uint(n); 558 num = scm_to_uint(n);
533 if (num > elstr->nsyl) 559 if (num > elstr->nsyl)
@@ -543,5 +569,5 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
543 if (destructive) 569 if (destructive)
544 scm = SCM_UNSPECIFIED; 570 scm = SCM_UNSPECIFIED;
545 else { 571 else if (scm == el) {
546 scm = _elstr_dup(elstr); 572 scm = _elstr_dup(elstr);
547 elstr = (struct elstr*) SCM_CDR(scm); 573 elstr = (struct elstr*) SCM_CDR(scm);
@@ -599,7 +625,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
599 int num; 625 int num;
600 626
601 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 627 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
602 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 628 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
603 elstr = (struct elstr*) SCM_CDR(el);
604 num = scm_to_int(n); 629 num = scm_to_int(n);
605 if (num < 0) 630 if (num < 0)
@@ -685,11 +710,14 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
685 size_t wclen; 710 size_t wclen;
686 711
687 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 712 if (destructive) {
688 elstr = (struct elstr*) SCM_CDR(el); 713 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
689 if (destructive) 714 elstr = (struct elstr*) SCM_CDR(el);
690 scm = SCM_UNSPECIFIED; 715 scm = SCM_UNSPECIFIED;
691 else { 716 } else {
692 scm = _elstr_dup(elstr); 717 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
693 elstr = (struct elstr*) SCM_CDR(scm); 718 if (scm == el) {
719 scm = _elstr_dup(elstr);
720 elstr = (struct elstr*) SCM_CDR(scm);
721 }
694 } 722 }
695 if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen)) 723 if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
@@ -727,8 +755,11 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
727 SCM scm; 755 SCM scm;
728 756
729 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 757 if (destructive) {
758 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
759 elstr = (struct elstr*) SCM_CDR(el);
760 } else
761 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
730 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); 762 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
731 SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name); 763 SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
732 elstr = (struct elstr*) SCM_CDR(el);
733 num = scm_to_int(n); 764 num = scm_to_int(n);
734 len = scm_to_uint(l); 765 len = scm_to_uint(l);
@@ -743,5 +774,5 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
743 if (destructive) 774 if (destructive)
744 scm = SCM_UNSPECIFIED; 775 scm = SCM_UNSPECIFIED;
745 else { 776 else if (scm == el) {
746 scm = _elstr_dup(elstr); 777 scm = _elstr_dup(elstr);
747 elstr = (struct elstr*) SCM_CDR(scm); 778 elstr = (struct elstr*) SCM_CDR(scm);
@@ -782,31 +813,11 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
782#define FUNC_NAME s_scm_elstr_index 813#define FUNC_NAME s_scm_elstr_index
783{ 814{
784 struct elstr *elstr; 815 struct elstr *elstr, *ep;
785 unsigned *wc, *wtmp = NULL, *p; 816 unsigned *p;
786 unsigned wlen; 817
787 818 force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
788 SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); 819 force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
789 elstr = (struct elstr*) SCM_CDR(word); 820 p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
790 if (scm_is_elstr(needle)) { 821 ep->str, ep->len);
791 struct elstr *ep = (struct elstr*) SCM_CDR(needle);
792 wc = ep->str;
793 wlen = ep->len;
794 } else {
795 SCM scm;
796 char *str;
797
798 SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
799 str = scm_to_locale_string(needle);
800 if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
801 free(str);
802 scm_misc_error(FUNC_NAME,
803 "Invalid needle string: ~S",
804 scm_list_1(needle));
805 }
806 free(str);
807 wc = wtmp;
808 }
809 p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
810 free(wtmp);
811 if (p) 822 if (p)
812 return scm_from_int(p - elstr->str); 823 return scm_from_int(p - elstr->str);
@@ -816,20 +827,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
816 827
817static int 828static int
818_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name) 829_suffix_matches(struct elstr *elstr, struct elstr *ep)
819{ 830{
820 struct elstr *ep;
821
822 if (scm_is_elstr(suffix)) {
823 ep = (struct elstr*) SCM_CDR(suffix);
824 } else {
825 SCM scm;
826 char *str;
827
828 SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
829 str = scm_to_locale_string(suffix);
830 scm = _elstr_alloc(str, 0);
831 free(str);
832 ep = (struct elstr*) SCM_CDR(scm);
833 }
834 return (ep->len < elstr->len && 831 return (ep->len < elstr->len &&
835 memcmp(elstr->str + elstr->len - ep->len, 832 memcmp(elstr->str + elstr->len - ep->len,
@@ -844,14 +841,15 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
844#define FUNC_NAME s_scm_elstr_suffix_p 841#define FUNC_NAME s_scm_elstr_suffix_p
845{ 842{
846 struct elstr *elstr; 843 struct elstr *elstr, *ep;
847 844
848 SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME); 845 force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
849 elstr = (struct elstr*) SCM_CDR(word); 846 force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
850 if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME)) 847 if (_suffix_matches(elstr, ep))
851 return suffix; 848 return suffix;
852 849
853 for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { 850 for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
854 SCM val = SCM_CAR(rest); 851 SCM val = SCM_CAR(rest);
855 if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME)) 852 force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
853 if (_suffix_matches(elstr, ep))
856 return val; 854 return val;
857 } 855 }
@@ -870,20 +868,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
870 868
871 for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { 869 for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
870 struct elstr *elt;
872 SCM val = SCM_CAR(rest); 871 SCM val = SCM_CAR(rest);
873 if (scm_is_elstr(val)) { 872
874 struct elstr *elt = (struct elstr*) SCM_CDR(val); 873 force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
875 _elstr_concat(elstr, elt, FUNC_NAME); 874 _elstr_concat(elstr, elt, FUNC_NAME);
876 } else if (scm_is_string(val)) {
877 char *s = scm_to_locale_string(val);
878 if (s[0]) {
879 SCM tmp = _elstr_alloc(s, 0);
880 free(s);
881 _elstr_concat(elstr,
882 (struct elstr*) SCM_CDR(tmp),
883 FUNC_NAME);
884 } else
885 free(s);
886 } else
887 scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
888 } 875 }
889 _elstr_syllabize(elstr); 876 _elstr_syllabize(elstr);
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index e3ed4b5..f916d1c 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -22,18 +22,24 @@
22 22
23(define-public (elstr-trim word n) 23(define-public (elstr-trim word n)
24 (cond 24 (let ((word (if (string? word)
25 ((> n 0) 25 (string->elstr word)
26 (elstr-slice word n (- (elstr-length word) n))) 26 word)))
27 ((< n 0) 27 (cond
28 (elstr-slice word 0 (+ (elstr-length word) n))) 28 ((> n 0)
29 (else 29 (elstr-slice word n (- (elstr-length word) n)))
30 word))) 30 ((< n 0)
31 (elstr-slice word 0 (+ (elstr-length word) n)))
32 (else
33 word))))
31 34
32(define-public (elstr-trim! word n) 35(define-public (elstr-trim! word n)
33 (cond 36 (let ((word (if (string? word)
34 ((> n 0) 37 (string->elstr word)
35 (elstr-slice! word n (- (elstr-length word) n))) 38 word)))
36 ((< n 0) 39 (cond
37 (elstr-slice! word 0 (+ (elstr-length word) n))))) 40 ((> n 0)
41 (elstr-slice! word n (- (elstr-length word) n)))
42 ((< n 0)
43 (elstr-slice! word 0 (+ (elstr-length word) n))))))
38 44
39 45

Return to:

Send suggestions and report system problems to the System administrator.