diff options
Diffstat (limited to 'src/ellinika/elmorph.c')
-rw-r--r-- | src/ellinika/elmorph.c | 141 |
1 files changed, 112 insertions, 29 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 6ff5f01..75e42f9 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c | |||
@@ -48,7 +48,7 @@ _elstr_syllabize(struct elstr *elstr) | |||
48 | elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, | 48 | elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, |
49 | "syllable map"); | 49 | "syllable map"); |
50 | sylmap = elstr->sylmap; | 50 | sylmap = elstr->sylmap; |
51 | 51 | ||
52 | for (i = 0; i < elstr->len; i++) { | 52 | for (i = 0; i < elstr->len; i++) { |
53 | int nstate; | 53 | int nstate; |
54 | 54 | ||
@@ -75,7 +75,18 @@ _elstr_syllabize(struct elstr *elstr) | |||
75 | } | 75 | } |
76 | 76 | ||
77 | static SCM | 77 | static SCM |
78 | _elstr_alloc(const char *instr) | 78 | _elstr_alloc_empty(struct elstr **pelstr) |
79 | { | ||
80 | struct elstr *elstr; | ||
81 | |||
82 | elstr = scm_gc_malloc(sizeof(*elstr), "Elstr"); | ||
83 | memset(elstr, 0, sizeof(*elstr)); | ||
84 | *pelstr = elstr; | ||
85 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); | ||
86 | } | ||
87 | |||
88 | static SCM | ||
89 | _elstr_alloc(const char *instr, int syl) | ||
79 | { | 90 | { |
80 | struct elstr *elstr; | 91 | struct elstr *elstr; |
81 | unsigned *wptr; | 92 | unsigned *wptr; |
@@ -88,7 +99,8 @@ _elstr_alloc(const char *instr) | |||
88 | elstr->str = wptr; | 99 | elstr->str = wptr; |
89 | elstr->len = wlen; | 100 | elstr->len = wlen; |
90 | elstr->sylmap = NULL; | 101 | elstr->sylmap = NULL; |
91 | _elstr_syllabize(elstr); | 102 | if (syl) |
103 | _elstr_syllabize(elstr); | ||
92 | 104 | ||
93 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); | 105 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); |
94 | } | 106 | } |
@@ -116,7 +128,23 @@ _elstr_dup(struct elstr *elstr) | |||
116 | elnew->acc_pos = elstr->acc_pos; | 128 | elnew->acc_pos = elstr->acc_pos; |
117 | SCM_RETURN_NEWSMOB(_elstr_tag, elnew); | 129 | SCM_RETURN_NEWSMOB(_elstr_tag, elnew); |
118 | } | 130 | } |
119 | 131 | ||
132 | static void | ||
133 | _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) | ||
134 | { | ||
135 | unsigned *wp; | ||
136 | |||
137 | wp = realloc(dest->str, | ||
138 | sizeof(dest->str[0]) * (dest->len + src->len)); | ||
139 | if (!wp) | ||
140 | scm_memory_error(func_name); | ||
141 | dest->str = wp; | ||
142 | memcpy(dest->str + dest->len, | ||
143 | src->str, | ||
144 | sizeof(src->str[0]) * src->len); | ||
145 | dest->len += src->len; | ||
146 | } | ||
147 | |||
120 | static scm_sizet | 148 | static scm_sizet |
121 | _elstr_free(SCM smob) | 149 | _elstr_free(SCM smob) |
122 | { | 150 | { |
@@ -134,29 +162,43 @@ _elstr_print(SCM smob, SCM port, scm_print_state *pstate) | |||
134 | int i, j, an; | 162 | int i, j, an; |
135 | char *s; | 163 | char *s; |
136 | 164 | ||
137 | scm_puts("#<elstr ``", port); | 165 | scm_puts("#<elstr ", port); |
138 | an = elstr->nsyl - elstr->acc_syl; | 166 | if (elstr->sylmap) { |
139 | if (an == 0) | 167 | scm_puts("``", port); |
140 | scm_puts("[", port); | 168 | an = elstr->nsyl - elstr->acc_syl; |
141 | for (i = j = 0; i < elstr->len; i++) { | 169 | if (an == 0) |
142 | char r[6]; | 170 | scm_puts("[", port); |
143 | int n; | 171 | for (i = j = 0; i < elstr->len; i++) { |
144 | 172 | char r[6]; | |
145 | if (i == elstr->sylmap[j] + 1) { | 173 | int n; |
146 | if (j == an) | 174 | |
147 | scm_puts("]", port); | 175 | if (i == elstr->sylmap[j] + 1) { |
148 | scm_puts("-", port); | 176 | if (j == an) |
149 | if (++j == an) | 177 | scm_puts("]", port); |
150 | scm_puts("[", port); | 178 | scm_puts("-", port); |
179 | if (++j == an) | ||
180 | scm_puts("[", port); | ||
181 | } | ||
182 | n = utf8_wctomb(r, elstr->str[i]); | ||
183 | if (n == -1) | ||
184 | continue; | ||
185 | r[n] = 0; | ||
186 | scm_puts(r, port); | ||
187 | } | ||
188 | if (j == an) | ||
189 | scm_puts("]", port); | ||
190 | } else { | ||
191 | scm_puts("[NS] ``", port); | ||
192 | for (i = j = 0; i < elstr->len; i++) { | ||
193 | char r[6]; | ||
194 | int n; | ||
195 | n = utf8_wctomb(r, elstr->str[i]); | ||
196 | if (n == -1) | ||
197 | continue; | ||
198 | r[n] = 0; | ||
199 | scm_puts(r, port); | ||
151 | } | 200 | } |
152 | n = utf8_wctomb(r, elstr->str[i]); | ||
153 | if (n == -1) | ||
154 | continue; | ||
155 | r[n] = 0; | ||
156 | scm_puts(r, port); | ||
157 | } | 201 | } |
158 | if (j == an) | ||
159 | scm_puts("]", port); | ||
160 | scm_puts("''>", port); | 202 | scm_puts("''>", port); |
161 | return 1; | 203 | return 1; |
162 | } | 204 | } |
@@ -169,6 +211,17 @@ _elstr_init() | |||
169 | scm_set_smob_print(_elstr_tag, _elstr_print); | 211 | scm_set_smob_print(_elstr_tag, _elstr_print); |
170 | } | 212 | } |
171 | 213 | ||
214 | #define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) | ||
215 | |||
216 | SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, | ||
217 | (SCM string), | ||
218 | "Return true if STRING is an elstr\n") | ||
219 | #define FUNC_NAME s_scm_elstr_p | ||
220 | { | ||
221 | return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F; | ||
222 | } | ||
223 | #undef FUNC_NAME | ||
224 | |||
172 | SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | 225 | SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, |
173 | (SCM string), | 226 | (SCM string), |
174 | "Create new ELSTR from STRING\n") | 227 | "Create new ELSTR from STRING\n") |
@@ -179,7 +232,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | |||
179 | 232 | ||
180 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); | 233 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); |
181 | str = scm_to_locale_string(string); | 234 | str = scm_to_locale_string(string); |
182 | scm = _elstr_alloc(str); | 235 | scm = _elstr_alloc(str, 1); |
183 | free(str); | 236 | free(str); |
184 | if (scm == SCM_EOL) | 237 | if (scm == SCM_EOL) |
185 | scm_misc_error(FUNC_NAME, | 238 | scm_misc_error(FUNC_NAME, |
@@ -189,8 +242,6 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | |||
189 | } | 242 | } |
190 | #undef FUNC_NAME | 243 | #undef FUNC_NAME |
191 | 244 | ||
192 | #define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) | ||
193 | |||
194 | SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, | 245 | SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, |
195 | (SCM el), | 246 | (SCM el), |
196 | "Convert EL to a STRING\n") | 247 | "Convert EL to a STRING\n") |
@@ -475,7 +526,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) | |||
475 | num = scm_to_uint(n); | 526 | num = scm_to_uint(n); |
476 | if (num > elstr->nsyl) | 527 | if (num > elstr->nsyl) |
477 | scm_misc_error(func_name, | 528 | scm_misc_error(func_name, |
478 | "cannot get syllable #~S: not enough syllables: ~S", | 529 | "cannot set accent on syllable #~S: not enough syllables: ~S", |
479 | scm_list_2(el, n)); | 530 | scm_list_2(el, n)); |
480 | acc_num = elstr->nsyl - num; | 531 | acc_num = elstr->nsyl - num; |
481 | if (acc_num == 0) | 532 | if (acc_num == 0) |
@@ -754,6 +805,38 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", | |||
754 | return SCM_BOOL_F; | 805 | return SCM_BOOL_F; |
755 | } | 806 | } |
756 | #undef FUNC_NAME | 807 | #undef FUNC_NAME |
808 | |||
809 | SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", | ||
810 | 0, 0, 1, | ||
811 | (SCM rest), | ||
812 | "") | ||
813 | #define FUNC_NAME s_scm_elstr_append | ||
814 | { | ||
815 | SCM ret = _elstr_alloc("", 0); | ||
816 | struct elstr *elstr = (struct elstr*) SCM_CDR(ret); | ||
817 | |||
818 | for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { | ||
819 | SCM val = SCM_CAR(rest); | ||
820 | if (scm_is_elstr(val)) { | ||
821 | struct elstr *elt = (struct elstr*) SCM_CDR(val); | ||
822 | _elstr_concat(elstr, elt, FUNC_NAME); | ||
823 | } else if (scm_is_string(val)) { | ||
824 | char *s = scm_to_locale_string(val); | ||
825 | if (s[0]) { | ||
826 | SCM tmp = _elstr_alloc(s, 0); | ||
827 | free(s); | ||
828 | _elstr_concat(elstr, | ||
829 | (struct elstr*) SCM_CDR(tmp), | ||
830 | FUNC_NAME); | ||
831 | } else | ||
832 | free(s); | ||
833 | } else | ||
834 | scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest); | ||
835 | } | ||
836 | _elstr_syllabize(elstr); | ||
837 | return ret; | ||
838 | } | ||
839 | #undef FUNC_NAME | ||
757 | 840 | ||
758 | 841 | ||
759 | void |