diff options
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/elchr.c | 4 | ||||
-rw-r--r-- | src/ellinika/elmorph.c | 141 |
2 files changed, 114 insertions, 31 deletions
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 10ea72d..3142b6f 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c | |||
@@ -213,7 +213,7 @@ struct char_info_st el_basic_ctype[] = { | |||
213 | { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ | 213 | { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ |
214 | { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ | 214 | { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ |
215 | { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ | 215 | { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ |
216 | { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B4, 0x0388 }, /* έ */ | 216 | { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */ |
217 | { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ | 217 | { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ |
218 | { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ | 218 | { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ |
219 | { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ | 219 | { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ |
@@ -232,7 +232,7 @@ struct char_info_st el_basic_ctype[] = { | |||
232 | { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ | 232 | { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ |
233 | { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ | 233 | { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ |
234 | 234 | ||
235 | { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, 70, 0x03CC }, /* ο */ | 235 | { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */ |
236 | { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ | 236 | { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ |
237 | { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ | 237 | { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ |
238 | { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ | 238 | { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ |
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 6ff5f01..75e42f9 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c | |||
@@ -48,7 +48,7 @@ _elstr_syllabize(struct elstr *elstr) | |||
48 | elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, | 48 | elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, |
49 | "syllable map"); | 49 | "syllable map"); |
50 | sylmap = elstr->sylmap; | 50 | sylmap = elstr->sylmap; |
51 | 51 | ||
52 | for (i = 0; i < elstr->len; i++) { | 52 | for (i = 0; i < elstr->len; i++) { |
53 | int nstate; | 53 | int nstate; |
54 | 54 | ||
@@ -75,7 +75,18 @@ _elstr_syllabize(struct elstr *elstr) | |||
75 | } | 75 | } |
76 | 76 | ||
77 | static SCM | 77 | static SCM |
78 | _elstr_alloc(const char *instr) | 78 | _elstr_alloc_empty(struct elstr **pelstr) |
79 | { | ||
80 | struct elstr *elstr; | ||
81 | |||
82 | elstr = scm_gc_malloc(sizeof(*elstr), "Elstr"); | ||
83 | memset(elstr, 0, sizeof(*elstr)); | ||
84 | *pelstr = elstr; | ||
85 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); | ||
86 | } | ||
87 | |||
88 | static SCM | ||
89 | _elstr_alloc(const char *instr, int syl) | ||
79 | { | 90 | { |
80 | struct elstr *elstr; | 91 | struct elstr *elstr; |
81 | unsigned *wptr; | 92 | unsigned *wptr; |
@@ -88,7 +99,8 @@ _elstr_alloc(const char *instr) | |||
88 | elstr->str = wptr; | 99 | elstr->str = wptr; |
89 | elstr->len = wlen; | 100 | elstr->len = wlen; |
90 | elstr->sylmap = NULL; | 101 | elstr->sylmap = NULL; |
91 | _elstr_syllabize(elstr); | 102 | if (syl) |
103 | _elstr_syllabize(elstr); | ||
92 | 104 | ||
93 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); | 105 | SCM_RETURN_NEWSMOB(_elstr_tag, elstr); |
94 | } | 106 | } |
@@ -116,7 +128,23 @@ _elstr_dup(struct elstr *elstr) | |||
116 | elnew->acc_pos = elstr->acc_pos; | 128 | elnew->acc_pos = elstr->acc_pos; |
117 | SCM_RETURN_NEWSMOB(_elstr_tag, elnew); | 129 | SCM_RETURN_NEWSMOB(_elstr_tag, elnew); |
118 | } | 130 | } |
119 | 131 | ||
132 | static void | ||
133 | _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) | ||
134 | { | ||
135 | unsigned *wp; | ||
136 | |||
137 | wp = realloc(dest->str, | ||
138 | sizeof(dest->str[0]) * (dest->len + src->len)); | ||
139 | if (!wp) | ||
140 | scm_memory_error(func_name); | ||
141 | dest->str = wp; | ||
142 | memcpy(dest->str + dest->len, | ||
143 | src->str, | ||
144 | sizeof(src->str[0]) * src->len); | ||
145 | dest->len += src->len; | ||
146 | } | ||
147 | |||
120 | static scm_sizet | 148 | static scm_sizet |
121 | _elstr_free(SCM smob) | 149 | _elstr_free(SCM smob) |
122 | { | 150 | { |
@@ -134,29 +162,43 @@ _elstr_print(SCM smob, SCM port, scm_print_state *pstate) | |||
134 | int i, j, an; | 162 | int i, j, an; |
135 | char *s; | 163 | char *s; |
136 | 164 | ||
137 | scm_puts("#<elstr ``", port); | 165 | scm_puts("#<elstr ", port); |
138 | an = elstr->nsyl - elstr->acc_syl; | 166 | if (elstr->sylmap) { |
139 | if (an == 0) | 167 | scm_puts("``", port); |
140 | scm_puts("[", port); | 168 | an = elstr->nsyl - elstr->acc_syl; |
141 | for (i = j = 0; i < elstr->len; i++) { | 169 | if (an == 0) |
142 | char r[6]; | 170 | scm_puts("[", port); |
143 | int n; | 171 | for (i = j = 0; i < elstr->len; i++) { |
144 | 172 | char r[6]; | |
145 | if (i == elstr->sylmap[j] + 1) { | 173 | int n; |
146 | if (j == an) | 174 | |
147 | scm_puts("]", port); | 175 | if (i == elstr->sylmap[j] + 1) { |
148 | scm_puts("-", port); | 176 | if (j == an) |
149 | if (++j == an) | 177 | scm_puts("]", port); |
150 | scm_puts("[", port); | 178 | scm_puts("-", port); |
179 | if (++j == an) | ||
180 | scm_puts("[", port); | ||
181 | } | ||
182 | n = utf8_wctomb(r, elstr->str[i]); | ||
183 | if (n == -1) | ||
184 | continue; | ||
185 | r[n] = 0; | ||
186 | scm_puts(r, port); | ||
187 | } | ||
188 | if (j == an) | ||
189 | scm_puts("]", port); | ||
190 | } else { | ||
191 | scm_puts("[NS] ``", port); | ||
192 | for (i = j = 0; i < elstr->len; i++) { | ||
193 | char r[6]; | ||
194 | int n; | ||
195 | n = utf8_wctomb(r, elstr->str[i]); | ||
196 | if (n == -1) | ||
197 | continue; | ||
198 | r[n] = 0; | ||
199 | scm_puts(r, port); | ||
151 | } | 200 | } |
152 | n = utf8_wctomb(r, elstr->str[i]); | ||
153 | if (n == -1) | ||
154 | continue; | ||
155 | r[n] = 0; | ||
156 | scm_puts(r, port); | ||
157 | } | 201 | } |
158 | if (j == an) | ||
159 | scm_puts("]", port); | ||
160 | scm_puts("''>", port); | 202 | scm_puts("''>", port); |
161 | return 1; | 203 | return 1; |
162 | } | 204 | } |
@@ -169,6 +211,17 @@ _elstr_init() | |||
169 | scm_set_smob_print(_elstr_tag, _elstr_print); | 211 | scm_set_smob_print(_elstr_tag, _elstr_print); |
170 | } | 212 | } |
171 | 213 | ||
214 | #define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) | ||
215 | |||
216 | SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, | ||
217 | (SCM string), | ||
218 | "Return true if STRING is an elstr\n") | ||
219 | #define FUNC_NAME s_scm_elstr_p | ||
220 | { | ||
221 | return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F; | ||
222 | } | ||
223 | #undef FUNC_NAME | ||
224 | |||
172 | SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | 225 | SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, |
173 | (SCM string), | 226 | (SCM string), |
174 | "Create new ELSTR from STRING\n") | 227 | "Create new ELSTR from STRING\n") |
@@ -179,7 +232,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | |||
179 | 232 | ||
180 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); | 233 | SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); |
181 | str = scm_to_locale_string(string); | 234 | str = scm_to_locale_string(string); |
182 | scm = _elstr_alloc(str); | 235 | scm = _elstr_alloc(str, 1); |
183 | free(str); | 236 | free(str); |
184 | if (scm == SCM_EOL) | 237 | if (scm == SCM_EOL) |
185 | scm_misc_error(FUNC_NAME, | 238 | scm_misc_error(FUNC_NAME, |
@@ -189,8 +242,6 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, | |||
189 | } | 242 | } |
190 | #undef FUNC_NAME | 243 | #undef FUNC_NAME |
191 | 244 | ||
192 | #define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) | ||
193 | |||
194 | SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, | 245 | SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, |
195 | (SCM el), | 246 | (SCM el), |
196 | "Convert EL to a STRING\n") | 247 | "Convert EL to a STRING\n") |
@@ -475,7 +526,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) | |||
475 | num = scm_to_uint(n); | 526 | num = scm_to_uint(n); |
476 | if (num > elstr->nsyl) | 527 | if (num > elstr->nsyl) |
477 | scm_misc_error(func_name, | 528 | scm_misc_error(func_name, |
478 | "cannot get syllable #~S: not enough syllables: ~S", | 529 | "cannot set accent on syllable #~S: not enough syllables: ~S", |
479 | scm_list_2(el, n)); | 530 | scm_list_2(el, n)); |
480 | acc_num = elstr->nsyl - num; | 531 | acc_num = elstr->nsyl - num; |
481 | if (acc_num == 0) | 532 | if (acc_num == 0) |
@@ -754,6 +805,38 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", | |||
754 | return SCM_BOOL_F; | 805 | return SCM_BOOL_F; |
755 | } | 806 | } |
756 | #undef FUNC_NAME | 807 | #undef FUNC_NAME |
808 | |||
809 | SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", | ||
810 | 0, 0, 1, | ||