aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika/elmorph.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/ellinika/elmorph.c')
-rw-r--r--src/ellinika/elmorph.c217
1 files changed, 102 insertions, 115 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -101,11 +101,14 @@ _elstr_alloc(const char *instr, int syl)
101 elstr = scm_gc_malloc(sizeof(*elstr), "Elstr"); 101 elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
102 elstr->str = wptr; 102 elstr->str = wptr;
103 elstr->len = wlen; 103 elstr->len = wlen;
104 elstr->sylmap = NULL; 104 elstr->sylmap = NULL;
105 elstr->nsyl = 0;
106 elstr->acc_syl = 0;
107 elstr->acc_pos = 0;
105 if (syl) 108 if (syl)
106 _elstr_syllabize(elstr); 109 _elstr_syllabize(elstr);
107 110
108 SCM_RETURN_NEWSMOB(_elstr_tag, elstr); 111 SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
109} 112}
110 113
111static SCM 114static SCM
@@ -139,8 +142,10 @@ static void
139_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) 142_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
140{ 143{
141 unsigned *wp; 144 unsigned *wp;
142 145
146 if (src->len == 0)
147 return;
143 wp = realloc(dest->str, 148 wp = realloc(dest->str,
144 sizeof(dest->str[0]) * (dest->len + src->len)); 149 sizeof(dest->str[0]) * (dest->len + src->len));
145 if (!wp) 150 if (!wp)
146 scm_memory_error(func_name); 151 scm_memory_error(func_name);
@@ -218,8 +223,37 @@ _elstr_init()
218} 223}
219 224
220#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) 225#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
221 226
227static SCM
228force_elstr(struct elstr **ep, SCM scm, int sylopt,
229 int arg, const char *func_name)
230{
231 struct elstr *elstr;
232
233 if (scm_is_elstr(scm)) {
234 elstr = (struct elstr*) SCM_CDR(scm);
235 } else {
236 SCM newscm;
237 char *str;
238
239 SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
240 str = scm_to_locale_string(scm);
241 newscm = _elstr_alloc(str, sylopt);
242 free(str);
243 if (newscm == SCM_EOL)
244 scm_misc_error(func_name,
245 "Invalid input string: ~S",
246 scm_list_1(scm));
247 scm = newscm;
248 elstr = (struct elstr*) SCM_CDR(newscm);
249 }
250 if (ep)
251 *ep = elstr;
252 return scm;
253}
254
255
222SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, 256SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
223 (SCM string), 257 (SCM string),
224"Return true if STRING is an elstr\n") 258"Return true if STRING is an elstr\n")
225#define FUNC_NAME s_scm_elstr_p 259#define FUNC_NAME s_scm_elstr_p
@@ -236,16 +270,9 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
236 char *str; 270 char *str;
237 SCM scm; 271 SCM scm;
238 272
239 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); 273 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
240 str = scm_to_locale_string(string); 274 return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
241 scm = _elstr_alloc(str, 1);
242 free(str);
243 if (scm == SCM_EOL)
244 scm_misc_error(FUNC_NAME,
245 "Invalid input string: ~S",
246 scm_list_1(string));
247 return scm;
248} 275}
249#undef FUNC_NAME 276#undef FUNC_NAME
250 277
251SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, 278SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
@@ -274,11 +301,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
274"Returns the number of characters in EL\n") 301"Returns the number of characters in EL\n")
275#define FUNC_NAME s_scm_elstr_length 302#define FUNC_NAME s_scm_elstr_length
276{ 303{
277 struct elstr *elstr; 304 struct elstr *elstr;
278 305 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
279 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
280 elstr = (struct elstr*) SCM_CDR(el);
281 return scm_from_uint(elstr->len); 306 return scm_from_uint(elstr->len);
282} 307}
283#undef FUNC_NAME 308#undef FUNC_NAME
284 309
@@ -288,11 +313,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
288"Returns the number of characters in EL\n") 313"Returns the number of characters in EL\n")
289#define FUNC_NAME s_scm_elstr_number_of_syllables 314#define FUNC_NAME s_scm_elstr_number_of_syllables
290{ 315{
291 struct elstr *elstr; 316 struct elstr *elstr;
292 317 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
293 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
294 elstr = (struct elstr*) SCM_CDR(el);
295 return scm_from_uint(elstr->nsyl); 318 return scm_from_uint(elstr->nsyl);
296} 319}
297#undef FUNC_NAME 320#undef FUNC_NAME
298 321
@@ -304,10 +327,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
304{ 327{
305 struct elstr *elstr; 328 struct elstr *elstr;
306 unsigned num, start; 329 unsigned num, start;
307 330
308 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 331 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
309 elstr = (struct elstr*) SCM_CDR(el);
310 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 332 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
311 num = scm_to_uint(n); 333 num = scm_to_uint(n);
312 if (num > elstr->nsyl) 334 if (num > elstr->nsyl)
313 scm_misc_error(FUNC_NAME, 335 scm_misc_error(FUNC_NAME,
@@ -329,11 +351,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
329"Return position of the accented character in EL\n") 351"Return position of the accented character in EL\n")
330#define FUNC_NAME s_scm_elstr_accent_position 352#define FUNC_NAME s_scm_elstr_accent_position
331{ 353{
332 struct elstr *elstr; 354 struct elstr *elstr;
333 355 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
334 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
335 elstr = (struct elstr*) SCM_CDR(el);
336 return scm_from_uint(elstr->acc_pos); 356 return scm_from_uint(elstr->acc_pos);
337} 357}
338#undef FUNC_NAME 358#undef FUNC_NAME
339 359
@@ -343,11 +363,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
343"Return position of the accented syllable in EL\n") 363"Return position of the accented syllable in EL\n")
344#define FUNC_NAME s_scm_elstr_accented_syllable 364#define FUNC_NAME s_scm_elstr_accented_syllable
345{ 365{
346 struct elstr *elstr; 366 struct elstr *elstr;
347 367 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
348 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
349 elstr = (struct elstr*) SCM_CDR(el);
350 return scm_from_uint(elstr->acc_syl); 368 return scm_from_uint(elstr->acc_syl);
351} 369}
352#undef FUNC_NAME 370#undef FUNC_NAME
353 371
@@ -361,10 +379,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
361 char *s; 379 char *s;
362 SCM scm; 380 SCM scm;
363 unsigned num, start; 381 unsigned num, start;
364 382
365 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 383 force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
366 elstr = (struct elstr*) SCM_CDR(el);
367 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 384 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
368 num = scm_to_uint(n); 385 num = scm_to_uint(n);
369 if (num > elstr->nsyl) 386 if (num > elstr->nsyl)
370 scm_misc_error(FUNC_NAME, 387 scm_misc_error(FUNC_NAME,
@@ -397,10 +414,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
397 unsigned num; 414 unsigned num;
398 char r[6]; 415 char r[6];
399 int len; 416 int len;
400 417
401 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME); 418 force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
402 elstr = (struct elstr*) SCM_CDR(el);
403 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME); 419 SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
404 num = scm_to_uint(n); 420 num = scm_to_uint(n);
405 if (num >= elstr->len) 421 if (num >= elstr->len)
406 scm_misc_error(FUNC_NAME, 422 scm_misc_error(FUNC_NAME,
@@ -422,15 +438,18 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
422{ 438{
423 struct elstr *elstr; 439 struct elstr *elstr;
424 SCM scm; 440 SCM scm;
425 441
426 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); 442 if (destructive) {
427 elstr = (struct elstr*) SCM_CDR(el); 443 SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
428 if (destructive) 444 elstr = (struct elstr*) SCM_CDR(el);
429 scm = SCM_UNSPECIFIED; 445 scm = SCM_UNSPECIFIED;
430 else { 446 } else {
431 scm = _elstr_dup(elstr); 447 scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
432 elstr = (struct elstr*) SCM_CDR(scm); 448 if (scm == el) {
449