From 7087cd30afbb6f15c55b8adbc270776f35d4fefb Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Sun, 5 Jun 2011 01:19:53 +0300 Subject: Bugfixes. * data/db.struct (verb): Minor fix. * src/ellinika/elchr.c: Fix base of lower case epsilon. * src/ellinika/elmorph.c (_elstr_alloc_empty): New function. (_elstr_alloc): Take additional argument. All uses changed. (_elstr_concat): New function. (_elstr_print): Correctly print objects with NULL sylmap. (elstr?): New function. (elstr-append): New function. --- data/db.struct | 3 +- src/ellinika/elchr.c | 4 +- src/ellinika/elmorph.c | 141 +++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 116 insertions(+), 32 deletions(-) diff --git a/data/db.struct b/data/db.struct index c4d6460..01507ce 100644 --- a/data/db.struct +++ b/data/db.struct @@ -371,5 +371,6 @@ CREATE TABLE verb( conj char(2), -- REL 9 present varchar(128), -- θέμα ενεστώτα aorist varchar(128), -- θέμα αόριστου - pass varchar(128) -- θέμα αόριστου μεσοπαθητικής + pass varchar(128), -- θέμα αόριστου μεσοπαθητικής + INDEX(word) ); \ No newline at end of file diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 10ea72d..3142b6f 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c @@ -213,7 +213,7 @@ struct char_info_st el_basic_ctype[] = { { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ - { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B4, 0x0388 }, /* έ */ + { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */ { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ @@ -232,7 +232,7 @@ struct char_info_st el_basic_ctype[] = { { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ - { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, 70, 0x03CC }, /* ο */ + { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */ { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 6ff5f01..75e42f9 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -48,7 +48,7 @@ _elstr_syllabize(struct elstr *elstr) elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, "syllable map"); sylmap = elstr->sylmap; - + for (i = 0; i < elstr->len; i++) { int nstate; @@ -75,7 +75,18 @@ _elstr_syllabize(struct elstr *elstr) } static SCM -_elstr_alloc(const char *instr) +_elstr_alloc_empty(struct elstr **pelstr) +{ + struct elstr *elstr; + + elstr = scm_gc_malloc(sizeof(*elstr), "Elstr"); + memset(elstr, 0, sizeof(*elstr)); + *pelstr = elstr; + SCM_RETURN_NEWSMOB(_elstr_tag, elstr); +} + +static SCM +_elstr_alloc(const char *instr, int syl) { struct elstr *elstr; unsigned *wptr; @@ -88,7 +99,8 @@ _elstr_alloc(const char *instr) elstr->str = wptr; elstr->len = wlen; elstr->sylmap = NULL; - _elstr_syllabize(elstr); + if (syl) + _elstr_syllabize(elstr); SCM_RETURN_NEWSMOB(_elstr_tag, elstr); } @@ -116,7 +128,23 @@ _elstr_dup(struct elstr *elstr) elnew->acc_pos = elstr->acc_pos; SCM_RETURN_NEWSMOB(_elstr_tag, elnew); } - + +static void +_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) +{ + unsigned *wp; + + wp = realloc(dest->str, + sizeof(dest->str[0]) * (dest->len + src->len)); + if (!wp) + scm_memory_error(func_name); + dest->str = wp; + memcpy(dest->str + dest->len, + src->str, + sizeof(src->str[0]) * src->len); + dest->len += src->len; +} + static scm_sizet _elstr_free(SCM smob) { @@ -134,29 +162,43 @@ _elstr_print(SCM smob, SCM port, scm_print_state *pstate) int i, j, an; char *s; - scm_puts("#nsyl - elstr->acc_syl; - if (an == 0) - scm_puts("[", port); - for (i = j = 0; i < elstr->len; i++) { - char r[6]; - int n; - - if (i == elstr->sylmap[j] + 1) { - if (j == an) - scm_puts("]", port); - scm_puts("-", port); - if (++j == an) - scm_puts("[", port); + scm_puts("#sylmap) { + scm_puts("``", port); + an = elstr->nsyl - elstr->acc_syl; + if (an == 0) + scm_puts("[", port); + for (i = j = 0; i < elstr->len; i++) { + char r[6]; + int n; + + if (i == elstr->sylmap[j] + 1) { + if (j == an) + scm_puts("]", port); + scm_puts("-", port); + if (++j == an) + scm_puts("[", port); + } + n = utf8_wctomb(r, elstr->str[i]); + if (n == -1) + continue; + r[n] = 0; + scm_puts(r, port); + } + if (j == an) + scm_puts("]", port); + } else { + scm_puts("[NS] ``", port); + for (i = j = 0; i < elstr->len; i++) { + char r[6]; + int n; + n = utf8_wctomb(r, elstr->str[i]); + if (n == -1) + continue; + r[n] = 0; + scm_puts(r, port); } - n = utf8_wctomb(r, elstr->str[i]); - if (n == -1) - continue; - r[n] = 0; - scm_puts(r, port); } - if (j == an) - scm_puts("]", port); scm_puts("''>", port); return 1; } @@ -169,6 +211,17 @@ _elstr_init() scm_set_smob_print(_elstr_tag, _elstr_print); } +#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) + +SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0, + (SCM string), +"Return true if STRING is an elstr\n") +#define FUNC_NAME s_scm_elstr_p +{ + return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F; +} +#undef FUNC_NAME + SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, (SCM string), "Create new ELSTR from STRING\n") @@ -179,7 +232,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); str = scm_to_locale_string(string); - scm = _elstr_alloc(str); + scm = _elstr_alloc(str, 1); free(str); if (scm == SCM_EOL) scm_misc_error(FUNC_NAME, @@ -189,8 +242,6 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, } #undef FUNC_NAME -#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag) - SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, (SCM el), "Convert EL to a STRING\n") @@ -475,7 +526,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) num = scm_to_uint(n); if (num > elstr->nsyl) scm_misc_error(func_name, - "cannot get syllable #~S: not enough syllables: ~S", + "cannot set accent on syllable #~S: not enough syllables: ~S", scm_list_2(el, n)); acc_num = elstr->nsyl - num; if (acc_num == 0) @@ -754,6 +805,38 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index", return SCM_BOOL_F; } #undef FUNC_NAME + +SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append", + 0, 0, 1, + (SCM rest), +"") +#define FUNC_NAME s_scm_elstr_append +{ + SCM ret = _elstr_alloc("", 0); + struct elstr *elstr = (struct elstr*) SCM_CDR(ret); + + for (; !scm_is_null(rest); rest = SCM_CDR(rest)) { + SCM val = SCM_CAR(rest); + if (scm_is_elstr(val)) { + struct elstr *elt = (struct elstr*) SCM_CDR(val); + _elstr_concat(elstr, elt, FUNC_NAME); + } else if (scm_is_string(val)) { + char *s = scm_to_locale_string(val); + if (s[0]) { + SCM tmp = _elstr_alloc(s, 0); + free(s); + _elstr_concat(elstr, + (struct elstr*) SCM_CDR(tmp), + FUNC_NAME); + } else + free(s); + } else + scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest); + } + _elstr_syllabize(elstr); + return ret; +} +#undef FUNC_NAME void -- cgit v1.2.1