aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika/elmorph.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/ellinika/elmorph.c')
-rw-r--r--src/ellinika/elmorph.c141
1 files changed, 112 insertions, 29 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 6ff5f01..75e42f9 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -48,7 +48,7 @@ _elstr_syllabize(struct elstr *elstr)
48 elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len, 48 elstr->sylmap = scm_gc_malloc(sizeof(sylmap[0])*elstr->len,
49 "syllable map"); 49 "syllable map");
50 sylmap = elstr->sylmap; 50 sylmap = elstr->sylmap;
51 51
52 for (i = 0; i < elstr->len; i++) { 52 for (i = 0; i < elstr->len; i++) {
53 int nstate; 53 int nstate;
54 54
@@ -75,7 +75,18 @@ _elstr_syllabize(struct elstr *elstr)
75} 75}
76 76
77static SCM 77static SCM
78_elstr_alloc(const char *instr) 78_elstr_alloc_empty(struct elstr **pelstr)
79{
80 struct elstr *elstr;
81
82 elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
83 memset(elstr, 0, sizeof(*elstr));
84 *pelstr = elstr;
85 SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
86}
87
88static SCM
89_elstr_alloc(const char *instr, int syl)
79{ 90{
80 struct elstr *elstr; 91 struct elstr *elstr;
81 unsigned *wptr; 92 unsigned *wptr;
@@ -88,7 +99,8 @@ _elstr_alloc(const char *instr)
88 elstr->str = wptr; 99 elstr->str = wptr;
89 elstr->len = wlen; 100 elstr->len = wlen;
90 elstr->sylmap = NULL; 101 elstr->sylmap = NULL;
91 _elstr_syllabize(elstr); 102 if (syl)
103 _elstr_syllabize(elstr);
92 104
93 SCM_RETURN_NEWSMOB(_elstr_tag, elstr); 105 SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
94} 106}
@@ -116,7 +128,23 @@ _elstr_dup(struct elstr *elstr)
116 elnew->acc_pos = elstr->acc_pos; 128 elnew->acc_pos = elstr->acc_pos;
117 SCM_RETURN_NEWSMOB(_elstr_tag, elnew); 129 SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
118} 130}
119 131
132static void
133_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
134{
135 unsigned *wp;
136
137 wp = realloc(dest->str,
138 sizeof(dest->str[0]) * (dest->len + src->len));
139 if (!wp)
140 scm_memory_error(func_name);
141 dest->str = wp;
142 memcpy(dest->str + dest->len,
143 src->str,
144 sizeof(src->str[0]) * src->len);
145 dest->len += src->len;
146}
147
120static scm_sizet 148static scm_sizet
121_elstr_free(SCM smob) 149_elstr_free(SCM smob)
122{ 150{
@@ -134,29 +162,43 @@ _elstr_print(SCM smob, SCM port, scm_print_state *pstate)
134 int i, j, an; 162 int i, j, an;
135 char *s; 163 char *s;
136 164
137 scm_puts("#<elstr ``", port); 165 scm_puts("#<elstr ", port);
138 an = elstr->nsyl - elstr->acc_syl; 166 if (elstr->sylmap) {
139 if (an == 0) 167 scm_puts("``", port);
140 scm_puts("[", port); 168 an = elstr->nsyl - elstr->acc_syl;
141 for (i = j = 0; i < elstr->len; i++) { 169 if (an == 0)
142 char r[6]; 170 scm_puts("[", port);
143 int n; 171 for (i = j = 0; i < elstr->len; i++) {
144 172 char r[6];
145 if (i == elstr->sylmap[j] + 1) { 173 int n;
146 if (j == an) 174
147 scm_puts("]", port); 175 if (i == elstr->sylmap[j] + 1) {
148 scm_puts("-", port); 176 if (j == an)
149 if (++j == an) 177 scm_puts("]", port);
150 scm_puts("[", port); 178 scm_puts("-", port);
179 if (++j == an)
180 scm_puts("[", port);
181 }
182 n = utf8_wctomb(r, elstr->str[i]);
183 if (n == -1)
184 continue;
185 r[n] = 0;
186 scm_puts(r, port);
187 }
188 if (j == an)
189 scm_puts("]", port);
190 } else {
191 scm_puts("[NS] ``", port);
192 for (i = j = 0; i < elstr->len; i++) {
193 char r[6];
194 int n;
195 n = utf8_wctomb(r, elstr->str[i]);
196 if (n == -1)
197 continue;
198 r[n] = 0;
199 scm_puts(r, port);
151 } 200 }
152 n = utf8_wctomb(r, elstr->str[i]);
153 if (n == -1)
154 continue;
155 r[n] = 0;
156 scm_puts(r, port);
157 } 201 }
158 if (j == an)
159 scm_puts("]", port);
160 scm_puts("''>", port); 202 scm_puts("''>", port);
161 return 1; 203 return 1;
162} 204}
@@ -169,6 +211,17 @@ _elstr_init()
169 scm_set_smob_print(_elstr_tag, _elstr_print); 211 scm_set_smob_print(_elstr_tag, _elstr_print);
170} 212}
171 213
214#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
215
216SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
217 (SCM string),
218"Return true if STRING is an elstr\n")
219#define FUNC_NAME s_scm_elstr_p
220{
221 return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F;
222}
223#undef FUNC_NAME
224
172SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0, 225SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
173 (SCM string), 226 (SCM string),
174"Create new ELSTR from STRING\n") 227"Create new ELSTR from STRING\n")
@@ -179,7 +232,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
179 232
180 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME); 233 SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
181 str = scm_to_locale_string(string); 234 str = scm_to_locale_string(string);
182 scm = _elstr_alloc(str); 235 scm = _elstr_alloc(str, 1);
183 free(str); 236 free(str);
184 if (scm == SCM_EOL) 237 if (scm == SCM_EOL)
185 scm_misc_error(FUNC_NAME, 238 scm_misc_error(FUNC_NAME,
@@ -189,8 +242,6 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
189} 242}
190#undef FUNC_NAME 243#undef FUNC_NAME
191 244
192#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
193
194SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0, 245SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
195 (SCM el), 246 (SCM el),
196"Convert EL to a STRING\n") 247"Convert EL to a STRING\n")
@@ -475,7 +526,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
475 num = scm_to_uint(n); 526 num = scm_to_uint(n);
476 if (num > elstr->nsyl) 527 if (num > elstr->nsyl)
477 scm_misc_error(func_name, 528 scm_misc_error(func_name,
478 "cannot get syllable #~S: not enough syllables: ~S", 529 "cannot set accent on syllable #~S: not enough syllables: ~S",
479 scm_list_2(el, n)); 530 scm_list_2(el, n));
480 acc_num = elstr->nsyl - num; 531 acc_num = elstr->nsyl - num;
481 if (acc_num == 0) 532 if (acc_num == 0)
@@ -754,6 +805,38 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
754 return SCM_BOOL_F; 805 return SCM_BOOL_F;
755} 806}
756#undef FUNC_NAME 807#undef FUNC_NAME
808
809SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
810 0, 0, 1,
811 (SCM rest),
812"")
813#define FUNC_NAME s_scm_elstr_append
814{
815 SCM ret = _elstr_alloc("", 0);
816 struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
817
818 for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
819 SCM val = SCM_CAR(rest);
820 if (scm_is_elstr(val)) {
821 struct elstr *elt = (struct elstr*) SCM_CDR(val);
822 _elstr_concat(elstr, elt, FUNC_NAME);
823 } else if (scm_is_string(val)) {
824 char *s = scm_to_locale_string(val);
825 if (s[0]) {
826 SCM tmp = _elstr_alloc(s, 0);
827 free(s);
828 _elstr_concat(elstr,
829 (struct elstr*) SCM_CDR(tmp),
830 FUNC_NAME);
831 } else
832 free(s);
833 } else
834 scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
835 }
836 _elstr_syllabize(elstr);
837 return ret;
838}
839#undef FUNC_NAME
757 840
758 841
759void