aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika
diff options
context:
space:
mode:
Diffstat (limited to 'src/ellinika')
-rw-r--r--src/ellinika/elmorph.c217
-rw-r--r--src/ellinika/elmorph.scm430
2 files changed, 120 insertions, 127 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -102,9 +102,12 @@ _elstr_alloc(const char *instr, int syl)
elstr->str = wptr;
elstr->len = wlen;
elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ elstr->acc_syl = 0;
+ elstr->acc_pos = 0;
if (syl)
_elstr_syllabize(elstr);
-
+
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
@@ -140,6 +143,8 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
{
unsigned *wp;
+ if (src->len == 0)
+ return;
wp = realloc(dest->str,
sizeof(dest->str[0]) * (dest->len + src->len));
if (!wp)
@@ -219,6 +224,35 @@ _elstr_init()
#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
+static SCM
+force_elstr(struct elstr **ep, SCM scm, int sylopt,
+ int arg, const char *func_name)
+{
+ struct elstr *elstr;
+
+ if (scm_is_elstr(scm)) {
+ elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ SCM newscm;
+ char *str;
+
+ SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
+ str = scm_to_locale_string(scm);
+ newscm = _elstr_alloc(str, sylopt);
+ free(str);
+ if (newscm == SCM_EOL)
+ scm_misc_error(func_name,
+ "Invalid input string: ~S",
+ scm_list_1(scm));
+ scm = newscm;
+ elstr = (struct elstr*) SCM_CDR(newscm);
+ }
+ if (ep)
+ *ep = elstr;
+ return scm;
+}
+
+
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
(SCM string),
"Return true if STRING is an elstr\n")
@@ -237,14 +271,7 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- scm = _elstr_alloc(str, 1);
- free(str);
- if (scm == SCM_EOL)
- scm_misc_error(FUNC_NAME,
- "Invalid input string: ~S",
- scm_list_1(string));
- return scm;
+ return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
}
#undef FUNC_NAME
@@ -275,9 +302,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
#define FUNC_NAME s_scm_elstr_length
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->len);
}
#undef FUNC_NAME
@@ -289,9 +314,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
#define FUNC_NAME s_scm_elstr_number_of_syllables
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->nsyl);
}
#undef FUNC_NAME
@@ -305,8 +328,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
struct elstr *elstr;
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
@@ -330,9 +352,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
#define FUNC_NAME s_scm_elstr_accent_position
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_pos);
}
#undef FUNC_NAME
@@ -344,9 +364,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_syl);
}
#undef FUNC_NAME
@@ -362,8 +380,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
SCM scm;
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
@@ -398,8 +415,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
char r[6];
int len;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num >= elstr->len)
@@ -423,13 +439,16 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
struct elstr *elstr;
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
chgfun(elstr->str, elstr->len);
return scm;
@@ -482,13 +501,16 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
unsigned i;
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
@@ -526,9 +548,13 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
SCM scm;
int dstate;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(func_name,
@@ -542,7 +568,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
@@ -598,9 +624,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
struct elstr *elstr;
int num;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
if (num < 0)
num += elstr->len;
@@ -684,13 +709,16 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
unsigned *wc;
size_t wclen;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
scm_memory_error(func_name);
@@ -726,10 +754,13 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
unsigned len;
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
len = scm_to_uint(l);
@@ -742,7 +773,7 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
@@ -781,33 +812,13 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
"")
#define FUNC_NAME s_scm_elstr_index
{
- struct elstr *elstr;
- unsigned *wc, *wtmp = NULL, *p;
- unsigned wlen;
-
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (scm_is_elstr(needle)) {
- struct elstr *ep = (struct elstr*) SCM_CDR(needle);
- wc = ep->str;
- wlen = ep->len;
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
- str = scm_to_locale_string(needle);
- if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
- free(str);
- scm_misc_error(FUNC_NAME,
- "Invalid needle string: ~S",
- scm_list_1(needle));
- }
- free(str);
- wc = wtmp;
- }
- p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
- free(wtmp);
+ struct elstr *elstr, *ep;
+ unsigned *p;
+
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
+ p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
+ ep->str, ep->len);
if (p)
return scm_from_int(p - elstr->str);
return SCM_BOOL_F;
@@ -815,22 +826,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
#undef FUNC_NAME
static int
-_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name)
+_suffix_matches(struct elstr *elstr, struct elstr *ep)
{
- struct elstr *ep;
-
- if (scm_is_elstr(suffix)) {
- ep = (struct elstr*) SCM_CDR(suffix);
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
- str = scm_to_locale_string(suffix);
- scm = _elstr_alloc(str, 0);
- free(str);
- ep = (struct elstr*) SCM_CDR(scm);
- }
return (ep->len < elstr->len &&
memcmp(elstr->str + elstr->len - ep->len,
ep->str,
@@ -843,16 +840,17 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
"Return #t if WORDS ends with SUFFIX")
#define FUNC_NAME s_scm_elstr_suffix_p
{
- struct elstr *elstr;
+ struct elstr *elstr, *ep;
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME))
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return suffix;
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
- if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME))
+ force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return val;
}
return SCM_BOOL_F;
@@ -869,22 +867,11 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
+ struct elstr *elt;
SCM val = SCM_CAR(rest);
- if (scm_is_elstr(val)) {
- struct elstr *elt = (struct elstr*) SCM_CDR(val);
- _elstr_concat(elstr, elt, FUNC_NAME);
- } else if (scm_is_string(val)) {
- char *s = scm_to_locale_string(val);
- if (s[0]) {
- SCM tmp = _elstr_alloc(s, 0);
- free(s);
- _elstr_concat(elstr,
- (struct elstr*) SCM_CDR(tmp),
- FUNC_NAME);
- } else
- free(s);
- } else
- scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
+
+ force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
+ _elstr_concat(elstr, elt, FUNC_NAME);
}
_elstr_syllabize(elstr);
return ret;
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index e3ed4b5..f916d1c 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -21,20 +21,26 @@
"scm_init_ellinika_elmorph_module")
(define-public (elstr-trim word n)
- (cond
- ((> n 0)
- (elstr-slice word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice word 0 (+ (elstr-length word) n)))
- (else
- word)))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice word 0 (+ (elstr-length word) n)))
+ (else
+ word))))
(define-public (elstr-trim! word n)
- (cond
- ((> n 0)
- (elstr-slice! word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice! word 0 (+ (elstr-length word) n)))))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice! word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))

Return to:

Send suggestions and report system problems to the System administrator.