summaryrefslogtreecommitdiffabout
path: root/src/ellinika
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-06 14:37:28 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2011-06-06 14:37:28 (GMT)
commitc598bc4dee28a9480ca9b7e9d5a20d75a5baccda (patch) (side-by-side diff)
treec768bf56be6fda3e5cac9ed3f56d5289be5905c9 /src/ellinika
parent8a7e9b26e073731c82c02594d081c57aa474eade (diff)
downloadellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.gz
ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.bz2
Rewrite all elstr- functions to take either elstr or string as arguments.
Diffstat (limited to 'src/ellinika') (more/less context) (ignore whitespace changes)
-rw-r--r--src/ellinika/elmorph.c217
-rw-r--r--src/ellinika/elmorph.scm430
2 files changed, 120 insertions, 127 deletions
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -103,7 +103,10 @@ _elstr_alloc(const char *instr, int syl)
elstr->len = wlen;
elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ elstr->acc_syl = 0;
+ elstr->acc_pos = 0;
if (syl)
_elstr_syllabize(elstr);
-
+
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
@@ -141,4 +144,6 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
unsigned *wp;
+ if (src->len == 0)
+ return;
wp = realloc(dest->str,
sizeof(dest->str[0]) * (dest->len + src->len));
@@ -220,4 +225,33 @@ _elstr_init()
#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
+static SCM
+force_elstr(struct elstr **ep, SCM scm, int sylopt,
+ int arg, const char *func_name)
+{
+ struct elstr *elstr;
+
+ if (scm_is_elstr(scm)) {
+ elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ SCM newscm;
+ char *str;
+
+ SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
+ str = scm_to_locale_string(scm);
+ newscm = _elstr_alloc(str, sylopt);
+ free(str);
+ if (newscm == SCM_EOL)
+ scm_misc_error(func_name,
+ "Invalid input string: ~S",
+ scm_list_1(scm));
+ scm = newscm;
+ elstr = (struct elstr*) SCM_CDR(newscm);
+ }
+ if (ep)
+ *ep = elstr;
+ return scm;
+}
+
+
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
(SCM string),
@@ -238,12 +272,5 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- scm = _elstr_alloc(str, 1);
- free(str);
- if (scm == SCM_EOL)
- scm_misc_error(FUNC_NAME,
- "Invalid input string: ~S",
- scm_list_1(string));
- return scm;
+ return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
}
#undef FUNC_NAME
@@ -276,7 +303,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->len);
}
@@ -290,7 +315,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->nsyl);
}
@@ -306,6 +329,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
@@ -331,7 +353,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_pos);
}
@@ -345,7 +365,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_syl);
}
@@ -363,6 +381,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
@@ -399,6 +416,5 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
int len;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
@@ -424,11 +440,14 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
chgfun(elstr->str, elstr->len);
@@ -483,11 +502,14 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
for (i = 0; i < elstr->len; i++)
@@ -527,7 +549,11 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
int dstate;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_uint(n);
if (num > elstr->nsyl)
@@ -543,5 +569,5 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
@@ -599,7 +625,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
int num;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
if (num < 0)
@@ -685,11 +710,14 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
size_t wclen;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
@@ -727,8 +755,11 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
SCM scm;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
len = scm_to_uint(l);
@@ -743,5 +774,5 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
@@ -782,31 +813,11 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
#define FUNC_NAME s_scm_elstr_index
{
- struct elstr *elstr;
- unsigned *wc, *wtmp = NULL, *p;
- unsigned wlen;
-
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (scm_is_elstr(needle)) {
- struct elstr *ep = (struct elstr*) SCM_CDR(needle);
- wc = ep->str;
- wlen = ep->len;
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
- str = scm_to_locale_string(needle);
- if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
- free(str);
- scm_misc_error(FUNC_NAME,
- "Invalid needle string: ~S",
- scm_list_1(needle));
- }
- free(str);
- wc = wtmp;
- }
- p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
- free(wtmp);
+ struct elstr *elstr, *ep;
+ unsigned *p;
+
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
+ p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
+ ep->str, ep->len);
if (p)
return scm_from_int(p - elstr->str);
@@ -816,20 +827,6 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
static int
-_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name)
+_suffix_matches(struct elstr *elstr, struct elstr *ep)
{
- struct elstr *ep;
-
- if (scm_is_elstr(suffix)) {
- ep = (struct elstr*) SCM_CDR(suffix);
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
- str = scm_to_locale_string(suffix);
- scm = _elstr_alloc(str, 0);
- free(str);
- ep = (struct elstr*) SCM_CDR(scm);
- }
return (ep->len < elstr->len &&
memcmp(elstr->str + elstr->len - ep->len,
@@ -844,14 +841,15 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
#define FUNC_NAME s_scm_elstr_suffix_p
{
- struct elstr *elstr;
+ struct elstr *elstr, *ep;
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME))
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return suffix;
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
- if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME))
+ force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return val;
}
@@ -870,20 +868,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
+ struct elstr *elt;
SCM val = SCM_CAR(rest);
- if (scm_is_elstr(val)) {
- struct elstr *elt = (struct elstr*) SCM_CDR(val);
- _elstr_concat(elstr, elt, FUNC_NAME);
- } else if (scm_is_string(val)) {
- char *s = scm_to_locale_string(val);
- if (s[0]) {
- SCM tmp = _elstr_alloc(s, 0);
- free(s);
- _elstr_concat(elstr,
- (struct elstr*) SCM_CDR(tmp),
- FUNC_NAME);
- } else
- free(s);
- } else
- scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
+
+ force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
+ _elstr_concat(elstr, elt, FUNC_NAME);
}
_elstr_syllabize(elstr);
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index e3ed4b5..f916d1c 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -22,18 +22,24 @@
(define-public (elstr-trim word n)
- (cond
- ((> n 0)
- (elstr-slice word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice word 0 (+ (elstr-length word) n)))
- (else
- word)))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice word 0 (+ (elstr-length word) n)))
+ (else
+ word))))
(define-public (elstr-trim! word n)
- (cond
- ((> n 0)
- (elstr-slice! word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice! word 0 (+ (elstr-length word) n)))))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice! word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))

Return to:

Send suggestions and report system problems to the System administrator.