aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-06 17:37:28 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2011-06-06 17:37:28 +0300
commitc598bc4dee28a9480ca9b7e9d5a20d75a5baccda (patch)
treec768bf56be6fda3e5cac9ed3f56d5289be5905c9
parent8a7e9b26e073731c82c02594d081c57aa474eade (diff)
downloadellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.gz
ellinika-c598bc4dee28a9480ca9b7e9d5a20d75a5baccda.tar.bz2
Rewrite all elstr- functions to take either elstr or string as arguments.
-rw-r--r--data/dbverb.struct17
-rw-r--r--src/ellinika/elmorph.c217
-rw-r--r--src/ellinika/elmorph.scm430
3 files changed, 137 insertions, 127 deletions
diff --git a/data/dbverb.struct b/data/dbverb.struct
index 4ab6a37..af9d236 100644
--- a/data/dbverb.struct
+++ b/data/dbverb.struct
@@ -1,2 +1,19 @@
+-- This file is part of Ellinika
+-- Copyright (C) 2004, 2005, 2007 Sergey Poznyakoff
+--
+-- Ellinika is free software; you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation; either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- Ellinika is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with this program. If not, see <http://www.gnu.org/licenses/>.
+--
set names utf8;
+
DROP TABLE IF EXISTS verbflect;
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -104,5 +104,8 @@ _elstr_alloc(const char *instr, int syl)
elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ elstr->acc_syl = 0;
+ elstr->acc_pos = 0;
if (syl)
_elstr_syllabize(elstr);
-
+
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
@@ -142,2 +145,4 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
+ if (src->len == 0)
+ return;
wp = realloc(dest->str,
@@ -221,2 +226,31 @@ _elstr_init()
+static SCM
+force_elstr(struct elstr **ep, SCM scm, int sylopt,
+ int arg, const char *func_name)
+{
+ struct elstr *elstr;
+
+ if (scm_is_elstr(scm)) {
+ elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ SCM newscm;
+ char *str;
+
+ SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
+ str = scm_to_locale_string(scm);
+ newscm = _elstr_alloc(str, sylopt);
+ free(str);
+ if (newscm == SCM_EOL)
+ scm_misc_error(func_name,
+ "Invalid input string: ~S",
+ scm_list_1(scm));
+ scm = newscm;
+ elstr = (struct elstr*) SCM_CDR(newscm);
+ }
+ if (ep)
+ *ep = elstr;
+ return scm;
+}
+
+
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
@@ -239,10 +273,3 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- scm = _elstr_alloc(str, 1);
- free(str);
- if (scm == SCM_EOL)
- scm_misc_error(FUNC_NAME,
- "Invalid input string: ~S",
- scm_list_1(string));
- return scm;
+ return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
}
@@ -277,5 +304,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->len);
@@ -291,5 +316,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->nsyl);
@@ -307,4 +330,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
@@ -332,5 +354,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_pos);
@@ -346,5 +366,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_syl);
@@ -364,4 +382,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
@@ -400,4 +417,3 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
@@ -425,9 +441,12 @@ _elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
@@ -484,9 +503,12 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
@@ -528,5 +550,9 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_uint(n);
@@ -544,3 +570,3 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
@@ -600,5 +626,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
@@ -686,9 +711,12 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
- else {
- scm = _elstr_dup(elstr);
- elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
+ scm = _elstr_dup(elstr);
+ elstr = (struct elstr*) SCM_CDR(scm);
+ }
}
@@ -728,6 +756,9 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ if (destructive) {
+ SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
@@ -744,3 +775,3 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
@@ -783,29 +814,9 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
{
- struct elstr *elstr;
- unsigned *wc, *wtmp = NULL, *p;
- unsigned wlen;
-
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (scm_is_elstr(needle)) {
- struct elstr *ep = (struct elstr*) SCM_CDR(needle);
- wc = ep->str;
- wlen = ep->len;
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
- str = scm_to_locale_string(needle);
- if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
- free(str);
- scm_misc_error(FUNC_NAME,
- "Invalid needle string: ~S",
- scm_list_1(needle));
- }
- free(str);
- wc = wtmp;
- }
- p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
- free(wtmp);
+ struct elstr *elstr, *ep;
+ unsigned *p;
+
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
+ p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
+ ep->str, ep->len);
if (p)
@@ -817,18 +828,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
static int
-_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name)
+_suffix_matches(struct elstr *elstr, struct elstr *ep)
{
- struct elstr *ep;
-
- if (scm_is_elstr(suffix)) {
- ep = (struct elstr*) SCM_CDR(suffix);
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
- str = scm_to_locale_string(suffix);
- scm = _elstr_alloc(str, 0);
- free(str);
- ep = (struct elstr*) SCM_CDR(scm);
- }
return (ep->len < elstr->len &&
@@ -845,7 +842,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
{
- struct elstr *elstr;
+ struct elstr *elstr, *ep;
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME))
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return suffix;
@@ -854,3 +851,4 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
SCM val = SCM_CAR(rest);
- if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME))
+ force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return val;
@@ -871,18 +869,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
+ struct elstr *elt;
SCM val = SCM_CAR(rest);
- if (scm_is_elstr(val)) {
- struct elstr *elt = (struct elstr*) SCM_CDR(val);
- _elstr_concat(elstr, elt, FUNC_NAME);
- } else if (scm_is_string(val)) {
- char *s = scm_to_locale_string(val);
- if (s[0]) {
- SCM tmp = _elstr_alloc(s, 0);
- free(s);
- _elstr_concat(elstr,
- (struct elstr*) SCM_CDR(tmp),
- FUNC_NAME);
- } else
- free(s);
- } else
- scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
+
+ force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
+ _elstr_concat(elstr, elt, FUNC_NAME);
}
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index e3ed4b5..f916d1c 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -23,16 +23,22 @@
(define-public (elstr-trim word n)
- (cond
- ((> n 0)
- (elstr-slice word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice word 0 (+ (elstr-length word) n)))
- (else
- word)))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice word 0 (+ (elstr-length word) n)))
+ (else
+ word))))
(define-public (elstr-trim! word n)
- (cond
- ((> n 0)
- (elstr-slice! word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice! word 0 (+ (elstr-length word) n)))))
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice! word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))

Return to:

Send suggestions and report system problems to the System administrator.