aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--data/dbverb.struct17
-rw-r--r--src/ellinika/elmorph.c185
-rw-r--r--src/ellinika/elmorph.scm410
3 files changed, 111 insertions, 101 deletions
diff --git a/data/dbverb.struct b/data/dbverb.struct
index 4ab6a37..af9d236 100644
--- a/data/dbverb.struct
+++ b/data/dbverb.struct
@@ -1,7 +1,24 @@
+-- This file is part of Ellinika
+-- Copyright (C) 2004, 2005, 2007 Sergey Poznyakoff
+--
+-- Ellinika is free software; you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation; either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- Ellinika is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+-- GNU General Public License for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with this program. If not, see <http://www.gnu.org/licenses/>.
+--
set names utf8;
+
DROP TABLE IF EXISTS verbflect;
CREATE TABLE verbflect(
ident int(32) not null, -- REL 8
sing1 varchar(32),
sing2 varchar(32),
sing3 varchar(32),
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 5785f8d..f55e010 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -99,12 +99,15 @@ _elstr_alloc(const char *instr, int syl)
return SCM_EOL;
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
elstr->str = wptr;
elstr->len = wlen;
elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ elstr->acc_syl = 0;
+ elstr->acc_pos = 0;
if (syl)
_elstr_syllabize(elstr);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
@@ -137,12 +140,14 @@ _elstr_dup(struct elstr *elstr)
static void
_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
{
unsigned *wp;
+ if (src->len == 0)
+ return;
wp = realloc(dest->str,
sizeof(dest->str[0]) * (dest->len + src->len));
if (!wp)
scm_memory_error(func_name);
dest->str = wp;
memcpy(dest->str + dest->len,
@@ -216,12 +221,41 @@ _elstr_init()
scm_set_smob_free(_elstr_tag, _elstr_free);
scm_set_smob_print(_elstr_tag, _elstr_print);
}
#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
+static SCM
+force_elstr(struct elstr **ep, SCM scm, int sylopt,
+ int arg, const char *func_name)
+{
+ struct elstr *elstr;
+
+ if (scm_is_elstr(scm)) {
+ elstr = (struct elstr*) SCM_CDR(scm);
+ } else {
+ SCM newscm;
+ char *str;
+
+ SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
+ str = scm_to_locale_string(scm);
+ newscm = _elstr_alloc(str, sylopt);
+ free(str);
+ if (newscm == SCM_EOL)
+ scm_misc_error(func_name,
+ "Invalid input string: ~S",
+ scm_list_1(scm));
+ scm = newscm;
+ elstr = (struct elstr*) SCM_CDR(newscm);
+ }
+ if (ep)
+ *ep = elstr;
+ return scm;
+}
+
+
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
(SCM string),
"Return true if STRING is an elstr\n")
#define FUNC_NAME s_scm_elstr_p
{
return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F;
@@ -234,20 +268,13 @@ SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
#define FUNC_NAME s_scm_string__elstr
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
- str = scm_to_locale_string(string);
- scm = _elstr_alloc(str, 1);
- free(str);
- if (scm == SCM_EOL)
- scm_misc_error(FUNC_NAME,
- "Invalid input string: ~S",
- scm_list_1(string));
- return scm;
+ return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
(SCM el),
"Convert EL to a STRING\n")
@@ -272,29 +299,25 @@ SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_length
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->len);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_number_of_syllables
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->nsyl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
2, 0, 0,
@@ -302,14 +325,13 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
"Returns properties of the syllable N in EL\n")
#define FUNC_NAME s_scm_elstr_syllable_prop
{
struct elstr *elstr;
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
@@ -327,29 +349,25 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
(SCM el),
"Return position of the accented character in EL\n")
#define FUNC_NAME s_scm_elstr_accent_position
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_pos);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
1, 0, 0,
(SCM el),
"Return position of the accented syllable in EL\n")
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
-
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_syl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
2, 0, 0,
@@ -359,14 +377,13 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
{
struct elstr *elstr;
char *s;
SCM scm;
unsigned num, start;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
@@ -395,14 +412,13 @@ SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
{
struct elstr *elstr;
unsigned num;
char r[6];
int len;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
@@ -420,20 +436,23 @@ static SCM
_elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
+ if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
+ }
chgfun(elstr->str, elstr->len);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_toupper, "elstr-toupper",
1, 0, 0,
@@ -479,20 +498,23 @@ static SCM
_elstr_deaccent(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
SCM scm;
+ if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
+ }
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
elstr->acc_pos = 0;
elstr->acc_syl = 0;
return scm;
}
@@ -523,15 +545,19 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
struct elstr *elstr;
unsigned i;
unsigned acc_num, num, len, start;
SCM scm;
int dstate;
+ if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
- SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+
+ SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(func_name,
"cannot set accent on syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
acc_num = elstr->nsyl - num;
@@ -539,13 +565,13 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
start = 0;
else
start = elstr->sylmap[acc_num - 1] + 1;
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
/* Clear all accents */
for (i = 0; i < elstr->len; i++)
@@ -595,15 +621,14 @@ SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
"Returns properties of the Nth char in EL, as a bitmask\n")
#define FUNC_NAME s_scm_elstr_char_prop_bitmask
{
struct elstr *elstr;
int num;
- SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
if (num < 0)
num += elstr->len;
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
@@ -681,20 +706,23 @@ _elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
unsigned *wc;
size_t wclen;
+ if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
- if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ } else {
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
+ }
if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
scm_memory_error(func_name);
free(elstr->str);
elstr->str = wc;
elstr->len = wclen;
return scm;
@@ -723,29 +751,32 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
{
struct elstr *elstr;
int num;
unsigned len;
SCM scm;
+ if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
+ elstr = (struct elstr*) SCM_CDR(el);
+ } else
+ scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
- elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
len = scm_to_uint(l);
if (num < 0)
num += elstr->len;
if (num < 0 || num >= elstr->len || num + len > elstr->len)
scm_misc_error(func_name,
"invalid offset or length",
SCM_EOL);
if (destructive)
scm = SCM_UNSPECIFIED;
- else {
+ else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
if (num)
memmove(elstr->str, elstr->str + num,
@@ -778,84 +809,51 @@ SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
2, 0, 0,
(SCM word, SCM needle),
"")
#define FUNC_NAME s_scm_elstr_index
{
- struct elstr *elstr;
- unsigned *wc, *wtmp = NULL, *p;
- unsigned wlen;
-
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (scm_is_elstr(needle)) {
- struct elstr *ep = (struct elstr*) SCM_CDR(needle);
- wc = ep->str;
- wlen = ep->len;
- } else {
- SCM scm;
- char *str;
+ struct elstr *elstr, *ep;
+ unsigned *p;
- SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
- str = scm_to_locale_string(needle);
- if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
- free(str);
- scm_misc_error(FUNC_NAME,
- "Invalid needle string: ~S",
- scm_list_1(needle));
- }
- free(str);
- wc = wtmp;
- }
- p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
- free(wtmp);
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
+ p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
+ ep->str, ep->len);
if (p)
return scm_from_int(p - elstr->str);
return SCM_BOOL_F;
}
#undef FUNC_NAME
static int
-_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name)
+_suffix_matches(struct elstr *elstr, struct elstr *ep)
{
- struct elstr *ep;
-
- if (scm_is_elstr(suffix)) {
- ep = (struct elstr*) SCM_CDR(suffix);
- } else {
- SCM scm;
- char *str;
-
- SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
- str = scm_to_locale_string(suffix);
- scm = _elstr_alloc(str, 0);
- free(str);
- ep = (struct elstr*) SCM_CDR(scm);
- }
return (ep->len < elstr->len &&
memcmp(elstr->str + elstr->len - ep->len,
ep->str,
ep->len * sizeof(elstr->str[0])) == 0);
}
SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
2, 0, 1,
(SCM word, SCM suffix, SCM rest),
"Return #t if WORDS ends with SUFFIX")
#define FUNC_NAME s_scm_elstr_suffix_p
{
- struct elstr *elstr;
+ struct elstr *elstr, *ep;
- SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
- elstr = (struct elstr*) SCM_CDR(word);
- if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME))
+ force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
+ force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return suffix;
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
- if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME))
+ force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
+ if (_suffix_matches(elstr, ep))
return val;
}
return SCM_BOOL_F;
}
#undef FUNC_NAME
@@ -866,28 +864,17 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
#define FUNC_NAME s_scm_elstr_append
{
SCM ret = _elstr_alloc("", 0);
struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
+ struct elstr *elt;
SCM val = SCM_CAR(rest);
- if (scm_is_elstr(val)) {
- struct elstr *elt = (struct elstr*) SCM_CDR(val);
+
+ force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
_elstr_concat(elstr, elt, FUNC_NAME);
- } else if (scm_is_string(val)) {
- char *s = scm_to_locale_string(val);
- if (s[0]) {
- SCM tmp = _elstr_alloc(s, 0);
- free(s);
- _elstr_concat(elstr,
- (struct elstr*) SCM_CDR(tmp),
- FUNC_NAME);
- } else
- free(s);
- } else
- scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
}
_elstr_syllabize(elstr);
return ret;
}
#undef FUNC_NAME
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index e3ed4b5..f916d1c 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -18,23 +18,29 @@
(load-extension
"LIBDIR/libguile-elmorph-v-VERSION"
"scm_init_ellinika_elmorph_module")
(define-public (elstr-trim word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
(cond
((> n 0)
(elstr-slice word n (- (elstr-length word) n)))
((< n 0)
(elstr-slice word 0 (+ (elstr-length word) n)))
(else
- word)))
+ word))))
(define-public (elstr-trim! word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
(cond
((> n 0)
(elstr-slice! word n (- (elstr-length word) n)))
((< n 0)
- (elstr-slice! word 0 (+ (elstr-length word) n)))))
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))

Return to:

Send suggestions and report system problems to the System administrator.