/* This file is part of Ellinika project.
Copyright (C) 2011 Sergey Poznyakoff
Ellinika is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Ellinika is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include "utf8.h"
#include "elmorph.h"
struct elstr {
unsigned *str; /* UTF-8 string */
size_t len; /* Its length */
unsigned nsyl; /* Number of syllables. */
unsigned *sylmap; /* Syllable map (nsyl elements) */
unsigned acc_syl; /* Number of the accented syllable
(1-based, from the last syllable) */
unsigned acc_pos; /* Number of the accented character
(0-based, from str[0]) */
};
scm_t_bits _elstr_tag;
static void
_elstr_syllabize(struct elstr *elstr)
{
unsigned *sylmap;
unsigned i, nsyl = 0, accchr = 0;
int accsyl = -1;
int dstate = 0;
int acc = 0;
if (!elstr->sylmap) {
elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0]));
if (!elstr->sylmap)
scm_memory_error("_elstr_syllabize");
}
sylmap = elstr->sylmap;
for (i = 0; i < elstr->len; i++) {
int nstate;
if (elchr_getaccent(elstr->str[i])) {
accsyl = nsyl;
accchr = i;
}
nstate = elchr_diphthong(elstr->str[i], dstate);
if (nstate)
/* skip */;
else if (dstate)
sylmap[nsyl++] = i - 1;
else if (elchr_isvowel(elstr->str[i]))
sylmap[nsyl++] = i;
dstate = nstate;
}
if (dstate)
sylmap[nsyl++] = i - 1;
else if (nsyl)
sylmap[nsyl-1] = i - 1;
elstr->nsyl = nsyl;
elstr->acc_pos = accchr;
elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0;
}
static SCM
_elstr_alloc_empty(struct elstr **pelstr)
{
struct elstr *elstr;
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
memset(elstr, 0, sizeof(*elstr));
*pelstr = elstr;
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
_elstr_alloc(const char *instr, int syl)
{
struct elstr *elstr;
unsigned *wptr;
size_t wlen;
if (utf8_mbstr_to_wc(instr, &wptr, &wlen))
return SCM_EOL;
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
elstr->str = wptr;
elstr->len = wlen;
elstr->sylmap = NULL;
if (syl)
_elstr_syllabize(elstr);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
_elstr_dup(struct elstr *elstr)
{
struct elstr *elnew;
elnew = scm_gc_malloc(sizeof(*elstr), "Elstr");
elnew->str = calloc(elstr->len, sizeof(elnew->str[0]));
if (!elnew->str)
scm_memory_error("_elstr_dup");
if (elstr->sylmap) {
elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0]));
if (!elnew->sylmap) {
free(elnew->str);
scm_memory_error("_elstr_dup");
}
} else
elnew->sylmap = NULL;
memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
elnew->len = elstr->len;
elnew->nsyl = elstr->nsyl;
memcpy(elnew->sylmap, elstr->sylmap,
sizeof(elstr->sylmap[0]) * elstr->nsyl);
elnew->acc_syl = elstr->acc_syl;
elnew->acc_pos = elstr->acc_pos;
SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
}
static void
_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
{
unsigned *wp;
wp = realloc(dest->str,
sizeof(dest->str[0]) * (dest->len + src->len));
if (!wp)
scm_memory_error(func_name);
dest->str = wp;
memcpy(dest->str + dest->len,
src->str,
sizeof(src->str[0]) * src->len);
dest->len += src->len;
}
static scm_sizet
_elstr_free(SCM smob)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
free(elstr->str);
free(elstr->sylmap);
scm_gc_free(elstr, sizeof(struct elstr), "elstr");
return 0;
}
static int
_elstr_print(SCM smob, SCM port, scm_print_state *pstate)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
int i, j, an;
char *s;
scm_puts("#sylmap) {
scm_puts("``", port);
an = elstr->nsyl - elstr->acc_syl;
if (an == 0)
scm_puts("[", port);
for (i = j = 0; i < elstr->len; i++) {
char r[6];
int n;
if (i == elstr->sylmap[j] + 1) {
if (j == an)
scm_puts("]", port);
scm_puts("-", port);
if (++j == an)
scm_puts("[", port);
}
n = utf8_wctomb(r, elstr->str[i]);
if (n == -1)
continue;
r[n] = 0;
scm_puts(r, port);
}
if (j == an)
scm_puts("]", port);
} else {
scm_puts("[NS] ``", port);
for (i = j = 0; i < elstr->len; i++) {
char r[6];
int n;
n = utf8_wctomb(r, elstr->str[i]);
if (n == -1)
continue;
r[n] = 0;
scm_puts(r, port);
}
}
scm_puts("''>", port);
return 1;
}
static void
_elstr_init()
{
_elstr_tag = scm_make_smob_type("Elstr", sizeof(struct elstr));
scm_set_smob_free(_elstr_tag, _elstr_free);
scm_set_smob_print(_elstr_tag, _elstr_print);
}
#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
(SCM string),
"Return true if STRING is an elstr\n")
#define FUNC_NAME s_scm_elstr_p
{
return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
(SCM string),
"Create new ELSTR from STRING\n")
#define FUNC_NAME s_scm_string__elstr
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
scm = _elstr_alloc(str, 1);
free(str);
if (scm == SCM_EOL)
scm_misc_error(FUNC_NAME,
"Invalid input string: ~S",
scm_list_1(string));
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
(SCM el),
"Convert EL to a STRING\n")
#define FUNC_NAME s_scm_elstr__string
{
struct elstr *elstr;
char *s;
SCM scm;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
if (utf8_wc_to_mbstr(elstr->str, elstr->len, &s))
scm_misc_error(FUNC_NAME,
"cannot convert elstr to Scheme",
SCM_EOL);
scm = scm_from_locale_string(s);
free(s);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_length
{
struct elstr *elstr;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
return scm_from_uint(elstr->len);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_number_of_syllables
{
struct elstr *elstr;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
return scm_from_uint(elstr->nsyl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
2, 0, 0,
(SCM el, SCM n),
"Returns properties of the syllable N in EL\n")
#define FUNC_NAME s_scm_elstr_syllable_prop
{
struct elstr *elstr;
unsigned num, start;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
if (num == 0)
start = 0;
else
start = elstr->sylmap[num - 1] + 1;
return scm_cons(scm_from_uint(start),
scm_from_uint(elstr->sylmap[num]));
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
(SCM el),
"Return position of the accented character in EL\n")
#define FUNC_NAME s_scm_elstr_accent_position
{
struct elstr *elstr;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
return scm_from_uint(elstr->acc_pos);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
1, 0, 0,
(SCM el),
"Return position of the accented syllable in EL\n")
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
return scm_from_uint(elstr->acc_syl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
2, 0, 0,
(SCM el, SCM n),
"Return Nth syllable in EL\n")
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
char *s;
SCM scm;
unsigned num, start;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
if (num == 0)
start = 0;
else
start = elstr->sylmap[num - 1] + 1;
if (utf8_wc_to_mbstr(elstr->str + start,
elstr->sylmap[num] - start + 1,
&s))
scm_misc_error(FUNC_NAME,
"cannot convert elstr to Scheme",
SCM_EOL);
scm = scm_from_locale_string(s);
free(s);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
2, 0, 0,
(SCM el, SCM n),
"Return Nth character in EL\n")
#define FUNC_NAME s_scm_elstr_character
{
struct elstr *elstr;
unsigned num;
char r[6];
int len;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
len = utf8_wctomb(r, elstr->str[num]);
if (len <= 0)
scm_misc_error(FUNC_NAME,
"cannot convert elchr to Scheme",
SCM_EOL);
r[len] = 0;
return scm_from_locale_string(r);
}
#undef FUNC_NAME
static SCM
_elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
if (destructive)
scm = SCM_UNSPECIFIED;
else {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
chgfun(elstr->str, elstr->len);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_toupper, "elstr-toupper",
1, 0, 0,
(SCM el),
"Convert EL to upper case\n")
#define FUNC_NAME s_scm_elstr_toupper
{
return _elstr_chgcase(el, utf8_wc_strnupper, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_tolower, "elstr-tolower",
1, 0, 0,
(SCM el),
"Convert EL to lower case\n")
#define FUNC_NAME s_scm_elstr_tolower
{
return _elstr_chgcase(el, utf8_wc_strnlower, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_toupper_x, "elstr-toupper!",
1, 0, 0,
(SCM el),
"Convert EL to upper case (destructive)\n")
#define FUNC_NAME s_scm_elstr_toupper_x
{
return _elstr_chgcase(el, utf8_wc_strnupper, 1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_tolower_x, "elstr-tolower!",
1, 0, 0,
(SCM el),
"Convert EL to lower case (destructive)\n")
#define FUNC_NAME s_scm_elstr_tolower_x
{
return _elstr_chgcase(el, utf8_wc_strnlower, 0, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_deaccent(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
SCM scm;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
if (destructive)
scm = SCM_UNSPECIFIED;
else {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
elstr->acc_pos = 0;
elstr->acc_syl = 0;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_deaccent, "elstr-deaccent",
1, 0, 0,
(SCM el),
"Remove all accents from EL\n")
#define FUNC_NAME s_scm_elstr_deaccent
{
return _elstr_deaccent(el, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_deaccent_x, "elstr-deaccent!",
1, 0, 0,
(SCM el),
"Remove all accents from EL (desctructive)\n")
#define FUNC_NAME s_scm_elstr_deaccent_x
{
return _elstr_deaccent(el, 1, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
unsigned acc_num, num, len, start;
SCM scm;
int dstate;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(func_name,
"cannot set accent on syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
acc_num = elstr->nsyl - num;
if (acc_num == 0)
start = 0;
else
start = elstr->sylmap[acc_num - 1] + 1;
if (destructive)
scm = SCM_UNSPECIFIED;
else {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
/* Clear all accents */
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
len = elstr->sylmap[acc_num] - start + 1;
dstate = 0;
for (i = start; i <= start + len; i++) {
int nstate;
if (!elchr_isvowel(elstr->str[i])) {
if (dstate) {
--i;
break;
}
continue;
}
nstate = elchr_diphthong(elstr->str[i], dstate);
if (!nstate)
break;
dstate = nstate;
}
elstr->str[i] = elchr_accent(elstr->str[i], CHF_OXEIA);
elstr->acc_syl = num;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent, "elstr-set-accent",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth syllable of EL\n")
{
return _elstr_set_accent(el, n, 0, s_scm_elstr_set_accent);
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent_x, "elstr-set-accent!",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth syllable of EL (destructive)\n")
{
return _elstr_set_accent(el, n, 1, s_scm_elstr_set_accent_x);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
2, 0, 0,
(SCM el, SCM n),
"Returns properties of the Nth char in EL, as a bitmask\n")
#define FUNC_NAME s_scm_elstr_char_prop_bitmask
{
struct elstr *elstr;
int num;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
if (num < 0)
num += elstr->len;
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
return scm_from_uint(elchr_flags(elstr->str[num]));
}
#undef FUNC_NAME
static struct deftab {
unsigned val;
char *sym;
} deftab[] = {
{ CHF_OXEIA, "elmorph:oxeia" },
{ CHF_PERISPWMENH, "elmorph:perispwmenh" },
{ CHF_BAREIA, "elmorph:bareia" },
{ CHF_ACCENT_MASK, "elmorph:accent-mask" },
{ CHF_TREMA, "elmorph:trema" },
{ CHF_VOWEL, "elmorph:vowel" },
{ CHF_CONSONANT, "elmorph:consonant" },
{ CHF_SEMIVOWEL, "elmorph:semivowel" },
{ CHF_PUNCT, "elmorph:punct" },
{ CHF_SYMBOL, "elmorph:symbol" },
{ CHF_MODIFIER, "elmorph:modifier" },
{ CHF_ARCHAIC, "elmorph:archaic" },
{ CHF_LOWER, "elmorph:lower" },
{ CHF_UPPER, "elmorph:upper" },
{ CHF_NUMERIC, "elmorph:numeric" },
{ CHF_DIPH1, "elmorph:diph1" },
{ CHF_DIPH2, "elmorph:diph2" }
};
SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
(SCM string),
"Convert STRING to uppercase\n")
#define FUNC_NAME s_scm_utf8_toupper
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_toupper(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to upper case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
(SCM string),
"Convert STRING to lowercase\n")
#define FUNC_NAME s_scm_utf8_tolower
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_tolower(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to lower case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
static SCM
_elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
unsigned *wc;
size_t wclen;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
if (destructive)
scm = SCM_UNSPECIFIED;
else {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
scm_memory_error(func_name);
free(elstr->str);
elstr->str = wc;
elstr->len = wclen;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy, "elstr-thema-aoristoy", 1, 0, 0,
(SCM thema),
"Convert THEMA, which must be a root of present. to an aorist root\n")
#define FUNC_NAME s_scm_elstr_thema_aoristoy
{
return _elstr_thema_aoristoy(thema, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy_x, "elstr-thema-aoristoy!", 1, 0, 0,
(SCM thema),
"Convert THEMA, which must be a root of present. to an aorist root (destructive)\n")
#define FUNC_NAME s_scm_elstr_thema_aoristoy_x
{
return _elstr_thema_aoristoy(thema, 1, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
{
struct elstr *elstr;
int num;
unsigned len;
SCM scm;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
elstr = (struct elstr*) SCM_CDR(el);
num = scm_to_int(n);
len = scm_to_uint(l);
if (num < 0)
num += elstr->len;
if (num < 0 || num >= elstr->len || num + len > elstr->len)
scm_misc_error(func_name,
"invalid offset or length",
SCM_EOL);
if (destructive)
scm = SCM_UNSPECIFIED;
else {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
if (num)
memmove(elstr->str, elstr->str + num,
sizeof(elstr->str[0]) * len);
elstr->len = len;
_elstr_syllabize(elstr);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_slice, "elstr-slice",
3, 0, 0,
(SCM word, SCM off, SCM len),
"Extract LEN characters from WORD starting from position OFF\n")
#define FUNC_NAME s_scm_elstr_slice
{
return _elstr_slice(word, off, len, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
3, 0, 0,
(SCM word, SCM off, SCM len),
"Extract LEN characters from WORD starting from position OFF (destructive)\n")
#define FUNC_NAME s_scm_elstr_slice_x
{
return _elstr_slice(word, off, len, 1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
2, 0, 0,
(SCM word, SCM needle),
"")
#define FUNC_NAME s_scm_elstr_index
{
struct elstr *elstr;
unsigned *wc, *wtmp = NULL, *p;
unsigned wlen;
SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(word);
if (scm_is_elstr(needle)) {
struct elstr *ep = (struct elstr*) SCM_CDR(needle);
wc = ep->str;
wlen = ep->len;
} else {
SCM scm;
char *str;
SCM_ASSERT(scm_is_string(needle), needle, SCM_ARG2, FUNC_NAME);
str = scm_to_locale_string(needle);
if (utf8_mbstr_to_wc(str, &wtmp, &wlen)) {
free(str);
scm_misc_error(FUNC_NAME,
"Invalid needle string: ~S",
scm_list_1(needle));
}
free(str);
wc = wtmp;
}
p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len, wc, wlen);
free(wtmp);
if (p)
return scm_from_int(p - elstr->str);
return SCM_BOOL_F;
}
#undef FUNC_NAME
static int
_suffix_matches(struct elstr *elstr, SCM suffix, int arg, const char *func_name)
{
struct elstr *ep;
if (scm_is_elstr(suffix)) {
ep = (struct elstr*) SCM_CDR(suffix);
} else {
SCM scm;
char *str;
SCM_ASSERT(scm_is_string(suffix), suffix, arg, func_name);
str = scm_to_locale_string(suffix);
scm = _elstr_alloc(str, 0);
free(str);
ep = (struct elstr*) SCM_CDR(scm);
}
return (ep->len < elstr->len &&
memcmp(elstr->str + elstr->len - ep->len,
ep->str,
ep->len * sizeof(elstr->str[0])) == 0);
}
SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
2, 0, 1,
(SCM word, SCM suffix, SCM rest),
"Return #t if WORDS ends with SUFFIX")
#define FUNC_NAME s_scm_elstr_suffix_p
{
struct elstr *elstr;
SCM_ASSERT(scm_is_elstr(word), word, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(word);
if (_suffix_matches(elstr, suffix, SCM_ARG2, FUNC_NAME))
return suffix;
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
if (_suffix_matches(elstr, val, SCM_ARGn, FUNC_NAME))
return val;
}
return SCM_BOOL_F;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
0, 0, 1,
(SCM rest),
"")
#define FUNC_NAME s_scm_elstr_append
{
SCM ret = _elstr_alloc("", 0);
struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
if (scm_is_elstr(val)) {
struct elstr *elt = (struct elstr*) SCM_CDR(val);
_elstr_concat(elstr, elt, FUNC_NAME);
} else if (scm_is_string(val)) {
char *s = scm_to_locale_string(val);
if (s[0]) {
SCM tmp = _elstr_alloc(s, 0);
free(s);
_elstr_concat(elstr,
(struct elstr*) SCM_CDR(tmp),
FUNC_NAME);
} else
free(s);
} else
scm_wrong_type_arg(FUNC_NAME, SCM_ARGn, rest);
}
_elstr_syllabize(elstr);
return ret;
}
#undef FUNC_NAME
void
scm_init_ellinika_elmorph_module()
{
int i;
_elstr_init();
for (i = 0; i < sizeof(deftab)/sizeof(deftab[0]); i++) {
scm_c_define(deftab[i].sym, scm_from_uint(deftab[i].val));
scm_c_export(deftab[i].sym, NULL);
}
#include "elmorph.x"
}