/* This file is part of Ellinika project.
Copyright (C) 2011 Sergey Poznyakoff
Ellinika is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Ellinika is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include "utf8.h"
#include "elmorph.h"
struct elstr {
unsigned *str; /* UTF-8 string */
size_t len; /* Its length */
struct phoneme *phoneme; /* Phonetical map*/
unsigned phoneme_count; /* Number of phonemes */
struct syllable *sylmap; /* Syllable map (nsyl elements) */
unsigned nsyl; /* Number of syllables. */
unsigned acc_syl; /* Number of the accented syllable
(1-based, from the last syllable) */
unsigned acc_pos; /* Number of the accented character
(0-based, from str[0]) */
};
scm_t_bits _elstr_tag;
static void
_elstr_syllabize(struct elstr *elstr, const char *func_name)
{
unsigned i;
free(elstr->phoneme);
free(elstr->sylmap);
if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count,
elstr->str, elstr->len))
scm_misc_error(func_name,
"cannot create phonetic map: ~S",
scm_from_int(errno));
if (syllable_map(&elstr->sylmap, &elstr->nsyl,
elstr->phoneme, elstr->phoneme_count))
scm_misc_error(func_name,
"cannot create syllable map: ~S",
scm_from_int(errno));
for (i = elstr->nsyl; i > 0; i--) {
if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK)
break;
}
elstr->acc_syl = i;
for (i = 0; i < elstr->len; i++)
if (elchr_getaccent(elstr->str[i]))
break;
elstr->acc_pos = i;
}
static void
invalidate_maps(struct elstr *elstr)
{
free(elstr->sylmap);
elstr->sylmap = NULL;
elstr->nsyl = 0;
free(elstr->phoneme);
elstr->phoneme = NULL;
elstr->phoneme_count = 0;
elstr->acc_pos = 0;
elstr->acc_syl = 0;
}
static SCM
_elstr_alloc_empty(struct elstr **pelstr)
{
struct elstr *elstr;
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
memset(elstr, 0, sizeof(*elstr));
*pelstr = elstr;
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
_elstr_alloc(const char *instr, int syl, const char *func_name)
{
struct elstr *elstr;
unsigned *wptr;
size_t wlen;
if (utf8_mbstr_to_wc(instr, &wptr, &wlen))
return SCM_EOL;
elstr = scm_gc_malloc(sizeof(*elstr), "Elstr");
elstr->str = wptr;
elstr->len = wlen;
elstr->sylmap = NULL;
elstr->nsyl = 0;
elstr->acc_syl = 0;
elstr->acc_pos = 0;
elstr->phoneme = 0;
elstr->phoneme_count = 0;
if (syl)
_elstr_syllabize(elstr, func_name);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
_elstr_dup(struct elstr *elstr)
{
struct elstr *elnew;
elnew = scm_gc_malloc(sizeof(*elstr), "Elstr");
elnew->str = calloc(elstr->len, sizeof(elnew->str[0]));
if (!elnew->str)
scm_memory_error("_elstr_dup");
memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
elnew->len = elstr->len;
if (elstr->phoneme) {
elnew->phoneme = calloc(elstr->phoneme_count,
sizeof(elnew->phoneme[0]));
if (!elnew->phoneme) {
free(elnew->str);
scm_memory_error("_elstr_dup");
}
memcpy(elnew->phoneme, elstr->phoneme,
sizeof(elstr->phoneme[0]) * elstr->phoneme_count);
} else
elnew->phoneme = NULL;
elnew->phoneme_count = elstr->phoneme_count;
if (elstr->sylmap) {
elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0]));
if (!elnew->sylmap) {
free(elnew->str);
scm_memory_error("_elstr_dup");
}
memcpy(elnew->sylmap, elstr->sylmap,
sizeof(elstr->sylmap[0]) * elstr->nsyl);
} else
elnew->sylmap = NULL;
elnew->nsyl = elstr->nsyl;
elnew->acc_syl = elstr->acc_syl;
elnew->acc_pos = elstr->acc_pos;
SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
}
static void
_elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
{
unsigned *wp;
if (src->len == 0)
return;
wp = realloc(dest->str,
sizeof(dest->str[0]) * (dest->len + src->len));
if (!wp)
scm_memory_error(func_name);
dest->str = wp;
memcpy(dest->str + dest->len,
src->str,
sizeof(src->str[0]) * src->len);
dest->len += src->len;
}
static scm_sizet
_elstr_free(SCM smob)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
free(elstr->str);
free(elstr->sylmap);
free(elstr->phoneme);
scm_gc_free(elstr, sizeof(struct elstr), "elstr");
return 0;
}
static int
_elstr_print(SCM smob, SCM port, scm_print_state *pstate)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
int i, j;
char *s;
scm_puts("#sylmap) {
scm_puts("``", port);
for (i = 0; i < elstr->nsyl; i++) {
size_t start = elstr->sylmap[i].char_start;
if (i)
scm_puts("-", port);
if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
scm_puts("[", port);
for (j = 0; j < elstr->sylmap[i].char_count; j++) {
char r[6];
int n;
n = utf8_wctomb(r, elstr->str[start+j]);
if (n == -1)
continue;
r[n] = 0;
scm_puts(r, port);
}
if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
scm_puts("]", port);
}
} else {
scm_puts("[NS] ``", port);
for (i = j = 0; i < elstr->len; i++) {
char r[6];
int n;
n = utf8_wctomb(r, elstr->str[i]);
if (n == -1)
continue;
r[n] = 0;
scm_puts(r, port);
}
}
scm_puts("''>", port);
return 1;
}
static void
_elstr_init()
{
_elstr_tag = scm_make_smob_type("Elstr", sizeof(struct elstr));
scm_set_smob_free(_elstr_tag, _elstr_free);
scm_set_smob_print(_elstr_tag, _elstr_print);
}
#define scm_is_elstr(s) (!SCM_IMP(s) && SCM_CELL_TYPE(s) == _elstr_tag)
static SCM
force_elstr(struct elstr **ep, SCM scm, int sylopt,
int arg, const char *func_name)
{
struct elstr *elstr;
if (scm_is_elstr(scm)) {
elstr = (struct elstr*) SCM_CDR(scm);
} else {
SCM newscm;
char *str;
SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
str = scm_to_locale_string(scm);
newscm = _elstr_alloc(str, sylopt, func_name);
free(str);
if (newscm == SCM_EOL)
scm_misc_error(func_name,
"Invalid input string: ~S",
scm_list_1(scm));
scm = newscm;
elstr = (struct elstr*) SCM_CDR(newscm);
}
if (ep)
*ep = elstr;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_p, "elstr?", 1, 0, 0,
(SCM string),
"Return true if STRING is an elstr\n")
#define FUNC_NAME s_scm_elstr_p
{
return scm_is_elstr(string) ? SCM_BOOL_T : SCM_BOOL_F;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_string__elstr, "string->elstr", 1, 0, 0,
(SCM string),
"Create new ELSTR from STRING\n")
#define FUNC_NAME s_scm_string__elstr
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
return force_elstr(NULL, string, 1, SCM_ARG1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr__string, "elstr->string", 1, 0, 0,
(SCM el),
"Convert EL to a STRING\n")
#define FUNC_NAME s_scm_elstr__string
{
struct elstr *elstr;
char *s;
SCM scm;
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, FUNC_NAME);
elstr = (struct elstr*) SCM_CDR(el);
if (utf8_wc_to_mbstr(elstr->str, elstr->len, &s))
scm_misc_error(FUNC_NAME,
"cannot convert elstr to Scheme",
SCM_EOL);
scm = scm_from_locale_string(s);
free(s);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_length, "elstr-length", 1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_length
{
struct elstr *elstr;
force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->len);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_number_of_syllables, "elstr-number-of-syllables",
1, 0, 0,
(SCM el),
"Returns the number of characters in EL\n")
#define FUNC_NAME s_scm_elstr_number_of_syllables
{
struct elstr *elstr;
force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->nsyl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
2, 0, 0,
(SCM el, SCM n),
"Returns properties of the syllable N in EL\n")
#define FUNC_NAME s_scm_elstr_syllable_prop
{
struct elstr *elstr;
unsigned num, start;
force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
return scm_list_3(scm_from_uint(elstr->sylmap[num].char_start),
scm_from_uint(elstr->sylmap[num].char_count),
scm_from_int(elstr->sylmap[num].flags));
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0,
(SCM el),
"Return position of the accented character in EL\n")
#define FUNC_NAME s_scm_elstr_accent_position
{
struct elstr *elstr;
force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_pos);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_accented_syllable, "elstr-accented-syllable",
1, 0, 0,
(SCM el),
"Return position of the accented syllable in EL\n")
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
return scm_from_uint(elstr->acc_syl);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
2, 0, 0,
(SCM el, SCM n),
"Return Nth syllable in EL\n")
#define FUNC_NAME s_scm_elstr_accented_syllable
{
struct elstr *elstr;
char *s;
SCM scm;
unsigned num, start;
force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
if (utf8_wc_to_mbstr(elstr->str + elstr->sylmap[num].char_start,
elstr->sylmap[num].char_count,
&s))
scm_misc_error(FUNC_NAME,
"cannot convert elstr to Scheme",
SCM_EOL);
scm = scm_from_locale_string(s);
free(s);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_character, "elstr-character",
2, 0, 0,
(SCM el, SCM n),
"Return Nth character in EL\n")
#define FUNC_NAME s_scm_elstr_character
{
struct elstr *elstr;
unsigned num;
char r[6];
int len;
force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_uint(n);
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
len = utf8_wctomb(r, elstr->str[num]);
if (len <= 0)
scm_misc_error(FUNC_NAME,
"cannot convert elchr to Scheme",
SCM_EOL);
r[len] = 0;
return scm_from_locale_string(r);
}
#undef FUNC_NAME
static SCM
_elstr_chgcase(SCM el, void (*chgfun)(unsigned *, size_t),
int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
} else {
scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
}
chgfun(elstr->str, elstr->len);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_toupper, "elstr-toupper",
1, 0, 0,
(SCM el),
"Convert EL to upper case\n")
#define FUNC_NAME s_scm_elstr_toupper
{
return _elstr_chgcase(el, utf8_wc_strnupper, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_tolower, "elstr-tolower",
1, 0, 0,
(SCM el),
"Convert EL to lower case\n")
#define FUNC_NAME s_scm_elstr_tolower
{
return _elstr_chgcase(el, utf8_wc_strnlower, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_toupper_x, "elstr-toupper!",
1, 0, 0,
(SCM el),
"Convert EL to upper case (destructive)\n")
#define FUNC_NAME s_scm_elstr_toupper_x
{
return _elstr_chgcase(el, utf8_wc_strnupper, 1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_tolower_x, "elstr-tolower!",
1, 0, 0,
(SCM el),
"Convert EL to lower case (destructive)\n")
#define FUNC_NAME s_scm_elstr_tolower_x
{
return _elstr_chgcase(el, utf8_wc_strnlower, 0, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_deaccent(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
SCM scm;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
scm = SCM_UNSPECIFIED;
elstr = (struct elstr*) SCM_CDR(el);
} else {
scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
}
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
invalidate_maps(elstr);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_deaccent, "elstr-deaccent",
1, 0, 0,
(SCM el),
"Remove all accents from EL\n")
#define FUNC_NAME s_scm_elstr_deaccent
{
return _elstr_deaccent(el, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_deaccent_x, "elstr-deaccent!",
1, 0, 0,
(SCM el),
"Remove all accents from EL (desctructive)\n")
#define FUNC_NAME s_scm_elstr_deaccent_x
{
return _elstr_deaccent(el, 1, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
unsigned acc_num, num, start;
SCM scm;
unsigned pos;
struct phoneme *phoneme = NULL;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
} else
scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
num = scm_to_uint(n);
if (num == 0 | num > elstr->nsyl)
scm_misc_error(func_name,
"cannot set accent on syllable #~S: not enough syllables: ~S",
scm_list_2(n, el));
acc_num = elstr->nsyl - num;
if (destructive)
scm = SCM_UNSPECIFIED;
else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
/* Clear all accents */
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
for (i = 0; i < elstr->nsyl; i++)
elstr->sylmap[i].flags &= ~CHF_ACCENT_MASK;
for (i = 0; i < elstr->phoneme_count; i++)
elstr->phoneme[i].flags &= ~CHF_ACCENT_MASK;
start = elstr->sylmap[acc_num].phoneme_start;
pos = 0;
for (i = elstr->sylmap[acc_num].phoneme_count; i > 0; i--) {
struct phoneme *ph = elstr->phoneme + start + i - 1;
if (ph->flags & CHF_CONSONANT)
/* skip */ ;
else if (ph->flags & CHF_VOWEL) {
phoneme = ph;
break;
}
}
if (!phoneme)
scm_misc_error(func_name,
"cannot set accent on syllable #~S of ~S: "
"INTERNAL ERROR",
scm_list_2(n, el));
else if (phoneme->flags & CHF_DIPHTHONG)
pos = phoneme->start + phoneme->count - 1;
else
pos = phoneme->start;
phoneme->flags |= CHF_OXEIA;
elstr->sylmap[acc_num].flags |= CHF_OXEIA;
elstr->str[pos] = elchr_accent(elstr->str[pos], CHF_OXEIA);
elstr->acc_syl = num;
elstr->acc_pos = pos;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent, "elstr-set-accent",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth syllable of EL\n")
{
return _elstr_set_accent(el, n, 0, s_scm_elstr_set_accent);
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent_x, "elstr-set-accent!",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth syllable of EL (destructive)\n")
{
return _elstr_set_accent(el, n, 1, s_scm_elstr_set_accent_x);
}
static SCM
_elstr_set_accent_on_char(SCM el, SCM n, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
unsigned acc_num, num, len, start;
SCM scm;
int dstate;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
} else
scm = force_elstr(&elstr, el, 0, SCM_ARG1, func_name);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
num = scm_to_uint(n);
if (num > elstr->len)
scm_misc_error(func_name,
"cannot set accent on character #~S: not enough characters: ~S",
scm_list_2(el, n));
if (!elchr_isvowel(elstr->str[num]))
scm_misc_error(func_name,
"cannot set accent on character #~S: not a vowel: ~S",
scm_list_2(el, n));
if (destructive)
scm = SCM_UNSPECIFIED;
else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
/* Clear all accents */
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
elstr->str[num] = elchr_accent(elstr->str[num], CHF_OXEIA);
invalidate_maps(elstr);
_elstr_syllabize(elstr, func_name);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent_character, "elstr-set-accent-character",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth character of EL\n")
{
return _elstr_set_accent_on_char(el, n, 0,
s_scm_elstr_set_accent_character);
}
SCM_DEFINE_PUBLIC(scm_elstr_set_accent_character_x,
"elstr-set-accent-character!",
2, 0, 0,
(SCM el, SCM n),
"Set accent on Nth character of EL (destructive)\n")
{
return _elstr_set_accent_on_char(el, n, 1,
s_scm_elstr_set_accent_character_x);
}
SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask",
2, 0, 0,
(SCM el, SCM n),
"Returns properties of the Nth char in EL, as a bitmask\n")
#define FUNC_NAME s_scm_elstr_char_prop_bitmask
{
struct elstr *elstr;
int num;
force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_int(n);
if (num < 0)
num += elstr->len;
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
return scm_from_uint(elchr_flags(elstr->str[num]));
}
#undef FUNC_NAME
static struct deftab {
unsigned val;
char *sym;
} deftab[] = {
{ CHF_OXEIA, "elmorph:oxeia" },
{ CHF_PERISPWMENH, "elmorph:perispwmenh" },
{ CHF_BAREIA, "elmorph:bareia" },
{ CHF_ACCENT_MASK, "elmorph:accent-mask" },
{ CHF_TREMA, "elmorph:trema" },
{ CHF_VOWEL, "elmorph:vowel" },
{ CHF_CONSONANT, "elmorph:consonant" },
{ CHF_SEMIVOWEL, "elmorph:semivowel" },
{ CHF_PUNCT, "elmorph:punct" },
{ CHF_SYMBOL, "elmorph:symbol" },
{ CHF_MODIFIER, "elmorph:modifier" },
{ CHF_ARCHAIC, "elmorph:archaic" },
{ CHF_LOWER, "elmorph:lower" },
{ CHF_UPPER, "elmorph:upper" },
{ CHF_NUMERIC, "elmorph:numeric" },
{ CHF_DIPHTHONG, "elmorph:diphthong" },
{ CHF_IOTA, "elmorph:iota" },
};
SCM_DEFINE_PUBLIC(scm_elstr_char_phoneme, "elstr-char-phoneme",
2, 0, 0,
(SCM el, SCM n),
"Returns a phoneme code of the Nth char in EL\n")
#define FUNC_NAME s_scm_elstr_char_phoneme
{
struct elstr *elstr;
int num;
force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
num = scm_to_int(n);
if (num < 0)
num += elstr->len;
if (num >= elstr->len)
scm_misc_error(FUNC_NAME,
"cannot get character #~S: not enough characters: ~S",
scm_list_2(el, n));
return scm_from_uint(elchr_phoneme(elstr->str[num]));
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
(SCM string),
"Convert STRING to uppercase\n")
#define FUNC_NAME s_scm_utf8_toupper
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_toupper(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to upper case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_utf8_tolower, "utf8-tolower", 1, 0, 0,
(SCM string),
"Convert STRING to lowercase\n")
#define FUNC_NAME s_scm_utf8_tolower
{
char *str;
SCM scm;
SCM_ASSERT(scm_is_string(string), string, SCM_ARG1, FUNC_NAME);
str = scm_to_locale_string(string);
if (utf8_tolower(str, strlen(str)))
scm_misc_error(FUNC_NAME,
"cannot convert to lower case: ~S",
scm_list_1(string));
scm = scm_from_locale_string(str);
free(str);
return scm;
}
#undef FUNC_NAME
static SCM
_elstr_thema_aoristoy(SCM el, int destructive, const char *func_name)
{
struct elstr *elstr;
SCM scm;
unsigned *wc;
size_t wclen;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
scm = SCM_UNSPECIFIED;
} else {
scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
}
if (elmorph_thema_aoristoy(elstr->str, elstr->len, &wc, &wclen))
scm_memory_error(func_name);
free(elstr->str);
elstr->str = wc;
elstr->len = wclen;
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy, "elstr-thema-aoristoy", 1, 0, 0,
(SCM thema),
"Convert THEMA, which must be a root of present. to an aorist root\n")
#define FUNC_NAME s_scm_elstr_thema_aoristoy
{
return _elstr_thema_aoristoy(thema, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_thema_aoristoy_x, "elstr-thema-aoristoy!", 1, 0, 0,
(SCM thema),
"Convert THEMA, which must be a root of present. to an aorist root (destructive)\n")
#define FUNC_NAME s_scm_elstr_thema_aoristoy_x
{
return _elstr_thema_aoristoy(thema, 1, FUNC_NAME);
}
#undef FUNC_NAME
static SCM
_elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
{
struct elstr *elstr;
int num;
unsigned len;
SCM scm;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
} else
scm = force_elstr(&elstr, el, 0, SCM_ARG1, func_name);
invalidate_maps(elstr);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
num = scm_to_int(n);
len = scm_to_uint(l);
if (num < 0)
num += elstr->len;
if (num < 0 || num >= elstr->len || num + len > elstr->len)
scm_misc_error(func_name,
"invalid offset or length",
SCM_EOL);
if (destructive)
scm = SCM_UNSPECIFIED;
else if (scm == el) {
scm = _elstr_dup(elstr);
elstr = (struct elstr*) SCM_CDR(scm);
}
if (num)
memmove(elstr->str, elstr->str + num,
sizeof(elstr->str[0]) * len);
elstr->len = len;
_elstr_syllabize(elstr, func_name);
return scm;
}
SCM_DEFINE_PUBLIC(scm_elstr_slice, "elstr-slice",
3, 0, 0,
(SCM word, SCM off, SCM len),
"Extract LEN characters from WORD starting from position OFF\n")
#define FUNC_NAME s_scm_elstr_slice
{
return _elstr_slice(word, off, len, 0, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
3, 0, 0,
(SCM word, SCM off, SCM len),
"Extract LEN characters from WORD starting from position OFF (destructive)\n")
#define FUNC_NAME s_scm_elstr_slice_x
{
return _elstr_slice(word, off, len, 1, FUNC_NAME);
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
2, 0, 0,
(SCM word, SCM needle),
"Returns position of NEEDLE in the WORD")
#define FUNC_NAME s_scm_elstr_index
{
struct elstr *elstr, *ep;
unsigned *p;
force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
force_elstr(&ep, needle, 0, SCM_ARG2, FUNC_NAME);
p = (unsigned*)utf8_wc_strnstr(elstr->str, elstr->len,
ep->str, ep->len);
if (p)
return scm_from_int(p - elstr->str);
return SCM_BOOL_F;
}
#undef FUNC_NAME
static int
_suffix_matches(struct elstr *elstr, struct elstr *ep)
{
return (ep->len < elstr->len &&
memcmp(elstr->str + elstr->len - ep->len,
ep->str,
ep->len * sizeof(elstr->str[0])) == 0);
}
SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
2, 0, 1,
(SCM word, SCM suffix, SCM rest),
"Return #t if WORDS ends with SUFFIX")
#define FUNC_NAME s_scm_elstr_suffix_p
{
struct elstr *elstr, *ep;
force_elstr(&elstr, word, 0, SCM_ARG1, FUNC_NAME);
force_elstr(&ep, suffix, 0, SCM_ARG2, FUNC_NAME);
if (_suffix_matches(elstr, ep))
return suffix;
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
SCM val = SCM_CAR(rest);
force_elstr(&ep, val, 0, SCM_ARGn, FUNC_NAME);
if (_suffix_matches(elstr, ep))
return val;
}
return SCM_BOOL_F;
}
#undef FUNC_NAME
SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
0, 0, 1,
(SCM rest),
"Concatenates arguments.\n")
#define FUNC_NAME s_scm_elstr_append
{
SCM ret = _elstr_alloc("", 0, FUNC_NAME);
struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
struct elstr *elt;
SCM val = SCM_CAR(rest);
force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
_elstr_concat(elstr, elt, FUNC_NAME);
}
_elstr_syllabize(elstr, FUNC_NAME);
return ret;
}
#undef FUNC_NAME
static SCM
elmorph_scm_from_phoneme(struct phoneme *phoneme)
{
return scm_list_4(scm_from_int(phoneme->code),
scm_from_uint(phoneme->start),
scm_from_uint(phoneme->count),
scm_from_bool(phoneme->flags));
}
SCM_DEFINE_PUBLIC(scm_elstr__phonetic_map, "elstr->phonetic-map",
1, 0, 0,
(SCM word),
"Converts WORD to a phonetic map.\n")
#define FUNC_NAME s_scm_elstr__phonetic_map
{
struct elstr *elstr;
struct phoneme *phmap;
size_t phlen, i;
SCM head = SCM_EOL, tail = SCM_EOL;
force_elstr(&elstr, word, 1, SCM_ARG1, FUNC_NAME);
phmap = elstr->phoneme;
phlen = elstr->phoneme_count;
for (i = 0; i < phlen; i++) {
SCM elt = scm_cons(elmorph_scm_from_phoneme(phmap + i),
SCM_EOL);
if (scm_is_null(head))
head = tail = elt;
else {
SCM_SETCDR(tail, elt);
tail = elt;
}
}
free(phmap);
return head;
}
#undef FUNC_NAME
void
scm_init_ellinika_elmorph_module()
{
int i;
_elstr_init();
for (i = 0; i < sizeof(deftab)/sizeof(deftab[0]); i++) {
scm_c_define(deftab[i].sym, scm_from_uint(deftab[i].val));
scm_c_export(deftab[i].sym, NULL);
}
#include "elmorph.x"
}