aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-10 23:04:53 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2011-06-11 00:34:20 +0300
commita1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch)
treef86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika
parent2bae7da012e2125762855ce014e63345ecbbbb18 (diff)
downloadellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz
ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2
Fix syllabification.
* configure.ac: Add AC_PROG_YACC * src/ellinika/phoneme.y: New file. * src/ellinika/yyrename: New file. * src/ellinika/syllabificator.c: New file. * src/ellinika/.gitignore: Update. * src/ellinika/elchr.c (char_info_st): Move to header. (el_basic_ctype): (elchr_info): Remove static qualifier. Return a pointer to const. (elchr_letter,elchr_phoneme): New functions. (elchr_diphthong): Remove. * src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members. (_elstr_syllabize): Rewrite. (invalidate_maps)" New static function. (_elstr_alloc): Initialize new fields, take function name as argument, for diagnostic purposes. (_elstr_print): Rewrite (deftab): Update. (elstr-syllable-prop,elstr-syllable) (_elstr_set_accent,_elstr_set_accent_on_char): Rewrite. (elstr-char-phoneme,elstr->phonetic-map): New functions. * src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove. (CHF_DIPHTHONG): New flag. (PHON_.*): New constants. (phoneme,syllable): New structures. (char_info_st)<letter,phoneme>: New members. (elchr_info,elchr_letter) (elchr_phoneme,phoneme_map) (syllable_map): New protos. (elchr_diphthong): Remove protos. * src/ellinika/elmorph.scm4: Move public definitions to elmorph-public.scm; include it here. * src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a wrapper over elstr->soundslike. Describe Milesian numbers. * style.css (img.ellinika-img): New class. * xml/lingua.conf.in (IMAGE): New tag. * xml/pl/alfabhta.xml: Describe Milesian numbers. Various fixes. * data/dbverb.struct: fix a typo in flection. Use 'sub' theme for pas/sub/aor. * data/irregular-verbs.xml: Add more verbs. * scm/conjugator.scm: Various fixes. * scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika')
-rw-r--r--src/ellinika/.gitignore2
-rw-r--r--src/ellinika/Makefile.am23
-rw-r--r--src/ellinika/elchr.c273
-rw-r--r--src/ellinika/elmorph-public.scm106
-rw-r--r--src/ellinika/elmorph.c308
-rw-r--r--src/ellinika/elmorph.h82
-rw-r--r--src/ellinika/elmorph.scm425
-rw-r--r--src/ellinika/phoneme.y353
-rw-r--r--src/ellinika/syllabificator.c152
-rw-r--r--src/ellinika/tenses.scm38
-rw-r--r--src/ellinika/xlat.scm113
-rwxr-xr-xsrc/ellinika/yyrename97
12 files changed, 1206 insertions, 366 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore
index 9422f9a..11bf478 100644
--- a/src/ellinika/.gitignore
+++ b/src/ellinika/.gitignore
@@ -1,5 +1,7 @@
cgi.m4
cgi.scm
config.scm
elmorph.scm
elmorph.x
+phoneme.c
+phoneme.h
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
index 274eea8..b8988d4 100644
--- a/src/ellinika/Makefile.am
+++ b/src/ellinika/Makefile.am
@@ -1,8 +1,8 @@
# This file is part of Ellinika project.
-# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff
+# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff
#
# Ellinika is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
@@ -12,13 +12,20 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
guiledir=$(GUILE_SITE)/$(PACKAGE)
-guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm
+guile_DATA=\
+ xlat.scm\
+ cgi.scm\
+ i18n.scm\
+ config.scm\
+ dico.scm\
+ elmorph.scm\
+ tenses.scm
cgi.m4: Makefile
echo 'divert(-1)' > $@
echo 'changequote([,])' >> $@
echo 'changecom([;],[' >> $@
echo '])' >> $@
@@ -36,26 +43,28 @@ cgi.m4: Makefile
echo 'divert(0)dnl' >> $@
echo '@AUTOGENERATED@' >> $@
SUFFIXES = .scm4 .scm .x
.scm4.scm:
- m4 cgi.m4 $< > $@
+ m4 -I$(srcdir) cgi.m4 $< > $@
cgi.scm: cgi.scm4 cgi.m4
config.scm: config.scm4 cgi.m4
-elmorph.scm: elmorph.scm4 cgi.m4
+elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4
pkglib_LTLIBRARIES=libelmorph.la
libelmorph_la_SOURCES = \
aorist.c\
utf8.c\
elchr.c\
elmorph.c\
- elmorph.h
+ elmorph.h\
+ phoneme.y\
+ syllabificator.c
DOT_X_FILES = elmorph.x
BUILT_SOURCES = $(DOT_X_FILES)
DISTCLEANFILES = $(DOT_X_FILES)
@@ -77,7 +86,9 @@ install-data-hook:
test -z "$$dlname" && dlname='lib$$name.so'; \
$(LN_S) -f "$$dlname" libguile-$$name-v-$(VERSION).so; \
fi; \
done; \
cd $$here
-
+AM_YFLAGS = -d
+YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
+EXTRA_DIST = yyrename elmorph-public.scm \ No newline at end of file
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c
index 3142b6f..621ac03 100644
--- a/src/ellinika/elchr.c
+++ b/src/ellinika/elchr.c
@@ -20,26 +20,13 @@
#endif
#include <errno.h>
#include <stdlib.h>
#include <libguile.h>
#include "utf8.h"
#include "elmorph.h"
-
-struct char_info_st {
- unsigned ch; /* Characters */
- int flags; /* Flags (see above) */
- unsigned base; /* for vowels - a corresponding vowel with all diacritics
- removed */
- unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase
- letter if flags & CHF_LOWER and a corresponding lowerrcase
- letter if flags & CHF_UPPER */
- unsigned numval; /* Numeric value */
- unsigned accented[3]; /* For vowels - corresponding accented variant */
- unsigned deaccent; /* For accented vowels with diaeresis - corresponding
- non-accented character */
-};
+#include "phoneme.h"
/* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */
struct char_info_st el_basic_ctype[] = {
{ 0x0300, },
{ 0x0301, },
{ 0x0302, },
@@ -171,86 +158,155 @@ struct char_info_st el_basic_ctype[] = {
{ 0x0380, },
{ 0x0381, },
{ 0x0382, },
{ 0x0383, },
{ 0x0384, CHF_MODIFIER }, /* Oxeia */
{ 0x0385, CHF_MODIFIER }, /* dialytika */
- { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */
+ { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC,
+ 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */
{ 0x0387, CHF_PUNCT }, /* ano teleia */
- { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */
- { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */
- { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */
+ { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD,
+ 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */
+ { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE,
+ 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */
+ { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF,
+ 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */
{ 0x038B, },
- { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */
+ { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC,
+ 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */
{ 0x038D, },
- { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */
- { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */
- { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */
- { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */
- { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */
- { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */
- { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */
- { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */
- { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */
- { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */
- { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */
- { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */
- { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */
- { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */
- { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */
- { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */
- { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */
- { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */
- { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */
- { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */
+ { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD,
+ 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */
+ { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE,
+ 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */
+ { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0,
+ 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */
+ { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1,
+ 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */
+ { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2,
+ 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */
+ { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3,
+ 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */
+ { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4,
+ 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */
+ { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5,
+ 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */
+ { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6,
+ 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */
+ { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7,
+ 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */
+ { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8,
+ 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */
+ { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9,
+ 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */
+ { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA,
+ 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */
+ { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB,
+ 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */
+ { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC,
+ 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */
+ { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD,
+ 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */
+ { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE,
+ 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */
+ { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF,
+ 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */
+ { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0,
+ 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */
+ { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1,
+ 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */
{ 0x03A2, },
- { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */
- { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */
- { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */
- { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */
- { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */
- { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */
- { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */
- { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */
- { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */
- { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */
- { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */
- { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */
- { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */
- { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */
- { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */
- { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */
- { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */
- { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */
- { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */
- { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */
- { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */
- { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */
- { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */
- { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */
- { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */
- { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */
- { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */
- { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */
+ { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3,
+ 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */
+ { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4,
+ 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */
+ { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5,
+ 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */
+ { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6,
+ 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */
+ { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7,
+ 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */
+ { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8,
+ 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */
+ { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9,
+ 800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */
+ { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA,
+ 0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */
+ { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB,
+ 0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */
+ { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386,
+ 0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */
+ { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388,
+ 0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */
+ { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389,
+ 0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */
+ { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A,
+ 0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */
+ { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0,
+ 0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */
+ { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391,
+ 1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */
+ { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392,
+ 2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */
+ { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393,
+ 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */
+ { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394,
+ 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */
+ { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395,
+ 5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */
+ { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396,
+ 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */
+ { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397,
+ 8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */
+ { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398,
+ 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */
+ { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399,
+ 10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */
+ { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A,
+ 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */
+ { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B,
+ 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */
+ { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C,
+ 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */
+ { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D,
+ 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */
+ { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E,
+ 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */
- { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */
- { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */
- { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */
- { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */
- { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */
- { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */
- { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */
- { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */
- { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */
- { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */
- { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */
- { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */
- { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */
- { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */
- { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */
- { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */
+ { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F,
+ 70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */
+ { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0,
+ 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */
+ { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1,
+ 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */
+ { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3,
+ 0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */
+ { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3,
+ 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */
+ { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4,
+ 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */
+ { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5,
+ 400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */
+ { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6,
+ 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */
+ { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7,
+ 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */
+ { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8,
+ 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */
+ { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9,
+ 800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */
+ { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA,
+ 0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */
+ { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB,
+ 0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */
+ { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C,
+ 0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */
+ { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E,
+ 0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */
+ { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F,
+ 0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */
{ 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */
{ 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */
{ 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */
{ 0x03D2, CHF_VOWEL|CHF_UPPER, }, /* capital ypsilon with hook */
{ 0x03D3, CHF_VOWEL|CHF_OXEIA, 0x03D2 }, /* capital ypsilon with acute & hook */
{ 0x03D4, CHF_VOWEL|CHF_TREMA, 0x03D2 }, /* capital ypsilon with diaeresis & hook */
@@ -558,30 +614,44 @@ struct char_info_st el_extended_ctype[] = {
{ 0x1FFC, },
{ 0x1FFD, },
{ 0x1FFE, },
{ 0x1FFF, }
};
-static struct char_info_st *
+struct char_info_st const *
elchr_info(unsigned ch)
{
if (ch >= 0x0300 && ch <= 0x03FF)
return el_basic_ctype + ch - 0x0300;
else if (ch >= 0x1F00 && ch <= 0x1FFF)
return el_extended_ctype + ch - 0x1F00;
return NULL;
}
int
elchr_flags(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return ci ? ci->flags : 0;
}
int
+elchr_letter(unsigned ch)
+{
+ struct char_info_st const *ci = elchr_info(ch);
+ return ci ? ci->letter : 0;
+}
+
+int
+elchr_phoneme(unsigned ch)
+{
+ struct char_info_st const *ci = elchr_info(ch);
+ return ci ? ci->phoneme : 0;
+}
+
+int
elchr_isupper(unsigned ch)
{
return elchr_flags(ch) & CHF_UPPER;
}
int
@@ -651,68 +721,47 @@ elchr_isnumeric(unsigned ch)
return elchr_flags(ch) & CHF_NUMERIC;
}
unsigned
elchr_numeric_value(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0;
}
unsigned
elchr_toupper(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch;
}
unsigned
elchr_tolower(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch;
}
unsigned
elchr_base(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch;
}
unsigned
elchr_deaccent(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
if (ci && (ci->flags & CHF_ACCENT_MASK))
return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch;
return ch;
}
unsigned
elchr_accent(unsigned ch, int acc)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch;
}
-int
-elchr_diphthong(unsigned ch, int state)
-{
- struct char_info_st *ci = elchr_info(ch);
-
- if (!ci || !(ci->flags & CHF_VOWEL))
- return 0;
- switch (state) {
- case 0:
- if (ci->flags & CHF_DIPH1)
- state = 1;
- break;
- case 1:
- if (ci->flags & CHF_DIPH2)
- state = 2;
- break;
- default:
- state = 0;
- }
- return state;
-}
diff --git a/src/ellinika/elmorph-public.scm b/src/ellinika/elmorph-public.scm
new file mode 100644
index 0000000..329fe4a
--- /dev/null
+++ b/src/ellinika/elmorph-public.scm
@@ -0,0 +1,106 @@
+;;;; This file is part of Ellinika project.
+;;;; Copyright (C) 2011 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(use-modules ((srfi srfi-1)))
+
+(define-public (elstr-trim word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice word 0 (+ (elstr-length word) n)))
+ (else
+ word))))
+
+(define-public (elstr-trim! word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice! word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))
+
+(define-public (phoneme:code ph)
+ (list-ref ph 0))
+
+(define-public (phoneme:start ph)
+ (list-ref ph 1))
+
+(define-public (phoneme:count ph)
+ (list-ref ph 2))
+
+(define-public (phoneme:flags ph)
+ (list-ref ph 3))
+
+(define-public (phoneme:accented? ph)
+ (logand (phoneme:flags ph) elmorph:accent-mask))
+
+(define-public (phoneme:vowel? ph)
+ (= (logand (phoneme:flags ph) elmorph:vowel)))
+
+(define-public (phoneme:consonant? ph)
+ (= (logand (phoneme:flags ph) elmorph:consonant)))
+
+(define-public (phoneme:diphthong? ph)
+ (= (logand (phoneme:flags ph) elmorph:diphthong)))
+
+
+(define soundslike-transcription-list
+ '((1 . "a")
+ (2 . "e")
+ (3 . "i")
+ (4 . "o")
+ (5 . "u")
+ (6 . "b")
+ (7 . "g")
+ (8 . "d")
+ (9 . "z")
+ (10 . "t")
+ (11 . "k")
+ (12 . "l")
+ (13 . "m")
+ (14 . "n")
+ (15 . "x")
+ (16 . "p")
+ (17 . "r")
+ (18 . "s")
+ (19 . "t")
+ (20 . "f")
+ (21 . "h")
+ (22 . "P")
+ (23 . "b")
+ (24 . "d")
+ (25 . "g")
+ (26 . "sm")
+ (27 . "ts")
+ (28 . "tz")
+ (29 . "ngz")
+ (30 . "au")
+ (31 . "eu")))
+
+(define-public (elstr->soundslike word)
+ (let ((phon-map (elstr->phonetic-map word)))
+ (apply string-append
+ (filter-map
+ (lambda (elt)
+ (assoc-ref soundslike-transcription-list (phoneme:code elt)))
+ phon-map))))
+
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 1831610..5a8acdf 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -23,61 +23,69 @@
#include "utf8.h"
#include "elmorph.h"
struct elstr {
unsigned *str; /* UTF-8 string */
size_t len; /* Its length */
+
+ struct phoneme *phoneme; /* Phonetical map*/
+ unsigned phoneme_count; /* Number of phonemes */
+
+ struct syllable *sylmap; /* Syllable map (nsyl elements) */
unsigned nsyl; /* Number of syllables. */
- unsigned *sylmap; /* Syllable map (nsyl elements) */
unsigned acc_syl; /* Number of the accented syllable
(1-based, from the last syllable) */
unsigned acc_pos; /* Number of the accented character
(0-based, from str[0]) */
+
};
scm_t_bits _elstr_tag;
static void
-_elstr_syllabize(struct elstr *elstr)
-{
- unsigned *sylmap;
- unsigned i, nsyl = 0, accchr = 0;
- int accsyl = -1;
- int dstate = 0;
- int acc = 0;
-
- if (!elstr->sylmap) {
- elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0]));
- if (!elstr->sylmap)
- scm_memory_error("_elstr_syllabize");
- }
- sylmap = elstr->sylmap;
+_elstr_syllabize(struct elstr *elstr, const char *func_name)
+{
+ unsigned i;
+
+ free(elstr->phoneme);
+ free(elstr->sylmap);
- for (i = 0; i < elstr->len; i++) {
- int nstate;
-
- if (elchr_getaccent(elstr->str[i])) {
- accsyl = nsyl;
- accchr = i;
- }
- nstate = elchr_diphthong(elstr->str[i], dstate);
- if (nstate)
- /* skip */;
- else if (dstate)
- sylmap[nsyl++] = i - 1;
- else if (elchr_isvowel(elstr->str[i]))
- sylmap[nsyl++] = i;
- dstate = nstate;
+ if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count,
+ elstr->str, elstr->len))
+ scm_misc_error(func_name,
+ "cannot create phonetic map: ~S",
+ scm_from_int(errno));
+
+ if (syllable_map(&elstr->sylmap, &elstr->nsyl,
+ elstr->phoneme, elstr->phoneme_count))
+ scm_misc_error(func_name,
+ "cannot create syllable map: ~S",
+ scm_from_int(errno));
+
+ for (i = elstr->nsyl; i > 0; i--) {
+ if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK)
+ break;
}
- if (dstate)
- sylmap[nsyl++] = i - 1;
- else if (nsyl)
- sylmap[nsyl-1] = i - 1;
- elstr->nsyl = nsyl;
- elstr->acc_pos = accchr;
- elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0;
+ elstr->acc_syl = i;
+ for (i = 0; i < elstr->len; i++)
+ if (elchr_getaccent(elstr->str[i]))
+ break;
+ elstr->acc_pos = i;
+}
+
+static void
+invalidate_maps(struct elstr *elstr)
+{
+ free(elstr->sylmap);
+ elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ free(elstr->phoneme);
+ elstr->phoneme = NULL;
+ elstr->phoneme_count = 0;
+ elstr->acc_pos = 0;
+ elstr->acc_syl = 0;
}
static SCM
_elstr_alloc_empty(struct elstr **pelstr)
{
struct elstr *elstr;
@@ -86,13 +94,13 @@ _elstr_alloc_empty(struct elstr **pelstr)
memset(elstr, 0, sizeof(*elstr));
*pelstr = elstr;
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
-_elstr_alloc(const char *instr, int syl)
+_elstr_alloc(const char *instr, int syl, const char *func_name)
{
struct elstr *elstr;
unsigned *wptr;
size_t wlen;
if (utf8_mbstr_to_wc(instr, &wptr, &wlen))
@@ -102,14 +110,16 @@ _elstr_alloc(const char *instr, int syl)
elstr->str = wptr;
elstr->len = wlen;
elstr->sylmap = NULL;
elstr->nsyl = 0;
elstr->acc_syl = 0;
elstr->acc_pos = 0;
+ elstr->phoneme = 0;
+ elstr->phoneme_count = 0;
if (syl)
- _elstr_syllabize(elstr);
+ _elstr_syllabize(elstr, func_name);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
static SCM
_elstr_dup(struct elstr *elstr)
@@ -117,25 +127,40 @@ _elstr_dup(struct elstr *elstr)
struct elstr *elnew;
elnew = scm_gc_malloc(sizeof(*elstr), "Elstr");
elnew->str = calloc(elstr->len, sizeof(elnew->str[0]));
if (!elnew->str)
scm_memory_error("_elstr_dup");
+ memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
+ elnew->len = elstr->len;
+
+ if (elstr->phoneme) {
+ elnew->phoneme = calloc(elstr->phoneme_count,
+ sizeof(elnew->phoneme[0]));
+ if (!elnew->phoneme) {
+ free(elnew->str);
+ scm_memory_error("_elstr_dup");
+ }
+ memcpy(elnew->phoneme, elstr->phoneme,
+ sizeof(elstr->phoneme[0]) * elstr->phoneme_count);
+ } else
+ elnew->phoneme = NULL;
+ elnew->phoneme_count = elstr->phoneme_count;
+
if (elstr->sylmap) {
elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0]));
if (!elnew->sylmap) {
free(elnew->str);
scm_memory_error("_elstr_dup");
}
+ memcpy(elnew->sylmap, elstr->sylmap,
+ sizeof(elstr->sylmap[0]) * elstr->nsyl);
} else
elnew->sylmap = NULL;
- memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
- elnew->len = elstr->len;
elnew->nsyl = elstr->nsyl;
- memcpy(elnew->sylmap, elstr->sylmap,
- sizeof(elstr->sylmap[0]) * elstr->nsyl);
+
elnew->acc_syl = elstr->acc_syl;
elnew->acc_pos = elstr->acc_pos;
SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
}
static void
@@ -159,48 +184,46 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name)
static scm_sizet
_elstr_free(SCM smob)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
free(elstr->str);
free(elstr->sylmap);
+ free(elstr->phoneme);
scm_gc_free(elstr, sizeof(struct elstr), "elstr");
return 0;
}
static int
_elstr_print(SCM smob, SCM port, scm_print_state *pstate)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
- int i, j, an;
+ int i, j;
char *s;
scm_puts("#<elstr ", port);
if (elstr->sylmap) {
scm_puts("``", port);
- an = elstr->nsyl - elstr->acc_syl;
- if (an == 0)
- scm_puts("[", port);
- for (i = j = 0; i < elstr->len; i++) {
- char r[6];
- int n;
-
- if (i == elstr->sylmap[j] + 1) {
- if (j == an)
- scm_puts("]", port);
+ for (i = 0; i < elstr->nsyl; i++) {
+ size_t start = elstr->sylmap[i].char_start;
+ if (i)
scm_puts("-", port);
- if (++j == an)
- scm_puts("[", port);
+ if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+ scm_puts("[", port);
+ for (j = 0; j < elstr->sylmap[i].char_count; j++) {
+ char r[6];
+ int n;
+
+ n = utf8_wctomb(r, elstr->str[start+j]);
+ if (n == -1)
+ continue;
+ r[n] = 0;
+ scm_puts(r, port);
}
- n = utf8_wctomb(r, elstr->str[i]);
- if (n == -1)
- continue;
- r[n] = 0;
- scm_puts(r, port);
+ if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+ scm_puts("]", port);
}
- if (j == an)
- scm_puts("]", port);
} else {
scm_puts("[NS] ``", port);
for (i = j = 0; i < elstr->len; i++) {
char r[6];
int n;
n = utf8_wctomb(r, elstr->str[i]);
@@ -235,13 +258,13 @@ force_elstr(struct elstr **ep, SCM scm, int sylopt,
} else {
SCM newscm;
char *str;
SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
str = scm_to_locale_string(scm);
- newscm = _elstr_alloc(str, sylopt);
+ newscm = _elstr_alloc(str, sylopt, func_name);
free(str);
if (newscm == SCM_EOL)
scm_misc_error(func_name,
"Invalid input string: ~S",
scm_list_1(scm));
scm = newscm;
@@ -333,19 +356,16 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
num = scm_to_uint(n);
if (num > elstr->nsyl)
scm_misc_error(FUNC_NAME,
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
- if (num == 0)
- start = 0;
- else
- start = elstr->sylmap[num - 1] + 1;
- return scm_cons(scm_from_uint(start),
- scm_from_uint(elstr->sylmap[num]));
+ return scm_list_3(scm_from_uint