diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-10 23:04:53 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-11 00:34:20 +0300 |
commit | a1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch) | |
tree | f86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika | |
parent | 2bae7da012e2125762855ce014e63345ecbbbb18 (diff) | |
download | ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2 |
Fix syllabification.
* configure.ac: Add AC_PROG_YACC
* src/ellinika/phoneme.y: New file.
* src/ellinika/yyrename: New file.
* src/ellinika/syllabificator.c: New file.
* src/ellinika/.gitignore: Update.
* src/ellinika/elchr.c (char_info_st): Move to header.
(el_basic_ctype):
(elchr_info): Remove static qualifier.
Return a pointer to const.
(elchr_letter,elchr_phoneme): New functions.
(elchr_diphthong): Remove.
* src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members.
(_elstr_syllabize): Rewrite.
(invalidate_maps)" New static function.
(_elstr_alloc): Initialize new fields, take function name
as argument, for diagnostic purposes.
(_elstr_print): Rewrite
(deftab): Update.
(elstr-syllable-prop,elstr-syllable)
(_elstr_set_accent,_elstr_set_accent_on_char): Rewrite.
(elstr-char-phoneme,elstr->phonetic-map): New functions.
* src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove.
(CHF_DIPHTHONG): New flag.
(PHON_.*): New constants.
(phoneme,syllable): New structures.
(char_info_st)<letter,phoneme>: New members.
(elchr_info,elchr_letter)
(elchr_phoneme,phoneme_map)
(syllable_map): New protos.
(elchr_diphthong): Remove protos.
* src/ellinika/elmorph.scm4: Move public definitions
to elmorph-public.scm; include it here.
* src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a
wrapper over elstr->soundslike.
Describe Milesian numbers.
* style.css (img.ellinika-img): New class.
* xml/lingua.conf.in (IMAGE): New tag.
* xml/pl/alfabhta.xml: Describe Milesian numbers.
Various fixes.
* data/dbverb.struct: fix a typo in flection.
Use 'sub' theme for pas/sub/aor.
* data/irregular-verbs.xml: Add more verbs.
* scm/conjugator.scm: Various fixes.
* scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/.gitignore | 2 | ||||
-rw-r--r-- | src/ellinika/Makefile.am | 23 | ||||
-rw-r--r-- | src/ellinika/elchr.c | 273 | ||||
-rw-r--r-- | src/ellinika/elmorph-public.scm | 106 | ||||
-rw-r--r-- | src/ellinika/elmorph.c | 308 | ||||
-rw-r--r-- | src/ellinika/elmorph.h | 82 | ||||
-rw-r--r-- | src/ellinika/elmorph.scm4 | 25 | ||||
-rw-r--r-- | src/ellinika/phoneme.y | 353 | ||||
-rw-r--r-- | src/ellinika/syllabificator.c | 152 | ||||
-rw-r--r-- | src/ellinika/tenses.scm | 38 | ||||
-rw-r--r-- | src/ellinika/xlat.scm | 113 | ||||
-rwxr-xr-x | src/ellinika/yyrename | 97 |
12 files changed, 1206 insertions, 366 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore index 9422f9a..11bf478 100644 --- a/src/ellinika/.gitignore +++ b/src/ellinika/.gitignore @@ -1,5 +1,7 @@ cgi.m4 cgi.scm config.scm elmorph.scm elmorph.x +phoneme.c +phoneme.h diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am index 274eea8..b8988d4 100644 --- a/src/ellinika/Makefile.am +++ b/src/ellinika/Makefile.am @@ -1,8 +1,8 @@ # This file is part of Ellinika project. -# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff +# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff # # Ellinika is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # @@ -12,13 +12,20 @@ # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. guiledir=$(GUILE_SITE)/$(PACKAGE) -guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm +guile_DATA=\ + xlat.scm\ + cgi.scm\ + i18n.scm\ + config.scm\ + dico.scm\ + elmorph.scm\ + tenses.scm cgi.m4: Makefile echo 'divert(-1)' > $@ echo 'changequote([,])' >> $@ echo 'changecom([;],[' >> $@ echo '])' >> $@ @@ -36,26 +43,28 @@ cgi.m4: Makefile echo 'divert(0)dnl' >> $@ echo '@AUTOGENERATED@' >> $@ SUFFIXES = .scm4 .scm .x .scm4.scm: - m4 cgi.m4 $< > $@ + m4 -I$(srcdir) cgi.m4 $< > $@ cgi.scm: cgi.scm4 cgi.m4 config.scm: config.scm4 cgi.m4 -elmorph.scm: elmorph.scm4 cgi.m4 +elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4 pkglib_LTLIBRARIES=libelmorph.la libelmorph_la_SOURCES = \ aorist.c\ utf8.c\ elchr.c\ elmorph.c\ - elmorph.h + elmorph.h\ + phoneme.y\ + syllabificator.c DOT_X_FILES = elmorph.x BUILT_SOURCES = $(DOT_X_FILES) DISTCLEANFILES = $(DOT_X_FILES) @@ -77,7 +86,9 @@ install-data-hook: test -z "$$dlname" && dlname='lib$$name.so'; \ $(LN_S) -f "$$dlname" libguile-$$name-v-$(VERSION).so; \ fi; \ done; \ cd $$here - +AM_YFLAGS = -d +YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)' +EXTRA_DIST = yyrename elmorph-public.scm
\ No newline at end of file diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 3142b6f..621ac03 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c @@ -20,26 +20,13 @@ #endif #include <errno.h> #include <stdlib.h> #include <libguile.h> #include "utf8.h" #include "elmorph.h" - -struct char_info_st { - unsigned ch; /* Characters */ - int flags; /* Flags (see above) */ - unsigned base; /* for vowels - a corresponding vowel with all diacritics - removed */ - unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase - letter if flags & CHF_LOWER and a corresponding lowerrcase - letter if flags & CHF_UPPER */ - unsigned numval; /* Numeric value */ - unsigned accented[3]; /* For vowels - corresponding accented variant */ - unsigned deaccent; /* For accented vowels with diaeresis - corresponding - non-accented character */ -}; +#include "phoneme.h" /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ struct char_info_st el_basic_ctype[] = { { 0x0300, }, { 0x0301, }, { 0x0302, }, @@ -171,86 +158,155 @@ struct char_info_st el_basic_ctype[] = { { 0x0380, }, { 0x0381, }, { 0x0382, }, { 0x0383, }, { 0x0384, CHF_MODIFIER }, /* Oxeia */ { 0x0385, CHF_MODIFIER }, /* dialytika */ - { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ + { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC, + 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */ { 0x0387, CHF_PUNCT }, /* ano teleia */ - { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ - { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ - { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ + { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD, + 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */ + { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE, + 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */ + { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF, + 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */ { 0x038B, }, - { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ + { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC, + 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */ { 0x038D, }, - { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ - { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ - { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ - { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ - { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ - { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ - { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ - { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ - { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ - { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ - { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ - { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ - { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ - { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ - { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ - { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ - { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ - { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ - { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ - { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ + { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD, + 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */ + { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE, + 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */ + { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, + 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */ + { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1, + 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */ + { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */ + { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */ + { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */ + { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5, + 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */ + { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */ + { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, + 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */ + { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */ + { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9, + 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */ + { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */ + { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */ + { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */ + { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */ + { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */ + { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, + 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */ + { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */ + { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */ { 0x03A2, }, - { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ - { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ - { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ - { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ - { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ - { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ - { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ - { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ - { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ - { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ - { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */ - { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ - { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ - { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ - { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */ - { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */ - { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */ - { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */ - { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */ - { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */ - { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */ - { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */ - { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */ - { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */ - { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */ - { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */ - { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ - { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ + { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */ + { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */ + { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5, + 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */ + { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */ + { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */ + { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */ + { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, + 800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */ + { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA, + 0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */ + { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB, + 0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */ + { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386, + 0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */ + { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388, + 0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */ + { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389, + 0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */ + { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A, + 0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */ + { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, + 0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */ + { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391, + 1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */ + { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */ + { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */ + { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */ + { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395, + 5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */ + { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */ + { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397, + 8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */ + { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */ + { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, + 10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */ + { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */ + { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */ + { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */ + { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */ + { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */ - { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */ - { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ - { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ - { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ - { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */ - { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */ - { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */ - { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */ - { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */ - { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */ - { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */ - { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */ - { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */ - { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */ - { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */ - { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */ + { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, + 70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */ + { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */ + { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */ + { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3, + 0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */ + { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */ + { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */ + { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5, + 400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */ + { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */ + { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */ + { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */ + { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, + 800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */ + { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA, + 0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */ + { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, + 0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */ + { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C, + 0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */ + { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E, + 0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */ + { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F, + 0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */ { 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */ { 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */ { 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */ { 0x03D2, CHF_VOWEL|CHF_UPPER, }, /* capital ypsilon with hook */ { 0x03D3, CHF_VOWEL|CHF_OXEIA, 0x03D2 }, /* capital ypsilon with acute & hook */ { 0x03D4, CHF_VOWEL|CHF_TREMA, 0x03D2 }, /* capital ypsilon with diaeresis & hook */ @@ -558,30 +614,44 @@ struct char_info_st el_extended_ctype[] = { { 0x1FFC, }, { 0x1FFD, }, { 0x1FFE, }, { 0x1FFF, } }; -static struct char_info_st * +struct char_info_st const * elchr_info(unsigned ch) { if (ch >= 0x0300 && ch <= 0x03FF) return el_basic_ctype + ch - 0x0300; else if (ch >= 0x1F00 && ch <= 0x1FFF) return el_extended_ctype + ch - 0x1F00; return NULL; } int elchr_flags(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return ci ? ci->flags : 0; } int +elchr_letter(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->letter : 0; +} + +int +elchr_phoneme(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->phoneme : 0; +} + +int elchr_isupper(unsigned ch) { return elchr_flags(ch) & CHF_UPPER; } int @@ -651,68 +721,47 @@ elchr_isnumeric(unsigned ch) return elchr_flags(ch) & CHF_NUMERIC; } unsigned elchr_numeric_value(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0; } unsigned elchr_toupper(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch; } unsigned elchr_tolower(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch; } unsigned elchr_base(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch; } unsigned elchr_deaccent(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); if (ci && (ci->flags & CHF_ACCENT_MASK)) return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch; return ch; } unsigned elchr_accent(unsigned ch, int acc) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch; } -int -elchr_diphthong(unsigned ch, int state) -{ - struct char_info_st *ci = elchr_info(ch); - - if (!ci || !(ci->flags & CHF_VOWEL)) - return 0; - switch (state) { - case 0: - if (ci->flags & CHF_DIPH1) - state = 1; - break; - case 1: - if (ci->flags & CHF_DIPH2) - state = 2; - break; - default: - state = 0; - } - return state; -} diff --git a/src/ellinika/elmorph-public.scm b/src/ellinika/elmorph-public.scm new file mode 100644 index 0000000..329fe4a --- /dev/null +++ b/src/ellinika/elmorph-public.scm @@ -0,0 +1,106 @@ +;;;; This file is part of Ellinika project. +;;;; Copyright (C) 2011 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +(use-modules ((srfi srfi-1))) + +(define-public (elstr-trim word n) + (let ((word (if (string? word) + (string->elstr word) + word))) + (cond + ((> n 0) + (elstr-slice word n (- (elstr-length word) n))) + ((< n 0) + (elstr-slice word 0 (+ (elstr-length word) n))) + (else + word)))) + +(define-public (elstr-trim! word n) + (let ((word (if (string? word) + (string->elstr word) + word))) + (cond + ((> n 0) + (elstr-slice! word n (- (elstr-length word) n))) + ((< n 0) + (elstr-slice! word 0 (+ (elstr-length word) n)))))) + +(define-public (phoneme:code ph) + (list-ref ph 0)) + +(define-public (phoneme:start ph) + (list-ref ph 1)) + +(define-public (phoneme:count ph) + (list-ref ph 2)) + +(define-public (phoneme:flags ph) + (list-ref ph 3)) + +(define-public (phoneme:accented? ph) + (logand (phoneme:flags ph) elmorph:accent-mask)) + +(define-public (phoneme:vowel? ph) + (= (logand (phoneme:flags ph) elmorph:vowel))) + +(define-public (phoneme:consonant? ph) + (= (logand (phoneme:flags ph) elmorph:consonant))) + +(define-public (phoneme:diphthong? ph) + (= (logand (phoneme:flags ph) elmorph:diphthong))) + + +(define soundslike-transcription-list + '((1 . "a") + (2 . "e") + (3 . "i") + (4 . "o") + (5 . "u") + (6 . "b") + (7 . "g") + (8 . "d") + (9 . "z") + (10 . "t") + (11 . "k") + (12 . "l") + (13 . "m") + (14 . "n") + (15 . "x") + (16 . "p") + (17 . "r") + (18 . "s") + (19 . "t") + (20 . "f") + (21 . "h") + (22 . "P") + (23 . "b") + (24 . "d") + (25 . "g") + (26 . "sm") + (27 . "ts") + (28 . "tz") + (29 . "ngz") + (30 . "au") + (31 . "eu"))) + +(define-public (elstr->soundslike word) + (let ((phon-map (elstr->phonetic-map word))) + (apply string-append + (filter-map + (lambda (elt) + (assoc-ref soundslike-transcription-list (phoneme:code elt))) + phon-map)))) + diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 1831610..5a8acdf 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -23,61 +23,69 @@ #include "utf8.h" #include "elmorph.h" struct elstr { unsigned *str; /* UTF-8 string */ size_t len; /* Its length */ + + struct phoneme *phoneme; /* Phonetical map*/ + unsigned phoneme_count; /* Number of phonemes */ + + struct syllable *sylmap; /* Syllable map (nsyl elements) */ unsigned nsyl; /* Number of syllables. */ - unsigned *sylmap; /* Syllable map (nsyl elements) */ unsigned acc_syl; /* Number of the accented syllable (1-based, from the last syllable) */ unsigned acc_pos; /* Number of the accented character (0-based, from str[0]) */ + }; scm_t_bits _elstr_tag; static void -_elstr_syllabize(struct elstr *elstr) -{ - unsigned *sylmap; - unsigned i, nsyl = 0, accchr = 0; - int accsyl = -1; - int dstate = 0; - int acc = 0; - - if (!elstr->sylmap) { - elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0])); - if (!elstr->sylmap) - scm_memory_error("_elstr_syllabize"); - } - sylmap = elstr->sylmap; +_elstr_syllabize(struct elstr *elstr, const char *func_name) +{ + unsigned i; + + free(elstr->phoneme); + free(elstr->sylmap); - for (i = 0; i < elstr->len; i++) { - int nstate; - - if (elchr_getaccent(elstr->str[i])) { - accsyl = nsyl; - accchr = i; - } - nstate = elchr_diphthong(elstr->str[i], dstate); - if (nstate) - /* skip */; - else if (dstate) - sylmap[nsyl++] = i - 1; - else if (elchr_isvowel(elstr->str[i])) - sylmap[nsyl++] = i; - dstate = nstate; + if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count, + elstr->str, elstr->len)) + scm_misc_error(func_name, + "cannot create phonetic map: ~S", + scm_from_int(errno)); + + if (syllable_map(&elstr->sylmap, &elstr->nsyl, + elstr->phoneme, elstr->phoneme_count)) + scm_misc_error(func_name, + "cannot create syllable map: ~S", + scm_from_int(errno)); + + for (i = elstr->nsyl; i > 0; i--) { + if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK) + break; } - if (dstate) - sylmap[nsyl++] = i - 1; - else if (nsyl) - sylmap[nsyl-1] = i - 1; - elstr->nsyl = nsyl; - elstr->acc_pos = accchr; - elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0; + elstr->acc_syl = i; + for (i = 0; i < elstr->len; i++) + if (elchr_getaccent(elstr->str[i])) + break; + elstr->acc_pos = i; +} + +static void +invalidate_maps(struct elstr *elstr) +{ + free(elstr->sylmap); + elstr->sylmap = NULL; + elstr->nsyl = 0; + free(elstr->phoneme); + elstr->phoneme = NULL; + elstr->phoneme_count = 0; + elstr->acc_pos = 0; + elstr->acc_syl = 0; } static SCM _elstr_alloc_empty(struct elstr **pelstr) { struct elstr *elstr; @@ -86,13 +94,13 @@ _elstr_alloc_empty(struct elstr **pelstr) memset(elstr, 0, sizeof(*elstr)); *pelstr = elstr; SCM_RETURN_NEWSMOB(_elstr_tag, elstr); } static SCM -_elstr_alloc(const char *instr, int syl) +_elstr_alloc(const char *instr, int syl, const char *func_name) { struct elstr *elstr; unsigned *wptr; size_t wlen; if (utf8_mbstr_to_wc(instr, &wptr, &wlen)) @@ -102,14 +110,16 @@ _elstr_alloc(const char *instr, int syl) elstr->str = wptr; elstr->len = wlen; elstr->sylmap = NULL; elstr->nsyl = 0; elstr->acc_syl = 0; elstr->acc_pos = 0; + elstr->phoneme = 0; + elstr->phoneme_count = 0; if (syl) - _elstr_syllabize(elstr); + _elstr_syllabize(elstr, func_name); SCM_RETURN_NEWSMOB(_elstr_tag, elstr); } static SCM _elstr_dup(struct elstr *elstr) @@ -117,25 +127,40 @@ _elstr_dup(struct elstr *elstr) struct elstr *elnew; elnew = scm_gc_malloc(sizeof(*elstr), "Elstr"); elnew->str = calloc(elstr->len, sizeof(elnew->str[0])); if (!elnew->str) scm_memory_error("_elstr_dup"); + memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len); + elnew->len = elstr->len; + + if (elstr->phoneme) { + elnew->phoneme = calloc(elstr->phoneme_count, + sizeof(elnew->phoneme[0])); + if (!elnew->phoneme) { + free(elnew->str); + scm_memory_error("_elstr_dup"); + } + memcpy(elnew->phoneme, elstr->phoneme, + sizeof(elstr->phoneme[0]) * elstr->phoneme_count); + } else + elnew->phoneme = NULL; + elnew->phoneme_count = elstr->phoneme_count; + if (elstr->sylmap) { elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0])); if (!elnew->sylmap) { free(elnew->str); scm_memory_error("_elstr_dup"); } + memcpy(elnew->sylmap, elstr->sylmap, + sizeof(elstr->sylmap[0]) * elstr->nsyl); } else elnew->sylmap = NULL; - memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len); - elnew->len = elstr->len; elnew->nsyl = elstr->nsyl; - memcpy(elnew->sylmap, elstr->sylmap, - sizeof(elstr->sylmap[0]) * elstr->nsyl); + elnew->acc_syl = elstr->acc_syl; elnew->acc_pos = elstr->acc_pos; SCM_RETURN_NEWSMOB(_elstr_tag, elnew); } static void @@ -159,48 +184,46 @@ _elstr_concat(struct elstr *dest, struct elstr *src, const char *func_name) static scm_sizet _elstr_free(SCM smob) { struct elstr *elstr = (struct elstr *) SCM_CDR(smob); free(elstr->str); free(elstr->sylmap); + free(elstr->phoneme); scm_gc_free(elstr, sizeof(struct elstr), "elstr"); return 0; } static int _elstr_print(SCM smob, SCM port, scm_print_state *pstate) { struct elstr *elstr = (struct elstr *) SCM_CDR(smob); - int i, j, an; + int i, j; char *s; scm_puts("#<elstr ", port); if (elstr->sylmap) { scm_puts("``", port); - an = elstr->nsyl - elstr->acc_syl; - if (an == 0) - scm_puts("[", port); - for (i = j = 0; i < elstr->len; i++) { - char r[6]; - int n; - - if (i == elstr->sylmap[j] + 1) { - if (j == an) - scm_puts("]", port); + for (i = 0; i < elstr->nsyl; i++) { + size_t start = elstr->sylmap[i].char_start; + if (i) scm_puts("-", port); - if (++j == an) - scm_puts("[", port); + if (elstr->sylmap[i].flags & CHF_ACCENT_MASK) + scm_puts("[", port); + for (j = 0; j < elstr->sylmap[i].char_count; j++) { + char r[6]; + int n; + + n = utf8_wctomb(r, elstr->str[start+j]); + if (n == -1) + continue; + r[n] = 0; + scm_puts(r, port); } - n = utf8_wctomb(r, elstr->str[i]); - if (n == -1) - continue; - r[n] = 0; - scm_puts(r, port); + if (elstr->sylmap[i].flags & CHF_ACCENT_MASK) + scm_puts("]", port); } - if (j == an) - scm_puts("]", port); } else { scm_puts("[NS] ``", port); for (i = j = 0; i < elstr->len; i++) { char r[6]; int n; n = utf8_wctomb(r, elstr->str[i]); @@ -235,13 +258,13 @@ force_elstr(struct elstr **ep, SCM scm, int sylopt, } else { SCM newscm; char *str; SCM_ASSERT(scm_is_string(scm), scm, arg, func_name); str = scm_to_locale_string(scm); - newscm = _elstr_alloc(str, sylopt); + newscm = _elstr_alloc(str, sylopt, func_name); free(str); if (newscm == SCM_EOL) scm_misc_error(func_name, "Invalid input string: ~S", scm_list_1(scm)); scm = newscm; @@ -333,19 +356,16 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop", num = scm_to_uint(n); if (num > elstr->nsyl) scm_misc_error(FUNC_NAME, "cannot get syllable #~S: not enough syllables: ~S", scm_list_2(el, n)); num = elstr->nsyl - num; - if (num == 0) - start = 0; - else - start = elstr->sylmap[num - 1] + 1; - return scm_cons(scm_from_uint(start), - scm_from_uint(elstr->sylmap[num])); + return scm_list_3(scm_from_uint |