diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ellinika/.gitignore | 2 | ||||
-rw-r--r-- | src/ellinika/Makefile.am | 23 | ||||
-rw-r--r-- | src/ellinika/elchr.c | 273 | ||||
-rw-r--r-- | src/ellinika/elmorph-public.scm | 106 | ||||
-rw-r--r-- | src/ellinika/elmorph.c | 308 | ||||
-rw-r--r-- | src/ellinika/elmorph.h | 82 | ||||
-rw-r--r-- | src/ellinika/elmorph.scm4 | 25 | ||||
-rw-r--r-- | src/ellinika/phoneme.y | 353 | ||||
-rw-r--r-- | src/ellinika/syllabificator.c | 152 | ||||
-rw-r--r-- | src/ellinika/tenses.scm | 38 | ||||
-rw-r--r-- | src/ellinika/xlat.scm | 113 | ||||
-rwxr-xr-x | src/ellinika/yyrename | 97 |
12 files changed, 1206 insertions, 366 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore index 9422f9a..11bf478 100644 --- a/src/ellinika/.gitignore +++ b/src/ellinika/.gitignore @@ -3,3 +3,5 @@ cgi.scm config.scm elmorph.scm elmorph.x +phoneme.c +phoneme.h diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am index 274eea8..b8988d4 100644 --- a/src/ellinika/Makefile.am +++ b/src/ellinika/Makefile.am @@ -1,5 +1,5 @@ # This file is part of Ellinika project. -# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff +# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff # # Ellinika is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -15,7 +15,14 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. guiledir=$(GUILE_SITE)/$(PACKAGE) -guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm +guile_DATA=\ + xlat.scm\ + cgi.scm\ + i18n.scm\ + config.scm\ + dico.scm\ + elmorph.scm\ + tenses.scm cgi.m4: Makefile echo 'divert(-1)' > $@ @@ -39,11 +46,11 @@ cgi.m4: Makefile SUFFIXES = .scm4 .scm .x .scm4.scm: - m4 cgi.m4 $< > $@ + m4 -I$(srcdir) cgi.m4 $< > $@ cgi.scm: cgi.scm4 cgi.m4 config.scm: config.scm4 cgi.m4 -elmorph.scm: elmorph.scm4 cgi.m4 +elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4 pkglib_LTLIBRARIES=libelmorph.la @@ -52,7 +59,9 @@ libelmorph_la_SOURCES = \ utf8.c\ elchr.c\ elmorph.c\ - elmorph.h + elmorph.h\ + phoneme.y\ + syllabificator.c DOT_X_FILES = elmorph.x @@ -80,4 +89,6 @@ install-data-hook: done; \ cd $$here - +AM_YFLAGS = -d +YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)' +EXTRA_DIST = yyrename elmorph-public.scm
\ No newline at end of file diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 3142b6f..621ac03 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c @@ -23,20 +23,7 @@ #include <libguile.h> #include "utf8.h" #include "elmorph.h" - -struct char_info_st { - unsigned ch; /* Characters */ - int flags; /* Flags (see above) */ - unsigned base; /* for vowels - a corresponding vowel with all diacritics - removed */ - unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase - letter if flags & CHF_LOWER and a corresponding lowerrcase - letter if flags & CHF_UPPER */ - unsigned numval; /* Numeric value */ - unsigned accented[3]; /* For vowels - corresponding accented variant */ - unsigned deaccent; /* For accented vowels with diaeresis - corresponding - non-accented character */ -}; +#include "phoneme.h" /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ struct char_info_st el_basic_ctype[] = { @@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = { { 0x0383, }, { 0x0384, CHF_MODIFIER }, /* Oxeia */ { 0x0385, CHF_MODIFIER }, /* dialytika */ - { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ + { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC, + 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */ { 0x0387, CHF_PUNCT }, /* ano teleia */ - { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ - { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ - { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ + { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD, + 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */ + { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE, + 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */ + { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF, + 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */ { 0x038B, }, - { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ + { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC, + 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */ { 0x038D, }, - { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ - { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ - { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ - { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ - { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ - { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ - { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ - { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ - { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ - { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ - { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ - { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ - { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ - { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ - { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ - { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ - { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ - { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ - { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ - { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ + { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD, + 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */ + { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE, + 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */ + { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, + 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */ + { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1, + 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */ + { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */ + { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */ + { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */ + { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5, + 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */ + { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */ + { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, + 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */ + { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */ + { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9, + 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */ + { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */ + { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */ + { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */ + { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */ + { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */ + { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, + 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */ + { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */ + { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */ { 0x03A2, }, - { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ - { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ - { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ - { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ - { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ - { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ - { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ - { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ - { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ - { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ - { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */ - { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ - { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ - { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ - { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */ - { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */ - { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */ - { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */ - { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */ - { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */ - { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */ - { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */ - { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */ - { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */ - { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */ - { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */ - { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ - { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ + { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */ + { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */ + { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5, + 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */ + { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */ + { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */ + { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */ + { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, + 800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */ + { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA, + 0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */ + { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB, + 0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */ + { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386, + 0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */ + { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388, + 0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */ + { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389, + 0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */ + { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A, + 0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */ + { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, + 0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */ + { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391, + 1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */ + { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */ + { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */ + { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */ + { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395, + 5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */ + { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */ + { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397, + 8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */ + { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */ + { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, + 10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */ + { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */ + { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */ + { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */ + { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */ + { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */ - { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */ - { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ - { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ - { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ - { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */ - { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */ - { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */ - { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */ - { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */ - { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */ - { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */ - { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */ - { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */ - { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */ - { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */ - { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */ + { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, + 70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */ + { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */ + { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */ + { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3, + 0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */ + { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */ + { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */ + { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5, + 400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */ + { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */ + { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */ + { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */ + { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, + 800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */ + { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA, + 0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */ + { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, + 0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */ + { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C, + 0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */ + { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E, + 0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */ + { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F, + 0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */ { 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */ { 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */ { 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */ @@ -561,7 +617,7 @@ struct char_info_st el_extended_ctype[] = { { 0x1FFF, } }; -static struct char_info_st * +struct char_info_st const * elchr_info(unsigned ch) { if (ch >= 0x0300 && ch <= 0x03FF) @@ -574,11 +630,25 @@ elchr_info(unsigned ch) int elchr_flags(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return ci ? ci->flags : 0; } int +elchr_letter(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->letter : 0; +} + +int +elchr_phoneme(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->phoneme : 0; +} + +int elchr_isupper(unsigned ch) { return elchr_flags(ch) & CHF_UPPER; @@ -654,35 +724,35 @@ elchr_isnumeric(unsigned ch) unsigned elchr_numeric_value(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0; } unsigned elchr_toupper(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch; } unsigned elchr_tolower(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch; } unsigned elchr_base(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch; } unsigned elchr_deaccent(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); if (ci && (ci->flags & CHF_ACCENT_MASK)) return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch; return ch; @@ -691,28 +761,7 @@ elchr_deaccent(unsigned ch) unsigned elchr_accent(unsigned ch, int acc) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch; } -int -elchr_diphthong(unsigned ch, int state) -{ - struct char_info_st *ci = elchr_info(ch); - - if (!ci || !(ci->flags & CHF_VOWEL)) - return 0; - switch (state) { - case 0: - if (ci->flags & CHF_DIPH1) - state = 1; - break; - case 1: - if (ci->flags & CHF_DIPH2) - state = 2; - break; - default: - state = 0; - } - return state; -} diff --git a/src/ellinika/elmorph-public.scm b/src/ellinika/elmorph-public.scm new file mode 100644 index 0000000..329fe4a --- /dev/null +++ b/src/ellinika/elmorph-public.scm @@ -0,0 +1,106 @@ +;;;; This file is part of Ellinika project. +;;;; Copyright (C) 2011 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +(use-modules ((srfi srfi-1))) + +(define-public (elstr-trim word n) + (let ((word (if (string? word) + (string->elstr word) + word))) + (cond + ((> n 0) + (elstr-slice word n (- (elstr-length word) n))) + ((< n 0) + (elstr-slice word 0 (+ (elstr-length word) n))) + (else + word)))) + +(define-public (elstr-trim! word n) + (let ((word (if (string? word) + (string->elstr word) + word))) + (cond + ((> n 0) + (elstr-slice! word n (- (elstr-length word) n))) + ((< n 0) + (elstr-slice! word 0 (+ (elstr-length word) n)))))) + +(define-public (phoneme:code ph) + (list-ref ph 0)) + +(define-public (phoneme:start ph) + (list-ref ph 1)) + +(define-public (phoneme:count ph) + (list-ref ph 2)) + +(define-public (phoneme:flags ph) + (list-ref ph 3)) + +(define-public (phoneme:accented? ph) + (logand (phoneme:flags ph) elmorph:accent-mask)) + +(define-public (phoneme:vowel? ph) + (= (logand (phoneme:flags ph) elmorph:vowel))) + +(define-public (phoneme:consonant? ph) + (= (logand (phoneme:flags ph) elmorph:consonant))) + +(define-public (phoneme:diphthong? ph) + (= (logand (phoneme:flags ph) elmorph:diphthong))) + + +(define soundslike-transcription-list + '((1 . "a") + (2 . "e") + (3 . "i") + (4 . "o") + (5 . "u") + (6 . "b") + (7 . "g") + (8 . "d") + (9 . "z") + (10 . "t") + (11 . "k") + (12 . "l") + (13 . "m") + (14 . "n") + (15 . "x") + (16 . "p") + (17 . "r") + (18 . "s") + (19 . "t") + (20 . "f") + (21 . "h") + (22 . "P") + (23 . "b") + (24 . "d") + (25 . "g") + (26 . "sm") + (27 . "ts") + (28 . "tz") + (29 . "ngz") + (30 . "au") + (31 . "eu"))) + +(define-public (elstr->soundslike word) + (let ((phon-map (elstr->phonetic-map word))) + (apply string-append + (filter-map + (lambda (elt) + (assoc-ref soundslike-transcription-list (phoneme:code elt))) + phon-map)))) + diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index 1831610..5a8acdf 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -26,55 +26,63 @@ struct elstr { unsigned *str; /* UTF-8 string */ size_t len; /* Its length */ + + struct phoneme *phoneme; /* Phonetical map*/ + unsigned phoneme_count; /* Number of phonemes */ + + struct syllable *sylmap; /* Syllable map (nsyl elements) */ unsigned nsyl; /* Number of syllables. */ - unsigned *sylmap; /* Syllable map (nsyl elements) */ unsigned acc_syl; /* Number of the accented syllable (1-based, from the last syllable) */ unsigned acc_pos; /* Number of the accented character (0-based, from str[0]) */ + }; scm_t_bits _elstr_tag; static void -_elstr_syllabize(struct elstr *elstr) -{ - unsigned *sylmap; - unsigned i, nsyl = 0, accchr = 0; - int accsyl = -1; - int dstate = 0; - int acc = 0; - - if (!elstr->sylmap) { - elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0])); - if (!elstr->sylmap) - scm_memory_error("_elstr_syllabize"); - } - sylmap = elstr->sylmap; +_elstr_syllabize(struct elstr *elstr, const char *func_name) +{ + unsigned i; + + free(elstr->phoneme); + free(elstr->sylmap); - for (i = 0; i < elstr->len; i++) { - int nstate; - - if (elchr_getaccent(elstr->str[i])) { - accsyl = nsyl; - accchr = i; - } - nstate = elchr_diphthong(elstr->str[i], dstate); - if (nstate) - /* skip */; - else if (dstate) - sylmap[nsyl++] = i - 1; - else if (elchr_isvowel(elstr->str[i])) - sylmap[nsyl++] = i; - dstate = nstate; + if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count, + elstr->str, elstr->len)) + scm_misc_error(func_name, + "cannot create phonetic map: ~S", + scm_from_int(errno)); + + if (syllable_map(&elstr->sylmap, &elstr->nsyl, + elstr->phoneme, elstr->phoneme_count)) + scm_misc_error(func_name, + "cannot create syllable map: ~S", + scm_from_int(errno)); + + for (i = elstr->nsyl; i > 0; i--) { + if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK) + break; } - if (dstate) - sylmap[nsyl++] = i - 1; - else if (nsyl) - sylmap[nsyl-1] = i - 1; - elstr->nsyl = nsyl; - elstr->acc_pos = accchr; - elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0; + elstr->acc_syl = i; + for (i = 0; i < elstr->len; i++) + if (elchr_getaccent(elstr->str[i])) + break; + elstr->acc_pos = i; +} + +static void +invalidate_maps(struct elstr *elstr) +{ + free(elstr->sylmap); + elstr->sylmap = NULL; + elstr->nsyl = 0; + free(elstr->phoneme); + elstr->phoneme = NULL; + elstr->phoneme_count = 0; + elstr->acc_pos = 0; + elstr->acc_syl = 0; } static SCM @@ -89,7 +97,7 @@ _elstr_alloc_empty(struct elstr **pelstr) } static SCM -_elstr_alloc(const char *instr, int syl) +_elstr_alloc(const char *instr, int syl, const char *func_name) { struct elstr *elstr; unsigned *wptr; @@ -105,8 +113,10 @@ _elstr_alloc(const char *instr, int syl) elstr->nsyl = 0; elstr->acc_syl = 0; elstr->acc_pos = 0; + elstr->phoneme = 0; + elstr->phoneme_count = 0; if (syl) - _elstr_syllabize(elstr); + _elstr_syllabize(elstr, func_name); SCM_RETURN_NEWSMOB(_elstr_tag, elstr); } @@ -120,19 +130,34 @@ _elstr_dup(struct elstr *elstr) elnew->str = calloc(elstr->len, sizeof(elnew->str[0])); if (!elnew->str) scm_memory_error("_elstr_dup"); + memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len); + elnew->len = elstr->len; + + if (elstr->phoneme) { + elnew->phoneme = calloc(elstr->phoneme_count, + sizeof(elnew->phoneme[0])); + if (!elnew->phoneme) { + free(elnew->str); + scm_memory_error("_elstr_dup"); + } + memcpy(elnew->phoneme, elstr->phoneme, + sizeof(elstr->phoneme[0]) * elstr->phoneme_count); + } else + elnew->phoneme = NULL; + elnew->phoneme_count = elstr->phoneme_count; + if (elstr->sylmap) { elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0])); if (!elnew->sylmap) { free(elnew->str); scm_memory_error("_elstr_dup"); } + memcpy(elnew->sylmap, elstr->sylmap, + sizeof(elstr->sylmap[0]) * elstr->nsyl); } else elnew->sylmap = NULL; - memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len); - elnew->len = elstr->len; elnew->nsyl = elstr->nsyl; - memcpy(elnew->sylmap, elstr->sylmap, - sizeof(elstr->sylmap[0]) * elstr->nsyl); + elnew->acc_syl = elstr->acc_syl; elnew->acc_pos = elstr->acc_pos; SCM_RETURN_NEWSMOB(_elstr_tag, elnew); @@ -162,6 +187,7 @@ _elstr_free(SCM smob) struct elstr *elstr = (struct elstr *) SCM_CDR(smob); free(elstr->str); free(elstr->sylmap); + free(elstr->phoneme); scm_gc_free(elstr, sizeof(struct elstr), "elstr"); return 0; } @@ -170,34 +196,31 @@ static int _elstr_print(SCM smob, SCM port, scm_print_state *pstate) { struct elstr *elstr = (struct elstr *) SCM_CDR(smob); - int i, j, an; + int i, j; char *s; scm_puts("#<elstr ", port); if (elstr->sylmap) { scm_puts("``", port); - an = elstr->nsyl - elstr->acc_syl; - if (an == 0) - scm_puts("[", port); - for (i = j = 0; i < elstr->len; i++) { - char r[6]; - int n; - - if (i == elstr->sylmap[j] + 1) { - if (j == an) - scm_puts("]", port); + for (i = 0; i < elstr->nsyl; i++) { + size_t start = elstr->sylmap[i].char_start; + if (i) scm_puts("-", port); - if (++j == an) - scm_puts("[", port); + if (elstr->sylmap[i].flags & CHF_ACCENT_MASK) + scm_puts("[", port); + for (j = 0; j < elstr->sylmap[i].char_count; j++) { + char r[6]; + int n; + + n = utf8_wctomb(r, elstr->str[start+j]); + if (n == -1) + continue; + r[n] = 0; + scm_puts(r, port); } - n = utf8_wctomb(r, elstr->str[i]); - if (n == -1) - continue; - r[n] = 0; - scm_puts(r, port); + if (elstr->sylmap[i].flags & CHF_ACCENT_MASK) + scm_puts("]", port); } - if (j == an) - scm_puts("]", port); } else { scm_puts("[NS] ``", port); for (i = j = 0; i < elstr->len; i++) { @@ -238,7 +261,7 @@ force_elstr(struct elstr **ep, SCM scm, int sylopt, SCM_ASSERT(scm_is_string(scm), scm, arg, func_name); str = scm_to_locale_string(scm); - newscm = _elstr_alloc(str, sylopt); + newscm = _elstr_alloc(str, sylopt, func_name); free(str); if (newscm == SCM_EOL) scm_misc_error(func_name, @@ -336,13 +359,10 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop", "cannot get syllable #~S: not enough syllables: ~S", scm_list_2(el, n)); num = elstr->nsyl - num; - if (num == 0) - start = 0; - else - start = elstr->sylmap[num - 1] + 1; - return scm_cons(scm_from_uint(start), - scm_from_uint(elstr->sylmap[num])); + return scm_list_3(scm_from_uint(elstr->sylmap[num].char_start), + scm_from_uint(elstr->sylmap[num].char_count), + scm_from_int(elstr->sylmap[num].flags)); } #undef FUNC_NAME @@ -388,12 +408,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable", "cannot get syllable #~S: not enough syllables: ~S", scm_list_2(el, n)); num = elstr->nsyl - num; - if (num == 0) - start = 0; - else - start = elstr->sylmap[num - 1] + 1; - if (utf8_wc_to_mbstr(elstr->str + start, - elstr->sylmap[num] - start + 1, + if (utf8_wc_to_mbstr(elstr->str + elstr->sylmap[num].char_start, + elstr->sylmap[num].char_count, &s)) scm_misc_error(FUNC_NAME, "cannot convert elstr to Scheme", @@ -514,8 +530,7 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name) } for (i = 0; i < elstr->len; i++) elstr->str[i] = elchr_deaccent(elstr->str[i]); - elstr->acc_pos = 0; - elstr->acc_syl = 0; + invalidate_maps(elstr); return scm; } @@ -544,9 +559,10 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) { struct elstr *elstr; unsigned i; - unsigned acc_num, num, len, start; + unsigned acc_num, num, start; SCM scm; - int dstate; + unsigned pos; + struct phoneme *phoneme = NULL; if (destructive) { SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); @@ -556,15 +572,11 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); num = scm_to_uint(n); - if (num > elstr->nsyl) + if (num == 0 | num > elstr->nsyl) scm_misc_error(func_name, "cannot set accent on syllable #~S: not enough syllables: ~S", scm_list_2(n, el)); acc_num = elstr->nsyl - num; - if (acc_num == 0) - start = 0; - else - start = elstr->sylmap[acc_num - 1] + 1; if (destructive) scm = SCM_UNSPECIFIED; @@ -576,25 +588,38 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) /* Clear all accents */ for (i = 0; i < elstr->len; i++) elstr->str[i] = elchr_deaccent(elstr->str[i]); - len = elstr->sylmap[acc_num] - start + 1; - dstate = 0; - for (i = start; i <= start + len; i++) { - int nstate; - - if (!elchr_isvowel(elstr->str[i])) { - if (dstate) { - --i; - break; - } - continue; - } - nstate = elchr_diphthong(elstr->str[i], dstate); - if (!nstate) + for (i = 0; i < elstr->nsyl; i++) + elstr->sylmap[i].flags &= ~CHF_ACCENT_MASK; + for (i = 0; i < elstr->phoneme_count; i++) + elstr->phoneme[i].flags &= ~CHF_ACCENT_MASK; + + start = elstr->sylmap[acc_num].phoneme_start; + pos = 0; + for (i = 0; i < elstr->sylmap[acc_num].phoneme_count; i++) { + struct phoneme *ph = elstr->phoneme + start + i; + if (ph->flags & CHF_CONSONANT) + /* skip */ ; + else if (ph->flags & CHF_DIPHTHONG) { + phoneme = ph; + pos = ph->start + 1; + break; + } else if (ph->flags & CHF_VOWEL) { + phoneme = ph; + pos = ph->start; break; - dstate = nstate; + } } - elstr->str[i] = elchr_accent(elstr->str[i], CHF_OXEIA); + if (!phoneme) + scm_misc_error(func_name, + "cannot set accent on syllable #~S of ~S: " + "INTERNAL ERROR", + scm_list_2(n, el)); + phoneme->flags |= CHF_OXEIA; + elstr->sylmap[acc_num].flags |= CHF_OXEIA; + elstr->str[pos] = elchr_accent(elstr->str[pos], CHF_OXEIA); + elstr->acc_syl = num; + elstr->acc_pos = pos; return scm; } @@ -652,7 +677,8 @@ _elstr_set_accent_on_char(SCM el, SCM n, int destructive, const char *func_name) elstr->str[i] = elchr_deaccent(elstr->str[i]); elstr->str[num] = elchr_accent(elstr->str[num], CHF_OXEIA); - _elstr_syllabize(elstr); + invalidate_maps(elstr); + _elstr_syllabize(elstr, func_name); return scm; } @@ -716,11 +742,31 @@ static struct deftab { { CHF_LOWER, "elmorph:lower" }, { CHF_UPPER, |