diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-10 23:04:53 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-11 00:34:20 +0300 |
commit | a1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch) | |
tree | f86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika/elchr.c | |
parent | 2bae7da012e2125762855ce014e63345ecbbbb18 (diff) | |
download | ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2 |
Fix syllabification.
* configure.ac: Add AC_PROG_YACC
* src/ellinika/phoneme.y: New file.
* src/ellinika/yyrename: New file.
* src/ellinika/syllabificator.c: New file.
* src/ellinika/.gitignore: Update.
* src/ellinika/elchr.c (char_info_st): Move to header.
(el_basic_ctype):
(elchr_info): Remove static qualifier.
Return a pointer to const.
(elchr_letter,elchr_phoneme): New functions.
(elchr_diphthong): Remove.
* src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members.
(_elstr_syllabize): Rewrite.
(invalidate_maps)" New static function.
(_elstr_alloc): Initialize new fields, take function name
as argument, for diagnostic purposes.
(_elstr_print): Rewrite
(deftab): Update.
(elstr-syllable-prop,elstr-syllable)
(_elstr_set_accent,_elstr_set_accent_on_char): Rewrite.
(elstr-char-phoneme,elstr->phonetic-map): New functions.
* src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove.
(CHF_DIPHTHONG): New flag.
(PHON_.*): New constants.
(phoneme,syllable): New structures.
(char_info_st)<letter,phoneme>: New members.
(elchr_info,elchr_letter)
(elchr_phoneme,phoneme_map)
(syllable_map): New protos.
(elchr_diphthong): Remove protos.
* src/ellinika/elmorph.scm4: Move public definitions
to elmorph-public.scm; include it here.
* src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a
wrapper over elstr->soundslike.
Describe Milesian numbers.
* style.css (img.ellinika-img): New class.
* xml/lingua.conf.in (IMAGE): New tag.
* xml/pl/alfabhta.xml: Describe Milesian numbers.
Various fixes.
* data/dbverb.struct: fix a typo in flection.
Use 'sub' theme for pas/sub/aor.
* data/irregular-verbs.xml: Add more verbs.
* scm/conjugator.scm: Various fixes.
* scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika/elchr.c')
-rw-r--r-- | src/ellinika/elchr.c | 273 |
1 files changed, 161 insertions, 112 deletions
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 3142b6f..621ac03 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c @@ -23,20 +23,7 @@ #include <libguile.h> #include "utf8.h" #include "elmorph.h" - -struct char_info_st { - unsigned ch; /* Characters */ - int flags; /* Flags (see above) */ - unsigned base; /* for vowels - a corresponding vowel with all diacritics - removed */ - unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase - letter if flags & CHF_LOWER and a corresponding lowerrcase - letter if flags & CHF_UPPER */ - unsigned numval; /* Numeric value */ - unsigned accented[3]; /* For vowels - corresponding accented variant */ - unsigned deaccent; /* For accented vowels with diaeresis - corresponding - non-accented character */ -}; +#include "phoneme.h" /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ struct char_info_st el_basic_ctype[] = { @@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = { { 0x0383, }, { 0x0384, CHF_MODIFIER }, /* Oxeia */ { 0x0385, CHF_MODIFIER }, /* dialytika */ - { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ + { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC, + 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */ { 0x0387, CHF_PUNCT }, /* ano teleia */ - { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ - { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ - { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ + { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD, + 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */ + { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE, + 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */ + { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF, + 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */ { 0x038B, }, - { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ + { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC, + 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */ { 0x038D, }, - { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ - { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ - { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ - { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ - { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ - { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ - { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ - { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ - { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ - { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ - { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ - { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ - { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ - { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ - { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ - { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ - { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ - { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ - { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ - { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ + { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD, + 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */ + { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE, + 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */ + { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, + 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */ + { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1, + 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */ + { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */ + { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */ + { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */ + { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5, + 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */ + { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */ + { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, + 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */ + { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */ + { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9, + 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */ + { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */ + { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */ + { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */ + { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */ + { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */ + { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, + 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */ + { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */ + { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */ { 0x03A2, }, - { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ - { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ - { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ - { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ - { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ - { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ - { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ - { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ - { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ - { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ - { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */ - { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ - { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ - { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ - { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */ - { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */ - { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */ - { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */ - { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */ - { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */ - { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */ - { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */ - { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */ - { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */ - { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */ - { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */ - { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ - { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ + { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */ + { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */ + { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5, + 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */ + { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */ + { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */ + { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */ + { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, + 800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */ + { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA, + 0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */ + { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB, + 0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */ + { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386, + 0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */ + { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388, + 0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */ + { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389, + 0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */ + { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A, + 0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */ + { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, + 0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */ + { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391, + 1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */ + { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, + 2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */ + { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, + 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */ + { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, + 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */ + { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395, + 5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */ + { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, + 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */ + { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397, + 8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */ + { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, + 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */ + { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, + 10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */ + { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, + 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */ + { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, + 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */ + { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, + 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */ + { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, + 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */ + { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, + 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */ - { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */ - { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ - { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ - { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ - { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */ - { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */ - { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */ - { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */ - { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */ - { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */ - { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */ - { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */ - { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */ - { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */ - { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */ - { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */ + { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, + 70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */ + { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, + 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */ + { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, + 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */ + { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3, + 0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */ + { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, + 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */ + { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, + 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */ + { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5, + 400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */ + { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, + 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */ + { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, + 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */ + { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, + 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */ + { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, + 800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */ + { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA, + 0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */ + { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, + 0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */ + { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C, + 0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */ + { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E, + 0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */ + { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F, + 0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */ { 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */ { 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */ { 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */ @@ -561,7 +617,7 @@ struct char_info_st el_extended_ctype[] = { { 0x1FFF, } }; -static struct char_info_st * +struct char_info_st const * elchr_info(unsigned ch) { if (ch >= 0x0300 && ch <= 0x03FF) @@ -574,11 +630,25 @@ elchr_info(unsigned ch) int elchr_flags(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return ci ? ci->flags : 0; } int +elchr_letter(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->letter : 0; +} + +int +elchr_phoneme(unsigned ch) +{ + struct char_info_st const *ci = elchr_info(ch); + return ci ? ci->phoneme : 0; +} + +int elchr_isupper(unsigned ch) { return elchr_flags(ch) & CHF_UPPER; @@ -654,35 +724,35 @@ elchr_isnumeric(unsigned ch) unsigned elchr_numeric_value(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0; } unsigned elchr_toupper(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch; } unsigned elchr_tolower(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch; } unsigned elchr_base(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch; } unsigned elchr_deaccent(unsigned ch) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); if (ci && (ci->flags & CHF_ACCENT_MASK)) return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch; return ch; @@ -691,28 +761,7 @@ elchr_deaccent(unsigned ch) unsigned elchr_accent(unsigned ch, int acc) { - struct char_info_st *ci = elchr_info(ch); + struct char_info_st const *ci = elchr_info(ch); return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch; } -int -elchr_diphthong(unsigned ch, int state) -{ - struct char_info_st *ci = elchr_info(ch); - - if (!ci || !(ci->flags & CHF_VOWEL)) - return 0; - switch (state) { - case 0: - if (ci->flags & CHF_DIPH1) - state = 1; - break; - case 1: - if (ci->flags & CHF_DIPH2) - state = 2; - break; - default: - state = 0; - } - return state; -} |