/* This file is part of Ellinika project. Copyright (C) 2011 Sergey Poznyakoff Ellinika is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Ellinika is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include "utf8.h" #include "elmorph.h" struct char_info_st { unsigned ch; /* Characters */ int flags; /* Flags (see above) */ unsigned base; /* for vowels - a corresponding vowel with all diacritics removed */ unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase letter if flags & CHF_LOWER and a corresponding lowerrcase letter if flags & CHF_UPPER */ unsigned numval; /* Numeric value */ unsigned accented[3]; /* For vowels - corresponding accented variant */ unsigned deaccent; /* For accented vowels with diaeresis - corresponding non-accented character */ }; /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ struct char_info_st el_basic_ctype[] = { { 0x0300, }, { 0x0301, }, { 0x0302, }, { 0x0303, }, { 0x0304, }, { 0x0305, }, { 0x0306, }, { 0x0307, }, { 0x0308, }, { 0x0309, }, { 0x030A, }, { 0x030B, }, { 0x030C, }, { 0x030D, }, { 0x030E, }, { 0x030F, }, { 0x0310, }, { 0x0311, }, { 0x0312, }, { 0x0313, }, { 0x0314, }, { 0x0315, }, { 0x0316, }, { 0x0317, }, { 0x0318, }, { 0x0319, }, { 0x031A, }, { 0x031B, }, { 0x031C, }, { 0x031D, }, { 0x031E, }, { 0x031F, }, { 0x0320, }, { 0x0321, }, { 0x0322, }, { 0x0323, }, { 0x0324, }, { 0x0325, }, { 0x0326, }, { 0x0327, }, { 0x0328, }, { 0x0329, }, { 0x032A, }, { 0x032B, }, { 0x032C, }, { 0x032D, }, { 0x032E, }, { 0x032F, }, { 0x0330, }, { 0x0331, }, { 0x0332, }, { 0x0333, }, { 0x0334, }, { 0x0335, }, { 0x0336, }, { 0x0337, }, { 0x0338, }, { 0x0339, }, { 0x033A, }, { 0x033B, }, { 0x033C, }, { 0x033D, }, { 0x033E, }, { 0x033F, }, { 0x0340, }, { 0x0341, }, { 0x0342, }, { 0x0343, }, { 0x0344, }, { 0x0345, }, { 0x0346, }, { 0x0347, }, { 0x0348, }, { 0x0349, }, { 0x034A, }, { 0x034B, }, { 0x034C, }, { 0x034D, }, { 0x034E, }, { 0x034F, }, { 0x0350, }, { 0x0351, }, { 0x0352, }, { 0x0353, }, { 0x0354, }, { 0x0355, }, { 0x0356, }, { 0x0357, }, { 0x0358, }, { 0x0359, }, { 0x035A, }, { 0x035B, }, { 0x035C, }, { 0x035D, }, { 0x035E, }, { 0x035F, }, { 0x0360, }, { 0x0361, }, { 0x0362, }, { 0x0363, }, { 0x0364, }, { 0x0365, }, { 0x0366, }, { 0x0367, }, { 0x0368, }, { 0x0369, }, { 0x036A, }, { 0x036B, }, { 0x036C, }, { 0x036D, }, { 0x036E, }, { 0x036F, }, { 0x0370, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER, 0, 0x0371 }, /* CAPITAL HETTA */ { 0x0371, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER, 0, 0x0370 }, /* SMALL HETA */ { 0x0372, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER, 0, 0x0373 }, /* CAPITAL SAMPI */ { 0x0373, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER, 0, 0x0372 }, /* SMALL SAMPI */ { 0x0374, CHF_MODIFIER|CHF_UPPER, 0, 0x0375 }, /* NUMERAL SIGN = dexia keraia */ { 0x0375, CHF_MODIFIER|CHF_LOWER, 0, 0x0374 }, /* aristeri keraia */ { 0x0376, CHF_ARCHAIC|CHF_SEMIVOWEL|CHF_UPPER, 0, 0x0377}, /* CAPITAL PAMPHYLIAN DIGAMMA */ { 0x0377, CHF_ARCHAIC|CHF_SEMIVOWEL|CHF_LOWER, 0, 0x0376}, /* SMALL PAMPHYLIAN DIGAMMA */ { 0x0378, }, { 0x0379, }, { 0x037A, CHF_ARCHAIC|CHF_MODIFIER }, /* YPOGEGRAMMENI */ { 0x037B, CHF_SYMBOL, 0, 0x03FD }, /* SMALL REVERSED LUNATE SIGMA */ { 0x037C, CHF_SYMBOL, 0, 0x03FE }, /* SMALL DOTTED LUNATE SIGMA */ { 0x037D, CHF_SYMBOL, 0, 0x03FF }, /* SMALL REVERSED DOTTED LUNATE SIGMA */ { 0x037E, CHF_PUNCT }, /* erotimatiko */ { 0x037F, }, { 0x0380, }, { 0x0381, }, { 0x0382, }, { 0x0383, }, { 0x0384, CHF_MODIFIER }, /* Oxeia */ { 0x0385, CHF_MODIFIER }, /* dialytika */ { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ { 0x0387, CHF_PUNCT }, /* ano teleia */ { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ { 0x038B, }, { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ { 0x038D, }, { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ { 0x03A2, }, { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */ { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B4, 0x0388 }, /* έ */ { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */ { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */ { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */ { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */ { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */ { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */ { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */ { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */ { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */ { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */ { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */ { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */ { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */ { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */ { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */ { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */ { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F, 70, 0x03CC }, /* ο */ { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */ { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */ { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */ { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */ { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */ { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */ { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */ { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */ { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */ { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */ { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */ { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */ { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */ { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */ { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */ { 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */ { 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */ { 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */ { 0x03D2, CHF_VOWEL|CHF_UPPER, }, /* capital ypsilon with hook */ { 0x03D3, CHF_VOWEL|CHF_OXEIA, 0x03D2 }, /* capital ypsilon with acute & hook */ { 0x03D4, CHF_VOWEL|CHF_TREMA, 0x03D2 }, /* capital ypsilon with diaeresis & hook */ { 0x03D5, CHF_CONSONANT|CHF_LOWER, 0, 0x03A6 }, /* phi */ { 0x03D6, CHF_CONSONANT|CHF_LOWER, 0, 0x03A0 }, /* pi */ { 0x03D7, CHF_SYMBOL|CHF_LOWER, 0, 0x03CF }, /* kai */ { 0x03D8, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER, 0, 0x03D9 }, /* QOPPA */ { 0x03D9, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER, 0, 0x03D8 }, /* qoppa */ { 0x03DA, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03DB, 6 }, /* STIGMA */ { 0x03DB, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03DA, 6 }, /* stigma */ { 0x03DC, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03DD, 6 }, /* DIGAMMA */ { 0x03DD, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03DC, 6 }, /* digamma */ { 0x03DE, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03DF, 6 }, /* KOPPA */ { 0x03DF, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03DE, 6 }, /* koppa */ { 0x03E0, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03E1, 900 }, /* SAMPI */ { 0x03E1, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03E0, 900 }, /* sampi */ { 0x03E2, }, { 0x03E3, }, { 0x03E4, }, { 0x03E5, }, { 0x03E6, }, { 0x03E7, }, { 0x03E8, }, { 0x03E9, }, { 0x03EA, }, { 0x03EB, }, { 0x03EC, }, { 0x03ED, }, { 0x03EE, }, { 0x03EF, }, { 0x03F0, CHF_CONSONANT|CHF_LOWER, 0, 0x039A }, /* kappa */ { 0x03F1, CHF_CONSONANT|CHF_LOWER, 0, 0x03A1 }, /* tailed rho */ { 0x03F2, CHF_CONSONANT, 0, 0x03F9 }, /* lunate sigma */ { 0x03F3, CHF_SEMIVOWEL|CHF_LOWER, }, /* yot */ { 0x03F4, CHF_CONSONANT|CHF_UPPER, 0, 0x03B8 }, /* THETA */ { 0x03F5, CHF_SYMBOL|CHF_LOWER, 0, 0x0395 }, /* lunate epsilon */ { 0x03F6, CHF_SYMBOL|CHF_LOWER, }, /* reversed lunate epsilon */ { 0x03F7, }, { 0x03F8, }, { 0x03F9, CHF_CONSONANT|CHF_UPPER, 0, 0x03F2 }, /* LUNATE SIGMA */ { 0x03FA, CHF_ARCHAIC|CHF_CONSONANT|CHF_UPPER, 0, 0x03FB }, /* SAN */ { 0x03FB, CHF_ARCHAIC|CHF_CONSONANT|CHF_LOWER, 0, 0x03FA }, /* san */ { 0x03FC, CHF_SYMBOL|CHF_CONSONANT|CHF_LOWER, }, /* rho with stroke */ { 0x03FD, CHF_SYMBOL|CHF_CONSONANT|CHF_UPPER, 0, 0x037B}, /* CAPITAL REV. LUNATE SIGMA antisigma */ { 0x03FE, CHF_SYMBOL|CHF_CONSONANT|CHF_UPPER, 0, 0x037C }, /* CAPITAL DOTTED LUNATE SIGMA sigma periestigmenon */ { 0x03FF, CHF_SYMBOL|CHF_CONSONANT|CHF_UPPER, 0, 0x037D }, /* antisigma periestigmenon */ }; /* FIXME: Implement http://www.unicode.org/charts/PDF/U1F00.pdf */ struct char_info_st el_extended_ctype[] = { { 0x1F00, }, { 0x1F01, }, { 0x1F02, }, { 0x1F03, }, { 0x1F04, }, { 0x1F05, }, { 0x1F06, }, { 0x1F07, }, { 0x1F08, }, { 0x1F09, }, { 0x1F0A, }, { 0x1F0B, }, { 0x1F0C, }, { 0x1F0D, }, { 0x1F0E, }, { 0x1F0F, }, { 0x1F10, }, { 0x1F11, }, { 0x1F12, }, { 0x1F13, }, { 0x1F14, }, { 0x1F15, }, { 0x1F16, }, { 0x1F17, }, { 0x1F18, }, { 0x1F19, }, { 0x1F1A, }, { 0x1F1B, }, { 0x1F1C, }, { 0x1F1D, }, { 0x1F1E, }, { 0x1F1F, }, { 0x1F20, }, { 0x1F21, }, { 0x1F22, }, { 0x1F23, }, { 0x1F24, }, { 0x1F25, }, { 0x1F26, }, { 0x1F27, }, { 0x1F28, }, { 0x1F29, }, { 0x1F2A, }, { 0x1F2B, }, { 0x1F2C, }, { 0x1F2D, }, { 0x1F2E, }, { 0x1F2F, }, { 0x1F30, }, { 0x1F31, }, { 0x1F32, }, { 0x1F33, }, { 0x1F34, }, { 0x1F35, }, { 0x1F36, }, { 0x1F37, }, { 0x1F38, }, { 0x1F39, }, { 0x1F3A, }, { 0x1F3B, }, { 0x1F3C, }, { 0x1F3D, }, { 0x1F3E, }, { 0x1F3F, }, { 0x1F40, }, { 0x1F41, }, { 0x1F42, }, { 0x1F43, }, { 0x1F44, }, { 0x1F45, }, { 0x1F46, }, { 0x1F47, }, { 0x1F48, }, { 0x1F49, }, { 0x1F4A, }, { 0x1F4B, }, { 0x1F4C, }, { 0x1F4D, }, { 0x1F4E, }, { 0x1F4F, }, { 0x1F50, }, { 0x1F51, }, { 0x1F52, }, { 0x1F53, }, { 0x1F54, }, { 0x1F55, }, { 0x1F56, }, { 0x1F57, }, { 0x1F58, }, { 0x1F59, }, { 0x1F5A, }, { 0x1F5B, }, { 0x1F5C, }, { 0x1F5D, }, { 0x1F5E, }, { 0x1F5F, }, { 0x1F60, }, { 0x1F61, }, { 0x1F62, }, { 0x1F63, }, { 0x1F64, }, { 0x1F65, }, { 0x1F66, }, { 0x1F67, }, { 0x1F68, }, { 0x1F69, }, { 0x1F6A, }, { 0x1F6B, }, { 0x1F6C, }, { 0x1F6D, }, { 0x1F6E, }, { 0x1F6F, }, { 0x1F70, }, { 0x1F71, }, { 0x1F72, }, { 0x1F73, }, { 0x1F74, }, { 0x1F75, }, { 0x1F76, }, { 0x1F77, }, { 0x1F78, }, { 0x1F79, }, { 0x1F7A, }, { 0x1F7B, }, { 0x1F7C, }, { 0x1F7D, }, { 0x1F7E, }, { 0x1F7F, }, { 0x1F80, }, { 0x1F81, }, { 0x1F82, }, { 0x1F83, }, { 0x1F84, }, { 0x1F85, }, { 0x1F86, }, { 0x1F87, }, { 0x1F88, }, { 0x1F89, }, { 0x1F8A, }, { 0x1F8B, }, { 0x1F8C, }, { 0x1F8D, }, { 0x1F8E, }, { 0x1F8F, }, { 0x1F90, }, { 0x1F91, }, { 0x1F92, }, { 0x1F93, }, { 0x1F94, }, { 0x1F95, }, { 0x1F96, }, { 0x1F97, }, { 0x1F98, }, { 0x1F99, }, { 0x1F9A, }, { 0x1F9B, }, { 0x1F9C, }, { 0x1F9D, }, { 0x1F9E, }, { 0x1F9F, }, { 0x1FA0, }, { 0x1FA1, }, { 0x1FA2, }, { 0x1FA3, }, { 0x1FA4, }, { 0x1FA5, }, { 0x1FA6, }, { 0x1FA7, }, { 0x1FA8, }, { 0x1FA9, }, { 0x1FAA, }, { 0x1FAB, }, { 0x1FAC, }, { 0x1FAD, }, { 0x1FAE, }, { 0x1FAF, }, { 0x1FB0, }, { 0x1FB1, }, { 0x1FB2, }, { 0x1FB3, }, { 0x1FB4, }, { 0x1FB5, }, { 0x1FB6, }, { 0x1FB7, }, { 0x1FB8, }, { 0x1FB9, }, { 0x1FBA, }, { 0x1FBB, }, { 0x1FBC, }, { 0x1FBD, }, { 0x1FBE, }, { 0x1FBF, }, { 0x1FC0, }, { 0x1FC1, }, { 0x1FC2, }, { 0x1FC3, }, { 0x1FC4, }, { 0x1FC5, }, { 0x1FC6, }, { 0x1FC7, }, { 0x1FC8, }, { 0x1FC9, }, { 0x1FCA, }, { 0x1FCB, }, { 0x1FCC, }, { 0x1FCD, }, { 0x1FCE, }, { 0x1FCF, }, { 0x1FD0, }, { 0x1FD1, }, { 0x1FD2, }, { 0x1FD3, }, { 0x1FD4, }, { 0x1FD5, }, { 0x1FD6, }, { 0x1FD7, }, { 0x1FD8, }, { 0x1FD9, }, { 0x1FDA, }, { 0x1FDB, }, { 0x1FDC, }, { 0x1FDD, }, { 0x1FDE, }, { 0x1FDF, }, { 0x1FE0, }, { 0x1FE1, }, { 0x1FE2, }, { 0x1FE3, }, { 0x1FE4, }, { 0x1FE5, }, { 0x1FE6, }, { 0x1FE7, }, { 0x1FE8, }, { 0x1FE9, }, { 0x1FEA, }, { 0x1FEB, }, { 0x1FEC, }, { 0x1FED, }, { 0x1FEE, }, { 0x1FEF, }, { 0x1FF0, }, { 0x1FF1, }, { 0x1FF2, }, { 0x1FF3, }, { 0x1FF4, }, { 0x1FF5, }, { 0x1FF6, }, { 0x1FF7, }, { 0x1FF8, }, { 0x1FF9, }, { 0x1FFA, }, { 0x1FFB, }, { 0x1FFC, }, { 0x1FFD, }, { 0x1FFE, }, { 0x1FFF, } }; static struct char_info_st * elchr_info(unsigned ch) { if (ch >= 0x0300 && ch <= 0x03FF) return el_basic_ctype + ch - 0x0300; else if (ch >= 0x1F00 && ch <= 0x1FFF) return el_extended_ctype + ch - 0x1F00; return NULL; } int elchr_flags(unsigned ch) { struct char_info_st *ci = elchr_info(ch); return ci ? ci->flags : 0; } int elchr_isupper(unsigned ch) { return elchr_flags(ch) & CHF_UPPER; } int elchr_islower(unsigned ch) { return elchr_flags(ch) & CHF_LOWER; } int elchr_getaccent(unsigned ch) { return elchr_flags(ch) & CHF_ACCENT_MASK; } int elchr_istrema(unsigned ch) { return elchr_flags(ch) & CHF_TREMA; } int elchr_isvowel(unsigned ch) { return elchr_flags(ch) & CHF_VOWEL; } int elchr_isconsonant(unsigned ch) { return elchr_flags(ch) & CHF_CONSONANT; } int elchr_issemivowel(unsigned ch) { return elchr_flags(ch) & CHF_SEMIVOWEL; } int elchr_ispunct(unsigned ch) { return elchr_flags(ch) & CHF_PUNCT; } int elchr_issymbol(unsigned ch) { return elchr_flags(ch) & CHF_SYMBOL; } int elchr_ismodifier(unsigned ch) { return elchr_flags(ch) & CHF_MODIFIER; } int elchr_isarchaic(unsigned ch) { return elchr_flags(ch) & CHF_ARCHAIC; } int elchr_isnumeric(unsigned ch) { return elchr_flags(ch) & CHF_NUMERIC; } unsigned elchr_numeric_value(unsigned ch) { struct char_info_st *ci = elchr_info(ch); return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0; } unsigned elchr_toupper(unsigned ch) { struct char_info_st *ci = elchr_info(ch); return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch; } unsigned elchr_tolower(unsigned ch) { struct char_info_st *ci = elchr_info(ch); return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch; } unsigned elchr_base(unsigned ch) { struct char_info_st *ci = elchr_info(ch); return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch; } unsigned elchr_deaccent(unsigned ch) { struct char_info_st *ci = elchr_info(ch); if (ci && (ci->flags & CHF_ACCENT_MASK)) return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch; return ch; } unsigned elchr_accent(unsigned ch, int acc) { struct char_info_st *ci = elchr_info(ch); return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch; } int elchr_diphthong(unsigned ch, int state) { struct char_info_st *ci = elchr_info(ch); if (!ci || !(ci->flags & CHF_VOWEL)) return 0; switch (state) { case 0: if (ci->flags & CHF_DIPH1) state = 1; break; case 1: if (ci->flags & CHF_DIPH2) state = 2; break; default: state = 0; } return state; }