diff options
Diffstat (limited to 'src/ellinika/elmorph.h')
-rw-r--r-- | src/ellinika/elmorph.h | 82 |
1 files changed, 79 insertions, 3 deletions
diff --git a/src/ellinika/elmorph.h b/src/ellinika/elmorph.h index eacbde5..2399b8a 100644 --- a/src/ellinika/elmorph.h +++ b/src/ellinika/elmorph.h @@ -33,10 +33,82 @@ #define CHF_UPPER 0x02000 #define CHF_NUMERIC 0x04000 -#define CHF_DIPH1 0x10000 -#define CHF_DIPH2 0x20000 +#define CHF_DIPHTHONG 0x08000 +/* Phonemes */ +#define PHON_A 1 /* α */ +#define PHON_E 2 /* ε αι */ +#define PHON_I 3 /* ι η υ ει οι υι */ +#define PHON_O 4 /* ο ω */ +#define PHON_U 5 /* ου */ + +#define PHON_BH 6 /* β */ +#define PHON_GH 7 /* γ */ +#define PHON_DH 8 /* δ */ +#define PHON_Z 9 /* ζ */ +#define PHON_TH 10 /* θ */ +#define PHON_K 11 /* κ */ +#define PHON_L 12 /* λ */ +#define PHON_M 13 /* μ */ +#define PHON_N 14 /* ν */ +#define PHON_X 15 /* ξ */ +#define PHON_P 16 /* π */ +#define PHON_R 17 /* ρ */ +#define PHON_S 18 /* σ */ +#define PHON_T 19 /* τ */ +#define PHON_F 20 /* φ */ +#define PHON_H 21 /* χ */ +#define PHON_PS 22 /* ψ */ + +#define PHON_B 23 /* μπ */ +#define PHON_D 24 /* ντ */ +#define PHON_G 25 /* γγ γκ γχ */ +#define PHON_ZM 26 /* σμ */ +#define PHON_TS 27 /* τσ */ +#define PHON_DZ 28 /* τζ */ +#define PHON_NGZ 29 /* νγζ */ + +#define PHON_AV 30 /* αυ */ +#define PHON_EV 31 /* ευ */ + +#define _PHON_MAX 32 + +struct phoneme { + int code; /* Phoneme code */ + unsigned start; /* Start of phoneme */ + unsigned count; /* Number of characters in it */ + int flags; +}; + +struct syllable { + unsigned char_start; /* Start of syllable */ + unsigned char_count; /* Number of characters in it */ + unsigned phoneme_start; + unsigned phoneme_count; + int flags; +}; + +struct char_info_st { + unsigned ch; /* Characters */ + int flags; /* Flags (see above) */ + unsigned base; /* for vowels - a corresponding vowel with + all diacritics removed */ + unsigned trans; /* a counter-case equivalent, i.e. a + corresponding uppercase letter if + flags & CHF_LOWER and a corresponding + lowercase letter if flags & CHF_UPPER */ + unsigned numval; /* Numeric value */ + unsigned accented[3]; /* For vowels - corresponding accented variant */ + unsigned deaccent; /* For accented vowels with diaeresis - + corresponding non-accented character */ + int letter; /* Letter code */ + int phoneme; /* Phoneme code */ +}; + +struct char_info_st const *elchr_info(unsigned ch); int elchr_flags(unsigned ch); +int elchr_letter(unsigned ch); +int elchr_phoneme(unsigned ch); int elchr_isupper(unsigned ch); int elchr_islower(unsigned ch); int elchr_getaccent(unsigned ch); @@ -55,8 +127,12 @@ unsigned elchr_tolower(unsigned ch); unsigned elchr_base(unsigned ch); unsigned elchr_deaccent(unsigned ch); unsigned elchr_accent(unsigned ch, int acc); -int elchr_diphthong(unsigned ch, int state); int elmorph_thema_aoristoy(unsigned *word, size_t len, unsigned **thema, size_t *tlen); + +int phoneme_map(struct phoneme **pph, size_t *plen, + unsigned *word, size_t len); +int syllable_map(struct syllable **psyl, size_t *plen, + struct phoneme *phon, size_t nphon); |