1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
/* This file is part of Ellinika project.
Copyright (C) 2011 Sergey Poznyakoff
Ellinika is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Ellinika is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define CHF_OXEIA 1
#define CHF_PERISPWMENH 2
#define CHF_BAREIA 3
#define CHF_ACCENT_MASK 0x000f
#define CHF_TREMA 0x0010
#define CHF_VOWEL 0x00020
#define CHF_CONSONANT 0x00040
#define CHF_SEMIVOWEL 0x00080
#define CHF_PUNCT 0x00100
#define CHF_SYMBOL 0x00200
#define CHF_MODIFIER 0x00400
#define CHF_ARCHAIC 0x00800
#define CHF_LOWER 0x01000
#define CHF_UPPER 0x02000
#define CHF_NUMERIC 0x04000
#define CHF_DIPHTHONG 0x08000
#define CHF_IOTA 0x10000
/* Phonemes */
#define PHON_A 1 /* α */
#define PHON_E 2 /* ε αι */
#define PHON_I 3 /* ι η υ ει οι υι */
#define PHON_O 4 /* ο ω */
#define PHON_U 5 /* ου */
#define PHON_BH 6 /* β */
#define PHON_GH 7 /* γ */
#define PHON_DH 8 /* δ */
#define PHON_Z 9 /* ζ */
#define PHON_TH 10 /* θ */
#define PHON_K 11 /* κ */
#define PHON_L 12 /* λ */
#define PHON_M 13 /* μ */
#define PHON_N 14 /* ν */
#define PHON_X 15 /* ξ */
#define PHON_P 16 /* π */
#define PHON_R 17 /* ρ */
#define PHON_S 18 /* σ */
#define PHON_T 19 /* τ */
#define PHON_F 20 /* φ */
#define PHON_H 21 /* χ */
#define PHON_PS 22 /* ψ */
#define PHON_B 23 /* μπ */
#define PHON_D 24 /* ντ */
#define PHON_G 25 /* γγ γκ γχ */
#define PHON_ZM 26 /* σμ */
#define PHON_TS 27 /* τσ */
#define PHON_DZ 28 /* τζ */
#define PHON_NGZ 29 /* νγζ */
#define PHON_AV 30 /* αυ */
#define PHON_EV 31 /* ευ */
#define _PHON_MAX 32
struct phoneme {
int code; /* Phoneme code */
unsigned start; /* Start of phoneme */
unsigned count; /* Number of characters in it */
int flags;
};
struct syllable {
unsigned char_start; /* Start of syllable */
unsigned char_count; /* Number of characters in it */
unsigned phoneme_start;
unsigned phoneme_count;
int flags;
};
struct char_info_st {
unsigned ch; /* Characters */
int flags; /* Flags (see above) */
unsigned base; /* for vowels - a corresponding vowel with
all diacritics removed */
unsigned trans; /* a counter-case equivalent, i.e. a
corresponding uppercase letter if
flags & CHF_LOWER and a corresponding
lowercase letter if flags & CHF_UPPER */
unsigned numval; /* Numeric value */
unsigned accented[3]; /* For vowels - corresponding accented variant */
unsigned deaccent; /* For accented vowels with diaeresis -
corresponding non-accented character */
int letter; /* Letter code */
int phoneme; /* Phoneme code */
};
struct char_info_st const *elchr_info(unsigned ch);
int elchr_flags(unsigned ch);
int elchr_letter(unsigned ch);
int elchr_phoneme(unsigned ch);
int elchr_isupper(unsigned ch);
int elchr_islower(unsigned ch);
int elchr_getaccent(unsigned ch);
int elchr_istrema(unsigned ch);
int elchr_isvowel(unsigned ch);
int elchr_isconsonant(unsigned ch);
int elchr_issemivowel(unsigned ch);
int elchr_ispunct(unsigned ch);
int elchr_issymbol(unsigned ch);
int elchr_ismodifier(unsigned ch);
int elchr_isarchaic(unsigned ch);
int elchr_isnumeric(unsigned ch);
unsigned elchr_numeric_value(unsigned ch);
unsigned elchr_toupper(unsigned ch);
unsigned elchr_tolower(unsigned ch);
unsigned elchr_base(unsigned ch);
unsigned elchr_deaccent(unsigned ch);
unsigned elchr_accent(unsigned ch, int acc);
int elmorph_thema_aoristoy(unsigned *word, size_t len,
unsigned **thema, size_t *tlen);
int phoneme_map(struct phoneme **pph, size_t *plen,
unsigned *word, size_t len);
int syllable_map(struct syllable **psyl, size_t *plen,
struct phoneme *phon, size_t nphon);
void elmorph_utf8scm_init(void);
|