summaryrefslogtreecommitdiffabout
path: root/src/ellinika/elmorph.h
blob: 83e1c83f22f73459d7561ca0a1397c29643069b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/* This file is part of Ellinika project.
   Copyright (C) 2011 Sergey Poznyakoff

   Ellinika is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Ellinika is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#define CHF_OXEIA       1
#define CHF_PERISPWMENH 2
#define CHF_BAREIA      3

#define CHF_ACCENT_MASK 0x000f

#define CHF_TREMA       0x0010

#define CHF_VOWEL         0x00020
#define CHF_CONSONANT     0x00040
#define CHF_SEMIVOWEL     0x00080
#define CHF_PUNCT         0x00100
#define CHF_SYMBOL        0x00200
#define CHF_MODIFIER      0x00400
#define CHF_ARCHAIC       0x00800
#define CHF_LOWER         0x01000
#define CHF_UPPER         0x02000
#define CHF_NUMERIC       0x04000

#define CHF_DIPHTHONG     0x08000
#define CHF_IOTA          0x10000

/* Phonemes */
#define PHON_A     1 /* α */ 
#define PHON_E     2 /* ε αι */ 
#define PHON_I     3 /* ι η υ ει οι υι */ 
#define PHON_O     4 /* ο ω */ 
#define PHON_U     5 /* ου */ 

#define PHON_BH    6 /* β */
#define PHON_GH    7 /* γ */
#define PHON_DH    8 /* δ */
#define PHON_Z     9 /* ζ */
#define PHON_TH   10 /* θ */ 
#define PHON_K    11 /* κ */ 
#define PHON_L    12 /* λ */ 
#define PHON_M    13 /* μ */ 
#define PHON_N    14 /* ν */ 
#define PHON_X    15 /* ξ */ 
#define PHON_P    16 /* π */ 
#define PHON_R    17 /* ρ */ 
#define PHON_S    18 /* σ */ 
#define PHON_T    19 /* τ */ 
#define PHON_F    20 /* φ */ 
#define PHON_H    21 /* χ */
#define PHON_PS   22 /* ψ */

#define PHON_B    23 /* μπ */
#define PHON_D    24 /* ντ */
#define PHON_G    25 /* γγ γκ γχ */
#define PHON_ZM   26 /* σμ */
#define PHON_TS   27 /* τσ */
#define PHON_DZ   28 /* τζ */
#define PHON_NGZ  29 /* νγζ */

#define PHON_AV   30 /* αυ */
#define PHON_EV   31 /* ευ */

#define _PHON_MAX 32

struct phoneme {
	int code;               /* Phoneme code */
	unsigned start;         /* Start of phoneme */
	unsigned count;         /* Number of characters in it */
	int flags;
};

struct syllable {
	unsigned char_start;         /* Start of syllable */
	unsigned char_count;         /* Number of characters in it */
	unsigned phoneme_start;
	unsigned phoneme_count;
	int flags;
};

struct char_info_st {
	unsigned ch;           /* Characters */
	int flags;             /* Flags (see above) */
	unsigned base;         /* for vowels - a corresponding vowel with
				  all diacritics removed */
	unsigned trans;        /* a counter-case equivalent, i.e. a
				  corresponding uppercase letter if
				  flags & CHF_LOWER and a corresponding
				  lowercase letter if flags & CHF_UPPER */
	unsigned numval;       /* Numeric value */
	unsigned accented[3];  /* For vowels - corresponding accented variant */
	unsigned deaccent;     /* For accented vowels with diaeresis -
				  corresponding non-accented character */
	int letter;            /* Letter code */
	int phoneme;           /* Phoneme code */
};

struct char_info_st const *elchr_info(unsigned ch);
int elchr_flags(unsigned ch);
int elchr_letter(unsigned ch);
int elchr_phoneme(unsigned ch);
int elchr_isupper(unsigned ch);
int elchr_islower(unsigned ch);
int elchr_getaccent(unsigned ch);
int elchr_istrema(unsigned ch);
int elchr_isvowel(unsigned ch);
int elchr_isconsonant(unsigned ch);
int elchr_issemivowel(unsigned ch);
int elchr_ispunct(unsigned ch);
int elchr_issymbol(unsigned ch);
int elchr_ismodifier(unsigned ch);
int elchr_isarchaic(unsigned ch);
int elchr_isnumeric(unsigned ch);
unsigned elchr_numeric_value(unsigned ch);
unsigned elchr_toupper(unsigned ch);
unsigned elchr_tolower(unsigned ch);
unsigned elchr_base(unsigned ch);
unsigned elchr_deaccent(unsigned ch);
unsigned elchr_accent(unsigned ch, int acc);


int elmorph_thema_aoristoy(unsigned *word, size_t len,
			   unsigned **thema, size_t *tlen);

int phoneme_map(struct phoneme **pph, size_t *plen,
		unsigned *word, size_t len);
int syllable_map(struct syllable **psyl, size_t *plen,
		 struct phoneme *phon, size_t nphon);

void elmorph_utf8scm_init(void);

Return to:

Send suggestions and report system problems to the System administrator.