diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-10 23:04:53 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2011-06-11 00:34:20 +0300 |
commit | a1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch) | |
tree | f86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika | |
parent | 2bae7da012e2125762855ce014e63345ecbbbb18 (diff) | |
download | ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2 |
Fix syllabification.
* configure.ac: Add AC_PROG_YACC
* src/ellinika/phoneme.y: New file.
* src/ellinika/yyrename: New file.
* src/ellinika/syllabificator.c: New file.
* src/ellinika/.gitignore: Update.
* src/ellinika/elchr.c (char_info_st): Move to header.
(el_basic_ctype):
(elchr_info): Remove static qualifier.
Return a pointer to const.
(elchr_letter,elchr_phoneme): New functions.
(elchr_diphthong): Remove.
* src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members.
(_elstr_syllabize): Rewrite.
(invalidate_maps)" New static function.
(_elstr_alloc): Initialize new fields, take function name
as argument, for diagnostic purposes.
(_elstr_print): Rewrite
(deftab): Update.
(elstr-syllable-prop,elstr-syllable)
(_elstr_set_accent,_elstr_set_accent_on_char): Rewrite.
(elstr-char-phoneme,elstr->phonetic-map): New functions.
* src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove.
(CHF_DIPHTHONG): New flag.
(PHON_.*): New constants.
(phoneme,syllable): New structures.
(char_info_st)<letter,phoneme>: New members.
(elchr_info,elchr_letter)
(elchr_phoneme,phoneme_map)
(syllable_map): New protos.
(elchr_diphthong): Remove protos.
* src/ellinika/elmorph.scm4: Move public definitions
to elmorph-public.scm; include it here.
* src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a
wrapper over elstr->soundslike.
Describe Milesian numbers.
* style.css (img.ellinika-img): New class.
* xml/lingua.conf.in (IMAGE): New tag.
* xml/pl/alfabhta.xml: Describe Milesian numbers.
Various fixes.
* data/dbverb.struct: fix a typo in flection.
Use 'sub' theme for pas/sub/aor.
* data/irregular-verbs.xml: Add more verbs.
* scm/conjugator.scm: Various fixes.
* scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/.gitignore | 2 | ||||
-rw-r--r-- | src/ellinika/Makefile.am | 23 | ||||
-rw-r--r-- | src/ellinika/elchr.c | 273 | ||||
-rw-r--r-- | src/ellinika/elmorph-public.scm | 106 | ||||
-rw-r--r-- | src/ellinika/elmorph.c | 308 | ||||
-rw-r--r-- | src/ellinika/elmorph.h | 82 | ||||
-rw-r--r-- | src/ellinika/elmorph.scm4 | 25 | ||||
-rw-r--r-- | src/ellinika/phoneme.y | 353 | ||||
-rw-r--r-- | src/ellinika/syllabificator.c | 152 | ||||
-rw-r--r-- | src/ellinika/tenses.scm | 38 | ||||
-rw-r--r-- | src/ellinika/xlat.scm | 113 | ||||
-rwxr-xr-x | src/ellinika/yyrename | 97 |
12 files changed, 1206 insertions, 366 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore index 9422f9a..11bf478 100644 --- a/src/ellinika/.gitignore +++ b/src/ellinika/.gitignore | |||
@@ -3,3 +3,5 @@ cgi.scm | |||
3 | config.scm | 3 | config.scm |
4 | elmorph.scm | 4 | elmorph.scm |
5 | elmorph.x | 5 | elmorph.x |
6 | phoneme.c | ||
7 | phoneme.h | ||
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am index 274eea8..b8988d4 100644 --- a/src/ellinika/Makefile.am +++ b/src/ellinika/Makefile.am | |||
@@ -1,5 +1,5 @@ | |||
1 | # This file is part of Ellinika project. | 1 | # This file is part of Ellinika project. |
2 | # Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff | 2 | # Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff |
3 | # | 3 | # |
4 | # Ellinika is free software; you can redistribute it and/or modify | 4 | # Ellinika is free software; you can redistribute it and/or modify |
5 | # it under the terms of the GNU General Public License as published by | 5 | # it under the terms of the GNU General Public License as published by |
@@ -15,7 +15,14 @@ | |||
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 | ||
17 | guiledir=$(GUILE_SITE)/$(PACKAGE) | 17 | guiledir=$(GUILE_SITE)/$(PACKAGE) |
18 | guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm | 18 | guile_DATA=\ |
19 | xlat.scm\ | ||
20 | cgi.scm\ | ||
21 | i18n.scm\ | ||
22 | config.scm\ | ||
23 | dico.scm\ | ||
24 | elmorph.scm\ | ||
25 | tenses.scm | ||
19 | 26 | ||
20 | cgi.m4: Makefile | 27 | cgi.m4: Makefile |
21 | echo 'divert(-1)' > $@ | 28 | echo 'divert(-1)' > $@ |
@@ -39,11 +46,11 @@ cgi.m4: Makefile | |||
39 | SUFFIXES = .scm4 .scm .x | 46 | SUFFIXES = .scm4 .scm .x |
40 | 47 | ||
41 | .scm4.scm: | 48 | .scm4.scm: |
42 | m4 cgi.m4 $< > $@ | 49 | m4 -I$(srcdir) cgi.m4 $< > $@ |
43 | 50 | ||
44 | cgi.scm: cgi.scm4 cgi.m4 | 51 | cgi.scm: cgi.scm4 cgi.m4 |
45 | config.scm: config.scm4 cgi.m4 | 52 | config.scm: config.scm4 cgi.m4 |
46 | elmorph.scm: elmorph.scm4 cgi.m4 | 53 | elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4 |
47 | 54 | ||
48 | pkglib_LTLIBRARIES=libelmorph.la | 55 | pkglib_LTLIBRARIES=libelmorph.la |
49 | 56 | ||
@@ -52,7 +59,9 @@ libelmorph_la_SOURCES = \ | |||
52 | utf8.c\ | 59 | utf8.c\ |
53 | elchr.c\ | 60 | elchr.c\ |
54 | elmorph.c\ | 61 | elmorph.c\ |
55 | elmorph.h | 62 | elmorph.h\ |
63 | phoneme.y\ | ||
64 | syllabificator.c | ||
56 | 65 | ||
57 | DOT_X_FILES = elmorph.x | 66 | DOT_X_FILES = elmorph.x |
58 | 67 | ||
@@ -80,4 +89,6 @@ install-data-hook: | |||
80 | done; \ | 89 | done; \ |
81 | cd $$here | 90 | cd $$here |
82 | 91 | ||
83 | 92 | AM_YFLAGS = -d | |
93 | YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)' | ||
94 | EXTRA_DIST = yyrename elmorph-public.scm \ No newline at end of file | ||
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c index 3142b6f..621ac03 100644 --- a/src/ellinika/elchr.c +++ b/src/ellinika/elchr.c | |||
@@ -23,20 +23,7 @@ | |||
23 | #include <libguile.h> | 23 | #include <libguile.h> |
24 | #include "utf8.h" | 24 | #include "utf8.h" |
25 | #include "elmorph.h" | 25 | #include "elmorph.h" |
26 | 26 | #include "phoneme.h" | |
27 | struct char_info_st { | ||
28 | unsigned ch; /* Characters */ | ||
29 | int flags; /* Flags (see above) */ | ||
30 | unsigned base; /* for vowels - a corresponding vowel with all diacritics | ||
31 | removed */ | ||
32 | unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase | ||
33 | letter if flags & CHF_LOWER and a corresponding lowerrcase | ||
34 | letter if flags & CHF_UPPER */ | ||
35 | unsigned numval; /* Numeric value */ | ||
36 | unsigned accented[3]; /* For vowels - corresponding accented variant */ | ||
37 | unsigned deaccent; /* For accented vowels with diaeresis - corresponding | ||
38 | non-accented character */ | ||
39 | }; | ||
40 | 27 | ||
41 | /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ | 28 | /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ |
42 | struct char_info_st el_basic_ctype[] = { | 29 | struct char_info_st el_basic_ctype[] = { |
@@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = { | |||
174 | { 0x0383, }, | 161 | { 0x0383, }, |
175 | { 0x0384, CHF_MODIFIER }, /* Oxeia */ | 162 | { 0x0384, CHF_MODIFIER }, /* Oxeia */ |
176 | { 0x0385, CHF_MODIFIER }, /* dialytika */ | 163 | { 0x0385, CHF_MODIFIER }, /* dialytika */ |
177 | { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ | 164 | { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC, |
165 | 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */ | ||
178 | { 0x0387, CHF_PUNCT }, /* ano teleia */ | 166 | { 0x0387, CHF_PUNCT }, /* ano teleia */ |
179 | { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ | 167 | { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD, |
180 | { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ | 168 | 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */ |
181 | { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ | 169 | { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE, |
170 | 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */ | ||
171 | { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF, | ||
172 | 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */ | ||
182 | { 0x038B, }, | 173 | { 0x038B, }, |
183 | { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ | 174 | { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC, |
175 | 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */ | ||
184 | { 0x038D, }, | 176 | { 0x038D, }, |
185 | { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ | 177 | { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD, |
186 | { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ | 178 | 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */ |
187 | { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ | 179 | { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE, |
188 | { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ | 180 | 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */ |
189 | { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ | 181 | { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, |
190 | { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ | 182 | 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */ |
191 | { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ | 183 | { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1, |
192 | { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ | 184 | 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */ |
193 | { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ | 185 | { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, |
194 | { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ | 186 | 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */ |
195 | { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ | 187 | { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, |
196 | { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ | 188 | 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */ |
197 | { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ | 189 | { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, |
198 | { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ | 190 | 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */ |
199 | { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ | 191 | { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5, |
200 | { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ | 192 | 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */ |
201 | { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ | 193 | { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, |
202 | { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ | 194 | 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */ |
203 | { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ | 195 | { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, |
204 | { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ | 196 | 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */ |
197 | { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, | ||
198 | 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */ | ||
199 | { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9, | ||
200 | 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */ | ||
201 | { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, | ||
202 | 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */ | ||
203 | { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, | ||
204 | 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */ | ||
205 | { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, | ||
206 | 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */ | ||
207 | { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, | ||
208 | 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */ | ||
209 | { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, | ||
210 | 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */ | ||
211 | { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, | ||
212 | 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */ | ||
213 | { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, | ||
214 | 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */ | ||
215 | { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, | ||
216 | 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */ | ||
205 | { 0x03A2, }, | 217 | { 0x03A2, }, |
206 | { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ | 218 | { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, |
207 | { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ | 219 | 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */ |
208 | { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ | 220 | { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, |
209 | { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ | 221 | 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */ |
210 | { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ | 222 | { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5, |
211 | { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ | 223 | 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */ |
212 | { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ | 224 | { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, |
213 | { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ | 225 | 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */ |
214 | { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */ |