aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-10 23:04:53 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2011-06-11 00:34:20 +0300
commita1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch)
treef86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika
parent2bae7da012e2125762855ce014e63345ecbbbb18 (diff)
downloadellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz
ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2
Fix syllabification.
* configure.ac: Add AC_PROG_YACC * src/ellinika/phoneme.y: New file. * src/ellinika/yyrename: New file. * src/ellinika/syllabificator.c: New file. * src/ellinika/.gitignore: Update. * src/ellinika/elchr.c (char_info_st): Move to header. (el_basic_ctype): (elchr_info): Remove static qualifier. Return a pointer to const. (elchr_letter,elchr_phoneme): New functions. (elchr_diphthong): Remove. * src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members. (_elstr_syllabize): Rewrite. (invalidate_maps)" New static function. (_elstr_alloc): Initialize new fields, take function name as argument, for diagnostic purposes. (_elstr_print): Rewrite (deftab): Update. (elstr-syllable-prop,elstr-syllable) (_elstr_set_accent,_elstr_set_accent_on_char): Rewrite. (elstr-char-phoneme,elstr->phonetic-map): New functions. * src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove. (CHF_DIPHTHONG): New flag. (PHON_.*): New constants. (phoneme,syllable): New structures. (char_info_st)<letter,phoneme>: New members. (elchr_info,elchr_letter) (elchr_phoneme,phoneme_map) (syllable_map): New protos. (elchr_diphthong): Remove protos. * src/ellinika/elmorph.scm4: Move public definitions to elmorph-public.scm; include it here. * src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a wrapper over elstr->soundslike. Describe Milesian numbers. * style.css (img.ellinika-img): New class. * xml/lingua.conf.in (IMAGE): New tag. * xml/pl/alfabhta.xml: Describe Milesian numbers. Various fixes. * data/dbverb.struct: fix a typo in flection. Use 'sub' theme for pas/sub/aor. * data/irregular-verbs.xml: Add more verbs. * scm/conjugator.scm: Various fixes. * scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika')
-rw-r--r--src/ellinika/.gitignore2
-rw-r--r--src/ellinika/Makefile.am23
-rw-r--r--src/ellinika/elchr.c273
-rw-r--r--src/ellinika/elmorph-public.scm106
-rw-r--r--src/ellinika/elmorph.c308
-rw-r--r--src/ellinika/elmorph.h82
-rw-r--r--src/ellinika/elmorph.scm425
-rw-r--r--src/ellinika/phoneme.y353
-rw-r--r--src/ellinika/syllabificator.c152
-rw-r--r--src/ellinika/tenses.scm38
-rw-r--r--src/ellinika/xlat.scm113
-rwxr-xr-xsrc/ellinika/yyrename97
12 files changed, 1206 insertions, 366 deletions
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore
index 9422f9a..11bf478 100644
--- a/src/ellinika/.gitignore
+++ b/src/ellinika/.gitignore
@@ -3,3 +3,5 @@ cgi.scm
3config.scm 3config.scm
4elmorph.scm 4elmorph.scm
5elmorph.x 5elmorph.x
6phoneme.c
7phoneme.h
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
index 274eea8..b8988d4 100644
--- a/src/ellinika/Makefile.am
+++ b/src/ellinika/Makefile.am
@@ -1,5 +1,5 @@
1# This file is part of Ellinika project. 1# This file is part of Ellinika project.
2# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff 2# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff
3# 3#
4# Ellinika is free software; you can redistribute it and/or modify 4# Ellinika is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by 5# it under the terms of the GNU General Public License as published by
@@ -15,7 +15,14 @@
15# along with this program. If not, see <http://www.gnu.org/licenses/>. 15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16
17guiledir=$(GUILE_SITE)/$(PACKAGE) 17guiledir=$(GUILE_SITE)/$(PACKAGE)
18guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm 18guile_DATA=\
19 xlat.scm\
20 cgi.scm\
21 i18n.scm\
22 config.scm\
23 dico.scm\
24 elmorph.scm\
25 tenses.scm
19 26
20cgi.m4: Makefile 27cgi.m4: Makefile
21 echo 'divert(-1)' > $@ 28 echo 'divert(-1)' > $@
@@ -39,11 +46,11 @@ cgi.m4: Makefile
39SUFFIXES = .scm4 .scm .x 46SUFFIXES = .scm4 .scm .x
40 47
41.scm4.scm: 48.scm4.scm:
42 m4 cgi.m4 $< > $@ 49 m4 -I$(srcdir) cgi.m4 $< > $@
43 50
44cgi.scm: cgi.scm4 cgi.m4 51cgi.scm: cgi.scm4 cgi.m4
45config.scm: config.scm4 cgi.m4 52config.scm: config.scm4 cgi.m4
46elmorph.scm: elmorph.scm4 cgi.m4 53elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4
47 54
48pkglib_LTLIBRARIES=libelmorph.la 55pkglib_LTLIBRARIES=libelmorph.la
49 56
@@ -52,7 +59,9 @@ libelmorph_la_SOURCES = \
52 utf8.c\ 59 utf8.c\
53 elchr.c\ 60 elchr.c\
54 elmorph.c\ 61 elmorph.c\
55 elmorph.h 62 elmorph.h\
63 phoneme.y\
64 syllabificator.c
56 65
57DOT_X_FILES = elmorph.x 66DOT_X_FILES = elmorph.x
58 67
@@ -80,4 +89,6 @@ install-data-hook:
80 done; \ 89 done; \
81 cd $$here 90 cd $$here
82 91
83 92AM_YFLAGS = -d
93YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
94EXTRA_DIST = yyrename elmorph-public.scm \ No newline at end of file
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c
index 3142b6f..621ac03 100644
--- a/src/ellinika/elchr.c
+++ b/src/ellinika/elchr.c
@@ -23,20 +23,7 @@
23#include <libguile.h> 23#include <libguile.h>
24#include "utf8.h" 24#include "utf8.h"
25#include "elmorph.h" 25#include "elmorph.h"
26 26#include "phoneme.h"
27struct char_info_st {
28 unsigned ch; /* Characters */
29 int flags; /* Flags (see above) */
30 unsigned base; /* for vowels - a corresponding vowel with all diacritics
31 removed */
32 unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase
33 letter if flags & CHF_LOWER and a corresponding lowerrcase
34 letter if flags & CHF_UPPER */
35 unsigned numval; /* Numeric value */
36 unsigned accented[3]; /* For vowels - corresponding accented variant */
37 unsigned deaccent; /* For accented vowels with diaeresis - corresponding
38 non-accented character */
39};
40 27
41/* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */ 28/* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */
42struct char_info_st el_basic_ctype[] = { 29struct char_info_st el_basic_ctype[] = {
@@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = {
174 { 0x0383, }, 161 { 0x0383, },
175 { 0x0384, CHF_MODIFIER }, /* Oxeia */ 162 { 0x0384, CHF_MODIFIER }, /* Oxeia */
176 { 0x0385, CHF_MODIFIER }, /* dialytika */ 163 { 0x0385, CHF_MODIFIER }, /* dialytika */
177 { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */ 164 { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC,
165 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */
178 { 0x0387, CHF_PUNCT }, /* ano teleia */ 166 { 0x0387, CHF_PUNCT }, /* ano teleia */
179 { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */ 167 { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD,
180 { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */ 168 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */
181 { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */ 169 { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE,
170 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */
171 { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF,
172 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */
182 { 0x038B, }, 173 { 0x038B, },
183 { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */ 174 { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC,
175 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */
184 { 0x038D, }, 176 { 0x038D, },
185 { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */ 177 { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD,
186 { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */ 178 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */
187 { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */ 179 { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE,
188 { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */ 180 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */
189 { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */ 181 { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0,
190 { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */ 182 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */
191 { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */ 183 { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1,
192 { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */ 184 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */
193 { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */ 185 { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2,
194 { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */ 186 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */
195 { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */ 187 { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3,
196 { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */ 188 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */
197 { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */ 189 { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4,
198 { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */ 190 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */
199 { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */ 191 { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5,
200 { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */ 192 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */
201 { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */ 193 { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6,
202 { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */ 194 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */
203 { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */ 195 { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7,
204 { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */ 196 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */
197 { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8,
198 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */
199 { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9,
200 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */
201 { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA,
202 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */
203 { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB,
204 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */
205 { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC,
206 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */
207 { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD,
208 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */
209 { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE,
210 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */
211 { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF,
212 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */
213 { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0,
214 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */
215 { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1,
216 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */
205 { 0x03A2, }, 217 { 0x03A2, },
206 { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */ 218 { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3,
207 { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */ 219 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */
208 { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */ 220 { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4,
209 { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */ 221 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */
210 { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */ 222 { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5,
211 { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */ 223 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */
212 { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */ 224 { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6,
213 { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */ 225 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */
214 { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */