summaryrefslogtreecommitdiffabout
path: root/src/ellinika/phoneme.y
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-10 20:04:53 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2011-06-10 21:34:20 (GMT)
commita1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch) (side-by-side diff)
treef86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika/phoneme.y
parent2bae7da012e2125762855ce014e63345ecbbbb18 (diff)
downloadellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz
ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2
Fix syllabification.
* configure.ac: Add AC_PROG_YACC * src/ellinika/phoneme.y: New file. * src/ellinika/yyrename: New file. * src/ellinika/syllabificator.c: New file. * src/ellinika/.gitignore: Update. * src/ellinika/elchr.c (char_info_st): Move to header. (el_basic_ctype): (elchr_info): Remove static qualifier. Return a pointer to const. (elchr_letter,elchr_phoneme): New functions. (elchr_diphthong): Remove. * src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members. (_elstr_syllabize): Rewrite. (invalidate_maps)" New static function. (_elstr_alloc): Initialize new fields, take function name as argument, for diagnostic purposes. (_elstr_print): Rewrite (deftab): Update. (elstr-syllable-prop,elstr-syllable) (_elstr_set_accent,_elstr_set_accent_on_char): Rewrite. (elstr-char-phoneme,elstr->phonetic-map): New functions. * src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove. (CHF_DIPHTHONG): New flag. (PHON_.*): New constants. (phoneme,syllable): New structures. (char_info_st)<letter,phoneme>: New members. (elchr_info,elchr_letter) (elchr_phoneme,phoneme_map) (syllable_map): New protos. (elchr_diphthong): Remove protos. * src/ellinika/elmorph.scm4: Move public definitions to elmorph-public.scm; include it here. * src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a wrapper over elstr->soundslike. Describe Milesian numbers. * style.css (img.ellinika-img): New class. * xml/lingua.conf.in (IMAGE): New tag. * xml/pl/alfabhta.xml: Describe Milesian numbers. Various fixes. * data/dbverb.struct: fix a typo in flection. Use 'sub' theme for pas/sub/aor. * data/irregular-verbs.xml: Add more verbs. * scm/conjugator.scm: Various fixes. * scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika/phoneme.y') (more/less context) (ignore whitespace changes)
-rw-r--r--src/ellinika/phoneme.y353
1 files changed, 353 insertions, 0 deletions
diff --git a/src/ellinika/phoneme.y b/src/ellinika/phoneme.y
new file mode 100644
index 0000000..353d175
--- a/dev/null
+++ b/src/ellinika/phoneme.y
@@ -0,0 +1,353 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utf8.h"
+#include "elmorph.h"
+
+static struct phoneme *phoneme_base;
+static size_t phoneme_max;
+static size_t phoneme_count;
+static int error_state;
+
+#define PHONEME_MAP_INITIAL_ALLOC 16
+
+static void
+phoneme_append(struct phoneme *phoneme)
+{
+ if (error_state)
+ return;
+
+ if (phoneme_max == phoneme_count) {
+ struct phoneme *np;
+ size_t nsize;
+
+ if (!phoneme_max)
+ nsize = PHONEME_MAP_INITIAL_ALLOC;
+ else {
+ nsize = 2 * phoneme_max;
+ if (nsize < phoneme_max) {
+ error_state = ENOMEM;
+ return;
+ }
+ }
+ np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0]));
+ if (!np) {
+ error_state = ENOMEM;
+ return;
+ }
+ phoneme_max = nsize;
+ phoneme_base = np;
+ }
+ phoneme_base[phoneme_count++] = *phoneme;
+}
+
+#define DIPHTHONG(a,b,pc,fl) do { \
+ (a).count = 2; \
+ (a).code = pc; \
+ (a).flags = (fl) | CHF_DIPHTHONG | \
+ (((a.flags) | (b).flags) & CHF_ACCENT_MASK); \
+ } while (0)
+
+%}
+%union {
+ struct phoneme phoneme;
+};
+
+%token <phoneme> LETTER_A 1
+%token <phoneme> LETTER_A_ACC 2
+%token <phoneme> LETTER_B 3
+%token <phoneme> LETTER_G 4
+%token <phoneme> LETTER_D 5
+%token <phoneme> LETTER_E 6
+%token <phoneme> LETTER_E_ACC 7
+%token <phoneme> LETTER_Z 8
+%token <phoneme> LETTER_H 9
+%token <phoneme> LETTER_H_ACC 10
+%token <phoneme> LETTER_TH 11
+%token <phoneme> LETTER_I 12
+%token <phoneme> LETTER_I_ACC 13
+%token <phoneme> LETTER_I_TREMA 14
+%token <phoneme> LETTER_I_TREMA_ACC 15
+%token <phoneme> LETTER_K 16
+%token <phoneme> LETTER_L 17
+%token <phoneme> LETTER_M 18
+%token <phoneme> LETTER_N 19
+%token <phoneme> LETTER_KS 20
+%token <phoneme> LETTER_OMICRON 21
+%token <phoneme> LETTER_OMICRON_ACC 22
+%token <phoneme> LETTER_P 23
+%token <phoneme> LETTER_R 24
+%token <phoneme> LETTER_S 25
+%token <phoneme> LETTER_T 26
+%token <phoneme> LETTER_Y 27
+%token <phoneme> LETTER_Y_ACC 28
+%token <phoneme> LETTER_Y_TREMA 29
+%token <phoneme> LETTER_Y_TREMA_ACC 30
+%token <phoneme> LETTER_F 31
+%token <phoneme> LETTER_X 32
+%token <phoneme> LETTER_PS 33
+%token <phoneme> LETTER_OMEGA 34
+%token <phoneme> LETTER_OMEGA_ACC 35
+
+%type <phoneme> monophthong diphthong phoneme
+
+%%
+input : phoneme
+ {
+ phoneme_append(&$1);
+ }
+ | input phoneme
+ {
+ phoneme_append(&$2);
+ }
+ ;
+
+phoneme : monophthong
+ | diphthong
+ ;
+
+monophthong:
+ LETTER_A
+ | LETTER_A_ACC
+ | LETTER_B
+ | LETTER_G
+ | LETTER_D
+ | LETTER_E
+ | LETTER_E_ACC
+ | LETTER_Z
+ | LETTER_H
+ | LETTER_H_ACC
+ | LETTER_TH
+ | LETTER_I
+ | LETTER_I_ACC
+ | LETTER_I_TREMA
+ | LETTER_I_TREMA_ACC
+ | LETTER_K
+ | LETTER_L
+ | LETTER_M
+ | LETTER_N
+ | LETTER_KS
+ | LETTER_OMICRON
+ | LETTER_OMICRON_ACC
+ | LETTER_P
+ | LETTER_R
+ | LETTER_S
+ | LETTER_T
+ | LETTER_Y
+ | LETTER_Y_ACC
+ | LETTER_Y_TREMA
+ | LETTER_Y_TREMA_ACC
+ | LETTER_F
+ | LETTER_X
+ | LETTER_PS
+ | LETTER_OMEGA
+ | LETTER_OMEGA_ACC
+ ;
+
+diphthong:
+ LETTER_A LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_Y LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_Y LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_M LETTER_P
+ {
+ DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_N LETTER_T
+ {
+ DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_G
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_K
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_X
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_S LETTER_M
+ {
+ DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_T LETTER_S
+ {
+ DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_T LETTER_Z
+ {
+ DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_KS
+ {
+ DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_AV, 0);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_AV, 0);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_EV, 0);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_EV, 0);
+ $$ = $1;
+ }
+ ;
+
+%%
+
+static unsigned *input_base;
+static size_t input_len;
+static size_t input_pos;
+
+#define ISALPHA(ci) ((ci) && ci->letter)
+
+#define PHONEME_FLAG_MASK \
+ (CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT)
+
+int
+yylex()
+{
+ unsigned c;
+ struct char_info_st const *ci;
+
+ do {
+ if (input_pos == input_len)
+ return 0;
+ c = input_base[input_pos++];
+ ci = elchr_info(c);
+ } while (!ISALPHA(ci));
+
+ yylval.phoneme.code = ci->phoneme;
+ yylval.phoneme.start = input_pos - 1;
+ yylval.phoneme.count = 1;
+ yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK;
+ return ci->letter;
+}
+
+int
+yyerror(const char *s)
+{
+ fprintf("\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s);
+ abort();
+}
+
+int
+phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len)
+{
+ int rc;
+
+ input_base = word;
+ input_len = len;
+ input_pos = 0;
+ phoneme_base = NULL;
+ phoneme_max = 0;
+ phoneme_count = 0;
+ error_state = 0;
+ rc = yyparse();
+ if (rc) {
+ free(phoneme_base);
+ errno = EINVAL;
+ return errno;
+ }
+ if (error_state) {
+ free(phoneme_base);
+ errno = error_state;
+ return errno;
+ }
+ if (phoneme_count < phoneme_max)
+ phoneme_base =
+ realloc(phoneme_base,
+ phoneme_count * sizeof(phoneme_base[0]));
+ *pph = phoneme_base;
+ *plen = phoneme_count;
+ return 0;
+}

Return to:

Send suggestions and report system problems to the System administrator.