diff options
Diffstat (limited to 'src/ellinika/phoneme.y')
-rw-r--r-- | src/ellinika/phoneme.y | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/src/ellinika/phoneme.y b/src/ellinika/phoneme.y new file mode 100644 index 0000000..353d175 --- /dev/null +++ b/src/ellinika/phoneme.y @@ -0,0 +1,353 @@ +/* This file is part of Ellinika project. + Copyright (C) 2011 Sergey Poznyakoff + + Ellinika is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Ellinika is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +%{ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include "utf8.h" +#include "elmorph.h" + +static struct phoneme *phoneme_base; +static size_t phoneme_max; +static size_t phoneme_count; +static int error_state; + +#define PHONEME_MAP_INITIAL_ALLOC 16 + +static void +phoneme_append(struct phoneme *phoneme) +{ + if (error_state) + return; + + if (phoneme_max == phoneme_count) { + struct phoneme *np; + size_t nsize; + + if (!phoneme_max) + nsize = PHONEME_MAP_INITIAL_ALLOC; + else { + nsize = 2 * phoneme_max; + if (nsize < phoneme_max) { + error_state = ENOMEM; + return; + } + } + np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0])); + if (!np) { + error_state = ENOMEM; + return; + } + phoneme_max = nsize; + phoneme_base = np; + } + phoneme_base[phoneme_count++] = *phoneme; +} + +#define DIPHTHONG(a,b,pc,fl) do { \ + (a).count = 2; \ + (a).code = pc; \ + (a).flags = (fl) | CHF_DIPHTHONG | \ + (((a.flags) | (b).flags) & CHF_ACCENT_MASK); \ + } while (0) + +%} +%union { + struct phoneme phoneme; +}; + +%token <phoneme> LETTER_A 1 +%token <phoneme> LETTER_A_ACC 2 +%token <phoneme> LETTER_B 3 +%token <phoneme> LETTER_G 4 +%token <phoneme> LETTER_D 5 +%token <phoneme> LETTER_E 6 +%token <phoneme> LETTER_E_ACC 7 +%token <phoneme> LETTER_Z 8 +%token <phoneme> LETTER_H 9 +%token <phoneme> LETTER_H_ACC 10 +%token <phoneme> LETTER_TH 11 +%token <phoneme> LETTER_I 12 +%token <phoneme> LETTER_I_ACC 13 +%token <phoneme> LETTER_I_TREMA 14 +%token <phoneme> LETTER_I_TREMA_ACC 15 +%token <phoneme> LETTER_K 16 +%token <phoneme> LETTER_L 17 +%token <phoneme> LETTER_M 18 +%token <phoneme> LETTER_N 19 +%token <phoneme> LETTER_KS 20 +%token <phoneme> LETTER_OMICRON 21 +%token <phoneme> LETTER_OMICRON_ACC 22 +%token <phoneme> LETTER_P 23 +%token <phoneme> LETTER_R 24 +%token <phoneme> LETTER_S 25 +%token <phoneme> LETTER_T 26 +%token <phoneme> LETTER_Y 27 +%token <phoneme> LETTER_Y_ACC 28 +%token <phoneme> LETTER_Y_TREMA 29 +%token <phoneme> LETTER_Y_TREMA_ACC 30 +%token <phoneme> LETTER_F 31 +%token <phoneme> LETTER_X 32 +%token <phoneme> LETTER_PS 33 +%token <phoneme> LETTER_OMEGA 34 +%token <phoneme> LETTER_OMEGA_ACC 35 + +%type <phoneme> monophthong diphthong phoneme + +%% +input : phoneme + { + phoneme_append(&$1); + } + | input phoneme + { + phoneme_append(&$2); + } + ; + +phoneme : monophthong + | diphthong + ; + +monophthong: + LETTER_A + | LETTER_A_ACC + | LETTER_B + | LETTER_G + | LETTER_D + | LETTER_E + | LETTER_E_ACC + | LETTER_Z + | LETTER_H + | LETTER_H_ACC + | LETTER_TH + | LETTER_I + | LETTER_I_ACC + | LETTER_I_TREMA + | LETTER_I_TREMA_ACC + | LETTER_K + | LETTER_L + | LETTER_M + | LETTER_N + | LETTER_KS + | LETTER_OMICRON + | LETTER_OMICRON_ACC + | LETTER_P + | LETTER_R + | LETTER_S + | LETTER_T + | LETTER_Y + | LETTER_Y_ACC + | LETTER_Y_TREMA + | LETTER_Y_TREMA_ACC + | LETTER_F + | LETTER_X + | LETTER_PS + | LETTER_OMEGA + | LETTER_OMEGA_ACC + ; + +diphthong: + LETTER_A LETTER_I + { + DIPHTHONG($1, $2, PHON_E, CHF_VOWEL); + $$ = $1; + } + | LETTER_A LETTER_I_ACC + { + DIPHTHONG($1, $2, PHON_E, CHF_VOWEL); + $$ = $1; + } + | LETTER_E LETTER_I + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_E LETTER_I_ACC + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_OMICRON LETTER_I + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_OMICRON LETTER_I_ACC + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_Y LETTER_I + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_Y LETTER_I_ACC + { + DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); + $$ = $1; + } + | LETTER_OMICRON LETTER_Y + { + DIPHTHONG($1, $2, PHON_U, CHF_VOWEL); + $$ = $1; + } + | LETTER_OMICRON LETTER_Y_ACC + { + DIPHTHONG($1, $2, PHON_U, CHF_VOWEL); + $$ = $1; + } + | LETTER_M LETTER_P + { + DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT); + $$ = $1; + } + | LETTER_N LETTER_T + { + DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT); + $$ = $1; + } + | LETTER_G LETTER_G + { + DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); + $$ = $1; + } + | LETTER_G LETTER_K + { + DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); + $$ = $1; + } + | LETTER_G LETTER_X + { + DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); + $$ = $1; + } + | LETTER_S LETTER_M + { + DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT); + $$ = $1; + } + | LETTER_T LETTER_S + { + DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT); + $$ = $1; + } + | LETTER_T LETTER_Z + { + DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT); + $$ = $1; + } + | LETTER_G LETTER_KS + { + DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT); + $$ = $1; + } + | LETTER_A LETTER_Y + { + DIPHTHONG($1, $2, PHON_AV, 0); + $$ = $1; + } + | LETTER_A LETTER_Y_ACC + { + DIPHTHONG($1, $2, PHON_AV, 0); + $$ = $1; + } + | LETTER_E LETTER_Y + { + DIPHTHONG($1, $2, PHON_EV, 0); + $$ = $1; + } + | LETTER_E LETTER_Y_ACC + { + DIPHTHONG($1, $2, PHON_EV, 0); + $$ = $1; + } + ; + +%% + +static unsigned *input_base; +static size_t input_len; +static size_t input_pos; + +#define ISALPHA(ci) ((ci) && ci->letter) + +#define PHONEME_FLAG_MASK \ + (CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT) + +int +yylex() +{ + unsigned c; + struct char_info_st const *ci; + + do { + if (input_pos == input_len) + return 0; + c = input_base[input_pos++]; + ci = elchr_info(c); + } while (!ISALPHA(ci)); + + yylval.phoneme.code = ci->phoneme; + yylval.phoneme.start = input_pos - 1; + yylval.phoneme.count = 1; + yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK; + return ci->letter; +} + +int +yyerror(const char *s) +{ + fprintf("\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s); + abort(); +} + +int +phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len) +{ + int rc; + + input_base = word; + input_len = len; + input_pos = 0; + phoneme_base = NULL; + phoneme_max = 0; + phoneme_count = 0; + error_state = 0; + rc = yyparse(); + if (rc) { + free(phoneme_base); + errno = EINVAL; + return errno; + } + if (error_state) { + free(phoneme_base); + errno = error_state; + return errno; + } + if (phoneme_count < phoneme_max) + phoneme_base = + realloc(phoneme_base, + phoneme_count * sizeof(phoneme_base[0])); + *pph = phoneme_base; + *plen = phoneme_count; + return 0; +} |