/* This file is part of Ellinika project. Copyright (C) 2011 Sergey Poznyakoff Ellinika is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Ellinika is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ %{ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include "utf8.h" #include "elmorph.h" static struct phoneme *phoneme_base; static size_t phoneme_max; static size_t phoneme_count; static int error_state; #define PHONEME_MAP_INITIAL_ALLOC 16 static void phoneme_append(struct phoneme *phoneme) { if (error_state) return; if (phoneme_max == phoneme_count) { struct phoneme *np; size_t nsize; if (!phoneme_max) nsize = PHONEME_MAP_INITIAL_ALLOC; else { nsize = 2 * phoneme_max; if (nsize < phoneme_max) { error_state = ENOMEM; return; } } np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0])); if (!np) { error_state = ENOMEM; return; } phoneme_max = nsize; phoneme_base = np; } phoneme_base[phoneme_count++] = *phoneme; } #define DIPHTHONG(a,b,pc,fl) do { \ (a).count = 2; \ (a).code = pc; \ (a).flags = (fl) | CHF_DIPHTHONG | \ (((a.flags) | (b).flags) & CHF_ACCENT_MASK); \ } while (0) %} %union { struct phoneme phoneme; }; %token LETTER_A 1 %token LETTER_A_ACC 2 %token LETTER_B 3 %token LETTER_G 4 %token LETTER_D 5 %token LETTER_E 6 %token LETTER_E_ACC 7 %token LETTER_Z 8 %token LETTER_H 9 %token LETTER_H_ACC 10 %token LETTER_TH 11 %token LETTER_I 12 %token LETTER_I_ACC 13 %token LETTER_I_TREMA 14 %token LETTER_I_TREMA_ACC 15 %token LETTER_K 16 %token LETTER_L 17 %token LETTER_M 18 %token LETTER_N 19 %token LETTER_KS 20 %token LETTER_OMICRON 21 %token LETTER_OMICRON_ACC 22 %token LETTER_P 23 %token LETTER_R 24 %token LETTER_S 25 %token LETTER_T 26 %token LETTER_Y 27 %token LETTER_Y_ACC 28 %token LETTER_Y_TREMA 29 %token LETTER_Y_TREMA_ACC 30 %token LETTER_F 31 %token LETTER_X 32 %token LETTER_PS 33 %token LETTER_OMEGA 34 %token LETTER_OMEGA_ACC 35 %type monophthong diphthong phoneme %% input : phoneme { phoneme_append(&$1); } | input phoneme { phoneme_append(&$2); } ; phoneme : monophthong | diphthong ; monophthong: LETTER_A | LETTER_A_ACC | LETTER_B | LETTER_G | LETTER_D | LETTER_E | LETTER_E_ACC | LETTER_Z | LETTER_H | LETTER_H_ACC | LETTER_TH | LETTER_I | LETTER_I_ACC | LETTER_I_TREMA | LETTER_I_TREMA_ACC | LETTER_K | LETTER_L | LETTER_M | LETTER_N | LETTER_KS | LETTER_OMICRON | LETTER_OMICRON_ACC | LETTER_P | LETTER_R | LETTER_S | LETTER_T | LETTER_Y | LETTER_Y_ACC | LETTER_Y_TREMA | LETTER_Y_TREMA_ACC | LETTER_F | LETTER_X | LETTER_PS | LETTER_OMEGA | LETTER_OMEGA_ACC ; diphthong: LETTER_A LETTER_I { DIPHTHONG($1, $2, PHON_E, CHF_VOWEL); $$ = $1; } | LETTER_A LETTER_I_ACC { DIPHTHONG($1, $2, PHON_E, CHF_VOWEL); $$ = $1; } | LETTER_E LETTER_I { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_E LETTER_I_ACC { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_OMICRON LETTER_I { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_OMICRON LETTER_I_ACC { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_Y LETTER_I { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_Y LETTER_I_ACC { DIPHTHONG($1, $2, PHON_I, CHF_VOWEL); $$ = $1; } | LETTER_OMICRON LETTER_Y { DIPHTHONG($1, $2, PHON_U, CHF_VOWEL); $$ = $1; } | LETTER_OMICRON LETTER_Y_ACC { DIPHTHONG($1, $2, PHON_U, CHF_VOWEL); $$ = $1; } | LETTER_M LETTER_P { DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT); $$ = $1; } | LETTER_N LETTER_T { DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT); $$ = $1; } | LETTER_G LETTER_G { DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); $$ = $1; } | LETTER_G LETTER_K { DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); $$ = $1; } | LETTER_G LETTER_X { DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT); $$ = $1; } | LETTER_S LETTER_M { DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT); $$ = $1; } | LETTER_T LETTER_S { DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT); $$ = $1; } | LETTER_T LETTER_Z { DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT); $$ = $1; } | LETTER_G LETTER_KS { DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT); $$ = $1; } | LETTER_A LETTER_Y { DIPHTHONG($1, $2, PHON_AV, 0); $$ = $1; } | LETTER_A LETTER_Y_ACC { DIPHTHONG($1, $2, PHON_AV, 0); $$ = $1; } | LETTER_E LETTER_Y { DIPHTHONG($1, $2, PHON_EV, 0); $$ = $1; } | LETTER_E LETTER_Y_ACC { DIPHTHONG($1, $2, PHON_EV, 0); $$ = $1; } ; %% static unsigned *input_base; static size_t input_len; static size_t input_pos; #define ISALPHA(ci) ((ci) && ci->letter) #define PHONEME_FLAG_MASK \ (CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT) int yylex() { unsigned c; struct char_info_st const *ci; do { if (input_pos == input_len) return 0; c = input_base[input_pos++]; ci = elchr_info(c); } while (!ISALPHA(ci)); yylval.phoneme.code = ci->phoneme; yylval.phoneme.start = input_pos - 1; yylval.phoneme.count = 1; yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK; return ci->letter; } int yyerror(const char *s) { fprintf(stderr, "\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s); abort(); } int phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len) { int rc; input_base = word; input_len = len; input_pos = 0; phoneme_base = NULL; phoneme_max = 0; phoneme_count = 0; error_state = 0; rc = yyparse(); if (rc) { free(phoneme_base); errno = EINVAL; return errno; } if (error_state) { free(phoneme_base); errno = error_state; return errno; } if (phoneme_count < phoneme_max) phoneme_base = realloc(phoneme_base, phoneme_count * sizeof(phoneme_base[0])); *pph = phoneme_base; *plen = phoneme_count; return 0; }