/* This file is part of Ellinika project.
Copyright (C) 2011 Sergey Poznyakoff
Ellinika is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Ellinika is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
%{
#ifdef HAVE_CONFIG_H
# include
#endif
#include
#include
#include
#include "utf8.h"
#include "elmorph.h"
static struct phoneme *phoneme_base;
static size_t phoneme_max;
static size_t phoneme_count;
static int error_state;
#define PHONEME_MAP_INITIAL_ALLOC 16
static void
phoneme_append(struct phoneme *phoneme)
{
if (error_state)
return;
if (phoneme_max == phoneme_count) {
struct phoneme *np;
size_t nsize;
if (!phoneme_max)
nsize = PHONEME_MAP_INITIAL_ALLOC;
else {
nsize = 2 * phoneme_max;
if (nsize < phoneme_max) {
error_state = ENOMEM;
return;
}
}
np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0]));
if (!np) {
error_state = ENOMEM;
return;
}
phoneme_max = nsize;
phoneme_base = np;
}
phoneme_base[phoneme_count++] = *phoneme;
}
#define DIPHTHONG(a,b,pc,fl) do { \
(a).count = 2; \
(a).code = pc; \
(a).flags = (fl) | CHF_DIPHTHONG | \
(((a.flags) | (b).flags) & CHF_ACCENT_MASK); \
} while (0)
%}
%union {
struct phoneme phoneme;
};
%token LETTER_A 1
%token LETTER_A_ACC 2
%token LETTER_B 3
%token LETTER_G 4
%token LETTER_D 5
%token LETTER_E 6
%token LETTER_E_ACC 7
%token LETTER_Z 8
%token LETTER_H 9
%token LETTER_H_ACC 10
%token LETTER_TH 11
%token LETTER_I 12
%token LETTER_I_ACC 13
%token LETTER_I_TREMA 14
%token LETTER_I_TREMA_ACC 15
%token LETTER_K 16
%token LETTER_L 17
%token LETTER_M 18
%token LETTER_N 19
%token LETTER_KS 20
%token LETTER_OMICRON 21
%token LETTER_OMICRON_ACC 22
%token LETTER_P 23
%token LETTER_R 24
%token LETTER_S 25
%token LETTER_T 26
%token LETTER_Y 27
%token LETTER_Y_ACC 28
%token LETTER_Y_TREMA 29
%token LETTER_Y_TREMA_ACC 30
%token LETTER_F 31
%token LETTER_X 32
%token LETTER_PS 33
%token LETTER_OMEGA 34
%token LETTER_OMEGA_ACC 35
%type monophthong diphthong phoneme
%%
input : phoneme
{
phoneme_append(&$1);
}
| input phoneme
{
phoneme_append(&$2);
}
;
phoneme : monophthong
| diphthong
;
monophthong:
LETTER_A
| LETTER_A_ACC
| LETTER_B
| LETTER_G
| LETTER_D
| LETTER_E
| LETTER_E_ACC
| LETTER_Z
| LETTER_H
| LETTER_H_ACC
| LETTER_TH
| LETTER_I
| LETTER_I_ACC
| LETTER_I_TREMA
| LETTER_I_TREMA_ACC
| LETTER_K
| LETTER_L
| LETTER_M
| LETTER_N
| LETTER_KS
| LETTER_OMICRON
| LETTER_OMICRON_ACC
| LETTER_P
| LETTER_R
| LETTER_S
| LETTER_T
| LETTER_Y
| LETTER_Y_ACC
| LETTER_Y_TREMA
| LETTER_Y_TREMA_ACC
| LETTER_F
| LETTER_X
| LETTER_PS
| LETTER_OMEGA
| LETTER_OMEGA_ACC
;
diphthong:
LETTER_A LETTER_I
{
DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
$$ = $1;
}
| LETTER_A LETTER_I_ACC
{
DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
$$ = $1;
}
| LETTER_E LETTER_I
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_E LETTER_I_ACC
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_OMICRON LETTER_I
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_OMICRON LETTER_I_ACC
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_Y LETTER_I
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_Y LETTER_I_ACC
{
DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
$$ = $1;
}
| LETTER_OMICRON LETTER_Y
{
DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
$$ = $1;
}
| LETTER_OMICRON LETTER_Y_ACC
{
DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
$$ = $1;
}
| LETTER_M LETTER_P
{
DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT);
$$ = $1;
}
| LETTER_N LETTER_T
{
DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT);
$$ = $1;
}
| LETTER_G LETTER_G
{
DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
$$ = $1;
}
| LETTER_G LETTER_K
{
DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
$$ = $1;
}
| LETTER_G LETTER_X
{
DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
$$ = $1;
}
| LETTER_S LETTER_M
{
DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT);
$$ = $1;
}
| LETTER_T LETTER_S
{
DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT);
$$ = $1;
}
| LETTER_T LETTER_Z
{
DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT);
$$ = $1;
}
| LETTER_G LETTER_KS
{
DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT);
$$ = $1;
}
| LETTER_A LETTER_Y
{
DIPHTHONG($1, $2, PHON_AV, 0);
$$ = $1;
}
| LETTER_A LETTER_Y_ACC
{
DIPHTHONG($1, $2, PHON_AV, 0);
$$ = $1;
}
| LETTER_E LETTER_Y
{
DIPHTHONG($1, $2, PHON_EV, 0);
$$ = $1;
}
| LETTER_E LETTER_Y_ACC
{
DIPHTHONG($1, $2, PHON_EV, 0);
$$ = $1;
}
;
%%
static unsigned *input_base;
static size_t input_len;
static size_t input_pos;
#define ISALPHA(ci) ((ci) && ci->letter)
#define PHONEME_FLAG_MASK \
(CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT)
int
yylex()
{
unsigned c;
struct char_info_st const *ci;
do {
if (input_pos == input_len)
return 0;
c = input_base[input_pos++];
ci = elchr_info(c);
} while (!ISALPHA(ci));
yylval.phoneme.code = ci->phoneme;
yylval.phoneme.start = input_pos - 1;
yylval.phoneme.count = 1;
yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK;
return ci->letter;
}
int
yyerror(const char *s)
{
fprintf(stderr, "\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s);
abort();
}
int
phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len)
{
int rc;
input_base = word;
input_len = len;
input_pos = 0;
phoneme_base = NULL;
phoneme_max = 0;
phoneme_count = 0;
error_state = 0;
rc = yyparse();
if (rc) {
free(phoneme_base);
errno = EINVAL;
return errno;
}
if (error_state) {
free(phoneme_base);
errno = error_state;
return errno;
}
if (phoneme_count < phoneme_max)
phoneme_base =
realloc(phoneme_base,
phoneme_count * sizeof(phoneme_base[0]));
*pph = phoneme_base;
*plen = phoneme_count;
return 0;
}