diff options
Diffstat (limited to 'src/ellinika/syllabificator.c')
-rw-r--r-- | src/ellinika/syllabificator.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/src/ellinika/syllabificator.c b/src/ellinika/syllabificator.c new file mode 100644 index 0000000..c4105ec --- /dev/null +++ b/src/ellinika/syllabificator.c @@ -0,0 +1,152 @@ +/* This file is part of Ellinika project. + Copyright (C) 2011 Sergey Poznyakoff + + Ellinika is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Ellinika is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +#include <errno.h> +#include <stdlib.h> +#include "utf8.h" +#include "elmorph.h" + +struct syllabificator { + struct syllable *syl; + size_t syl_count; + size_t syl_max; + struct phoneme *phon; + size_t phon_cur; + size_t phon_max; + int err; +}; + +#define SYL_FLAG_MASK (CHF_ACCENT_MASK) + +#define ISIOTA(ph) \ + ((ph).code == PHON_I && (ph).count == 1 && \ + !((ph).flags & (CHF_ACCENT_MASK|CHF_TREMA))) + +int +next_syllable(struct syllabificator *sp) +{ + struct syllable *syl; + + if (sp->phon_cur == sp->phon_max) + return 1; + + if (sp->syl_count == sp->syl_max) { + struct syllable *newsyl; + size_t newmax = sp->syl_max + 16; + + newsyl = realloc(sp->syl, sizeof(newsyl[0]) * newmax); + if (!newsyl) { + sp->err = errno; + return 1; + } + + sp->syl = newsyl; + sp->syl_max = newmax; + } + syl = sp->syl + sp->syl_count++; + syl->char_start = sp->phon[sp->phon_cur].start; + syl->char_count = sp->phon[sp->phon_cur].count; + syl->phoneme_start = sp->phon_cur; + syl->phoneme_count = 1; + syl->flags = sp->phon[sp->phon_cur].flags; + + sp->phon_cur++; + + /* A diphthong forms a single syllable. */ + if ((syl->flags & CHF_DIPHTHONG) && !(syl->flags & CHF_CONSONANT)) + return 0; + + /* If the syllable begins with a consonant, it includes all + subsequent consonants up to the first vowel. */ + if (syl->flags & CHF_CONSONANT) { + for (; sp->phon_cur < sp->phon_max && + (sp->phon[sp->phon_cur].flags & CHF_CONSONANT); + sp->phon_cur++) { + syl->char_count += sp->phon[sp->phon_cur].count; + syl->phoneme_count++; + } + } else if ((sp->phon[sp->phon_cur].flags & CHF_VOWEL) && + !ISIOTA(sp->phon[sp->phon_cur-1])) + /* V-V boundary */ + return 0; + + if (sp->phon_cur == sp->phon_max) + return 0; + + if (ISIOTA(sp->phon[sp->phon_cur])) { + /* incorporate iota */; + syl->char_count += sp->phon[sp->phon_cur].count; + syl->phoneme_count++; + sp->phon_cur++; + } + + if (sp->phon[sp->phon_cur].flags & CHF_VOWEL) + syl->flags |= sp->phon[sp->phon_cur].flags & CHF_ACCENT_MASK; + + syl->char_count += sp->phon[sp->phon_cur].count; + syl->phoneme_count++; + sp->phon_cur++; + + if (sp->phon_cur == sp->phon_max) + return 0; + + if (sp->phon[sp->phon_cur - 1].flags & CHF_VOWEL) { + /* If next phoneme is a consonant, incorporate it into the + current syllable */ + if ((sp->phon[sp->phon_cur].flags & CHF_CONSONANT) && + (sp->phon_cur + 1 == sp->phon_max || + (sp->phon[sp->phon_cur + 1].flags & CHF_CONSONANT))) { + syl->char_count += sp->phon[sp->phon_cur].count; + syl->phoneme_count++; + sp->phon_cur++; + } + } + + return 0; +} + + +int +syllable_map(struct syllable **psyl, size_t *plen, + struct phoneme *phon, size_t nphon) +{ + struct syllabificator sd; + + + sd.syl = NULL; + sd.syl_count = 0; + sd.syl_max = 0; + sd.phon = phon; + sd.phon_cur = 0; + sd.phon_max = nphon; + sd.err = 0; + + while (next_syllable(&sd) == 0) + sd.syl[sd.syl_count-1].flags &= SYL_FLAG_MASK; + + if (sd.err) { + free(sd.syl); + return sd.err; + } + + *psyl = sd.syl; + *plen = sd.syl_count; + + return 0; +} |