aboutsummaryrefslogtreecommitdiff
path: root/src/ellinika/syllabificator.c
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-10 23:04:53 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2011-06-11 00:34:20 +0300
commita1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch)
treef86f3572c77dc986bb2dfb65619ac4bc35c83847 /src/ellinika/syllabificator.c
parent2bae7da012e2125762855ce014e63345ecbbbb18 (diff)
downloadellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz
ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2
Fix syllabification.
* configure.ac: Add AC_PROG_YACC * src/ellinika/phoneme.y: New file. * src/ellinika/yyrename: New file. * src/ellinika/syllabificator.c: New file. * src/ellinika/.gitignore: Update. * src/ellinika/elchr.c (char_info_st): Move to header. (el_basic_ctype): (elchr_info): Remove static qualifier. Return a pointer to const. (elchr_letter,elchr_phoneme): New functions. (elchr_diphthong): Remove. * src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members. (_elstr_syllabize): Rewrite. (invalidate_maps)" New static function. (_elstr_alloc): Initialize new fields, take function name as argument, for diagnostic purposes. (_elstr_print): Rewrite (deftab): Update. (elstr-syllable-prop,elstr-syllable) (_elstr_set_accent,_elstr_set_accent_on_char): Rewrite. (elstr-char-phoneme,elstr->phonetic-map): New functions. * src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove. (CHF_DIPHTHONG): New flag. (PHON_.*): New constants. (phoneme,syllable): New structures. (char_info_st)<letter,phoneme>: New members. (elchr_info,elchr_letter) (elchr_phoneme,phoneme_map) (syllable_map): New protos. (elchr_diphthong): Remove protos. * src/ellinika/elmorph.scm4: Move public definitions to elmorph-public.scm; include it here. * src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a wrapper over elstr->soundslike. Describe Milesian numbers. * style.css (img.ellinika-img): New class. * xml/lingua.conf.in (IMAGE): New tag. * xml/pl/alfabhta.xml: Describe Milesian numbers. Various fixes. * data/dbverb.struct: fix a typo in flection. Use 'sub' theme for pas/sub/aor. * data/irregular-verbs.xml: Add more verbs. * scm/conjugator.scm: Various fixes. * scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (limited to 'src/ellinika/syllabificator.c')
-rw-r--r--src/ellinika/syllabificator.c152
1 files changed, 152 insertions, 0 deletions
diff --git a/src/ellinika/syllabificator.c b/src/ellinika/syllabificator.c
new file mode 100644
index 0000000..c4105ec
--- /dev/null
+++ b/src/ellinika/syllabificator.c
@@ -0,0 +1,152 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include "utf8.h"
+#include "elmorph.h"
+
+struct syllabificator {
+ struct syllable *syl;
+ size_t syl_count;
+ size_t syl_max;
+ struct phoneme *phon;
+ size_t phon_cur;
+ size_t phon_max;
+ int err;
+};
+
+#define SYL_FLAG_MASK (CHF_ACCENT_MASK)
+
+#define ISIOTA(ph) \
+ ((ph).code == PHON_I && (ph).count == 1 && \
+ !((ph).flags & (CHF_ACCENT_MASK|CHF_TREMA)))
+
+int
+next_syllable(struct syllabificator *sp)
+{
+ struct syllable *syl;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 1;
+
+ if (sp->syl_count == sp->syl_max) {
+ struct syllable *newsyl;
+ size_t newmax = sp->syl_max + 16;
+
+ newsyl = realloc(sp->syl, sizeof(newsyl[0]) * newmax);
+ if (!newsyl) {
+ sp->err = errno;
+ return 1;
+ }
+
+ sp->syl = newsyl;
+ sp->syl_max = newmax;
+ }
+ syl = sp->syl + sp->syl_count++;
+ syl->char_start = sp->phon[sp->phon_cur].start;
+ syl->char_count = sp->phon[sp->phon_cur].count;
+ syl->phoneme_start = sp->phon_cur;
+ syl->phoneme_count = 1;
+ syl->flags = sp->phon[sp->phon_cur].flags;
+
+ sp->phon_cur++;
+
+ /* A diphthong forms a single syllable. */
+ if ((syl->flags & CHF_DIPHTHONG) && !(syl->flags & CHF_CONSONANT))
+ return 0;
+
+ /* If the syllable begins with a consonant, it includes all
+ subsequent consonants up to the first vowel. */
+ if (syl->flags & CHF_CONSONANT) {
+ for (; sp->phon_cur < sp->phon_max &&
+ (sp->phon[sp->phon_cur].flags & CHF_CONSONANT);
+ sp->phon_cur++) {
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ }
+ } else if ((sp->phon[sp->phon_cur].flags & CHF_VOWEL) &&
+ !ISIOTA(sp->phon[sp->phon_cur-1]))
+ /* V-V boundary */
+ return 0;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 0;
+
+ if (ISIOTA(sp->phon[sp->phon_cur])) {
+ /* incorporate iota */;
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+ }
+
+ if (sp->phon[sp->phon_cur].flags & CHF_VOWEL)
+ syl->flags |= sp->phon[sp->phon_cur].flags & CHF_ACCENT_MASK;
+
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 0;
+
+ if (sp->phon[sp->phon_cur - 1].flags & CHF_VOWEL) {
+ /* If next phoneme is a consonant, incorporate it into the
+ current syllable */
+ if ((sp->phon[sp->phon_cur].flags & CHF_CONSONANT) &&
+ (sp->phon_cur + 1 == sp->phon_max ||
+ (sp->phon[sp->phon_cur + 1].flags & CHF_CONSONANT))) {
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+syllable_map(struct syllable **psyl, size_t *plen,
+ struct phoneme *phon, size_t nphon)
+{
+ struct syllabificator sd;
+
+
+ sd.syl = NULL;
+ sd.syl_count = 0;
+ sd.syl_max = 0;
+ sd.phon = phon;
+ sd.phon_cur = 0;
+ sd.phon_max = nphon;
+ sd.err = 0;
+
+ while (next_syllable(&sd) == 0)
+ sd.syl[sd.syl_count-1].flags &= SYL_FLAG_MASK;
+
+ if (sd.err) {
+ free(sd.syl);
+ return sd.err;
+ }
+
+ *psyl = sd.syl;
+ *plen = sd.syl_count;
+
+ return 0;
+}

Return to:

Send suggestions and report system problems to the System administrator.