From a1a5b7ddd6c3c0532c37551b24fd573a554ac181 Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <gray@gnu.org.ua>
Date: Fri, 10 Jun 2011 23:04:53 +0300
Subject: Fix syllabification.

* configure.ac: Add AC_PROG_YACC
* src/ellinika/phoneme.y: New file.
* src/ellinika/yyrename: New file.
* src/ellinika/syllabificator.c: New file.
* src/ellinika/.gitignore: Update.
* src/ellinika/elchr.c (char_info_st): Move to header.
(el_basic_ctype):
(elchr_info): Remove static qualifier.
Return a pointer to const.
(elchr_letter,elchr_phoneme): New functions.
(elchr_diphthong): Remove.
* src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members.
(_elstr_syllabize): Rewrite.
(invalidate_maps)" New static function.
(_elstr_alloc): Initialize new fields, take function name
as argument, for diagnostic purposes.
(_elstr_print): Rewrite
(deftab): Update.
(elstr-syllable-prop,elstr-syllable)
(_elstr_set_accent,_elstr_set_accent_on_char): Rewrite.
(elstr-char-phoneme,elstr->phonetic-map): New functions.
* src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove.
(CHF_DIPHTHONG): New flag.
(PHON_.*): New constants.
(phoneme,syllable): New structures.
(char_info_st)<letter,phoneme>: New members.
(elchr_info,elchr_letter)
(elchr_phoneme,phoneme_map)
(syllable_map): New protos.
(elchr_diphthong): Remove protos.
* src/ellinika/elmorph.scm4: Move public definitions
to elmorph-public.scm; include it here.
* src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a
wrapper over elstr->soundslike.

Describe Milesian numbers.

* style.css (img.ellinika-img): New class.
* xml/lingua.conf.in (IMAGE): New tag.
* xml/pl/alfabhta.xml: Describe Milesian numbers.

Various fixes.

* data/dbverb.struct: fix a typo in flection.
Use 'sub' theme for pas/sub/aor.
* data/irregular-verbs.xml: Add more verbs.
* scm/conjugator.scm: Various fixes.
* scm/verbop.scm: Accept empty mood and voice declarations.
---
 src/ellinika/phoneme.y | 353 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 353 insertions(+)
 create mode 100644 src/ellinika/phoneme.y

(limited to 'src/ellinika/phoneme.y')

diff --git a/src/ellinika/phoneme.y b/src/ellinika/phoneme.y
new file mode 100644
index 0000000..353d175
--- /dev/null
+++ b/src/ellinika/phoneme.y
@@ -0,0 +1,353 @@
+/* This file is part of Ellinika project.
+   Copyright (C) 2011 Sergey Poznyakoff
+
+   Ellinika is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Ellinika is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>	
+#include "utf8.h"
+#include "elmorph.h"
+
+static struct phoneme *phoneme_base;
+static size_t phoneme_max;
+static size_t phoneme_count;
+static int error_state;
+
+#define PHONEME_MAP_INITIAL_ALLOC 16
+
+static void
+phoneme_append(struct phoneme *phoneme)
+{
+	if (error_state)
+		return;
+	
+	if (phoneme_max == phoneme_count) {
+		struct phoneme *np;
+		size_t nsize;
+
+		if (!phoneme_max) 
+			nsize = PHONEME_MAP_INITIAL_ALLOC;
+		else {
+			nsize = 2 * phoneme_max;
+			if (nsize < phoneme_max) {
+				error_state = ENOMEM;
+				return;
+			}
+		}
+		np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0]));
+		if (!np) {
+			error_state = ENOMEM;
+			return;
+		}
+		phoneme_max = nsize;
+		phoneme_base = np;
+	}
+	phoneme_base[phoneme_count++] = *phoneme;
+}
+	
+#define DIPHTHONG(a,b,pc,fl) do {					\
+		(a).count = 2;						\
+                (a).code = pc;						\
+		(a).flags = (fl) | CHF_DIPHTHONG |			\
+			(((a.flags) | (b).flags) & CHF_ACCENT_MASK);	\
+	} while (0)
+
+%}
+%union {
+	struct phoneme phoneme;
+};
+
+%token <phoneme> LETTER_A            1
+%token <phoneme> LETTER_A_ACC        2
+%token <phoneme> LETTER_B            3
+%token <phoneme> LETTER_G            4
+%token <phoneme> LETTER_D            5
+%token <phoneme> LETTER_E            6
+%token <phoneme> LETTER_E_ACC        7
+%token <phoneme> LETTER_Z            8
+%token <phoneme> LETTER_H            9
+%token <phoneme> LETTER_H_ACC       10
+%token <phoneme> LETTER_TH          11
+%token <phoneme> LETTER_I           12 
+%token <phoneme> LETTER_I_ACC       13
+%token <phoneme> LETTER_I_TREMA     14
+%token <phoneme> LETTER_I_TREMA_ACC 15
+%token <phoneme> LETTER_K           16
+%token <phoneme> LETTER_L           17
+%token <phoneme> LETTER_M           18
+%token <phoneme> LETTER_N           19
+%token <phoneme> LETTER_KS          20
+%token <phoneme> LETTER_OMICRON     21
+%token <phoneme> LETTER_OMICRON_ACC 22
+%token <phoneme> LETTER_P           23
+%token <phoneme> LETTER_R           24
+%token <phoneme> LETTER_S           25
+%token <phoneme> LETTER_T           26
+%token <phoneme> LETTER_Y           27
+%token <phoneme> LETTER_Y_ACC       28
+%token <phoneme> LETTER_Y_TREMA     29
+%token <phoneme> LETTER_Y_TREMA_ACC 30
+%token <phoneme> LETTER_F           31
+%token <phoneme> LETTER_X           32 
+%token <phoneme> LETTER_PS          33
+%token <phoneme> LETTER_OMEGA       34
+%token <phoneme> LETTER_OMEGA_ACC   35
+
+%type <phoneme> monophthong diphthong phoneme
+
+%%
+input   : phoneme
+          {
+		  phoneme_append(&$1);
+	  }
+        | input phoneme
+          {
+		  phoneme_append(&$2);
+	  }
+        ;
+
+phoneme : monophthong
+        | diphthong
+        ;
+
+monophthong:
+          LETTER_A
+	| LETTER_A_ACC
+        | LETTER_B
+        | LETTER_G
+        | LETTER_D
+        | LETTER_E
+	| LETTER_E_ACC
+        | LETTER_Z
+        | LETTER_H
+	| LETTER_H_ACC
+        | LETTER_TH
+        | LETTER_I
+	| LETTER_I_ACC
+	| LETTER_I_TREMA
+	| LETTER_I_TREMA_ACC
+        | LETTER_K
+        | LETTER_L
+        | LETTER_M
+        | LETTER_N
+        | LETTER_KS
+        | LETTER_OMICRON
+	| LETTER_OMICRON_ACC
+        | LETTER_P
+        | LETTER_R
+        | LETTER_S
+        | LETTER_T
+        | LETTER_Y
+	| LETTER_Y_ACC
+	| LETTER_Y_TREMA
+	| LETTER_Y_TREMA_ACC
+        | LETTER_F
+        | LETTER_X
+        | LETTER_PS
+        | LETTER_OMEGA
+        | LETTER_OMEGA_ACC
+	;
+	
+diphthong:
+          LETTER_A LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_A LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_E LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_E LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_Y LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_Y LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_M LETTER_P
+          {
+		  DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_N LETTER_T
+          {
+		  DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_G
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_K
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_X
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_S LETTER_M
+          {
+		  DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_T LETTER_S
+          {
+		  DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_T LETTER_Z
+          {
+		  DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_KS
+          {
+		  DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+        | LETTER_A LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_AV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_A LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_AV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_E LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_EV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_E LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_EV, 0);
+		  $$ = $1;
+	  }		  
+        ;
+
+%%
+
+static unsigned *input_base;
+static size_t input_len;
+static size_t input_pos;
+
+#define ISALPHA(ci) ((ci) && ci->letter)
+
+#define PHONEME_FLAG_MASK \
+	(CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT)
+
+int
+yylex()
+{
+	unsigned c;
+	struct char_info_st const *ci;
+
+	do {
+		if (input_pos == input_len)
+			return 0;
+		c = input_base[input_pos++];
+		ci = elchr_info(c);
+	} while (!ISALPHA(ci));
+
+	yylval.phoneme.code = ci->phoneme;
+	yylval.phoneme.start = input_pos - 1;
+	yylval.phoneme.count = 1;
+	yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK;
+	return ci->letter;
+}
+
+int
+yyerror(const char *s)
+{
+	fprintf("\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s);
+	abort();
+}
+
+int
+phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len)
+{
+	int rc;
+	
+	input_base = word;
+	input_len = len;
+	input_pos = 0;
+	phoneme_base = NULL;
+	phoneme_max = 0;
+	phoneme_count = 0;
+	error_state = 0;
+	rc = yyparse();
+	if (rc) {
+		free(phoneme_base);
+		errno = EINVAL;
+		return errno;
+	}
+	if (error_state) {
+		free(phoneme_base);
+		errno = error_state;
+		return errno;
+	}
+	if (phoneme_count < phoneme_max)
+		phoneme_base =
+			realloc(phoneme_base,
+				phoneme_count * sizeof(phoneme_base[0]));
+	*pph = phoneme_base;
+	*plen = phoneme_count;
+	return 0;
+}
-- 
cgit v1.2.1