From a1a5b7ddd6c3c0532c37551b24fd573a554ac181 Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <gray@gnu.org.ua>
Date: Fri, 10 Jun 2011 23:04:53 +0300
Subject: Fix syllabification.

* configure.ac: Add AC_PROG_YACC
* src/ellinika/phoneme.y: New file.
* src/ellinika/yyrename: New file.
* src/ellinika/syllabificator.c: New file.
* src/ellinika/.gitignore: Update.
* src/ellinika/elchr.c (char_info_st): Move to header.
(el_basic_ctype):
(elchr_info): Remove static qualifier.
Return a pointer to const.
(elchr_letter,elchr_phoneme): New functions.
(elchr_diphthong): Remove.
* src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members.
(_elstr_syllabize): Rewrite.
(invalidate_maps)" New static function.
(_elstr_alloc): Initialize new fields, take function name
as argument, for diagnostic purposes.
(_elstr_print): Rewrite
(deftab): Update.
(elstr-syllable-prop,elstr-syllable)
(_elstr_set_accent,_elstr_set_accent_on_char): Rewrite.
(elstr-char-phoneme,elstr->phonetic-map): New functions.
* src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove.
(CHF_DIPHTHONG): New flag.
(PHON_.*): New constants.
(phoneme,syllable): New structures.
(char_info_st)<letter,phoneme>: New members.
(elchr_info,elchr_letter)
(elchr_phoneme,phoneme_map)
(syllable_map): New protos.
(elchr_diphthong): Remove protos.
* src/ellinika/elmorph.scm4: Move public definitions
to elmorph-public.scm; include it here.
* src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a
wrapper over elstr->soundslike.

Describe Milesian numbers.

* style.css (img.ellinika-img): New class.
* xml/lingua.conf.in (IMAGE): New tag.
* xml/pl/alfabhta.xml: Describe Milesian numbers.

Various fixes.

* data/dbverb.struct: fix a typo in flection.
Use 'sub' theme for pas/sub/aor.
* data/irregular-verbs.xml: Add more verbs.
* scm/conjugator.scm: Various fixes.
* scm/verbop.scm: Accept empty mood and voice declarations.
---
 src/ellinika/.gitignore         |   2 +
 src/ellinika/Makefile.am        |  23 ++-
 src/ellinika/elchr.c            | 273 ++++++++++++++++++-------------
 src/ellinika/elmorph-public.scm | 106 ++++++++++++
 src/ellinika/elmorph.c          | 308 ++++++++++++++++++++++-------------
 src/ellinika/elmorph.h          |  82 +++++++++-
 src/ellinika/elmorph.scm4       |  25 +--
 src/ellinika/phoneme.y          | 353 ++++++++++++++++++++++++++++++++++++++++
 src/ellinika/syllabificator.c   | 152 +++++++++++++++++
 src/ellinika/tenses.scm         |  38 +++++
 src/ellinika/xlat.scm           | 113 +------------
 src/ellinika/yyrename           |  97 +++++++++++
 12 files changed, 1206 insertions(+), 366 deletions(-)
 create mode 100644 src/ellinika/elmorph-public.scm
 create mode 100644 src/ellinika/phoneme.y
 create mode 100644 src/ellinika/syllabificator.c
 create mode 100644 src/ellinika/tenses.scm
 create mode 100755 src/ellinika/yyrename

(limited to 'src/ellinika')

diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore
index 9422f9a..11bf478 100644
--- a/src/ellinika/.gitignore
+++ b/src/ellinika/.gitignore
@@ -3,3 +3,5 @@ cgi.scm
 config.scm
 elmorph.scm
 elmorph.x
+phoneme.c
+phoneme.h
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
index 274eea8..b8988d4 100644
--- a/src/ellinika/Makefile.am
+++ b/src/ellinika/Makefile.am
@@ -1,5 +1,5 @@
 # This file is part of Ellinika project.
-# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff
+# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff
 #
 # Ellinika is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -15,7 +15,14 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 guiledir=$(GUILE_SITE)/$(PACKAGE)
-guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm
+guile_DATA=\
+ xlat.scm\
+ cgi.scm\
+ i18n.scm\
+ config.scm\
+ dico.scm\
+ elmorph.scm\
+ tenses.scm
 
 cgi.m4: Makefile
 	echo 'divert(-1)' > $@
@@ -39,11 +46,11 @@ cgi.m4: Makefile
 SUFFIXES = .scm4 .scm .x
 
 .scm4.scm:
-	m4 cgi.m4 $< > $@ 
+	m4 -I$(srcdir) cgi.m4 $< > $@ 
 
 cgi.scm: cgi.scm4 cgi.m4
 config.scm: config.scm4 cgi.m4
-elmorph.scm: elmorph.scm4 cgi.m4
+elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4
 
 pkglib_LTLIBRARIES=libelmorph.la
 
@@ -52,7 +59,9 @@ libelmorph_la_SOURCES = \
  utf8.c\
  elchr.c\
  elmorph.c\
- elmorph.h
+ elmorph.h\
+ phoneme.y\
+ syllabificator.c
 
 DOT_X_FILES = elmorph.x
 
@@ -80,4 +89,6 @@ install-data-hook:
 	 done; \
 	 cd $$here
 
-
+AM_YFLAGS = -d
+YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
+EXTRA_DIST = yyrename elmorph-public.scm
\ No newline at end of file
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c
index 3142b6f..621ac03 100644
--- a/src/ellinika/elchr.c
+++ b/src/ellinika/elchr.c
@@ -23,20 +23,7 @@
 #include <libguile.h>
 #include "utf8.h"
 #include "elmorph.h"
-
-struct char_info_st {
-	unsigned ch;           /* Characters */
-	int flags;             /* Flags (see above) */
-	unsigned base;         /* for vowels - a corresponding vowel with all diacritics
-				  removed */
-	unsigned trans;        /* a counter-case equivalent, i.e. a corresponding uppercase
-				  letter if flags & CHF_LOWER and a corresponding lowerrcase
-				  letter if flags & CHF_UPPER */
-	unsigned numval;       /* Numeric value */
-	unsigned accented[3];  /* For vowels - corresponding accented variant */
-	unsigned deaccent;     /* For accented vowels with diaeresis - corresponding
-				  non-accented character */
-};
+#include "phoneme.h"
 
 /* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */
 struct char_info_st el_basic_ctype[] = {
@@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = {
         { 0x0383, },
         { 0x0384, CHF_MODIFIER }, /* Oxeia */
         { 0x0385, CHF_MODIFIER }, /* dialytika */
-        { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */
+        { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC,
+	  0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */
         { 0x0387, CHF_PUNCT }, /* ano teleia */
-        { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */
-        { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */
-        { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */
+        { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD,
+	  0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */
+        { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE,
+	  0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */
+        { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF,
+	  0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */
         { 0x038B, },
-        { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */
+        { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC,
+	  0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */
         { 0x038D, },
-        { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */
-        { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */
-        { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */
-        { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */
-        { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */
-        { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */
-        { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */
-        { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */
-        { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */
-        { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */
-        { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */
-        { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */
-        { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */
-        { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */
-        { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */
-        { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */
-        { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */
-        { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */
-        { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */
-        { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */
+        { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD,
+	  0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */
+        { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE,
+	  0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */
+        { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0,
+	  0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */
+        { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1,
+	  1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */
+        { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2,
+	  2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */
+        { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3,
+	  3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */
+        { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4,
+	  4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */
+        { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5,
+	  5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */
+        { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6,
+	  7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */
+        { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7,
+	  8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */
+        { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8,
+	  9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */
+        { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9,
+	  10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */
+        { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA,
+	  20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */
+        { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB,
+	  30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */
+        { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC,
+	  40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */
+        { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD,
+	  50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */
+        { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE,
+	  60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */
+        { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF,
+	  70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */
+        { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0,
+	  80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */
+        { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1,
+	  100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */
         { 0x03A2, },
-        { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */
-        { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */
-        { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */
-        { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */
-        { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */
-        { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */
-        { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */
-        { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */
-        { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */
-        { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */
-        { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */
-        { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */
-        { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */
-        { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */
-        { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */
-        { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */
-        { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */
-        { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */
-        { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */
-        { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */
-        { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */
-        { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */
-        { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */
-        { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */
-        { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */
-        { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */
-        { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */
-        { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */ 
+        { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3,
+	  200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */
+        { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4,
+	  300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */
+        { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5,
+	  400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */
+        { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6,
+	  500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */
+        { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7,
+	  600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */
+        { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8,
+	  700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */
+        { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9,
+	  800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */
+        { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA,
+	  0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */
+        { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB,
+	  0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */
+        { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386,
+	  0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */
+        { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388,
+	  0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */
+        { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389,
+	  0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */
+        { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A,
+	  0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */
+        { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0,
+	  0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */
+        { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391,
+	  1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */
+        { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392,
+	  2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */
+        { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393,
+	  3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */
+        { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394,
+	  4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */
+        { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395,
+	  5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */
+        { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396,
+	  7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */
+        { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397,
+	  8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */
+        { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398,
+	  9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */
+        { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399,
+	  10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */
+        { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A,
+	  20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */
+        { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B,
+	  30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */
+        { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C,
+	  40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */
+        { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D,
+	  50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */
+        { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E,
+	  60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */ 
 
-	{ 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */
-        { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */
-        { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */
-        { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */
-        { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */
-        { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */
-        { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */
-        { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */
-        { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */
-        { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */
-        { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */
-        { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */
-        { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */
-        { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */
-        { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */
-        { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */
+	{ 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F,
+	  70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */
+        { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0,
+	  80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */
+        { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1,
+	  100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */
+        { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3,
+	  0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */
+        { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3,
+	  200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */
+        { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4,
+	  300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */
+        { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5,
+	  400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */
+        { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6,
+	  500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */
+        { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7,
+	  600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */
+        { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8,
+	  700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */
+        { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9,
+	  800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */
+        { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA,
+	  0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */
+        { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB,
+	  0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */
+        { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C,
+	  0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */
+        { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E,
+	  0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */
+        { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F,
+	  0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */
         { 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */
         { 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */
         { 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */
@@ -561,7 +617,7 @@ struct char_info_st el_extended_ctype[] = {
         { 0x1FFF, }
 };
 
-static struct char_info_st *
+struct char_info_st const *
 elchr_info(unsigned ch)
 {
 	if (ch >= 0x0300 && ch <= 0x03FF)
@@ -574,10 +630,24 @@ elchr_info(unsigned ch)
 int
 elchr_flags(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return ci ? ci->flags : 0;
 }
 
+int
+elchr_letter(unsigned ch)
+{
+	struct char_info_st const *ci = elchr_info(ch);
+	return ci ? ci->letter : 0;
+}	
+	
+int
+elchr_phoneme(unsigned ch)
+{
+	struct char_info_st const *ci = elchr_info(ch);
+	return ci ? ci->phoneme : 0;
+}
+
 int
 elchr_isupper(unsigned ch)
 {
@@ -654,35 +724,35 @@ elchr_isnumeric(unsigned ch)
 unsigned
 elchr_numeric_value(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0;
 }
 
 unsigned
 elchr_toupper(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch;
 }
 	
 unsigned
 elchr_tolower(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch;
 }
 
 unsigned
 elchr_base(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch;
 }
 
 unsigned
 elchr_deaccent(unsigned ch)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	if (ci && (ci->flags & CHF_ACCENT_MASK))
 		return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch;
 	return ch;
@@ -691,28 +761,7 @@ elchr_deaccent(unsigned ch)
 unsigned
 elchr_accent(unsigned ch, int acc)
 {
-	struct char_info_st *ci = elchr_info(ch);
+	struct char_info_st const *ci = elchr_info(ch);
 	return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch;
 }
 
-int
-elchr_diphthong(unsigned ch, int state)
-{
-	struct char_info_st *ci = elchr_info(ch);
-
-	if (!ci || !(ci->flags & CHF_VOWEL))
-		return 0;
-	switch (state) {
-	case 0:
-		if (ci->flags & CHF_DIPH1)
-			state = 1;
-		break;
-	case 1:
-		if (ci->flags & CHF_DIPH2)
-			state = 2;
-		break;
-	default:
-		state = 0;
-	}
-	return state;
-}
diff --git a/src/ellinika/elmorph-public.scm b/src/ellinika/elmorph-public.scm
new file mode 100644
index 0000000..329fe4a
--- /dev/null
+++ b/src/ellinika/elmorph-public.scm
@@ -0,0 +1,106 @@
+;;;; This file is part of Ellinika project.
+;;;; Copyright (C) 2011 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+(use-modules ((srfi srfi-1)))
+
+(define-public (elstr-trim word n)
+  (let ((word (if (string? word)
+		  (string->elstr word)
+		  word)))
+    (cond
+     ((> n 0)
+      (elstr-slice word n (- (elstr-length word) n)))
+     ((< n 0)
+      (elstr-slice word 0 (+ (elstr-length word) n)))
+     (else
+      word))))
+
+(define-public (elstr-trim! word n)
+  (let ((word (if (string? word)
+		  (string->elstr word)
+		  word)))
+    (cond
+     ((> n 0)
+      (elstr-slice! word n (- (elstr-length word) n)))
+     ((< n 0)
+      (elstr-slice! word 0 (+ (elstr-length word) n))))))
+
+(define-public (phoneme:code ph)
+  (list-ref ph 0))
+
+(define-public (phoneme:start ph)
+  (list-ref ph 1))
+
+(define-public (phoneme:count ph)
+  (list-ref ph 2))
+
+(define-public (phoneme:flags ph)
+  (list-ref ph 3))
+
+(define-public (phoneme:accented? ph)
+  (logand (phoneme:flags ph) elmorph:accent-mask))
+
+(define-public (phoneme:vowel? ph)
+  (= (logand (phoneme:flags ph) elmorph:vowel)))
+
+(define-public (phoneme:consonant? ph)
+  (= (logand (phoneme:flags ph) elmorph:consonant)))
+
+(define-public (phoneme:diphthong? ph)
+  (= (logand (phoneme:flags ph) elmorph:diphthong)))
+
+
+(define soundslike-transcription-list
+  '((1 . "a")
+    (2 . "e")
+    (3 . "i")
+    (4 . "o")
+    (5 . "u")
+    (6 . "b")
+    (7 . "g")
+    (8 . "d")
+    (9 . "z")
+    (10 . "t")
+    (11 . "k")
+    (12 . "l")
+    (13 . "m")
+    (14 . "n")
+    (15 . "x")
+    (16 . "p")
+    (17 . "r")
+    (18 . "s")
+    (19 . "t")
+    (20 . "f")
+    (21 . "h")
+    (22 . "P")
+    (23 . "b")
+    (24 . "d")
+    (25 . "g")
+    (26 . "sm")
+    (27 . "ts")
+    (28 . "tz")
+    (29 . "ngz")
+    (30 . "au")
+    (31 . "eu")))
+
+(define-public (elstr->soundslike word)
+  (let ((phon-map (elstr->phonetic-map word)))
+    (apply string-append
+	   (filter-map
+	    (lambda (elt)
+	      (assoc-ref soundslike-transcription-list (phoneme:code elt)))
+	    phon-map))))
+
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 1831610..5a8acdf 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -26,55 +26,63 @@
 struct elstr {
 	unsigned *str;          /* UTF-8 string */
 	size_t len;             /* Its length */
+
+	struct phoneme *phoneme;   /* Phonetical map*/
+	unsigned phoneme_count;    /* Number of phonemes */ 
+
+	struct syllable *sylmap;   /* Syllable map (nsyl elements) */
 	unsigned nsyl;          /* Number of syllables. */
-	unsigned *sylmap;       /* Syllable map (nsyl elements) */
 	unsigned acc_syl;       /* Number of the accented syllable
 				   (1-based, from the last syllable) */ 
         unsigned acc_pos;       /* Number of the accented character
 				   (0-based, from str[0]) */
+	
 };
 
 scm_t_bits _elstr_tag;
 
 static void
-_elstr_syllabize(struct elstr *elstr)
-{
-	unsigned *sylmap;
-	unsigned i, nsyl = 0, accchr = 0;
-	int accsyl = -1;
-	int dstate = 0;
-	int acc = 0;
-
-	if (!elstr->sylmap) {
-		elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0]));
-		if (!elstr->sylmap)
-			scm_memory_error("_elstr_syllabize");
-	}
-	sylmap = elstr->sylmap;
+_elstr_syllabize(struct elstr *elstr, const char *func_name)
+{
+	unsigned i;
+	
+	free(elstr->phoneme);
+	free(elstr->sylmap);
 
-	for (i = 0; i < elstr->len; i++) {
-		int nstate;
-		
-		if (elchr_getaccent(elstr->str[i])) {
-			accsyl = nsyl;
-			accchr = i;
-		}
-		nstate = elchr_diphthong(elstr->str[i], dstate);
-		if (nstate)
-			/* skip */;
-		else if (dstate) 
-			sylmap[nsyl++] = i - 1;
-		else if (elchr_isvowel(elstr->str[i]))
-			sylmap[nsyl++] = i;
-		dstate = nstate;
+	if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count,
+			elstr->str, elstr->len))
+		scm_misc_error(func_name,
+			       "cannot create phonetic map: ~S",
+			       scm_from_int(errno));
+
+	if (syllable_map(&elstr->sylmap, &elstr->nsyl,
+			 elstr->phoneme, elstr->phoneme_count))
+		scm_misc_error(func_name,
+			       "cannot create syllable map: ~S",
+			       scm_from_int(errno));
+
+	for (i = elstr->nsyl; i > 0; i--) {
+		if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK)
+			break;
 	}
-	if (dstate)
-		sylmap[nsyl++] = i - 1;
-	else if (nsyl)
-		sylmap[nsyl-1] = i - 1;
-	elstr->nsyl = nsyl;
-	elstr->acc_pos = accchr;
-	elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0;
+	elstr->acc_syl = i;
+	for (i = 0; i < elstr->len; i++)
+		if (elchr_getaccent(elstr->str[i]))
+			break;
+	elstr->acc_pos = i;
+}
+
+static void
+invalidate_maps(struct elstr *elstr)
+{
+	free(elstr->sylmap);
+	elstr->sylmap = NULL;
+	elstr->nsyl = 0;
+	free(elstr->phoneme);
+	elstr->phoneme = NULL;
+	elstr->phoneme_count = 0;
+	elstr->acc_pos = 0;
+	elstr->acc_syl = 0;
 }
 
 static SCM
@@ -89,7 +97,7 @@ _elstr_alloc_empty(struct elstr **pelstr)
 }
 
 static SCM
-_elstr_alloc(const char *instr, int syl)
+_elstr_alloc(const char *instr, int syl, const char *func_name)
 {
 	struct elstr *elstr;
 	unsigned *wptr;
@@ -105,8 +113,10 @@ _elstr_alloc(const char *instr, int syl)
 	elstr->nsyl = 0;
 	elstr->acc_syl = 0;
 	elstr->acc_pos = 0;
+	elstr->phoneme = 0;
+	elstr->phoneme_count = 0;
 	if (syl)
-		_elstr_syllabize(elstr);
+		_elstr_syllabize(elstr, func_name);
 		
 	SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
 }
@@ -120,19 +130,34 @@ _elstr_dup(struct elstr *elstr)
 	elnew->str = calloc(elstr->len, sizeof(elnew->str[0]));
 	if (!elnew->str)
 		scm_memory_error("_elstr_dup");
+	memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
+	elnew->len = elstr->len;
+	
+	if (elstr->phoneme) {
+		elnew->phoneme = calloc(elstr->phoneme_count,
+					sizeof(elnew->phoneme[0]));
+		if (!elnew->phoneme) {
+			free(elnew->str);
+			scm_memory_error("_elstr_dup");
+		}
+		memcpy(elnew->phoneme, elstr->phoneme,
+		       sizeof(elstr->phoneme[0]) * elstr->phoneme_count);
+	} else
+		elnew->phoneme = NULL;
+	elnew->phoneme_count = elstr->phoneme_count;
+	
 	if (elstr->sylmap) {
 		elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0]));
 		if (!elnew->sylmap) {
 			free(elnew->str);
 			scm_memory_error("_elstr_dup");
 		}
+		memcpy(elnew->sylmap, elstr->sylmap,
+		       sizeof(elstr->sylmap[0]) * elstr->nsyl);
 	} else
 		elnew->sylmap = NULL;
-	memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
-	elnew->len = elstr->len;
 	elnew->nsyl = elstr->nsyl;
-	memcpy(elnew->sylmap, elstr->sylmap,
-	       sizeof(elstr->sylmap[0]) * elstr->nsyl);
+	
 	elnew->acc_syl = elstr->acc_syl;
 	elnew->acc_pos = elstr->acc_pos;
 	SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
@@ -162,6 +187,7 @@ _elstr_free(SCM smob)
 	struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
 	free(elstr->str);
 	free(elstr->sylmap);
+	free(elstr->phoneme);
 	scm_gc_free(elstr, sizeof(struct elstr), "elstr");
 	return 0;
 }
@@ -170,34 +196,31 @@ static int
 _elstr_print(SCM smob, SCM port, scm_print_state *pstate)
 {
 	struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
-	int i, j, an;
+	int i, j;
 	char *s;
 	
 	scm_puts("#<elstr ", port);
 	if (elstr->sylmap) {
 		scm_puts("``", port);
-		an = elstr->nsyl - elstr->acc_syl;
-		if (an == 0)
-			scm_puts("[", port);
-		for (i = j = 0; i < elstr->len; i++) {
-			char r[6];
-			int n;
-			
-			if (i == elstr->sylmap[j] + 1) {
-				if (j == an)
-					scm_puts("]", port);
+		for (i = 0; i < elstr->nsyl; i++) {
+			size_t start = elstr->sylmap[i].char_start;
+			if (i)
 				scm_puts("-", port);
-				if (++j == an)
-					scm_puts("[", port);
+			if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+				scm_puts("[", port);
+			for (j = 0; j < elstr->sylmap[i].char_count; j++) {
+				char r[6];
+				int n;
+
+				n = utf8_wctomb(r, elstr->str[start+j]);
+				if (n == -1)
+					continue;
+				r[n] = 0;
+				scm_puts(r, port);
 			}
-			n = utf8_wctomb(r, elstr->str[i]);
-			if (n == -1)
-				continue;
-			r[n] = 0;
-			scm_puts(r, port);
+			if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+				scm_puts("]", port);
 		}
-		if (j == an)
-			scm_puts("]", port);
 	} else {
 		scm_puts("[NS] ``", port);
 		for (i = j = 0; i < elstr->len; i++) {
@@ -238,7 +261,7 @@ force_elstr(struct elstr **ep, SCM scm, int sylopt,
 		
 		SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
 		str = scm_to_locale_string(scm);
-		newscm = _elstr_alloc(str, sylopt);
+		newscm = _elstr_alloc(str, sylopt, func_name);
 		free(str);
 		if (newscm == SCM_EOL)
 			scm_misc_error(func_name,
@@ -336,13 +359,10 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
 			       "cannot get syllable #~S: not enough syllables: ~S",
 			       scm_list_2(el, n));
 	num = elstr->nsyl - num;
-	if (num == 0)
-		start = 0;
-	else
-		start = elstr->sylmap[num - 1] + 1;
 	
-	return scm_cons(scm_from_uint(start),
-			scm_from_uint(elstr->sylmap[num]));
+	return scm_list_3(scm_from_uint(elstr->sylmap[num].char_start),
+			  scm_from_uint(elstr->sylmap[num].char_count),
+			  scm_from_int(elstr->sylmap[num].flags));
 }
 #undef FUNC_NAME
 
@@ -388,12 +408,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
 			       "cannot get syllable #~S: not enough syllables: ~S",
 			       scm_list_2(el, n));
 	num = elstr->nsyl - num;
-	if (num == 0)
-		start = 0;
-	else
-		start = elstr->sylmap[num - 1] + 1;
-	if (utf8_wc_to_mbstr(elstr->str + start,
-			     elstr->sylmap[num] - start + 1,
+	if (utf8_wc_to_mbstr(elstr->str + elstr->sylmap[num].char_start,
+			     elstr->sylmap[num].char_count,
 			     &s))
 		scm_misc_error(FUNC_NAME,
 			       "cannot convert elstr to Scheme",
@@ -514,8 +530,7 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
 	}
 	for (i = 0; i < elstr->len; i++)
 		elstr->str[i] = elchr_deaccent(elstr->str[i]);
-	elstr->acc_pos = 0;
-	elstr->acc_syl = 0;
+	invalidate_maps(elstr);
 	return scm;
 }
 
@@ -544,9 +559,10 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
 {
 	struct elstr *elstr;
 	unsigned i;
-	unsigned acc_num, num, len, start;
+	unsigned acc_num, num, start;
 	SCM scm;
-	int dstate;
+	unsigned pos;
+	struct phoneme *phoneme = NULL;
 	
 	if (destructive) {
 		SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
@@ -556,15 +572,11 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
 
 	SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
 	num = scm_to_uint(n);
-	if (num > elstr->nsyl)
+	if (num == 0 | num > elstr->nsyl)
 		scm_misc_error(func_name,
 			       "cannot set accent on syllable #~S: not enough syllables: ~S",
 			       scm_list_2(n, el));
 	acc_num = elstr->nsyl - num;
-	if (acc_num == 0)
-		start = 0;
-	else
-		start = elstr->sylmap[acc_num - 1] + 1;
 
 	if (destructive)
 		scm = SCM_UNSPECIFIED;
@@ -576,25 +588,38 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
         /* Clear all accents */
 	for (i = 0; i < elstr->len; i++)
 		elstr->str[i] = elchr_deaccent(elstr->str[i]);
-	len = elstr->sylmap[acc_num] - start + 1;
-	dstate = 0;
-	for (i = start; i <= start + len; i++) {
-		int nstate;
-		
-		if (!elchr_isvowel(elstr->str[i])) {
-			if (dstate) {
-				--i;
-				break;
-			}
-			continue;
-		}
-		nstate = elchr_diphthong(elstr->str[i], dstate);
-		if (!nstate)
+	for (i = 0; i < elstr->nsyl; i++)
+		elstr->sylmap[i].flags &= ~CHF_ACCENT_MASK;
+	for (i = 0; i < elstr->phoneme_count; i++)
+		elstr->phoneme[i].flags &= ~CHF_ACCENT_MASK;
+	
+	start = elstr->sylmap[acc_num].phoneme_start;
+	pos = 0;
+	for (i = 0; i < elstr->sylmap[acc_num].phoneme_count; i++) {
+		struct phoneme *ph = elstr->phoneme + start + i;
+		if (ph->flags & CHF_CONSONANT)
+			/* skip */ ;
+		else if (ph->flags & CHF_DIPHTHONG) {
+			phoneme = ph;
+			pos = ph->start + 1;
+			break;
+		} else if (ph->flags & CHF_VOWEL) {
+			phoneme = ph;
+			pos = ph->start;
 			break;
-		dstate = nstate;
+		}
 	}
-	elstr->str[i] = elchr_accent(elstr->str[i], CHF_OXEIA);
+	if (!phoneme)
+		scm_misc_error(func_name,
+			       "cannot set accent on syllable #~S of ~S: "
+			       "INTERNAL ERROR",
+			       scm_list_2(n, el));
+	phoneme->flags |= CHF_OXEIA;
+	elstr->sylmap[acc_num].flags |= CHF_OXEIA;
+	elstr->str[pos] = elchr_accent(elstr->str[pos], CHF_OXEIA);
+	
 	elstr->acc_syl = num;
+	elstr->acc_pos = pos;
 	return scm;
 }
 
@@ -652,7 +677,8 @@ _elstr_set_accent_on_char(SCM el, SCM n, int destructive, const char *func_name)
 		elstr->str[i] = elchr_deaccent(elstr->str[i]);
 	
 	elstr->str[num] = elchr_accent(elstr->str[num], CHF_OXEIA);
-	_elstr_syllabize(elstr);
+	invalidate_maps(elstr);
+	_elstr_syllabize(elstr, func_name);
 	return scm;
 }
 
@@ -716,11 +742,31 @@ static struct deftab {
 	{ CHF_LOWER,       "elmorph:lower" },
 	{ CHF_UPPER,       "elmorph:upper" },
 	{ CHF_NUMERIC,     "elmorph:numeric" },
-
-	{ CHF_DIPH1,       "elmorph:diph1" },
-	{ CHF_DIPH2,       "elmorph:diph2" }
+	{ CHF_DIPHTHONG,   "elmorph:diphthong" },
 };
-
+
+SCM_DEFINE_PUBLIC(scm_elstr_char_phoneme, "elstr-char-phoneme",
+		  2, 0, 0,
+		  (SCM el, SCM n),
+"Returns a phoneme code of the Nth char in EL\n")
+#define FUNC_NAME s_scm_elstr_char_phoneme
+{
+	struct elstr *elstr;
+	int num;
+	
+	force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
+	SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
+	num = scm_to_int(n);
+	if (num < 0)
+		num += elstr->len;
+	if (num >= elstr->len)
+		scm_misc_error(FUNC_NAME,
+			       "cannot get character #~S: not enough characters: ~S",
+			       scm_list_2(el, n));
+	return scm_from_uint(elchr_phoneme(elstr->str[num]));
+}
+#undef FUNC_NAME
+
 SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
 		  (SCM string),
 "Convert STRING to uppercase\n")
@@ -818,7 +864,8 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
 		SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
 		elstr = (struct elstr*) SCM_CDR(el);
 	} else
-		scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+		scm = force_elstr(&elstr, el, 0, SCM_ARG1, func_name);
+	invalidate_maps(elstr);
 	SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
 	SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
 	num = scm_to_int(n);
@@ -842,7 +889,7 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
 		memmove(elstr->str, elstr->str + num,
 			sizeof(elstr->str[0]) * len);
 	elstr->len = len;
-	_elstr_syllabize(elstr);
+	_elstr_syllabize(elstr, func_name);
 	return scm;
 }
 
@@ -869,7 +916,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
 SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
 		  2, 0, 0,
 		  (SCM word, SCM needle),
-"")
+"Returns position of NEEDLE in the WORD")
 #define FUNC_NAME s_scm_elstr_index
 {
 	struct elstr *elstr, *ep;
@@ -920,10 +967,10 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
 SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
 		  0, 0, 1,
 		  (SCM rest),
-"")
+"Concatenates arguments.\n")
 #define FUNC_NAME s_scm_elstr_append
 {
-	SCM ret = _elstr_alloc("", 0);
+	SCM ret = _elstr_alloc("", 0, FUNC_NAME);
 	struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
 
 	for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
@@ -933,11 +980,48 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
 		force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
 		_elstr_concat(elstr, elt, FUNC_NAME);
 	}
-	_elstr_syllabize(elstr);
+	_elstr_syllabize(elstr, FUNC_NAME);
 	return ret;
 }
 #undef FUNC_NAME
+
+static SCM
+elmorph_scm_from_phoneme(struct phoneme *phoneme)
+{
+	return scm_list_4(scm_from_int(phoneme->code),
+			  scm_from_uint(phoneme->start),
+			  scm_from_uint(phoneme->count),
+			  scm_from_bool(phoneme->flags));
+}
 
+SCM_DEFINE_PUBLIC(scm_elstr__phonetic_map, "elstr->phonetic-map",
+		  1, 0, 0,
+		  (SCM word),
+"Converts WORD to a phonetic map.\n")
+#define FUNC_NAME s_scm_elstr__phonetic_map
+{
+	struct elstr *elstr;
+	struct phoneme *phmap;
+	size_t phlen, i;
+	SCM head = SCM_EOL, tail = SCM_EOL;
+		
+	force_elstr(&elstr, word, 1, SCM_ARG1, FUNC_NAME);
+	phmap = elstr->phoneme;
+	phlen = elstr->phoneme_count;
+	for (i = 0; i < phlen; i++) {
+		SCM elt = scm_cons(elmorph_scm_from_phoneme(phmap + i),
+				   SCM_EOL);
+		if (scm_is_null(head))
+			head = tail = elt;
+		else {
+			SCM_SETCDR(tail, elt);
+			tail = elt;
+		}
+	}
+	free(phmap);
+	return head;
+}
+#undef FUNC_NAME
 
 void
 scm_init_ellinika_elmorph_module()
diff --git a/src/ellinika/elmorph.h b/src/ellinika/elmorph.h
index eacbde5..2399b8a 100644
--- a/src/ellinika/elmorph.h
+++ b/src/ellinika/elmorph.h
@@ -33,10 +33,82 @@
 #define CHF_UPPER         0x02000
 #define CHF_NUMERIC       0x04000
 
-#define CHF_DIPH1         0x10000
-#define CHF_DIPH2         0x20000
+#define CHF_DIPHTHONG     0x08000
 
+/* Phonemes */
+#define PHON_A     1 /* α */ 
+#define PHON_E     2 /* ε αι */ 
+#define PHON_I     3 /* ι η υ ει οι υι */ 
+#define PHON_O     4 /* ο ω */ 
+#define PHON_U     5 /* ου */ 
+
+#define PHON_BH    6 /* β */
+#define PHON_GH    7 /* γ */
+#define PHON_DH    8 /* δ */
+#define PHON_Z     9 /* ζ */
+#define PHON_TH   10 /* θ */ 
+#define PHON_K    11 /* κ */ 
+#define PHON_L    12 /* λ */ 
+#define PHON_M    13 /* μ */ 
+#define PHON_N    14 /* ν */ 
+#define PHON_X    15 /* ξ */ 
+#define PHON_P    16 /* π */ 
+#define PHON_R    17 /* ρ */ 
+#define PHON_S    18 /* σ */ 
+#define PHON_T    19 /* τ */ 
+#define PHON_F    20 /* φ */ 
+#define PHON_H    21 /* χ */
+#define PHON_PS   22 /* ψ */
+
+#define PHON_B    23 /* μπ */
+#define PHON_D    24 /* ντ */
+#define PHON_G    25 /* γγ γκ γχ */
+#define PHON_ZM   26 /* σμ */
+#define PHON_TS   27 /* τσ */
+#define PHON_DZ   28 /* τζ */
+#define PHON_NGZ  29 /* νγζ */
+
+#define PHON_AV   30 /* αυ */
+#define PHON_EV   31 /* ευ */
+
+#define _PHON_MAX 32
+
+struct phoneme {
+	int code;               /* Phoneme code */
+	unsigned start;         /* Start of phoneme */
+	unsigned count;         /* Number of characters in it */
+	int flags;
+};
+
+struct syllable {
+	unsigned char_start;         /* Start of syllable */
+	unsigned char_count;         /* Number of characters in it */
+	unsigned phoneme_start;
+	unsigned phoneme_count;
+	int flags;
+};
+
+struct char_info_st {
+	unsigned ch;           /* Characters */
+	int flags;             /* Flags (see above) */
+	unsigned base;         /* for vowels - a corresponding vowel with
+				  all diacritics removed */
+	unsigned trans;        /* a counter-case equivalent, i.e. a
+				  corresponding uppercase letter if
+				  flags & CHF_LOWER and a corresponding
+				  lowercase letter if flags & CHF_UPPER */
+	unsigned numval;       /* Numeric value */
+	unsigned accented[3];  /* For vowels - corresponding accented variant */
+	unsigned deaccent;     /* For accented vowels with diaeresis -
+				  corresponding non-accented character */
+	int letter;            /* Letter code */
+	int phoneme;           /* Phoneme code */
+};
+
+struct char_info_st const *elchr_info(unsigned ch);
 int elchr_flags(unsigned ch);
+int elchr_letter(unsigned ch);
+int elchr_phoneme(unsigned ch);
 int elchr_isupper(unsigned ch);
 int elchr_islower(unsigned ch);
 int elchr_getaccent(unsigned ch);
@@ -55,8 +127,12 @@ unsigned elchr_tolower(unsigned ch);
 unsigned elchr_base(unsigned ch);
 unsigned elchr_deaccent(unsigned ch);
 unsigned elchr_accent(unsigned ch, int acc);
-int elchr_diphthong(unsigned ch, int state);
 
 
 int elmorph_thema_aoristoy(unsigned *word, size_t len,
 			   unsigned **thema, size_t *tlen);
+
+int phoneme_map(struct phoneme **pph, size_t *plen,
+		unsigned *word, size_t len);
+int syllable_map(struct syllable **psyl, size_t *plen,
+		 struct phoneme *phon, size_t nphon);
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index f916d1c..ede4d50 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -20,27 +20,4 @@
  "LIBDIR/libguile-elmorph-v-VERSION"
  "scm_init_ellinika_elmorph_module")
 
-(define-public (elstr-trim word n)
-  (let ((word (if (string? word)
-		  (string->elstr word)
-		  word)))
-    (cond
-     ((> n 0)
-      (elstr-slice word n (- (elstr-length word) n)))
-     ((< n 0)
-      (elstr-slice word 0 (+ (elstr-length word) n)))
-     (else
-      word))))
-
-(define-public (elstr-trim! word n)
-  (let ((word (if (string? word)
-		  (string->elstr word)
-		  word)))
-    (cond
-     ((> n 0)
-      (elstr-slice! word n (- (elstr-length word) n)))
-     ((< n 0)
-      (elstr-slice! word 0 (+ (elstr-length word) n))))))
-
-
-
+include([elmorph-public.scm])
diff --git a/src/ellinika/phoneme.y b/src/ellinika/phoneme.y
new file mode 100644
index 0000000..353d175
--- /dev/null
+++ b/src/ellinika/phoneme.y
@@ -0,0 +1,353 @@
+/* This file is part of Ellinika project.
+   Copyright (C) 2011 Sergey Poznyakoff
+
+   Ellinika is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Ellinika is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>	
+#include "utf8.h"
+#include "elmorph.h"
+
+static struct phoneme *phoneme_base;
+static size_t phoneme_max;
+static size_t phoneme_count;
+static int error_state;
+
+#define PHONEME_MAP_INITIAL_ALLOC 16
+
+static void
+phoneme_append(struct phoneme *phoneme)
+{
+	if (error_state)
+		return;
+	
+	if (phoneme_max == phoneme_count) {
+		struct phoneme *np;
+		size_t nsize;
+
+		if (!phoneme_max) 
+			nsize = PHONEME_MAP_INITIAL_ALLOC;
+		else {
+			nsize = 2 * phoneme_max;
+			if (nsize < phoneme_max) {
+				error_state = ENOMEM;
+				return;
+			}
+		}
+		np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0]));
+		if (!np) {
+			error_state = ENOMEM;
+			return;
+		}
+		phoneme_max = nsize;
+		phoneme_base = np;
+	}
+	phoneme_base[phoneme_count++] = *phoneme;
+}
+	
+#define DIPHTHONG(a,b,pc,fl) do {					\
+		(a).count = 2;						\
+                (a).code = pc;						\
+		(a).flags = (fl) | CHF_DIPHTHONG |			\
+			(((a.flags) | (b).flags) & CHF_ACCENT_MASK);	\
+	} while (0)
+
+%}
+%union {
+	struct phoneme phoneme;
+};
+
+%token <phoneme> LETTER_A            1
+%token <phoneme> LETTER_A_ACC        2
+%token <phoneme> LETTER_B            3
+%token <phoneme> LETTER_G            4
+%token <phoneme> LETTER_D            5
+%token <phoneme> LETTER_E            6
+%token <phoneme> LETTER_E_ACC        7
+%token <phoneme> LETTER_Z            8
+%token <phoneme> LETTER_H            9
+%token <phoneme> LETTER_H_ACC       10
+%token <phoneme> LETTER_TH          11
+%token <phoneme> LETTER_I           12 
+%token <phoneme> LETTER_I_ACC       13
+%token <phoneme> LETTER_I_TREMA     14
+%token <phoneme> LETTER_I_TREMA_ACC 15
+%token <phoneme> LETTER_K           16
+%token <phoneme> LETTER_L           17
+%token <phoneme> LETTER_M           18
+%token <phoneme> LETTER_N           19
+%token <phoneme> LETTER_KS          20
+%token <phoneme> LETTER_OMICRON     21
+%token <phoneme> LETTER_OMICRON_ACC 22
+%token <phoneme> LETTER_P           23
+%token <phoneme> LETTER_R           24
+%token <phoneme> LETTER_S           25
+%token <phoneme> LETTER_T           26
+%token <phoneme> LETTER_Y           27
+%token <phoneme> LETTER_Y_ACC       28
+%token <phoneme> LETTER_Y_TREMA     29
+%token <phoneme> LETTER_Y_TREMA_ACC 30
+%token <phoneme> LETTER_F           31
+%token <phoneme> LETTER_X           32 
+%token <phoneme> LETTER_PS          33
+%token <phoneme> LETTER_OMEGA       34
+%token <phoneme> LETTER_OMEGA_ACC   35
+
+%type <phoneme> monophthong diphthong phoneme
+
+%%
+input   : phoneme
+          {
+		  phoneme_append(&$1);
+	  }
+        | input phoneme
+          {
+		  phoneme_append(&$2);
+	  }
+        ;
+
+phoneme : monophthong
+        | diphthong
+        ;
+
+monophthong:
+          LETTER_A
+	| LETTER_A_ACC
+        | LETTER_B
+        | LETTER_G
+        | LETTER_D
+        | LETTER_E
+	| LETTER_E_ACC
+        | LETTER_Z
+        | LETTER_H
+	| LETTER_H_ACC
+        | LETTER_TH
+        | LETTER_I
+	| LETTER_I_ACC
+	| LETTER_I_TREMA
+	| LETTER_I_TREMA_ACC
+        | LETTER_K
+        | LETTER_L
+        | LETTER_M
+        | LETTER_N
+        | LETTER_KS
+        | LETTER_OMICRON
+	| LETTER_OMICRON_ACC
+        | LETTER_P
+        | LETTER_R
+        | LETTER_S
+        | LETTER_T
+        | LETTER_Y
+	| LETTER_Y_ACC
+	| LETTER_Y_TREMA
+	| LETTER_Y_TREMA_ACC
+        | LETTER_F
+        | LETTER_X
+        | LETTER_PS
+        | LETTER_OMEGA
+        | LETTER_OMEGA_ACC
+	;
+	
+diphthong:
+          LETTER_A LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_A LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_E LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_E LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_Y LETTER_I
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_Y LETTER_I_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_OMICRON LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+		  $$ = $1;
+	  }
+	| LETTER_M LETTER_P
+          {
+		  DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_N LETTER_T
+          {
+		  DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_G
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_K
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_X
+          {
+		  DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_S LETTER_M
+          {
+		  DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_T LETTER_S
+          {
+		  DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_T LETTER_Z
+          {
+		  DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+	| LETTER_G LETTER_KS
+          {
+		  DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT);
+		  $$ = $1;
+	  }
+        | LETTER_A LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_AV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_A LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_AV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_E LETTER_Y
+          {
+		  DIPHTHONG($1, $2, PHON_EV, 0);
+		  $$ = $1;
+	  }		  
+        | LETTER_E LETTER_Y_ACC
+          {
+		  DIPHTHONG($1, $2, PHON_EV, 0);
+		  $$ = $1;
+	  }		  
+        ;
+
+%%
+
+static unsigned *input_base;
+static size_t input_len;
+static size_t input_pos;
+
+#define ISALPHA(ci) ((ci) && ci->letter)
+
+#define PHONEME_FLAG_MASK \
+	(CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT)
+
+int
+yylex()
+{
+	unsigned c;
+	struct char_info_st const *ci;
+
+	do {
+		if (input_pos == input_len)
+			return 0;
+		c = input_base[input_pos++];
+		ci = elchr_info(c);
+	} while (!ISALPHA(ci));
+
+	yylval.phoneme.code = ci->phoneme;
+	yylval.phoneme.start = input_pos - 1;
+	yylval.phoneme.count = 1;
+	yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK;
+	return ci->letter;
+}
+
+int
+yyerror(const char *s)
+{
+	fprintf("\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s);
+	abort();
+}
+
+int
+phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len)
+{
+	int rc;
+	
+	input_base = word;
+	input_len = len;
+	input_pos = 0;
+	phoneme_base = NULL;
+	phoneme_max = 0;
+	phoneme_count = 0;
+	error_state = 0;
+	rc = yyparse();
+	if (rc) {
+		free(phoneme_base);
+		errno = EINVAL;
+		return errno;
+	}
+	if (error_state) {
+		free(phoneme_base);
+		errno = error_state;
+		return errno;
+	}
+	if (phoneme_count < phoneme_max)
+		phoneme_base =
+			realloc(phoneme_base,
+				phoneme_count * sizeof(phoneme_base[0]));
+	*pph = phoneme_base;
+	*plen = phoneme_count;
+	return 0;
+}
diff --git a/src/ellinika/syllabificator.c b/src/ellinika/syllabificator.c
new file mode 100644
index 0000000..c4105ec
--- /dev/null
+++ b/src/ellinika/syllabificator.c
@@ -0,0 +1,152 @@
+/* This file is part of Ellinika project.
+   Copyright (C) 2011 Sergey Poznyakoff
+
+   Ellinika is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Ellinika is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include "utf8.h"
+#include "elmorph.h"
+
+struct syllabificator {
+	struct syllable *syl;
+	size_t syl_count;
+	size_t syl_max;
+	struct phoneme *phon;
+	size_t phon_cur;
+	size_t phon_max;
+	int err;
+};
+
+#define SYL_FLAG_MASK (CHF_ACCENT_MASK)
+
+#define ISIOTA(ph)							\
+	((ph).code == PHON_I && (ph).count == 1 &&			\
+	 !((ph).flags & (CHF_ACCENT_MASK|CHF_TREMA)))
+
+int
+next_syllable(struct syllabificator *sp)
+{
+	struct syllable *syl;
+	
+	if (sp->phon_cur == sp->phon_max)
+		return 1;
+
+	if (sp->syl_count == sp->syl_max) {
+		struct syllable *newsyl;
+		size_t newmax = sp->syl_max + 16;
+
+		newsyl = realloc(sp->syl, sizeof(newsyl[0]) * newmax);
+		if (!newsyl) {
+			sp->err = errno;
+			return 1;
+		}
+
+		sp->syl = newsyl;
+		sp->syl_max = newmax;
+	}
+	syl = sp->syl + sp->syl_count++;
+	syl->char_start = sp->phon[sp->phon_cur].start;
+	syl->char_count = sp->phon[sp->phon_cur].count;
+	syl->phoneme_start = sp->phon_cur;
+	syl->phoneme_count = 1;
+	syl->flags = sp->phon[sp->phon_cur].flags;
+	
+	sp->phon_cur++;
+
+	/* A diphthong forms a single syllable. */
+	if ((syl->flags & CHF_DIPHTHONG) && !(syl->flags & CHF_CONSONANT))
+		return 0;
+
+	/* If the syllable begins with a consonant, it includes all
+	   subsequent consonants up to the first vowel. */
+	if (syl->flags & CHF_CONSONANT) {
+		for (; sp->phon_cur < sp->phon_max &&
+			     (sp->phon[sp->phon_cur].flags & CHF_CONSONANT);
+		     sp->phon_cur++) {
+			syl->char_count += sp->phon[sp->phon_cur].count;
+			syl->phoneme_count++;
+		}
+	} else if ((sp->phon[sp->phon_cur].flags & CHF_VOWEL) &&
+		   !ISIOTA(sp->phon[sp->phon_cur-1]))
+		/* V-V boundary */
+		return 0;
+
+	if (sp->phon_cur == sp->phon_max)
+		return 0;
+
+	if (ISIOTA(sp->phon[sp->phon_cur])) {
+		/* incorporate iota */;
+		syl->char_count += sp->phon[sp->phon_cur].count;
+		syl->phoneme_count++;
+		sp->phon_cur++;
+	}
+
+	if (sp->phon[sp->phon_cur].flags & CHF_VOWEL)
+		syl->flags |= sp->phon[sp->phon_cur].flags & CHF_ACCENT_MASK;
+
+	syl->char_count += sp->phon[sp->phon_cur].count;
+	syl->phoneme_count++;
+	sp->phon_cur++;
+
+	if (sp->phon_cur == sp->phon_max)
+		return 0;
+
+	if (sp->phon[sp->phon_cur - 1].flags & CHF_VOWEL) {
+		/* If next phoneme is a consonant, incorporate it into the
+		   current syllable */
+		if ((sp->phon[sp->phon_cur].flags & CHF_CONSONANT) &&
+		    (sp->phon_cur + 1 == sp->phon_max ||
+		     (sp->phon[sp->phon_cur + 1].flags & CHF_CONSONANT))) {
+			syl->char_count += sp->phon[sp->phon_cur].count;
+			syl->phoneme_count++;
+			sp->phon_cur++;
+		}
+	}
+	
+	return 0;
+}
+
+	
+int
+syllable_map(struct syllable **psyl, size_t *plen,
+	     struct phoneme *phon, size_t nphon)
+{
+	struct syllabificator sd;
+
+
+	sd.syl = NULL;
+	sd.syl_count = 0;
+	sd.syl_max = 0;
+	sd.phon = phon;
+	sd.phon_cur = 0;
+	sd.phon_max = nphon;
+	sd.err = 0;
+
+	while (next_syllable(&sd) == 0)
+		sd.syl[sd.syl_count-1].flags &= SYL_FLAG_MASK;
+	
+	if (sd.err) {
+		free(sd.syl);
+		return sd.err;
+	}
+
+	*psyl = sd.syl;
+	*plen = sd.syl_count;
+
+	return 0;
+}
diff --git a/src/ellinika/tenses.scm b/src/ellinika/tenses.scm
new file mode 100644
index 0000000..f830870
--- /dev/null
+++ b/src/ellinika/tenses.scm
@@ -0,0 +1,38 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2011 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(define-module (ellinika tenses))
+
+(define-public ellinika-tense-list
+  (list
+   (cons "ind"
+	 (list "Ενεστώτας"
+	       "Παρατατικός"
+	       "Μέλλοντας διαρκείας"
+	       "Αόριστος"
+	       "Παρακείμενος"
+	       "Υπερσυντέλικος"
+	       "Συντελεσμένος μέλλοντας"
+	       "Μέλλοντας στιγμιαίος"))
+   (cons "sub"
+	 (list "Ενεστώτας"
+	       "Αόριστος"
+	       "Παρακείμενος"))
+   (cons "imp"
+	 (list "Ενεστώτας"
+	       "Αόριστος"
+	       "Παρακείμενος"))))
+	  
\ No newline at end of file
diff --git a/src/ellinika/xlat.scm b/src/ellinika/xlat.scm
index c51edaa..63af468 100644
--- a/src/ellinika/xlat.scm
+++ b/src/ellinika/xlat.scm
@@ -16,6 +16,8 @@
 ;;;;
 (define-module (ellinika xlat))
 
+(use-modules (ellinika elmorph))
+
 (define greek-postfix-map
   (list
    (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ")
@@ -107,7 +109,7 @@
 ;;;		    +-----------------------------+
 ;;;
 ;;;
-;;; The followin escape sequences are recognized:
+;;; The following escape sequences are recognized:
 ;;;
 ;;;   '\ks' -> 'ξ' 
 ;;;   '\ps' -> 'ψ'
@@ -195,115 +197,8 @@
 
 
 
-(define transcription-list
-  (list
-   (cons  "μπ" "b" )
-   (cons  "γγ" "g" )
-   (cons  "γκ" "g" )
-   (cons  "γχ" "g" )
-   (cons  "ντ" "d" )
-   (cons  "αι" "e" )
-   (cons  "αί" "e" )
-   (cons  "αυ" "au")
-   (cons  "αύ" "au")
-   (cons  "ου" "ou")
-   (cons  "ού" "ou")
-   (cons  "ευ" "eu")
-   (cons  "εύ" "eu")
-   (cons  "οι" "i" )
-   (cons  "ει" "i" )
-   (cons  "εί" "i" )
-   (cons  "υι" "i" )
-   
-   (cons  "α" "a" )
-   (cons  "Α" "a" )
-   (cons  "Ά" "a" )
-   (cons  "ά" "a" )
-   (cons  "β" "b" )
-   (cons  "Β" "b" )
-   (cons  "γ" "g" )
-   (cons  "Γ" "g" )
-   (cons  "δ" "d" )
-   (cons  "Δ" "d" )
-   (cons  "ε" "e" )
-   (cons  "Ε" "e" )
-   (cons  "Έ" "e" )
-   (cons  "έ" "e" )
-   (cons  "ζ" "z" )
-   (cons  "Ζ" "z" )
-   (cons  "η" "i" )
-   (cons  "Η" "i" )
-   (cons  "Ή" "i" )
-   (cons  "ή" "i" )
-   (cons  "θ" "t" )
-   (cons  "Θ" "t" )
-   (cons  "ι" "i" )
-   (cons  "Ι" "i" )
-   (cons  "Ί" "i" )
-   (cons  "ί" "i" )
-   (cons  "κ" "k" )
-   (cons  "Κ" "k" )
-   (cons  "λ" "l" )
-   (cons  "Λ" "l" )
-   (cons  "μ" "m" )
-   (cons  "Μ" "m" )
-   (cons  "ν" "n" )
-   (cons  "Ν" "n" )
-   (cons  "ξ" "x" )
-   (cons  "Ξ" "x" )
-   (cons  "ο" "o" )
-   (cons  "Ο" "o" )
-   (cons  "Ό" "o" )
-   (cons  "ό" "o" )
-   (cons  "π" "p" )
-   (cons  "Π" "p" )
-   (cons  "ρ" "r" )
-   (cons  "Ρ" "r" )
-   (cons  "σ" "s" )
-   (cons  "Σ" "s" )
-   (cons  "ς" "s" )
-   (cons  "τ" "t" )
-   (cons  "Τ" "t" )
-   (cons  "υ" "i" )
-   (cons  "Υ" "i" )
-   (cons  "Ύ" "i" )
-   (cons  "ύ" "i" )
-   (cons  "φ" "f" )
-   (cons  "Φ" "f" )
-   (cons  "χ" "h" )
-   (cons  "Χ" "h" )
-   (cons  "ψ" "P" )
-   (cons  "Ψ" "P" )
-   (cons  "ω" "o" )
-   (cons  "Ω" "o" )
-   (cons  "Ώ" "o" )
-   (cons  "ώ" "o" )
-   (cons  "Ϊ" "i" )
-   (cons  "ΐ" "i" )
-   (cons  "Ϋ" "i" )
-   (cons  "ΰ" "i" )))
-
 (define-public (ellinika:sounds-like str)
-  (let ((len (string-length str)))
-    (do ((i 0)
-	 (sl '()))
-	((= i len) (apply string-append (reverse sl)))
-      (set! sl (cons 
-		(cond
-		 ((and (<= (+ i 4) len)
-		       (assoc (substring str i (+ i 4)) transcription-list)) =>
-		       (lambda (x) 
-			 (set! i (+ i 4))
-			 (cdr x)))
-		 ((and (<= (+ i 2) len)
-                       (assoc (substring str i (+ i 2)) transcription-list)) =>
-		  (lambda (x)
-		    (set! i (+ i 2))
-		    (cdr x)))
-		 (else
-		  (set! i (1+ i))
-		  (substring str (- i 1) i)))
-		sl)))))
+  (elstr->soundslike str))
   
 ;;;; End of ellinika.scm
 
diff --git a/src/ellinika/yyrename b/src/ellinika/yyrename
new file mode 100755
index 0000000..996abf2
--- /dev/null
+++ b/src/ellinika/yyrename
@@ -0,0 +1,97 @@
+#! /bin/sh
+# Rename yy.* identifiers to avoid name clashes.  This file is part of Grecs.
+# Copyright (C) 2011 Sergey Poznyakoff
+#
+# Grecs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Grecs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Grecs.  If not, see <http://www.gnu.org/licenses/>.
+
+# Usage: yyrename [-f "OUTFILE [OUTFILE...]"] COMMAND INFILE
+# Makefile.am:
+#  LEXCOMPILE = yyrename -f $(LEX_OUTPUT_ROOT).c \
+#                        '$(LEX) $(LFLAGS) $(AM_LFLAGS)'
+#  YACCCOMPILE = yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
+#
+# This script runs COMMAND with INFILE as its argument and scans
+# OUTFILEs for identifiers starting with 'yy'.  It then renames these
+# identifiers by replacing 'yy' with the selected prefix.
+#
+# The prefix is looked up in the file yytrans, located in the INFILE's
+# directory.  If this file does not exist, the prefix is constructed
+# by concatenating the string 'grecs_' and the ``root name''.  The root
+# name is built by removing '-lex.l' or '-gram.y' from the base name.
+# If the latter does not end in any of these, the root name
+# is constructed by removing the suffix from the base name.
+#
+# The yytrans file is a line-oriented lookup table.  Empty lines are
+# ignored, usual UNIX comment lines are honored.  The remaining lines
+# shall consist of two words separated by any amount of whitespace.
+# The first word is a look-up key, the second one provides a translation
+# (yy replacement) for that key.
+#
+# Two look-ups are tried: first the base name and then the root name.
+# If both result in a non-empty replacement, the former is preferred
+# over the latter.
+#
+# The -f option supplies a list of output file names generated by COMMAND.
+# If not supplied, the following defaults are used: y.tab.c and y.tab.h, if
+# INFILE ends in '.y', and yy.lex.c, if it ends in '.l'.  If INFILE does not
+# end in any of these suffixes, error is reported.
+#
+# BUGS: Any occurrence of 'yy' is replaced, not only 'yy' prefixes.
+#
+case $1 in
+-f) files=$2
+    shift
+    shift
+esac
+
+if test $# -ne 2; then
+  echo >&2 "usage: yyrename [-f "OUTFILE [OUTFILE...]"] COMMAND INFILE"
+  exit 1
+fi
+
+base=`expr "$2" : '.*/\(.*\)\.[ly]'`
+dir=`dirname "$2"`
+case $2 in
+*.y) test -z "$files" && files="y.tab.c y.tab.h"
+     root=`expr "$2" : '.*/\(.*\)-gram\.y'`;;
+*.l) test -z "$files" && files=lex.yy.c
+     root=`expr "$2" : '.*/\(.*\)-lex\.l'`;;
+*)   if test -z "$files"; then
+       echo >&2 "$0: suffix unknown, files must be given (use -f)"
+       exit 1
+     fi
+     root=$base
+esac
+
+if test -f $dir/yytrans; then
+  pfx=`awk '
+{ sub(/#.*$/,"") }
+NF == 2 && $1=="'$base'" { exact=$2 }
+NF == 2 && $1=="'$root'" { root=$2 }
+{ next }
+END { print exact ? exact : root ? root : "" }' $dir/yytrans`
+else
+  pfx=
+fi
+if test -z "$pfx"; then
+  pfx=ellinika_`echo $root | tr .- __`
+fi
+
+eval $* || exit $?
+
+for file in $files
+do
+  mv $file ${file}.tmp
+  sed "/^#line/b;/^#  *[0-9]/b;s/yy/$pfx/g;s/YY/$pfx/g" ${file}.tmp > $file
+done
-- 
cgit v1.2.1