summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2011-06-10 20:04:53 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2011-06-10 21:34:20 (GMT)
commita1a5b7ddd6c3c0532c37551b24fd573a554ac181 (patch) (side-by-side diff)
treef86f3572c77dc986bb2dfb65619ac4bc35c83847
parent2bae7da012e2125762855ce014e63345ecbbbb18 (diff)
downloadellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.gz
ellinika-a1a5b7ddd6c3c0532c37551b24fd573a554ac181.tar.bz2
Fix syllabification.
* configure.ac: Add AC_PROG_YACC * src/ellinika/phoneme.y: New file. * src/ellinika/yyrename: New file. * src/ellinika/syllabificator.c: New file. * src/ellinika/.gitignore: Update. * src/ellinika/elchr.c (char_info_st): Move to header. (el_basic_ctype): (elchr_info): Remove static qualifier. Return a pointer to const. (elchr_letter,elchr_phoneme): New functions. (elchr_diphthong): Remove. * src/ellinika/elmorph.c (elstr)<phoneme,phoneme_count>: New members. (_elstr_syllabize): Rewrite. (invalidate_maps)" New static function. (_elstr_alloc): Initialize new fields, take function name as argument, for diagnostic purposes. (_elstr_print): Rewrite (deftab): Update. (elstr-syllable-prop,elstr-syllable) (_elstr_set_accent,_elstr_set_accent_on_char): Rewrite. (elstr-char-phoneme,elstr->phonetic-map): New functions. * src/ellinika/elmorph.h (CHF_DIPH1,CHF_DIPH2): Remove. (CHF_DIPHTHONG): New flag. (PHON_.*): New constants. (phoneme,syllable): New structures. (char_info_st)<letter,phoneme>: New members. (elchr_info,elchr_letter) (elchr_phoneme,phoneme_map) (syllable_map): New protos. (elchr_diphthong): Remove protos. * src/ellinika/elmorph.scm4: Move public definitions to elmorph-public.scm; include it here. * src/ellinika/xlat.scm (ellinika:sounds-like): Rewrite as a wrapper over elstr->soundslike. Describe Milesian numbers. * style.css (img.ellinika-img): New class. * xml/lingua.conf.in (IMAGE): New tag. * xml/pl/alfabhta.xml: Describe Milesian numbers. Various fixes. * data/dbverb.struct: fix a typo in flection. Use 'sub' theme for pas/sub/aor. * data/irregular-verbs.xml: Add more verbs. * scm/conjugator.scm: Various fixes. * scm/verbop.scm: Accept empty mood and voice declarations.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--configure.ac1
-rw-r--r--data/dbverb.struct4
-rw-r--r--data/irregular-verbs.xml81
-rw-r--r--scm/conj.scm335
-rw-r--r--scm/conjugator.scm129
-rw-r--r--scm/elmorph.c10
-rw-r--r--scm/elmorph.h39
-rw-r--r--scm/verbop.scm95
-rw-r--r--scm/xlat.scm280
-rw-r--r--src/ellinika/.gitignore2
-rw-r--r--src/ellinika/Makefile.am23
-rw-r--r--src/ellinika/elchr.c273
-rw-r--r--src/ellinika/elmorph-public.scm106
-rw-r--r--src/ellinika/elmorph.c308
-rw-r--r--src/ellinika/elmorph.h82
-rw-r--r--src/ellinika/elmorph.scm425
-rw-r--r--src/ellinika/phoneme.y353
-rw-r--r--src/ellinika/syllabificator.c152
-rw-r--r--src/ellinika/tenses.scm38
-rw-r--r--src/ellinika/xlat.scm113
-rwxr-xr-xsrc/ellinika/yyrename97
-rw-r--r--style.css6
-rw-r--r--xml/lingua.conf.in9
-rw-r--r--xml/pl/alfabhta.xml204
24 files changed, 1588 insertions, 1177 deletions
diff --git a/configure.ac b/configure.ac
index 233ec46..01b27a2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -28,6 +28,7 @@ AC_CONFIG_MACRO_DIR([m4])
## * Checks for programs.
AC_PROG_CC
+AC_PROG_YACC
AM_PROG_LIBTOOL
M4='`cd $(top_srcdir); pwd`/scripts/missing --run m4'
diff --git a/data/dbverb.struct b/data/dbverb.struct
index 06745c8..d51633b 100644
--- a/data/dbverb.struct
+++ b/data/dbverb.struct
@@ -65,7 +65,7 @@ INSERT INTO verbflect VALUES
(8, NULL, "ε", NULL, NULL, "ετε", NULL),
(9, NULL, "ε", NULL, NULL, "τε", NULL),
(11, "ομαι", "εσαι", "εται", "όμαστε", "εστε", "ονται"),
-(12, "όμουν", "όσουν", "όταν", "όμαστε", "όμαστε", "ονταν"),
+(12, "όμουν", "όσουν", "όταν", "όμαστε", "όσαστε", "ονταν"),
(15, "ώ", "είς", "εί", "ούμε", "είτε", "ούν(ε)"),
(17, NULL, "ου", NULL, NULL, NULL, NULL),
(18, NULL, NULL, NULL, NULL, "είτε", NULL),
@@ -123,7 +123,7 @@ INSERT INTO conjugation VALUES
("A", "pas", "ind", "Μέλλοντας στιγμιαίος", 'aor', NULL, 15, "111111", "θα", NULL, NULL),
-- Υποτακτική
("A", "pas", "sub", "Ενεστώτας", 'pres', NULL, 11, "333333", "να", NULL, NULL),
-("A", "pas", "sub", "Αόριστος", 'aor', NULL, 15, "111221", "να", NULL, NULL),
+("A", "pas", "sub", "Αόριστος", 'sub', NULL, 15, "111221", "να", NULL, NULL),
("A", "pas", "sub", "Παρακείμενος", 'synt', NULL, 0, NULL, "να", "έχω", "Παρατατικός"),
-- Προστακτική
("A", "pas", "imp", "Ενεστώτας", 'pres', NULL, 11, "-3--3-", "να", NULL, NULL),
diff --git a/data/irregular-verbs.xml b/data/irregular-verbs.xml
index ca2c7d1..d056f7d 100644
--- a/data/irregular-verbs.xml
+++ b/data/irregular-verbs.xml
@@ -56,28 +56,7 @@
</imp>
</act>
- <pas>
- <ind>
- <t name="Ενεστώτας"/>
- <t name="Παρατατικός"/>
- <t name="Μέλλοντας διαρκείας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- <t name="Υπερσυντέλικος"/>
- <t name="Συντελεσμένος μέλλοντας"/>
- <t name="Μέλλοντας στιγμιαίος"/>
- </ind>
- <sub>
- <t name="Ενεστώτας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- </sub>
- <imp>
- <t name="Ενεστώτας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- </imp>
- </pas>
+ <pas/>
</v>
<v>
@@ -109,28 +88,8 @@
<t name="Παρακείμενος"/>
</imp>
</act>
- <pas>
- <ind>
- <t name="Ενεστώτας"/>
- <t name="Παρατατικός"/>
- <t name="Μέλλοντας διαρκείας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- <t name="Υπερσυντέλικος"/>
- <t name="Συντελεσμένος μέλλοντας"/>
- <t name="Μέλλοντας στιγμιαίος"/>
- </ind>
- <sub>
- <t name="Ενεστώτας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- </sub>
- <imp>
- <t name="Ενεστώτας"/>
- <t name="Αόριστος" />
- <t name="Παρακείμενος"/>
- </imp>
- </pas>
+
+ <pas/>
</v>
<v>
@@ -337,6 +296,31 @@
</act>
</v>
+ <v>
+ <a>έρχομαι</a>
+ <c>A</c>
+ <suffix></suffix>
+ <accmap>000000</accmap>
+ <act>
+ <root theme="sub">ερθ</root>
+ <ind/>
+ <sub>
+ <t name="Ενεστώτας"/>
+ <t name="Παρατατικός"/>
+ <t name="Μέλλοντας διαρκείας"/>
+ <t name="Παρακείμενος"/>
+ <t name="Υπερσυντέλικος"/>
+ <t name="Συντελεσμένος μέλλοντας"/>
+ <t name="Μέλλοντας στιγμιαίος"/>
+ </sub>
+ <imp/>
+ </act>
+ <pas>
+ <root theme="aor">ήρθ</root>
+ <root theme="sub">ερθ</root>
+ </pas>
+ </v>
+
<!-- FIXME
έρχομαι
κάθομαι
@@ -414,15 +398,6 @@
</v>
<v>
- <a></a>
- <c>A</c>
- <act>
- <root theme="aor"></root>
- <root theme="sub"></root>
- </act>
- </v>
-
- <v>
<a>ξέρω</a>
<c>A</c>
<augment>η</augment>
diff --git a/scm/conj.scm b/scm/conj.scm
deleted file mode 100644
index 3c2e96a..0000000
--- a/scm/conj.scm
+++ b/dev/null
@@ -1,335 +0,0 @@
-(use-modules (xlat))
-
-(define (active-aorist-root present-root)
- (let ((last-syllable (car present-root)))
- (case (car last-syllable)
- ((#\@)
- (set-car! last-syllable #\s))
- ((#\z)
- (set-car! last-syllable #\s)) ;; FIXME: not always
- ((#\k)
- (cond
- ((and (not (null? (cdr last-syllable)))
- (char=? (cadr last-syllable) #\s))
- (set! last-syllable (cons #\x (cddr last-syllable))))
- (else
- (set-car! last-syllable #\x))))
- ((#\n)
- (cond
- ((and (not (null? (cdr last-syllable)))
- (char=? (cadr last-syllable) #\h))
- (set! last-syllable (cons #\x (cddr last-syllable))))
- (else
- (set-car! last-syllable #\s))))
- ((#\g #\h)
- (set-car! last-syllable #\x))
- ((#\p #\b #\f)
- (set-car! last-syllable #\*))
- ((#\y)
- (cond
- ((and (not (null? (cdr last-syllable)))
- (or (char=? (cadr last-syllable) #\a)
- (char=? (cadr last-syllable) #\e)))
- (set! last-syllable (cons #\* (cdr last-syllable))))))
- (else
- (throw 'grammar "Dont't know how to handle " present-root)))
- (cons last-syllable (cdr present-root))))
-
-
-(define (aor str)
- (active-aorist-root (cdr (greek-normalize str))))
-
-
-(define (add-flection word flect)
- (let* ((syl-list (append (cdr flect) (cdr word)))
- (acc-pos (or (car flect)
- (and (car word)
- (+ (car word) (length (cdr flect)))))))
- (cons
- (if (and acc-pos (> acc-pos 3))
- 3
- acc-pos)
- syl-list)))
-
-; FIXME: Should return real augment!
-(define (get-augment root)
- #\e)
-
-; FIXME: Does not handle verbs with internal augment
-(define (create-paratatikos-A root flect)
- (let ((word (add-flection root flect)))
- (cons 3
- (if (< (length (cdr word)) 3)
- (append (cdr word) (list (list (get-augment root))))
- (cdr word)))))
-
-(define (create-paratatikos-B root flect)
- (add-flection root (add-flection (greek->xlat "ούσ")
- (cons
- #f
- (cdr flect)))))
-
-(define (create-prostaktiki-enestota root flect)
- (let ((w (add-flection root flect)))
- (cond
- ((car flect)
- (cons (car flect) (cdr w)))
- ((>= (length (cdr w)) 3)
- (cons 3 (cdr w)))
- ((not (car w))
- (cons (length (cdr w)) (cdr w)))
- (else
- w))))
-
-;; Verbal form Accessors
-(define (vtab-root tab)
- (car tab))
-
-(define (vtab-analizer tab)
- (list-ref tab 2))
-
-(define (vtab-composer tab)
- (or (list-ref tab 1)
- add-flection))
-
-(define (vtab-flection tab person)
- (list-ref tab (+ person 2)))
-
-;;
-(define (flect-list root composer analizer . p)
- (append
- (list root composer analizer)
- (map
- (lambda (pers)
- (map
- greek->xlat
- pers))
- p)))
-
-;;
-(define verbal-flect-table
- (list
- (cons "Α"
- (list
- (cons "ενεργητηκή"
- (list
- (cons "οριστική"
- (list
- (cons "ενεστώτας"
- (flect-list
- #:present-root
- #f
- #f
- (list "ω")
- (list "εις")
- (list "ει")
- (list "ουμε")
- (list "ετε")
- (list "ουν" "ουνε")))
- (cons "παρατατικός"
- (flect-list
- #:present-root
- create-paratatikos-A
- #f
- (list "α")
- (list "ες")
- (list "ε")
- (list "αμε")
- (list "ατε")
- (list "αν" "ανε")))
- (cons "μέλλοντας διαρκείας"
- (flect-list
- #:present-root
- #f ;; FIXME: "θα"
- #f
- (list "ω")
- (list "εις")
- (list "ει")
- (list "ουμε")
- (list "ετε")
- (list "ουν" "ουνε")))))
-
- (cons "προστακτική"
- (list
- (cons "ενεστώτας"
- (flect-list
- #:present-root
- create-prostaktiki-enestota
- #f
- '()
- (list "ε")
- '()
- '()
- (list "ετε")
- '())))) ))))
- (cons "Β1"
- (list
- (cons "ενεργητηκή"
- (list
- (cons "οριστική"
- (list
- (cons "ενεστώτας"
- (flect-list
- #:present-root
- #f
- #f
- (list "άω")
- (list "άς")
- (list "ά" "άει")
- (list "άμε")
- (list "άτε")
- (list "ούν" "ούνε")))
- (cons "παρατατικός"
- (flect-list
- #:present-root
- create-paratatikos-B
- #f
- (list "α")
- (list "ες")
- (list "ε")
- (list "αμε")
- (list "ατε")
- (list "αν" "ανε")))
- (cons "μέλλοντας διαρκείας"
- (flect-list
- #:present-root
- #f ;; FIXME: "θα"
- #f
- (list "άω")
- (list "άς")
- (list "ά" "άει")
- (list "άμε")
- (list "άτε")
- (list "ούν" "ούνε"))) ))
-
- (cons "προστακτική"
- (list
- (cons "ενεστώτας"
- (flect-list
- #:present-root
- create-prostaktiki-enestota
- #f
- '()
- (list "α")
- '()
- '()
- (list "άτε")
- '())))) ))))
- (cons "Β2"
- (list
- (cons "ενεργητηκή"
- (list
- (cons "οριστική"
- (list
- (cons "ενεστώτας"
- (flect-list
- #:present-root
- #f
- #f
- (list "ώ")
- (list "είς")
- (list "εί")
- (list "ούμε")
- (list "είτε")
- (list "ούν" "ούνε")))
- (cons "παρατατικός"
- (flect-list
- #:present-root
- create-paratatikos-B
- #f
- (list "α")
- (list "ες")
- (list "ε")
- (list "αμε")
- (list "ατε")
- (list "αν" "ανε")))
- (cons "μέλλοντας διαρκείας"
- (flect-list
- #:present-root
- #f ;; FIXME: "θα"
- #f
- (list "ώ")
- (list "είς")
- (list "εί")
- (list "ούμε")
- (list "είτε")
- (list "ούν" "ούνε"))) ))))))))
-
-(define (verbal-flect-table-lookup table form-list)
- (if (null? form-list)
- table
- (let ((entry (assoc (car form-list) table)))
- (if entry
- (verbal-flect-table-lookup (cdr entry) (cdr form-list))
- #f))))
-
-(define (find-verbal-form . rest)
- (verbal-flect-table-lookup verbal-flect-table rest))
-
-
-(define (conjugate root pers forms)
- (let ((tab (verbal-flect-table-lookup verbal-flect-table forms)))
- (if (not tab)
- (throw 'grammar "Verbal form not found " forms))
- (let ((func (vtab-composer tab))
- (root-selector (vtab-root tab)))
-
- (map
- (lambda (x)
- (func root x))
- (vtab-flection tab pers)))))
-
-(define (conjugate-v root pers . forms)
- (conjugate root pers forms))
-
-;; Test
-
-(define (conj-all root . rest)
- (map (lambda (x)
- (display x)(display "/"))
- rest)
- (newline)
- (do ((i 1 (1+ i)))
- ((> i 6) #f)
- (map
- (lambda (x)
- (display (xlat->greek x))(display ","))
- (conjugate (greek->xlat root) i rest))
- (newline)))
-
-
-;(conj-all "βεβαίων" "Α" "ενεργητηκή" "οριστική" "ενεστώτας")
-;(newline)
-
-;(conj-all "βεβαίων" "Α" "ενεργητηκή" "οριστική" "παρατατικός")
-;(newline)
-
-;(conj-all "ντυν" "Α" "ενεργητηκή" "προστακτική" "ενεστώτας")
-;(newline)
-;(conj-all "βεβαίων" "Α" "ενεργητηκή" "προστακτική" "ενεστώτας")
-;(newline)
-;(conj-all "διαβάζ" "Α" "ενεργητηκή" "προστακτική" "ενεστώτας")
-;(newline)
-
-;(conj-all "λύν" "Α" "ενεργητηκή" "οριστική" "παρατατικός")
-;(newline)
-
-;(conj-all "νικ" "Β1" "ενεργητηκή" "οριστική" "ενεστώτας")
-;(newline)
-
-;(conj-all "νικ" "Β1" "ενεργητηκή" "οριστική" "παρατατικός")
-;(newline)
-
-;(conj-all "νικ" "Β1" "ενεργητηκή" "προστακτική" "ενεστώτας")
-;(newline)
-
-;(conj-all "θεωρ" "Β2" "ενεργητηκή" "οριστική" "ενεστώτας")
-;(newline)
-
-;(conj-all "θεωρ" "Β2" "ενεργητηκή" "οριστική" "παρατατικός")
-;(newline)
-
-;(display (xlat->greek (cons #f (active-aorist-root (cdr (greek->xlat "ιατρευ"))))))
-;(newline)
-
-
diff --git a/scm/conjugator.scm b/scm/conjugator.scm
index 7b2a4a6..c2c2171 100644
--- a/scm/conjugator.scm
+++ b/scm/conjugator.scm
@@ -64,6 +64,7 @@
"ησ"
#f
#f))
+ ;; FIXME: deponentia?
(else
(list "A"
#f
@@ -136,6 +137,15 @@ WHERE verb='" (force-string verb) "'"
"SELECT root FROM irregular_root \
WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'")))
+(define (verb-A-root verb)
+ (cond
+ ((elstr-suffix? verb "ω")
+ (elstr-trim verb -1))
+ ((elstr-suffix? verb "ομαι")
+ (elstr-trim verb -4))
+ (else
+ (error "cannot handle ~A~%" verb))))
+
(define (complement-verb-info vinfo verb voice thema)
; (format #t "COMPLEMENT ~S~%" thema)
(let ((elverb (string->elstr verb))
@@ -149,7 +159,7 @@ WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'")))
(verb-info-set! #:attested vinfo 'root)
(caar result))
((string=? (verb-info #:conj vinfo) "A")
- (let ((root (elstr-trim elverb -1)))
+ (let ((root (verb-A-root elverb)))
(cond
((string=? thema "pres")
(verb-info-set! #:attested vinfo 'root)
@@ -284,9 +294,6 @@ WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mood='" mood
syl
(+ (- len syl) 1))))
-(define (set-accented-syllable-0! str nsyl)
- (elstr-set-accent! str (+ (- (elstr-number-of-syllables str) nsyl) 1)))
-
(define (apply-flect conj vinfo verb)
(let ((root (verb-info #:root vinfo))
(suffix (let ((s (conj-info #:suffix conj)))
@@ -311,28 +318,19 @@ WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mood='" mood
(let* ((rs (force-elstr root))
(suf (elstr-deaccent (elstr-append suffix flect)))
(result (elstr-append rs suf))
- (acc-syl (let ((n (accented-syllable-0 rs)))
- (if (= 0 n)
- (accented-syllable-0 verb)
- n))))
- (if (> (elstr-number-of-syllables result) 1)
- (set-accented-syllable-0! result acc-syl))
- (let ((acc-syl (elstr-accented-syllable result)))
- (cond
- ((and (= acc-syl 1)
- (= (elstr-number-of-syllables result) 1))
- (elstr-deaccent result))
- ((> acc-syl 3)
- (let ((nsyl (elstr-number-of-syllables suf)))
- (cond
- ((= nsyl 1)
- result)
- ((= nsyl 3)
- (elstr-set-accent result 3))
- (else
- (elstr-set-accent result 2)))))
- (else
- result)))))
+ (nsyl (elstr-number-of-syllables result))
+ (acc-syl (+ (- nsyl
+ (let ((n (accented-syllable-0 rs)))
+ (if (= 0 n)
+ (accented-syllable-0 verb)
+ n))) 1)))
+ (cond
+ ((= nsyl 1)
+ (elstr-deaccent result))
+ ((> acc-syl 3)
+ (elstr-set-accent result 3)) ; FIXME
+ (else
+ (elstr-set-accent result acc-syl)))))
((char=? acc #\f)
(elstr-append
(elstr-deaccent (elstr-append root suffix))
@@ -397,17 +395,22 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mood='" mood
(let* ((verb-conj (conjugate verb "act" "sub" "Αόριστος" #:nopart))
(form (list-ref verb-conj 2))
(part (conj-info #:particle conj)))
- (append
- (map
- (lambda (aux)
- (elstr->string
- (if part
- (elstr-append part " " aux " " form)
- (elstr-append aux " " form))))
- (conjugation:table (conjugate (conj-info #:aux conj) "act" "ind"
- (conj-info #:auxtense conj))))
- (list (verb-info #:conj vinfo)
- (conjugation:attested verb-conj)))))
+ (cond
+ (form
+; (format #t "FORM ~A FROM ~A~%" form verb-conj);;FIXME
+ (append
+ (map
+ (lambda (aux)
+ (elstr->string
+ (if part
+ (elstr-append part " " aux " " form)
+ (elstr-append aux " " form))))
+ (conjugation:table (conjugate (conj-info #:aux conj) "act" "ind"
+ (conj-info #:auxtense conj))))
+ (list (verb-info #:conj vinfo)
+ (conjugation:attested verb-conj))))
+ (else
+ #f))))
(else
; (format #t "CONJ ~S~%" conj)
(complement-verb-info vinfo verb voice (conj-info #:thema conj))
@@ -416,21 +419,34 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mood='" mood
(verb-info #:attested vinfo)))))))))
(define (conjugation:table conj)
- (list-head conj 6))
+ (cond
+ ((not conj)
+ #f)
+ (else
+ (list-head conj 6))))
(define (conjugation:class conj)
- (list-ref conj 6))
+ (cond
+ ((not conj)
+ #f)
+ (else
+ (list-ref conj 6))))
(define (conjugation:attested conj)
- (list-ref conj 7))
+ (cond
+ ((not conj)
+ #f)
+ (else (list-ref conj 7))))
(define (empty-conjugation? conj)
- (call-with-current-continuation
- (lambda (return)
- (for-each
- (lambda (x)
- (if x
- (return #f)))
- conj)
- (return #t))))
+ (or
+ (not conj)
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (x)
+ (if x
+ (return #f)))
+ conj)
+ (return #t)))))
;;
;(display (verb-info "βρίσκω"))
@@ -525,8 +541,17 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mood='" mood
;; (test-conjugation "βλέπω" "act" "ind" "Αόριστος")
;; (test-conjugation "βλέπω" "act" "sub" "Αόριστος")
;; (test-conjugation "βλέπω" "act" "imp" "Αόριστος")
-(test-conjugation "πηγαίνω" "act" "ind" "Μέλλοντας στιγμιαίος")
-(test-conjugation "πίνω" "act" "ind" "Αόριστος")
-(test-conjugation "πίνω" "act" "sub" "Αόριστος")
-(test-conjugation "πίνω" "act" "imp" "Αόριστος")
+;; (test-conjugation "πηγαίνω" "act" "ind" "Μέλλοντας στιγμιαίος")
+;; (test-conjugation "πίνω" "act" "ind" "Αόριστος")
+;; (test-conjugation "πίνω" "act" "sub" "Αόριστος")
+;; (test-conjugation "πίνω" "act" "imp" "Αόριστος")
+
+(test-conjugation "έρχομαι" "pas" "ind" "Παρατατικός")
+(test-conjugation "έρχομαι" "pas" "ind" "Αόριστος")
+(test-conjugation "έρχομαι" "pas" "sub" "Αόριστος")
+;; (test-conjugation "έρχομαι" "pas" "ind" "Συντελεσμένος Μέλλοντας")
+;; (test-conjugation "έρχομαι" "act" "sub" "Αόριστος" )
+;; (test-conjugation "έρχομαι" "pas" "sub" "Αόριστος" )
+;; (test-conjugation "έρχομαι" "pas" "ind" "Ενεστώτας");FIXME!
+;; (test-conjugation "έρχομαι" "pas" "ind" "Υπερσυντέλικος")
(newline)
diff --git a/scm/elmorph.c b/scm/elmorph.c
deleted file mode 100644
index 87598d9..0000000
--- a/scm/elmorph.c
+++ b/dev/null
@@ -1,10 +0,0 @@
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-#include <errno.h>
-#include <stdlib.h>
-#include <libguile.h>
-#include "utf8.h"
-#include "elchr.h"
-
-
diff --git a/scm/elmorph.h b/scm/elmorph.h
deleted file mode 100644
index 6bc19ca..0000000
--- a/scm/elmorph.h
+++ b/dev/null
@@ -1,39 +0,0 @@
-#define CHF_OXEIA 1
-#define CHF_PERISPWMENH 2
-#define CHF_BAREIA 3
-
-#define CHF_ACCENT_MASK 0x000f
-
-#define CHF_TREMA 0x0010
-
-#define CHF_VOWEL 0x0020
-#define CHF_CONSONANT 0x0040
-#define CHF_SEMIVOWEL 0x0080
-#define CHF_PUNCT 0x0100
-#define CHF_SYMBOL 0x0200
-#define CHF_MODIFIER 0x0400
-#define CHF_ARCHAIC 0x0800
-#define CHF_LOWER 0x1000
-#define CHF_UPPER 0x2000
-#define CHF_NUMERIC 0x4000
-
-int elchr_flags(unsigned ch);
-int elchr_isupper(unsigned ch);
-int elchr_islower(unsigned ch);
-int elchr_getaccent(unsigned ch);
-int elchr_istrema(unsigned ch);
-int elchr_isvowel(unsigned ch);
-int elchr_isconsonant(unsigned ch);
-int elchr_issemivowel(unsigned ch);
-int elchr_ispunct(unsigned ch);
-int elchr_issymbol(unsigned ch);
-int elchr_ismodifier(unsigned ch);
-int elchr_isarchaic(unsigned ch);
-int elchr_isnumeric(unsigned ch);
-unsigned elchr_numeric_value(unsigned ch);
-unsigned elchr_toupper(unsigned ch);
-unsigned elchr_tolower(unsigned ch);
-unsigned elchr_base(unsigned ch);
-unsigned elchr_deaccent(unsigned ch);
-unsigned elchr_accent(unsigned ch, int acc);
-
diff --git a/scm/verbop.scm b/scm/verbop.scm
index bb54126..f3ecc33 100644
--- a/scm/verbop.scm
+++ b/scm/verbop.scm
@@ -2,7 +2,7 @@
(xmltools xmltrans)
(ellinika elmorph)
(gamma sql)
- (ellinika xlat)
+ (ellinika tenses)
(ice-9 getopt-long))
(define cleanup-option #f)
@@ -51,12 +51,12 @@
(debug 100 rest)
(let ((q (apply format (cons #f rest))))
(if verbose-option
- (format #t "QUERY: ~S\n" q))
+ (format #t "QUERY: ~A\n" q))
(cond
(connection
(let ((res (sql-query connection q)))
(if verbose-option
- (format #t "RESULT: ~S\n" res))
+ (format #t "RESULT: ~A\n" res))
res))
(else
#f))))
@@ -197,6 +197,15 @@
conj)
(return #t))))
+(define (insert-individual-verb voice mood tense ident)
+ (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \
+VALUES (~A,~A,~A,~A,~A);~%"
+ (verb-get-sql #:verb)
+ (sql-val voice)
+ (sql-val mood)
+ (sql-val tense)
+ (number->string ident)))
+
(define (flush-mood mood vstr)
(if (eq? (car mood) #:root)
(let ((val (cdr mood)))
@@ -207,45 +216,51 @@ VALUES (~A,~A,~A,~A);~%"
(sql-val (car val))
(sql-val (cdr val))))
(let ((mood-str (mood-key->string (car mood))))
- (for-each
- (lambda (p)
- (let ((key (car p)))
- (debug 1 "flush-mood: " p)
- (cond
- ((empty-conjugation? (cdr p))
- (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \
-VALUES (~A,~A,~A,~A,~A);~%"
- (verb-get-sql #:verb)
- (sql-val vstr)
- (sql-val mood-str)
- (sql-val key)
- "0"))
- (else
- (let ((num (next-flect-ident))
- (l (cdr p)))
- (run-query "INSERT INTO verbflect VALUES (~A,~A,~A,~A,~A,~A,~A);~%"
- num
- (sql-val (list-ref l 0))
- (sql-val (list-ref l 1))
- (sql-val (list-ref l 2))
- (sql-val (list-ref l 3))
- (sql-val (list-ref l 4))
- (sql-val (list-ref l 5)))
- (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \
-VALUES (~A,~A,~A,~A,~A);~%"
- (verb-get-sql #:verb)
- (sql-val vstr)
- (sql-val mood-str)
- (sql-val key)
- num) )))))
- (cdr mood)))))
+ (let ((lst (cdr mood)))
+ (if (null? lst)
+ (for-each
+ (lambda (tense)
+ (insert-individual-verb vstr mood-str tense 0))
+ (assoc-ref ellinika-tense-list mood-str))
+
+ (for-each
+ (lambda (p)
+ (let ((key (car p)))
+ (debug 1 "flush-mood: " p)
+ (cond
+ ((empty-conjugation? (cdr p))
+ (insert-individual-verb vstr mood-str key 0))
+ (else
+ (let ((num (next-flect-ident))
+ (l (cdr p)))
+ (run-query
+ "INSERT INTO verbflect VALUES (~A,~A,~A,~A,~A,~A,~A);~%"
+ num
+ (sql-val (list-ref l 0))
+ (sql-val (list-ref l 1))
+ (sql-val (list-ref l 2))
+ (sql-val (list-ref l 3))
+ (sql-val (list-ref l 4))
+ (sql-val (list-ref l 5)))
+ (insert-individual-verb vstr mood-str key num) )))))
+ lst))))))
(define (flush-voice vstr conj-list)
- (if conj-list
- (for-each
- (lambda (mood)
- (flush-mood mood vstr))
- conj-list)))
+ (cond
+ ((null? conj-list)
+ (for-each
+ (lambda (vp)
+ (let ((mood (car vp)))
+ (for-each
+ (lambda (tense)
+ (insert-individual-verb vstr mood tense 0))
+ (cdr vp))))
+ ellinika-tense-list))
+ (conj-list
+ (for-each
+ (lambda (mood)
+ (flush-mood mood vstr))
+ conj-list))))
;;; Fush verb definition to the database
(define (verb-flush)
diff --git a/scm/xlat.scm b/scm/xlat.scm
deleted file mode 100644
index 37028b3..0000000
--- a/scm/xlat.scm
+++ b/dev/null
@@ -1,280 +0,0 @@
-(define-module (xlat))
-
-(define xlist-latin
- (list
- (list #\a "α" "Α" "ά" "Ά")
- (list #\b "β" "Β")
- (list #\g "γ" "Γ")
- (list #\d "δ" "Δ")
- (list #\e "ε" "Ε" "έ" "Έ")
- (list #\z "ζ" "Ζ")
- (list #\% "η" "Η" "ή" "Ή")
- (list #\@ "θ" "Θ")
- (list #\i "ι" "Ι" "ί" "Ί")
- (list #\k "κ" "Κ")
- (list #\l "λ" "Λ")
- (list #\m "μ" "Μ")
- (list #\n "ν" "Ν")
- (list #\x "ξ" "Ξ")
- (list #\o "ο" "Ο" "ό" "Ό")
- (list #\p "π" "Π")
- (list #\r "ρ" "Ρ")
- (list #\s "σ" "Σ" "ς") ; FIXME: Special case.
- (list #\c "ς" "Σ")
- (list #\t "τ" "Τ")
- (list #\y "υ" "Υ" "ύ" "Ύ")
- (list #\f "φ" "Φ")
- (list #\h "χ" "Χ")
- (list #\* "ψ" "Ψ")
- (list #\w "ω" "Ω" "ώ" "Ώ")
- (list #\I "ϊ" "Ϊ" "ΐ" "ΐ")
- (list #\Y "ϋ" "Ϋ" "ΰ" "ΰ")))
-
-
-(define xlist-greek
- (list
- (cons "α" #\a )
- (cons "Α" #\a )
- (cons "Ά" (cons #\a #t))
- (cons "ά" (cons #\a #t))
- (cons "β" #\b )
- (cons "Β" #\b )
- (cons "γ" #\g )
- (cons "Γ" #\g )
- (cons "δ" #\d )
- (cons "Δ" #\d )
- (cons "ε" #\e )
- (cons "Ε" #\e )
- (cons "Έ" (cons #\e #t))
- (cons "έ" (cons #\e #t))
- (cons "ζ" #\z )
- (cons "Ζ" #\z )
- (cons "η" #\% )
- (cons "Η" #\% )
- (cons "Ή" (cons #\% #t))
- (cons "ή" (cons #\% #t))
- (cons "θ" #\@ )
- (cons "Θ" #\@ )
- (cons "ι" #\i )
- (cons "Ι" #\i )
- (cons "Ί" (cons #\i #t))
- (cons "ί" (cons #\i #t))
- (cons "κ" #\k )
- (cons "Κ" #\k )
- (cons "λ" #\l )
- (cons "Λ" #\l )
- (cons "μ" #\m )
- (cons "Μ" #\m )
- (cons "ν" #\n )
- (cons "Ν" #\n )
- (cons "ξ" #\x )
- (cons "Ξ" #\x )
- (cons "ο" #\o )
- (cons "Ο" #\o )
- (cons "Ό" (cons #\o #t))
- (cons "ό" (cons #\o #t))
- (cons "π" #\p )
- (cons "Π" #\p )
- (cons "ρ" #\r )
- (cons "Ρ" #\r )
- (cons "σ" #\s )
- (cons "Σ" #\s )
- (cons "ς" #\s )
- (cons "τ" #\t )
- (cons "Τ" #\t )
- (cons "υ" #\y )
- (cons "Υ" #\y )
- (cons "Ύ" (cons #\y #t))
- (cons "ύ" (cons #\y #t))
- (cons "φ" #\f )
- (cons "Φ" #\f )
- (cons "χ" #\h )
- (cons "Χ" #\h )
- (cons "ψ" #\* )
- (cons "Ψ" #\* )
- (cons "ω" #\w )
- (cons "Ω" #\w )
- (cons "Ώ" (cons #\w #t))
- (cons "ώ" (cons #\w #t))
- (cons "Ϊ" #\I )
- (cons "ΐ" (cons #\I #t))
- (cons "Ϋ" #\Y )
- (cons "ΰ" (cons #\Y #t))))
-
-(define-public (greek->xlat0 str)
- "Convert the greek STRing into its latin transliteration. Returns
-
- (list AP XLAT)
-
-where AP is the number of accented letter (not syllable!), counted from 0;
- XLAT is the list of transliterated letters
-Secondary accents are ignored"
- (let ((accent-pos #f)
- (syllable 0)
- (len (string-length str)))
- (do ((i 0)
- (lcnt 0 (1+ lcnt))
- (sl '()))
- ((= i len) (cons
- (and accent-pos
- (- (length sl) accent-pos 1))
- (reverse sl)))
- (letrec ((get-trans (lambda (x)
- (let ((y (cdr x)))
- (cond
- ((pair? y)
- (if (not accent-pos)
- (set! accent-pos lcnt))
- (car y))
- (else
- y))))))
- (set! sl (cons (cond
- ((and (<= (+ i 4) len)
- (assoc (substring str i (+ i 4)) xlist-greek)) =>
- (lambda (x)
- (set! i (+ i 4))
- (get-trans x)))
- ((and (<= (+ i 2) len)
- (assoc (substring str i (+ i 2)) xlist-greek)) =>
- (lambda (x)
- (set! i (+ i 2))
- (get-trans x)))
- (else
- (set! i (1+ i))
- (substring str (- i 1) i)))
- sl))))))
-
-(define-public (xlat->greek w)
- (apply
- string-append
- (apply append
- (let ((acc (car w))
- (n 0))
-
- (reverse
- (map
- (lambda (syllable)
- (set! n (1+ n))
- (reverse
- (map
- (lambda (x)
- (let ((entry (assoc x xlist-latin)))
- (cond
- ((and acc (= n acc) (= (length entry) 5))
- (set! acc #f)
- (list-ref entry 3))
- (else
- (list-ref entry 1)))))
- syllable)))
- (let ((slist (cdr w)))
- (if (char=? (caar slist) #\s)
- ;; Special handling for terminal sigma
- (cons (cons #\c (cdar slist)) (cdr slist))
- slist))))))))
-
-;; α a
-;;
-;; ε e
-;; αι ai
-;;
-;; ο o
-;; ω w
-;;
-;; ι i
-;; η %
-;; υ y
-;; οι oi
-;; ει ei
-;; υι ui
-;;
-;; ου oy
-;;
-;; αυ ay
-;;
-;; ευ ey
-;;
-;; a -> a, ai, au
-;; e -> e, ei, ey
-;; o -> o, oi, oy
-;; y -> y, yi
-;; i -> i, ia, ie, io, ioy
-;; %
-;; I
-;; Y
-
-(define-public (vowel? x)
- (member x (list #\a #\e #\o #\y #\i #\% #\I #\Y #\w)))
-
-(define (prosodia ap wl accented sl)
- (let ((syl '()))
- ;; Collect consonants
- (do ()
- ((or (null? wl)
- (vowel? (car wl))))
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl)))
- (cond
- ((null? wl)
- (cons accented
- (if (null? sl)
- (list syl)
- (cons (append syl (car sl)) (cdr sl)))))
- (else
- (let ((a (car wl)))
- (set! wl (cdr wl))
- (set! syl (cons a syl))
- (cond
- ((and ap (= ap (length wl)))
- (set! accented (length sl)))
- (else
- (case a
- ((#\a #\e #\o)
- (cond
- ((and (not (null? wl))
- (or (char=? (car wl) #\i) (char=? (car wl) #\y)))
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl))
- (if (and ap (= ap (length wl)))
- (set! accented (length sl))))))
- ((#\i)
- (if (not (null? wl))
- (cond
- ((or (char=? (car wl) #\e)
- (char=? (car wl) #\a))
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl))
- (if (and ap (= ap (length wl)))
- (set! accented (length sl))))
- ((char=? (car wl) #\o)
- ;; "ιο" ή "ιου"
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl))
- (cond
- ((and ap (= ap (length wl)))
- (set! accented (length sl)))
- ((and (not (null? wl)) (char=? (car wl) #\y))
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl))
- (if (and ap (= ap (length wl)))
- (set! accented (length sl)))))))))
- ((#\y)
- (cond
- ((and (not (null? wl))
- (char=? (car wl) #\i))
- (set! syl (cons (car wl) syl))
- (set! wl (cdr wl))
- (if (and ap (= ap (length wl)))
- (set! accented (length sl))))))
- ((#\Y #\I)
- (set! accented (length sl))))))
- (prosodia ap wl accented (cons syl sl)))))))
-
-
-(define-public (greek->xlat str)
- (let* ((wl (greek->xlat0 str))
- (sl (prosodia (car wl) (cdr wl) #f '())))
- (cons
- (and (car sl) (- (length (cdr sl)) (car sl)))
- (cdr sl))))
-
-;;;; End of file \ No newline at end of file
diff --git a/src/ellinika/.gitignore b/src/ellinika/.gitignore
index 9422f9a..11bf478 100644
--- a/src/ellinika/.gitignore
+++ b/src/ellinika/.gitignore
@@ -3,3 +3,5 @@ cgi.scm
config.scm
elmorph.scm
elmorph.x
+phoneme.c
+phoneme.h
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
index 274eea8..b8988d4 100644
--- a/src/ellinika/Makefile.am
+++ b/src/ellinika/Makefile.am
@@ -1,5 +1,5 @@
# This file is part of Ellinika project.
-# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff
+# Copyright (C) 2004,2006,2007,2008,2011 Sergey Poznyakoff
#
# Ellinika is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -15,7 +15,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
guiledir=$(GUILE_SITE)/$(PACKAGE)
-guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm elmorph.scm
+guile_DATA=\
+ xlat.scm\
+ cgi.scm\
+ i18n.scm\
+ config.scm\
+ dico.scm\
+ elmorph.scm\
+ tenses.scm
cgi.m4: Makefile
echo 'divert(-1)' > $@
@@ -39,11 +46,11 @@ cgi.m4: Makefile
SUFFIXES = .scm4 .scm .x
.scm4.scm:
- m4 cgi.m4 $< > $@
+ m4 -I$(srcdir) cgi.m4 $< > $@
cgi.scm: cgi.scm4 cgi.m4
config.scm: config.scm4 cgi.m4
-elmorph.scm: elmorph.scm4 cgi.m4
+elmorph.scm: elmorph.scm4 elmorph-public.scm cgi.m4
pkglib_LTLIBRARIES=libelmorph.la
@@ -52,7 +59,9 @@ libelmorph_la_SOURCES = \
utf8.c\
elchr.c\
elmorph.c\
- elmorph.h
+ elmorph.h\
+ phoneme.y\
+ syllabificator.c
DOT_X_FILES = elmorph.x
@@ -80,4 +89,6 @@ install-data-hook:
done; \
cd $$here
-
+AM_YFLAGS = -d
+YACCCOMPILE = $(srcdir)/yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
+EXTRA_DIST = yyrename elmorph-public.scm \ No newline at end of file
diff --git a/src/ellinika/elchr.c b/src/ellinika/elchr.c
index 3142b6f..621ac03 100644
--- a/src/ellinika/elchr.c
+++ b/src/ellinika/elchr.c
@@ -23,20 +23,7 @@
#include <libguile.h>
#include "utf8.h"
#include "elmorph.h"
-
-struct char_info_st {
- unsigned ch; /* Characters */
- int flags; /* Flags (see above) */
- unsigned base; /* for vowels - a corresponding vowel with all diacritics
- removed */
- unsigned trans; /* a counter-case equivalent, i.e. a corresponding uppercase
- letter if flags & CHF_LOWER and a corresponding lowerrcase
- letter if flags & CHF_UPPER */
- unsigned numval; /* Numeric value */
- unsigned accented[3]; /* For vowels - corresponding accented variant */
- unsigned deaccent; /* For accented vowels with diaeresis - corresponding
- non-accented character */
-};
+#include "phoneme.h"
/* See http://www.unicode.org/charts/PDF/Unicode-5.1/U51-0370.pdf */
struct char_info_st el_basic_ctype[] = {
@@ -174,80 +161,149 @@ struct char_info_st el_basic_ctype[] = {
{ 0x0383, },
{ 0x0384, CHF_MODIFIER }, /* Oxeia */
{ 0x0385, CHF_MODIFIER }, /* dialytika */
- { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC }, /* Ά */
+ { 0x0386, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0391, 0x03AC,
+ 0, { 0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* Ά */
{ 0x0387, CHF_PUNCT }, /* ano teleia */
- { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD }, /* Έ */
- { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0397, 0x03AE }, /* Ή */
- { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x0399, 0x03AF }, /* Ί */
+ { 0x0388, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0395, 0x03AD,
+ 0, { 0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* Έ */
+ { 0x0389, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0397, 0x03AE,
+ 0, { 0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* Ή */
+ { 0x038A, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x0399, 0x03AF,
+ 0, { 0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* Ί */
{ 0x038B, },
- { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC }, /* Ό */
+ { 0x038C, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x039F, 0x03CC,
+ 0, { 0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* Ό */
{ 0x038D, },
- { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA|CHF_DIPH2, 0x03A5, 0x03CD }, /* Ύ */
- { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE }, /* Ώ */
- { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0, 0, 0, 0, 0x03CA }, /* ΐ */
- { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B1, 1, 0x0386 }, /* Α */
- { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2, 2 }, /* Β */
- { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3, 3 }, /* Γ */
- { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4, 4 }, /* Δ */
- { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1, 0, 0x03B5, 5, 0x0388 }, /* Ε */
- { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6, 7 }, /* Ζ */
- { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7, 8, 0x0389 }, /* Η */
- { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8, 9 }, /* Θ */
- { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03B9, 10, 0x038A }, /* Ι */
- { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA, 20 }, /* Κ */
- { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB, 30 }, /* Λ */
- { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC, 40 }, /* Μ */
- { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD, 50 }, /* Ν */
- { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE, 60 }, /* Ξ */
- { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF, 70, 0x038C }, /* Ο */
- { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0, 80 }, /* Π */
- { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1, 100 }, /* Ρ */
+ { 0x038E, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A5, 0x03CD,
+ 0, { 0, 0, 0}, 0, LETTER_Y_ACC, PHON_I }, /* Ύ */
+ { 0x038F, CHF_VOWEL|CHF_UPPER|CHF_OXEIA, 0x03A9, 0x03CE,
+ 0, { 0, 0, 0}, 0, LETTER_OMEGA_ACC, PHON_O }, /* Ώ */
+ { 0x0390, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_OXEIA, 0x03B9, 0,
+ 0, { 0, 0, 0}, 0x03CA, LETTER_I_TREMA_ACC, PHON_I }, /* ΐ */
+ { 0x0391, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B1,
+ 1, { 0x0386, 0, 0}, 0, LETTER_A, PHON_A }, /* Α */
+ { 0x0392, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B2,
+ 2, {0, 0, 0}, 0, LETTER_B, PHON_BH },/* Β */
+ { 0x0393, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B3,
+ 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* Γ */
+ { 0x0394, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B4,
+ 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* Δ */
+ { 0x0395, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B5,
+ 5, { 0x0388, 0, 0}, 0, LETTER_E, PHON_E }, /* Ε */
+ { 0x0396, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B6,
+ 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* Ζ */
+ { 0x0397, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B7,
+ 8, {0x0389, 0, 0}, 0, LETTER_H, PHON_I }, /* Η */
+ { 0x0398, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03B8,
+ 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* Θ */
+ { 0x0399, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03B9,
+ 10, { 0x038A, 0, 0}, 0, LETTER_I, PHON_I }, /* Ι */
+ { 0x039A, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BA,
+ 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* Κ */
+ { 0x039B, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BB,
+ 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* Λ */
+ { 0x039C, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BC,
+ 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* Μ */
+ { 0x039D, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BD,
+ 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* Ν */
+ { 0x039E, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03BE,
+ 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* Ξ */
+ { 0x039F, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03BF,
+ 70, { 0x038C, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* Ο */
+ { 0x03A0, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C0,
+ 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* Π */
+ { 0x03A1, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C1,
+ 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* Ρ */
{ 0x03A2, },
- { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3, 200 }, /* Σ */
- { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4, 300 }, /* Τ */
- { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x03C5, 400, 0x038E }, /* Υ */
- { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6, 500 }, /* Φ */
- { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7, 600 }, /* Χ */
- { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8, 700 }, /* Ψ */
- { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9, 800, 0x038F }, /* Ω */
- { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA|CHF_DIPH2, 0x0399, 0x03CA }, /* Ϊ */
- { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB }, /* Ϋ */
- { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386 }, /* ά */
- { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388 }, /* έ */
- { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B7, 0x0389 }, /* ή */
- { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03B9, 0x038A }, /* ί */
- { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0, 0, 0, 0, 0x03CB }, /* ΰ */
- { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0391, 1, 0x03AC }, /* α */
- { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392, 2 }, /* β */
- { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393, 3 }, /* γ */
- { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394, 4 }, /* δ */
- { 0x03B5, CHF_CONSONANT|CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x0395, 5, 0x03AD }, /* ε */
- { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396, 7 }, /* ζ */
- { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1|CHF_DIPH2, 0, 0x0397, 8, 0x03AE }, /* η */
- { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398, 9 }, /* θ */
- { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399, 10, 0x03AF }, /* ι */
- { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A, 20 }, /* κ */
- { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B, 30 }, /* λ */
- { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C, 40 }, /* μ */
- { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D, 50 }, /* ν */
- { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E, 60 }, /* ξ */
+ { 0x03A3, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C3,
+ 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* Σ */
+ { 0x03A4, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C4,
+ 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* Τ */
+ { 0x03A5, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C5,
+ 400, { 0x038E, 0, 0}, 0, LETTER_Y, PHON_I }, /* Υ */
+ { 0x03A6, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C6,
+ 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* Φ */
+ { 0x03A7, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C7,
+ 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* Χ */
+ { 0x03A8, CHF_CONSONANT|CHF_UPPER|CHF_NUMERIC, 0, 0x03C8,
+ 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* Ψ */
+ { 0x03A9, CHF_VOWEL|CHF_UPPER|CHF_NUMERIC, 0, 0x03C9,
+ 800, { 0x038F, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* Ω */
+ { 0x03AA, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x0399, 0x03CA,
+ 0, {0, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* Ϊ */
+ { 0x03AB, CHF_VOWEL|CHF_UPPER|CHF_TREMA, 0x03A5, 0x03CB,
+ 0, {0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* Ϋ */
+ { 0x03AC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B1, 0x0386,
+ 0, {0, 0, 0}, 0, LETTER_A_ACC, PHON_A }, /* ά */
+ { 0x03AD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B5, 0x0388,
+ 0, {0, 0, 0}, 0, LETTER_E_ACC, PHON_E }, /* έ */
+ { 0x03AE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B7, 0x0389,
+ 0, {0, 0, 0}, 0, LETTER_H_ACC, PHON_I }, /* ή */
+ { 0x03AF, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03B9, 0x038A,
+ 0, {0, 0, 0}, 0, LETTER_I_ACC, PHON_I }, /* ί */
+ { 0x03B0, CHF_VOWEL|CHF_OXEIA|CHF_TREMA, 0x03C5, 0,
+ 0, { 0, 0, 0 }, 0x03CB, LETTER_Y_TREMA_ACC, PHON_I }, /* ΰ */
+ { 0x03B1, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0391,
+ 1, {0x03AC, 0, 0}, 0, LETTER_A, PHON_A }, /* α */
+ { 0x03B2, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0392,
+ 2, {0, 0, 0}, 0, LETTER_B, PHON_BH }, /* β */
+ { 0x03B3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0393,
+ 3, {0, 0, 0}, 0, LETTER_G, PHON_GH }, /* γ */
+ { 0x03B4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0394,
+ 4, {0, 0, 0}, 0, LETTER_D, PHON_DH }, /* δ */
+ { 0x03B5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0395,
+ 5, { 0x03AD, 0, 0}, 0, LETTER_E, PHON_E }, /* ε */
+ { 0x03B6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0396,
+ 7, {0, 0, 0}, 0, LETTER_Z, PHON_Z }, /* ζ */
+ { 0x03B7, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0397,
+ 8, { 0x03AE, 0, 0}, 0, LETTER_H, PHON_I }, /* η */
+ { 0x03B8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x0398,
+ 9, {0, 0, 0}, 0, LETTER_TH, PHON_TH }, /* θ */
+ { 0x03B9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x0399,
+ 10, {0x03AF, 0, 0}, 0, LETTER_I, PHON_I }, /* ι */
+ { 0x03BA, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039A,
+ 20, {0, 0, 0}, 0, LETTER_K, PHON_K }, /* κ */
+ { 0x03BB, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039B,
+ 30, {0, 0, 0}, 0, LETTER_L, PHON_L }, /* λ */
+ { 0x03BC, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039C,
+ 40, {0, 0, 0}, 0, LETTER_M, PHON_M }, /* μ */
+ { 0x03BD, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039D,
+ 50, {0, 0, 0}, 0, LETTER_N, PHON_N }, /* ν */
+ { 0x03BE, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x039E,
+ 60, {0, 0, 0}, 0, LETTER_KS, PHON_X }, /* ξ */
- { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH1, 0, 0x039F, 70, 0x03CC }, /* ο */
- { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0, 80 }, /* π */
- { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1, 100 }, /* ρ */
- { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3 }, /* ς */
- { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3, 200 }, /* σ */
- { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4, 300 }, /* τ */
- { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC|CHF_DIPH2, 0, 0x03A5, 400, 0x03CD }, /* υ */
- { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6, 500 }, /* φ */
- { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7, 600 }, /* χ */
- { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8, 700 }, /* ψ */
- { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9, 800, 0x03CE }, /* ω */
- { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA|CHF_DIPH2, 0x03B9, 0x03AA, 0, 0x0390 }, /* ϊ */
- { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB, 0, 0x03B0 }, /* ϋ */
- { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C }, /* ό */
- { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA|CHF_DIPH2, 0x03C5, 0x038E }, /* ύ */
- { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03CE, 0x038F }, /* ώ */
+ { 0x03BF, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x039F,
+ 70, {0x03CC, 0, 0}, 0, LETTER_OMICRON, PHON_O }, /* ο */
+ { 0x03C0, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A0,
+ 80, {0, 0, 0}, 0, LETTER_P, PHON_P }, /* π */
+ { 0x03C1, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A1,
+ 100, {0, 0, 0}, 0, LETTER_R, PHON_R }, /* ρ */
+ { 0x03C2, CHF_CONSONANT|CHF_LOWER, 0, 0x03A3,
+ 0, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* ς */
+ { 0x03C3, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A3,
+ 200, {0, 0, 0}, 0, LETTER_S, PHON_S }, /* σ */
+ { 0x03C4, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A4,
+ 300, {0, 0, 0}, 0, LETTER_T, PHON_T }, /* τ */
+ { 0x03C5, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A5,
+ 400, { 0x03CD, 0, 0}, 0, LETTER_Y, PHON_I }, /* υ */
+ { 0x03C6, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A6,
+ 500, {0, 0, 0}, 0, LETTER_F, PHON_F }, /* φ */
+ { 0x03C7, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A7,
+ 600, {0, 0, 0}, 0, LETTER_X, PHON_H }, /* χ */
+ { 0x03C8, CHF_CONSONANT|CHF_LOWER|CHF_NUMERIC, 0, 0x03A8,
+ 700, {0, 0, 0}, 0, LETTER_PS, PHON_PS }, /* ψ */
+ { 0x03C9, CHF_VOWEL|CHF_LOWER|CHF_NUMERIC, 0, 0x03A9,
+ 800, {0x03CE, 0, 0}, 0, LETTER_OMEGA, PHON_O }, /* ω */
+ { 0x03CA, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03B9, 0x03AA,
+ 0, {0x0390, 0, 0}, 0, LETTER_I_TREMA, PHON_I }, /* ϊ */
+ { 0x03CB, CHF_VOWEL|CHF_LOWER|CHF_TREMA, 0x03C5, 0x03AB,
+ 0, {0x03B0, 0, 0}, 0, LETTER_Y_TREMA, PHON_I }, /* ϋ */
+ { 0x03CC, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03BF, 0x038C,
+ 0, {0, 0, 0}, 0, LETTER_OMICRON_ACC, PHON_O }, /* ό */
+ { 0x03CD, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C5, 0x038E,
+ 0, {0, 0, 0}, 0x03C5, LETTER_Y_ACC, PHON_I }, /* ύ */
+ { 0x03CE, CHF_VOWEL|CHF_LOWER|CHF_OXEIA, 0x03C9, 0x038F,
+ 0, {0, 0, 0}, 0x03C9, LETTER_OMEGA_ACC, PHON_O }, /* ώ */
{ 0x03CF, CHF_SYMBOL|CHF_UPPER, 0x03D7 }, /* KAI */
{ 0x03D0, CHF_CONSONANT|CHF_LOWER, 0, 0x0392 }, /* curled beta */
{ 0x03D1, CHF_CONSONANT|CHF_LOWER, 0, 0x0398 }, /* script theta */
@@ -561,7 +617,7 @@ struct char_info_st el_extended_ctype[] = {
{ 0x1FFF, }
};
-static struct char_info_st *
+struct char_info_st const *
elchr_info(unsigned ch)
{
if (ch >= 0x0300 && ch <= 0x03FF)
@@ -574,11 +630,25 @@ elchr_info(unsigned ch)
int
elchr_flags(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return ci ? ci->flags : 0;
}
int
+elchr_letter(unsigned ch)
+{
+ struct char_info_st const *ci = elchr_info(ch);
+ return ci ? ci->letter : 0;
+}
+
+int
+elchr_phoneme(unsigned ch)
+{
+ struct char_info_st const *ci = elchr_info(ch);
+ return ci ? ci->phoneme : 0;
+}
+
+int
elchr_isupper(unsigned ch)
{
return elchr_flags(ch) & CHF_UPPER;
@@ -654,35 +724,35 @@ elchr_isnumeric(unsigned ch)
unsigned
elchr_numeric_value(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_NUMERIC)) ? ci->numval: 0;
}
unsigned
elchr_toupper(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_LOWER)) ? ci->trans: ch;
}
unsigned
elchr_tolower(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_UPPER)) ? ci->trans : ch;
}
unsigned
elchr_base(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && (ci->flags & CHF_ACCENT_MASK) && ci->base) ? ci->base : ch;
}
unsigned
elchr_deaccent(unsigned ch)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
if (ci && (ci->flags & CHF_ACCENT_MASK))
return ci->deaccent ? ci->deaccent : ci->base ? ci->base : ch;
return ch;
@@ -691,28 +761,7 @@ elchr_deaccent(unsigned ch)
unsigned
elchr_accent(unsigned ch, int acc)
{
- struct char_info_st *ci = elchr_info(ch);
+ struct char_info_st const *ci = elchr_info(ch);
return (ci && ci->accented[acc-1]) ? ci->accented[acc-1] : ch;
}
-int
-elchr_diphthong(unsigned ch, int state)
-{
- struct char_info_st *ci = elchr_info(ch);
-
- if (!ci || !(ci->flags & CHF_VOWEL))
- return 0;
- switch (state) {
- case 0:
- if (ci->flags & CHF_DIPH1)
- state = 1;
- break;
- case 1:
- if (ci->flags & CHF_DIPH2)
- state = 2;
- break;
- default:
- state = 0;
- }
- return state;
-}
diff --git a/src/ellinika/elmorph-public.scm b/src/ellinika/elmorph-public.scm
new file mode 100644
index 0000000..329fe4a
--- a/dev/null
+++ b/src/ellinika/elmorph-public.scm
@@ -0,0 +1,106 @@
+;;;; This file is part of Ellinika project.
+;;;; Copyright (C) 2011 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(use-modules ((srfi srfi-1)))
+
+(define-public (elstr-trim word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice word 0 (+ (elstr-length word) n)))
+ (else
+ word))))
+
+(define-public (elstr-trim! word n)
+ (let ((word (if (string? word)
+ (string->elstr word)
+ word)))
+ (cond
+ ((> n 0)
+ (elstr-slice! word n (- (elstr-length word) n)))
+ ((< n 0)
+ (elstr-slice! word 0 (+ (elstr-length word) n))))))
+
+(define-public (phoneme:code ph)
+ (list-ref ph 0))
+
+(define-public (phoneme:start ph)
+ (list-ref ph 1))
+
+(define-public (phoneme:count ph)
+ (list-ref ph 2))
+
+(define-public (phoneme:flags ph)
+ (list-ref ph 3))
+
+(define-public (phoneme:accented? ph)
+ (logand (phoneme:flags ph) elmorph:accent-mask))
+
+(define-public (phoneme:vowel? ph)
+ (= (logand (phoneme:flags ph) elmorph:vowel)))
+
+(define-public (phoneme:consonant? ph)
+ (= (logand (phoneme:flags ph) elmorph:consonant)))
+
+(define-public (phoneme:diphthong? ph)
+ (= (logand (phoneme:flags ph) elmorph:diphthong)))
+
+
+(define soundslike-transcription-list
+ '((1 . "a")
+ (2 . "e")
+ (3 . "i")
+ (4 . "o")
+ (5 . "u")
+ (6 . "b")
+ (7 . "g")
+ (8 . "d")
+ (9 . "z")
+ (10 . "t")
+ (11 . "k")
+ (12 . "l")
+ (13 . "m")
+ (14 . "n")
+ (15 . "x")
+ (16 . "p")
+ (17 . "r")
+ (18 . "s")
+ (19 . "t")
+ (20 . "f")
+ (21 . "h")
+ (22 . "P")
+ (23 . "b")
+ (24 . "d")
+ (25 . "g")
+ (26 . "sm")
+ (27 . "ts")
+ (28 . "tz")
+ (29 . "ngz")
+ (30 . "au")
+ (31 . "eu")))
+
+(define-public (elstr->soundslike word)
+ (let ((phon-map (elstr->phonetic-map word)))
+ (apply string-append
+ (filter-map
+ (lambda (elt)
+ (assoc-ref soundslike-transcription-list (phoneme:code elt)))
+ phon-map))))
+
diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c
index 1831610..5a8acdf 100644
--- a/src/ellinika/elmorph.c
+++ b/src/ellinika/elmorph.c
@@ -26,55 +26,63 @@
struct elstr {
unsigned *str; /* UTF-8 string */
size_t len; /* Its length */
+
+ struct phoneme *phoneme; /* Phonetical map*/
+ unsigned phoneme_count; /* Number of phonemes */
+
+ struct syllable *sylmap; /* Syllable map (nsyl elements) */
unsigned nsyl; /* Number of syllables. */
- unsigned *sylmap; /* Syllable map (nsyl elements) */
unsigned acc_syl; /* Number of the accented syllable
(1-based, from the last syllable) */
unsigned acc_pos; /* Number of the accented character
(0-based, from str[0]) */
+
};
scm_t_bits _elstr_tag;
static void
-_elstr_syllabize(struct elstr *elstr)
-{
- unsigned *sylmap;
- unsigned i, nsyl = 0, accchr = 0;
- int accsyl = -1;
- int dstate = 0;
- int acc = 0;
-
- if (!elstr->sylmap) {
- elstr->sylmap = calloc(elstr->len, sizeof(sylmap[0]));
- if (!elstr->sylmap)
- scm_memory_error("_elstr_syllabize");
- }
- sylmap = elstr->sylmap;
+_elstr_syllabize(struct elstr *elstr, const char *func_name)
+{
+ unsigned i;
+
+ free(elstr->phoneme);
+ free(elstr->sylmap);
- for (i = 0; i < elstr->len; i++) {
- int nstate;
-
- if (elchr_getaccent(elstr->str[i])) {
- accsyl = nsyl;
- accchr = i;
- }
- nstate = elchr_diphthong(elstr->str[i], dstate);
- if (nstate)
- /* skip */;
- else if (dstate)
- sylmap[nsyl++] = i - 1;
- else if (elchr_isvowel(elstr->str[i]))
- sylmap[nsyl++] = i;
- dstate = nstate;
+ if (phoneme_map(&elstr->phoneme, &elstr->phoneme_count,
+ elstr->str, elstr->len))
+ scm_misc_error(func_name,
+ "cannot create phonetic map: ~S",
+ scm_from_int(errno));
+
+ if (syllable_map(&elstr->sylmap, &elstr->nsyl,
+ elstr->phoneme, elstr->phoneme_count))
+ scm_misc_error(func_name,
+ "cannot create syllable map: ~S",
+ scm_from_int(errno));
+
+ for (i = elstr->nsyl; i > 0; i--) {
+ if (elstr->sylmap[elstr->nsyl - i].flags & CHF_ACCENT_MASK)
+ break;
}
- if (dstate)
- sylmap[nsyl++] = i - 1;
- else if (nsyl)
- sylmap[nsyl-1] = i - 1;
- elstr->nsyl = nsyl;
- elstr->acc_pos = accchr;
- elstr->acc_syl = (accsyl >= 0) ? nsyl - accsyl : 0;
+ elstr->acc_syl = i;
+ for (i = 0; i < elstr->len; i++)
+ if (elchr_getaccent(elstr->str[i]))
+ break;
+ elstr->acc_pos = i;
+}
+
+static void
+invalidate_maps(struct elstr *elstr)
+{
+ free(elstr->sylmap);
+ elstr->sylmap = NULL;
+ elstr->nsyl = 0;
+ free(elstr->phoneme);
+ elstr->phoneme = NULL;
+ elstr->phoneme_count = 0;
+ elstr->acc_pos = 0;
+ elstr->acc_syl = 0;
}
static SCM
@@ -89,7 +97,7 @@ _elstr_alloc_empty(struct elstr **pelstr)
}
static SCM
-_elstr_alloc(const char *instr, int syl)
+_elstr_alloc(const char *instr, int syl, const char *func_name)
{
struct elstr *elstr;
unsigned *wptr;
@@ -105,8 +113,10 @@ _elstr_alloc(const char *instr, int syl)
elstr->nsyl = 0;
elstr->acc_syl = 0;
elstr->acc_pos = 0;
+ elstr->phoneme = 0;
+ elstr->phoneme_count = 0;
if (syl)
- _elstr_syllabize(elstr);
+ _elstr_syllabize(elstr, func_name);
SCM_RETURN_NEWSMOB(_elstr_tag, elstr);
}
@@ -120,19 +130,34 @@ _elstr_dup(struct elstr *elstr)
elnew->str = calloc(elstr->len, sizeof(elnew->str[0]));
if (!elnew->str)
scm_memory_error("_elstr_dup");
+ memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
+ elnew->len = elstr->len;
+
+ if (elstr->phoneme) {
+ elnew->phoneme = calloc(elstr->phoneme_count,
+ sizeof(elnew->phoneme[0]));
+ if (!elnew->phoneme) {
+ free(elnew->str);
+ scm_memory_error("_elstr_dup");
+ }
+ memcpy(elnew->phoneme, elstr->phoneme,
+ sizeof(elstr->phoneme[0]) * elstr->phoneme_count);
+ } else
+ elnew->phoneme = NULL;
+ elnew->phoneme_count = elstr->phoneme_count;
+
if (elstr->sylmap) {
elnew->sylmap = calloc(elstr->nsyl, sizeof(elnew->sylmap[0]));
if (!elnew->sylmap) {
free(elnew->str);
scm_memory_error("_elstr_dup");
}
+ memcpy(elnew->sylmap, elstr->sylmap,
+ sizeof(elstr->sylmap[0]) * elstr->nsyl);
} else
elnew->sylmap = NULL;
- memcpy(elnew->str, elstr->str, sizeof(elstr->str[0]) * elstr->len);
- elnew->len = elstr->len;
elnew->nsyl = elstr->nsyl;
- memcpy(elnew->sylmap, elstr->sylmap,
- sizeof(elstr->sylmap[0]) * elstr->nsyl);
+
elnew->acc_syl = elstr->acc_syl;
elnew->acc_pos = elstr->acc_pos;
SCM_RETURN_NEWSMOB(_elstr_tag, elnew);
@@ -162,6 +187,7 @@ _elstr_free(SCM smob)
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
free(elstr->str);
free(elstr->sylmap);
+ free(elstr->phoneme);
scm_gc_free(elstr, sizeof(struct elstr), "elstr");
return 0;
}
@@ -170,34 +196,31 @@ static int
_elstr_print(SCM smob, SCM port, scm_print_state *pstate)
{
struct elstr *elstr = (struct elstr *) SCM_CDR(smob);
- int i, j, an;
+ int i, j;
char *s;
scm_puts("#<elstr ", port);
if (elstr->sylmap) {
scm_puts("``", port);
- an = elstr->nsyl - elstr->acc_syl;
- if (an == 0)
- scm_puts("[", port);
- for (i = j = 0; i < elstr->len; i++) {
- char r[6];
- int n;
-
- if (i == elstr->sylmap[j] + 1) {
- if (j == an)
- scm_puts("]", port);
+ for (i = 0; i < elstr->nsyl; i++) {
+ size_t start = elstr->sylmap[i].char_start;
+ if (i)
scm_puts("-", port);
- if (++j == an)
- scm_puts("[", port);
+ if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+ scm_puts("[", port);
+ for (j = 0; j < elstr->sylmap[i].char_count; j++) {
+ char r[6];
+ int n;
+
+ n = utf8_wctomb(r, elstr->str[start+j]);
+ if (n == -1)
+ continue;
+ r[n] = 0;
+ scm_puts(r, port);
}
- n = utf8_wctomb(r, elstr->str[i]);
- if (n == -1)
- continue;
- r[n] = 0;
- scm_puts(r, port);
+ if (elstr->sylmap[i].flags & CHF_ACCENT_MASK)
+ scm_puts("]", port);
}
- if (j == an)
- scm_puts("]", port);
} else {
scm_puts("[NS] ``", port);
for (i = j = 0; i < elstr->len; i++) {
@@ -238,7 +261,7 @@ force_elstr(struct elstr **ep, SCM scm, int sylopt,
SCM_ASSERT(scm_is_string(scm), scm, arg, func_name);
str = scm_to_locale_string(scm);
- newscm = _elstr_alloc(str, sylopt);
+ newscm = _elstr_alloc(str, sylopt, func_name);
free(str);
if (newscm == SCM_EOL)
scm_misc_error(func_name,
@@ -336,13 +359,10 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable_prop, "elstr-syllable-prop",
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
- if (num == 0)
- start = 0;
- else
- start = elstr->sylmap[num - 1] + 1;
- return scm_cons(scm_from_uint(start),
- scm_from_uint(elstr->sylmap[num]));
+ return scm_list_3(scm_from_uint(elstr->sylmap[num].char_start),
+ scm_from_uint(elstr->sylmap[num].char_count),
+ scm_from_int(elstr->sylmap[num].flags));
}
#undef FUNC_NAME
@@ -388,12 +408,8 @@ SCM_DEFINE_PUBLIC(scm_elstr_syllable, "elstr-syllable",
"cannot get syllable #~S: not enough syllables: ~S",
scm_list_2(el, n));
num = elstr->nsyl - num;
- if (num == 0)
- start = 0;
- else
- start = elstr->sylmap[num - 1] + 1;
- if (utf8_wc_to_mbstr(elstr->str + start,
- elstr->sylmap[num] - start + 1,
+ if (utf8_wc_to_mbstr(elstr->str + elstr->sylmap[num].char_start,
+ elstr->sylmap[num].char_count,
&s))
scm_misc_error(FUNC_NAME,
"cannot convert elstr to Scheme",
@@ -514,8 +530,7 @@ _elstr_deaccent(SCM el, int destructive, const char *func_name)
}
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
- elstr->acc_pos = 0;
- elstr->acc_syl = 0;
+ invalidate_maps(elstr);
return scm;
}
@@ -544,9 +559,10 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
{
struct elstr *elstr;
unsigned i;
- unsigned acc_num, num, len, start;
+ unsigned acc_num, num, start;
SCM scm;
- int dstate;
+ unsigned pos;
+ struct phoneme *phoneme = NULL;
if (destructive) {
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
@@ -556,15 +572,11 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
num = scm_to_uint(n);
- if (num > elstr->nsyl)
+ if (num == 0 | num > elstr->nsyl)
scm_misc_error(func_name,
"cannot set accent on syllable #~S: not enough syllables: ~S",
scm_list_2(n, el));
acc_num = elstr->nsyl - num;
- if (acc_num == 0)
- start = 0;
- else
- start = elstr->sylmap[acc_num - 1] + 1;
if (destructive)
scm = SCM_UNSPECIFIED;
@@ -576,25 +588,38 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name)
/* Clear all accents */
for (i = 0; i < elstr->len; i++)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
- len = elstr->sylmap[acc_num] - start + 1;
- dstate = 0;
- for (i = start; i <= start + len; i++) {
- int nstate;
-
- if (!elchr_isvowel(elstr->str[i])) {
- if (dstate) {
- --i;
- break;
- }
- continue;
- }
- nstate = elchr_diphthong(elstr->str[i], dstate);
- if (!nstate)
+ for (i = 0; i < elstr->nsyl; i++)
+ elstr->sylmap[i].flags &= ~CHF_ACCENT_MASK;
+ for (i = 0; i < elstr->phoneme_count; i++)
+ elstr->phoneme[i].flags &= ~CHF_ACCENT_MASK;
+
+ start = elstr->sylmap[acc_num].phoneme_start;
+ pos = 0;
+ for (i = 0; i < elstr->sylmap[acc_num].phoneme_count; i++) {
+ struct phoneme *ph = elstr->phoneme + start + i;
+ if (ph->flags & CHF_CONSONANT)
+ /* skip */ ;
+ else if (ph->flags & CHF_DIPHTHONG) {
+ phoneme = ph;
+ pos = ph->start + 1;
+ break;
+ } else if (ph->flags & CHF_VOWEL) {
+ phoneme = ph;
+ pos = ph->start;
break;
- dstate = nstate;
+ }
}
- elstr->str[i] = elchr_accent(elstr->str[i], CHF_OXEIA);
+ if (!phoneme)
+ scm_misc_error(func_name,
+ "cannot set accent on syllable #~S of ~S: "
+ "INTERNAL ERROR",
+ scm_list_2(n, el));
+ phoneme->flags |= CHF_OXEIA;
+ elstr->sylmap[acc_num].flags |= CHF_OXEIA;
+ elstr->str[pos] = elchr_accent(elstr->str[pos], CHF_OXEIA);
+
elstr->acc_syl = num;
+ elstr->acc_pos = pos;
return scm;
}
@@ -652,7 +677,8 @@ _elstr_set_accent_on_char(SCM el, SCM n, int destructive, const char *func_name)
elstr->str[i] = elchr_deaccent(elstr->str[i]);
elstr->str[num] = elchr_accent(elstr->str[num], CHF_OXEIA);
- _elstr_syllabize(elstr);
+ invalidate_maps(elstr);
+ _elstr_syllabize(elstr, func_name);
return scm;
}
@@ -716,11 +742,31 @@ static struct deftab {
{ CHF_LOWER, "elmorph:lower" },
{ CHF_UPPER, "elmorph:upper" },
{ CHF_NUMERIC, "elmorph:numeric" },
-
- { CHF_DIPH1, "elmorph:diph1" },
- { CHF_DIPH2, "elmorph:diph2" }
+ { CHF_DIPHTHONG, "elmorph:diphthong" },
};
-
+
+SCM_DEFINE_PUBLIC(scm_elstr_char_phoneme, "elstr-char-phoneme",
+ 2, 0, 0,
+ (SCM el, SCM n),
+"Returns a phoneme code of the Nth char in EL\n")
+#define FUNC_NAME s_scm_elstr_char_phoneme
+{
+ struct elstr *elstr;
+ int num;
+
+ force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME);
+ SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, FUNC_NAME);
+ num = scm_to_int(n);
+ if (num < 0)
+ num += elstr->len;
+ if (num >= elstr->len)
+ scm_misc_error(FUNC_NAME,
+ "cannot get character #~S: not enough characters: ~S",
+ scm_list_2(el, n));
+ return scm_from_uint(elchr_phoneme(elstr->str[num]));
+}
+#undef FUNC_NAME
+
SCM_DEFINE_PUBLIC(scm_utf8_toupper, "utf8-toupper", 1, 0, 0,
(SCM string),
"Convert STRING to uppercase\n")
@@ -818,7 +864,8 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name);
elstr = (struct elstr*) SCM_CDR(el);
} else
- scm = force_elstr(&elstr, el, 1, SCM_ARG1, func_name);
+ scm = force_elstr(&elstr, el, 0, SCM_ARG1, func_name);
+ invalidate_maps(elstr);
SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name);
SCM_ASSERT(scm_is_integer(l), l, SCM_ARG3, func_name);
num = scm_to_int(n);
@@ -842,7 +889,7 @@ _elstr_slice(SCM el, SCM n, SCM l, int destructive, const char *func_name)
memmove(elstr->str, elstr->str + num,
sizeof(elstr->str[0]) * len);
elstr->len = len;
- _elstr_syllabize(elstr);
+ _elstr_syllabize(elstr, func_name);
return scm;
}
@@ -869,7 +916,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_slice_x, "elstr-slice!",
SCM_DEFINE_PUBLIC(scm_elstr_index, "elstr-index",
2, 0, 0,
(SCM word, SCM needle),
-"")
+"Returns position of NEEDLE in the WORD")
#define FUNC_NAME s_scm_elstr_index
{
struct elstr *elstr, *ep;
@@ -920,10 +967,10 @@ SCM_DEFINE_PUBLIC(scm_elstr_suffix_p, "elstr-suffix?",
SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
0, 0, 1,
(SCM rest),
-"")
+"Concatenates arguments.\n")
#define FUNC_NAME s_scm_elstr_append
{
- SCM ret = _elstr_alloc("", 0);
+ SCM ret = _elstr_alloc("", 0, FUNC_NAME);
struct elstr *elstr = (struct elstr*) SCM_CDR(ret);
for (; !scm_is_null(rest); rest = SCM_CDR(rest)) {
@@ -933,11 +980,48 @@ SCM_DEFINE_PUBLIC(scm_elstr_append, "elstr-append",
force_elstr(&elt, val, 0, SCM_ARGn, FUNC_NAME);
_elstr_concat(elstr, elt, FUNC_NAME);
}
- _elstr_syllabize(elstr);
+ _elstr_syllabize(elstr, FUNC_NAME);
return ret;
}
#undef FUNC_NAME
+
+static SCM
+elmorph_scm_from_phoneme(struct phoneme *phoneme)
+{
+ return scm_list_4(scm_from_int(phoneme->code),
+ scm_from_uint(phoneme->start),
+ scm_from_uint(phoneme->count),
+ scm_from_bool(phoneme->flags));
+}
+SCM_DEFINE_PUBLIC(scm_elstr__phonetic_map, "elstr->phonetic-map",
+ 1, 0, 0,
+ (SCM word),
+"Converts WORD to a phonetic map.\n")
+#define FUNC_NAME s_scm_elstr__phonetic_map
+{
+ struct elstr *elstr;
+ struct phoneme *phmap;
+ size_t phlen, i;
+ SCM head = SCM_EOL, tail = SCM_EOL;
+
+ force_elstr(&elstr, word, 1, SCM_ARG1, FUNC_NAME);
+ phmap = elstr->phoneme;
+ phlen = elstr->phoneme_count;
+ for (i = 0; i < phlen; i++) {
+ SCM elt = scm_cons(elmorph_scm_from_phoneme(phmap + i),
+ SCM_EOL);
+ if (scm_is_null(head))
+ head = tail = elt;
+ else {
+ SCM_SETCDR(tail, elt);
+ tail = elt;
+ }
+ }
+ free(phmap);
+ return head;
+}
+#undef FUNC_NAME
void
scm_init_ellinika_elmorph_module()
diff --git a/src/ellinika/elmorph.h b/src/ellinika/elmorph.h
index eacbde5..2399b8a 100644
--- a/src/ellinika/elmorph.h
+++ b/src/ellinika/elmorph.h
@@ -33,10 +33,82 @@
#define CHF_UPPER 0x02000
#define CHF_NUMERIC 0x04000
-#define CHF_DIPH1 0x10000
-#define CHF_DIPH2 0x20000
+#define CHF_DIPHTHONG 0x08000
+/* Phonemes */
+#define PHON_A 1 /* α */
+#define PHON_E 2 /* ε αι */
+#define PHON_I 3 /* ι η υ ει οι υι */
+#define PHON_O 4 /* ο ω */
+#define PHON_U 5 /* ου */
+
+#define PHON_BH 6 /* β */
+#define PHON_GH 7 /* γ */
+#define PHON_DH 8 /* δ */
+#define PHON_Z 9 /* ζ */
+#define PHON_TH 10 /* θ */
+#define PHON_K 11 /* κ */
+#define PHON_L 12 /* λ */
+#define PHON_M 13 /* μ */
+#define PHON_N 14 /* ν */
+#define PHON_X 15 /* ξ */
+#define PHON_P 16 /* π */
+#define PHON_R 17 /* ρ */
+#define PHON_S 18 /* σ */
+#define PHON_T 19 /* τ */
+#define PHON_F 20 /* φ */
+#define PHON_H 21 /* χ */
+#define PHON_PS 22 /* ψ */
+
+#define PHON_B 23 /* μπ */
+#define PHON_D 24 /* ντ */
+#define PHON_G 25 /* γγ γκ γχ */
+#define PHON_ZM 26 /* σμ */
+#define PHON_TS 27 /* τσ */
+#define PHON_DZ 28 /* τζ */
+#define PHON_NGZ 29 /* νγζ */
+
+#define PHON_AV 30 /* αυ */
+#define PHON_EV 31 /* ευ */
+
+#define _PHON_MAX 32
+
+struct phoneme {
+ int code; /* Phoneme code */
+ unsigned start; /* Start of phoneme */
+ unsigned count; /* Number of characters in it */
+ int flags;
+};
+
+struct syllable {
+ unsigned char_start; /* Start of syllable */
+ unsigned char_count; /* Number of characters in it */
+ unsigned phoneme_start;
+ unsigned phoneme_count;
+ int flags;
+};
+
+struct char_info_st {
+ unsigned ch; /* Characters */
+ int flags; /* Flags (see above) */
+ unsigned base; /* for vowels - a corresponding vowel with
+ all diacritics removed */
+ unsigned trans; /* a counter-case equivalent, i.e. a
+ corresponding uppercase letter if
+ flags & CHF_LOWER and a corresponding
+ lowercase letter if flags & CHF_UPPER */
+ unsigned numval; /* Numeric value */
+ unsigned accented[3]; /* For vowels - corresponding accented variant */
+ unsigned deaccent; /* For accented vowels with diaeresis -
+ corresponding non-accented character */
+ int letter; /* Letter code */
+ int phoneme; /* Phoneme code */
+};
+
+struct char_info_st const *elchr_info(unsigned ch);
int elchr_flags(unsigned ch);
+int elchr_letter(unsigned ch);
+int elchr_phoneme(unsigned ch);
int elchr_isupper(unsigned ch);
int elchr_islower(unsigned ch);
int elchr_getaccent(unsigned ch);
@@ -55,8 +127,12 @@ unsigned elchr_tolower(unsigned ch);
unsigned elchr_base(unsigned ch);
unsigned elchr_deaccent(unsigned ch);
unsigned elchr_accent(unsigned ch, int acc);
-int elchr_diphthong(unsigned ch, int state);
int elmorph_thema_aoristoy(unsigned *word, size_t len,
unsigned **thema, size_t *tlen);
+
+int phoneme_map(struct phoneme **pph, size_t *plen,
+ unsigned *word, size_t len);
+int syllable_map(struct syllable **psyl, size_t *plen,
+ struct phoneme *phon, size_t nphon);
diff --git a/src/ellinika/elmorph.scm4 b/src/ellinika/elmorph.scm4
index f916d1c..ede4d50 100644
--- a/src/ellinika/elmorph.scm4
+++ b/src/ellinika/elmorph.scm4
@@ -20,27 +20,4 @@
"LIBDIR/libguile-elmorph-v-VERSION"
"scm_init_ellinika_elmorph_module")
-(define-public (elstr-trim word n)
- (let ((word (if (string? word)
- (string->elstr word)
- word)))
- (cond
- ((> n 0)
- (elstr-slice word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice word 0 (+ (elstr-length word) n)))
- (else
- word))))
-
-(define-public (elstr-trim! word n)
- (let ((word (if (string? word)
- (string->elstr word)
- word)))
- (cond
- ((> n 0)
- (elstr-slice! word n (- (elstr-length word) n)))
- ((< n 0)
- (elstr-slice! word 0 (+ (elstr-length word) n))))))
-
-
-
+include([elmorph-public.scm])
diff --git a/src/ellinika/phoneme.y b/src/ellinika/phoneme.y
new file mode 100644
index 0000000..353d175
--- a/dev/null
+++ b/src/ellinika/phoneme.y
@@ -0,0 +1,353 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utf8.h"
+#include "elmorph.h"
+
+static struct phoneme *phoneme_base;
+static size_t phoneme_max;
+static size_t phoneme_count;
+static int error_state;
+
+#define PHONEME_MAP_INITIAL_ALLOC 16
+
+static void
+phoneme_append(struct phoneme *phoneme)
+{
+ if (error_state)
+ return;
+
+ if (phoneme_max == phoneme_count) {
+ struct phoneme *np;
+ size_t nsize;
+
+ if (!phoneme_max)
+ nsize = PHONEME_MAP_INITIAL_ALLOC;
+ else {
+ nsize = 2 * phoneme_max;
+ if (nsize < phoneme_max) {
+ error_state = ENOMEM;
+ return;
+ }
+ }
+ np = realloc(phoneme_base, nsize * sizeof(phoneme_base[0]));
+ if (!np) {
+ error_state = ENOMEM;
+ return;
+ }
+ phoneme_max = nsize;
+ phoneme_base = np;
+ }
+ phoneme_base[phoneme_count++] = *phoneme;
+}
+
+#define DIPHTHONG(a,b,pc,fl) do { \
+ (a).count = 2; \
+ (a).code = pc; \
+ (a).flags = (fl) | CHF_DIPHTHONG | \
+ (((a.flags) | (b).flags) & CHF_ACCENT_MASK); \
+ } while (0)
+
+%}
+%union {
+ struct phoneme phoneme;
+};
+
+%token <phoneme> LETTER_A 1
+%token <phoneme> LETTER_A_ACC 2
+%token <phoneme> LETTER_B 3
+%token <phoneme> LETTER_G 4
+%token <phoneme> LETTER_D 5
+%token <phoneme> LETTER_E 6
+%token <phoneme> LETTER_E_ACC 7
+%token <phoneme> LETTER_Z 8
+%token <phoneme> LETTER_H 9
+%token <phoneme> LETTER_H_ACC 10
+%token <phoneme> LETTER_TH 11
+%token <phoneme> LETTER_I 12
+%token <phoneme> LETTER_I_ACC 13
+%token <phoneme> LETTER_I_TREMA 14
+%token <phoneme> LETTER_I_TREMA_ACC 15
+%token <phoneme> LETTER_K 16
+%token <phoneme> LETTER_L 17
+%token <phoneme> LETTER_M 18
+%token <phoneme> LETTER_N 19
+%token <phoneme> LETTER_KS 20
+%token <phoneme> LETTER_OMICRON 21
+%token <phoneme> LETTER_OMICRON_ACC 22
+%token <phoneme> LETTER_P 23
+%token <phoneme> LETTER_R 24
+%token <phoneme> LETTER_S 25
+%token <phoneme> LETTER_T 26
+%token <phoneme> LETTER_Y 27
+%token <phoneme> LETTER_Y_ACC 28
+%token <phoneme> LETTER_Y_TREMA 29
+%token <phoneme> LETTER_Y_TREMA_ACC 30
+%token <phoneme> LETTER_F 31
+%token <phoneme> LETTER_X 32
+%token <phoneme> LETTER_PS 33
+%token <phoneme> LETTER_OMEGA 34
+%token <phoneme> LETTER_OMEGA_ACC 35
+
+%type <phoneme> monophthong diphthong phoneme
+
+%%
+input : phoneme
+ {
+ phoneme_append(&$1);
+ }
+ | input phoneme
+ {
+ phoneme_append(&$2);
+ }
+ ;
+
+phoneme : monophthong
+ | diphthong
+ ;
+
+monophthong:
+ LETTER_A
+ | LETTER_A_ACC
+ | LETTER_B
+ | LETTER_G
+ | LETTER_D
+ | LETTER_E
+ | LETTER_E_ACC
+ | LETTER_Z
+ | LETTER_H
+ | LETTER_H_ACC
+ | LETTER_TH
+ | LETTER_I
+ | LETTER_I_ACC
+ | LETTER_I_TREMA
+ | LETTER_I_TREMA_ACC
+ | LETTER_K
+ | LETTER_L
+ | LETTER_M
+ | LETTER_N
+ | LETTER_KS
+ | LETTER_OMICRON
+ | LETTER_OMICRON_ACC
+ | LETTER_P
+ | LETTER_R
+ | LETTER_S
+ | LETTER_T
+ | LETTER_Y
+ | LETTER_Y_ACC
+ | LETTER_Y_TREMA
+ | LETTER_Y_TREMA_ACC
+ | LETTER_F
+ | LETTER_X
+ | LETTER_PS
+ | LETTER_OMEGA
+ | LETTER_OMEGA_ACC
+ ;
+
+diphthong:
+ LETTER_A LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_E, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_Y LETTER_I
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_Y LETTER_I_ACC
+ {
+ DIPHTHONG($1, $2, PHON_I, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_OMICRON LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_U, CHF_VOWEL);
+ $$ = $1;
+ }
+ | LETTER_M LETTER_P
+ {
+ DIPHTHONG($1, $2, PHON_B, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_N LETTER_T
+ {
+ DIPHTHONG($1, $2, PHON_D, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_G
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_K
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_X
+ {
+ DIPHTHONG($1, $2, PHON_G, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_S LETTER_M
+ {
+ DIPHTHONG($1, $2, PHON_ZM, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_T LETTER_S
+ {
+ DIPHTHONG($1, $2, PHON_TS, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_T LETTER_Z
+ {
+ DIPHTHONG($1, $2, PHON_DZ, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_G LETTER_KS
+ {
+ DIPHTHONG($1, $2, PHON_NGZ, CHF_CONSONANT);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_AV, 0);
+ $$ = $1;
+ }
+ | LETTER_A LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_AV, 0);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_Y
+ {
+ DIPHTHONG($1, $2, PHON_EV, 0);
+ $$ = $1;
+ }
+ | LETTER_E LETTER_Y_ACC
+ {
+ DIPHTHONG($1, $2, PHON_EV, 0);
+ $$ = $1;
+ }
+ ;
+
+%%
+
+static unsigned *input_base;
+static size_t input_len;
+static size_t input_pos;
+
+#define ISALPHA(ci) ((ci) && ci->letter)
+
+#define PHONEME_FLAG_MASK \
+ (CHF_ACCENT_MASK|CHF_VOWEL|CHF_CONSONANT)
+
+int
+yylex()
+{
+ unsigned c;
+ struct char_info_st const *ci;
+
+ do {
+ if (input_pos == input_len)
+ return 0;
+ c = input_base[input_pos++];
+ ci = elchr_info(c);
+ } while (!ISALPHA(ci));
+
+ yylval.phoneme.code = ci->phoneme;
+ yylval.phoneme.start = input_pos - 1;
+ yylval.phoneme.count = 1;
+ yylval.phoneme.flags = ci->flags & PHONEME_FLAG_MASK;
+ return ci->letter;
+}
+
+int
+yyerror(const char *s)
+{
+ fprintf("\n%s:%d: INTERNAL ERROR: %s\n", __FILE__, __LINE__, s);
+ abort();
+}
+
+int
+phoneme_map(struct phoneme **pph, size_t *plen, unsigned *word, size_t len)
+{
+ int rc;
+
+ input_base = word;
+ input_len = len;
+ input_pos = 0;
+ phoneme_base = NULL;
+ phoneme_max = 0;
+ phoneme_count = 0;
+ error_state = 0;
+ rc = yyparse();
+ if (rc) {
+ free(phoneme_base);
+ errno = EINVAL;
+ return errno;
+ }
+ if (error_state) {
+ free(phoneme_base);
+ errno = error_state;
+ return errno;
+ }
+ if (phoneme_count < phoneme_max)
+ phoneme_base =
+ realloc(phoneme_base,
+ phoneme_count * sizeof(phoneme_base[0]));
+ *pph = phoneme_base;
+ *plen = phoneme_count;
+ return 0;
+}
diff --git a/src/ellinika/syllabificator.c b/src/ellinika/syllabificator.c
new file mode 100644
index 0000000..c4105ec
--- a/dev/null
+++ b/src/ellinika/syllabificator.c
@@ -0,0 +1,152 @@
+/* This file is part of Ellinika project.
+ Copyright (C) 2011 Sergey Poznyakoff
+
+ Ellinika is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Ellinika is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <errno.h>
+#include <stdlib.h>
+#include "utf8.h"
+#include "elmorph.h"
+
+struct syllabificator {
+ struct syllable *syl;
+ size_t syl_count;
+ size_t syl_max;
+ struct phoneme *phon;
+ size_t phon_cur;
+ size_t phon_max;
+ int err;
+};
+
+#define SYL_FLAG_MASK (CHF_ACCENT_MASK)
+
+#define ISIOTA(ph) \
+ ((ph).code == PHON_I && (ph).count == 1 && \
+ !((ph).flags & (CHF_ACCENT_MASK|CHF_TREMA)))
+
+int
+next_syllable(struct syllabificator *sp)
+{
+ struct syllable *syl;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 1;
+
+ if (sp->syl_count == sp->syl_max) {
+ struct syllable *newsyl;
+ size_t newmax = sp->syl_max + 16;
+
+ newsyl = realloc(sp->syl, sizeof(newsyl[0]) * newmax);
+ if (!newsyl) {
+ sp->err = errno;
+ return 1;
+ }
+
+ sp->syl = newsyl;
+ sp->syl_max = newmax;
+ }
+ syl = sp->syl + sp->syl_count++;
+ syl->char_start = sp->phon[sp->phon_cur].start;
+ syl->char_count = sp->phon[sp->phon_cur].count;
+ syl->phoneme_start = sp->phon_cur;
+ syl->phoneme_count = 1;
+ syl->flags = sp->phon[sp->phon_cur].flags;
+
+ sp->phon_cur++;
+
+ /* A diphthong forms a single syllable. */
+ if ((syl->flags & CHF_DIPHTHONG) && !(syl->flags & CHF_CONSONANT))
+ return 0;
+
+ /* If the syllable begins with a consonant, it includes all
+ subsequent consonants up to the first vowel. */
+ if (syl->flags & CHF_CONSONANT) {
+ for (; sp->phon_cur < sp->phon_max &&
+ (sp->phon[sp->phon_cur].flags & CHF_CONSONANT);
+ sp->phon_cur++) {
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ }
+ } else if ((sp->phon[sp->phon_cur].flags & CHF_VOWEL) &&
+ !ISIOTA(sp->phon[sp->phon_cur-1]))
+ /* V-V boundary */
+ return 0;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 0;
+
+ if (ISIOTA(sp->phon[sp->phon_cur])) {
+ /* incorporate iota */;
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+ }
+
+ if (sp->phon[sp->phon_cur].flags & CHF_VOWEL)
+ syl->flags |= sp->phon[sp->phon_cur].flags & CHF_ACCENT_MASK;
+
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+
+ if (sp->phon_cur == sp->phon_max)
+ return 0;
+
+ if (sp->phon[sp->phon_cur - 1].flags & CHF_VOWEL) {
+ /* If next phoneme is a consonant, incorporate it into the
+ current syllable */
+ if ((sp->phon[sp->phon_cur].flags & CHF_CONSONANT) &&
+ (sp->phon_cur + 1 == sp->phon_max ||
+ (sp->phon[sp->phon_cur + 1].flags & CHF_CONSONANT))) {
+ syl->char_count += sp->phon[sp->phon_cur].count;
+ syl->phoneme_count++;
+ sp->phon_cur++;
+ }
+ }
+
+ return 0;
+}
+
+
+int
+syllable_map(struct syllable **psyl, size_t *plen,
+ struct phoneme *phon, size_t nphon)
+{
+ struct syllabificator sd;
+
+
+ sd.syl = NULL;
+ sd.syl_count = 0;
+ sd.syl_max = 0;
+ sd.phon = phon;
+ sd.phon_cur = 0;
+ sd.phon_max = nphon;
+ sd.err = 0;
+
+ while (next_syllable(&sd) == 0)
+ sd.syl[sd.syl_count-1].flags &= SYL_FLAG_MASK;
+
+ if (sd.err) {
+ free(sd.syl);
+ return sd.err;
+ }
+
+ *psyl = sd.syl;
+ *plen = sd.syl_count;
+
+ return 0;
+}
diff --git a/src/ellinika/tenses.scm b/src/ellinika/tenses.scm
new file mode 100644
index 0000000..f830870
--- a/dev/null
+++ b/src/ellinika/tenses.scm
@@ -0,0 +1,38 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2011 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(define-module (ellinika tenses))
+
+(define-public ellinika-tense-list
+ (list
+ (cons "ind"
+ (list "Ενεστώτας"
+ "Παρατατικός"
+ "Μέλλοντας διαρκείας"
+ "Αόριστος"
+ "Παρακείμενος"
+ "Υπερσυντέλικος"
+ "Συντελεσμένος μέλλοντας"
+ "Μέλλοντας στιγμιαίος"))
+ (cons "sub"
+ (list "Ενεστώτας"
+ "Αόριστος"
+ "Παρακείμενος"))
+ (cons "imp"
+ (list "Ενεστώτας"
+ "Αόριστος"
+ "Παρακείμενος"))))
+ \ No newline at end of file
diff --git a/src/ellinika/xlat.scm b/src/ellinika/xlat.scm
index c51edaa..63af468 100644
--- a/src/ellinika/xlat.scm
+++ b/src/ellinika/xlat.scm
@@ -16,6 +16,8 @@
;;;;
(define-module (ellinika xlat))
+(use-modules (ellinika elmorph))
+
(define greek-postfix-map
(list
(cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ")
@@ -107,7 +109,7 @@
;;; +-----------------------------+
;;;
;;;
-;;; The followin escape sequences are recognized:
+;;; The following escape sequences are recognized:
;;;
;;; '\ks' -> 'ξ'
;;; '\ps' -> 'ψ'
@@ -195,115 +197,8 @@
-(define transcription-list
- (list
- (cons "μπ" "b" )
- (cons "γγ" "g" )
- (cons "γκ" "g" )
- (cons "γχ" "g" )
- (cons "ντ" "d" )
- (cons "αι" "e" )
- (cons "αί" "e" )
- (cons "αυ" "au")
- (cons "αύ" "au")
- (cons "ου" "ou")
- (cons "ού" "ou")
- (cons "ευ" "eu")
- (cons "εύ" "eu")
- (cons "οι" "i" )
- (cons "ει" "i" )
- (cons "εί" "i" )
- (cons "υι" "i" )
-
- (cons "α" "a" )
- (cons "Α" "a" )
- (cons "Ά" "a" )
- (cons "ά" "a" )
- (cons "β" "b" )
- (cons "Β" "b" )
- (cons "γ" "g" )
- (cons "Γ" "g" )
- (cons "δ" "d" )
- (cons "Δ" "d" )
- (cons "ε" "e" )
- (cons "Ε" "e" )
- (cons "Έ" "e" )
- (cons "έ" "e" )
- (cons "ζ" "z" )
- (cons "Ζ" "z" )
- (cons "η" "i" )
- (cons "Η" "i" )
- (cons "Ή" "i" )
- (cons "ή" "i" )
- (cons "θ" "t" )
- (cons "Θ" "t" )
- (cons "ι" "i" )
- (cons "Ι" "i" )
- (cons "Ί" "i" )
- (cons "ί" "i" )
- (cons "κ" "k" )
- (cons "Κ" "k" )
- (cons "λ" "l" )
- (cons "Λ" "l" )
- (cons "μ" "m" )
- (cons "Μ" "m" )
- (cons "ν" "n" )
- (cons "Ν" "n" )
- (cons "ξ" "x" )
- (cons "Ξ" "x" )
- (cons "ο" "o" )
- (cons "Ο" "o" )
- (cons "Ό" "o" )
- (cons "ό" "o" )
- (cons "π" "p" )
- (cons "Π" "p" )
- (cons "ρ" "r" )
- (cons "Ρ" "r" )
- (cons "σ" "s" )
- (cons "Σ" "s" )
- (cons "ς" "s" )
- (cons "τ" "t" )
- (cons "Τ" "t" )
- (cons "υ" "i" )
- (cons "Υ" "i" )
- (cons "Ύ" "i" )
- (cons "ύ" "i" )
- (cons "φ" "f" )
- (cons "Φ" "f" )
- (cons "χ" "h" )
- (cons "Χ" "h" )
- (cons "ψ" "P" )
- (cons "Ψ" "P" )
- (cons "ω" "o" )
- (cons "Ω" "o" )
- (cons "Ώ" "o" )
- (cons "ώ" "o" )
- (cons "Ϊ" "i" )
- (cons "ΐ" "i" )
- (cons "Ϋ" "i" )
- (cons "ΰ" "i" )))
-
(define-public (ellinika:sounds-like str)
- (let ((len (string-length str)))
- (do ((i 0)
- (sl '()))
- ((= i len) (apply string-append (reverse sl)))
- (set! sl (cons
- (cond
- ((and (<= (+ i 4) len)
- (assoc (substring str i (+ i 4)) transcription-list)) =>
- (lambda (x)
- (set! i (+ i 4))
- (cdr x)))
- ((and (<= (+ i 2) len)
- (assoc (substring str i (+ i 2)) transcription-list)) =>
- (lambda (x)
- (set! i (+ i 2))
- (cdr x)))
- (else
- (set! i (1+ i))
- (substring str (- i 1) i)))
- sl)))))
+ (elstr->soundslike str))
;;;; End of ellinika.scm
diff --git a/src/ellinika/yyrename b/src/ellinika/yyrename
new file mode 100755
index 0000000..996abf2
--- a/dev/null
+++ b/src/ellinika/yyrename
@@ -0,0 +1,97 @@
+#! /bin/sh
+# Rename yy.* identifiers to avoid name clashes. This file is part of Grecs.
+# Copyright (C) 2011 Sergey Poznyakoff
+#
+# Grecs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# Grecs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Grecs. If not, see <http://www.gnu.org/licenses/>.
+
+# Usage: yyrename [-f "OUTFILE [OUTFILE...]"] COMMAND INFILE
+# Makefile.am:
+# LEXCOMPILE = yyrename -f $(LEX_OUTPUT_ROOT).c \
+# '$(LEX) $(LFLAGS) $(AM_LFLAGS)'
+# YACCCOMPILE = yyrename '$(YACC) $(YFLAGS) $(AM_YFLAGS)'
+#
+# This script runs COMMAND with INFILE as its argument and scans
+# OUTFILEs for identifiers starting with 'yy'. It then renames these
+# identifiers by replacing 'yy' with the selected prefix.
+#
+# The prefix is looked up in the file yytrans, located in the INFILE's
+# directory. If this file does not exist, the prefix is constructed
+# by concatenating the string 'grecs_' and the ``root name''. The root
+# name is built by removing '-lex.l' or '-gram.y' from the base name.
+# If the latter does not end in any of these, the root name
+# is constructed by removing the suffix from the base name.
+#
+# The yytrans file is a line-oriented lookup table. Empty lines are
+# ignored, usual UNIX comment lines are honored. The remaining lines
+# shall consist of two words separated by any amount of whitespace.
+# The first word is a look-up key, the second one provides a translation
+# (yy replacement) for that key.
+#
+# Two look-ups are tried: first the base name and then the root name.
+# If both result in a non-empty replacement, the former is preferred
+# over the latter.
+#
+# The -f option supplies a list of output file names generated by COMMAND.
+# If not supplied, the following defaults are used: y.tab.c and y.tab.h, if
+# INFILE ends in '.y', and yy.lex.c, if it ends in '.l'. If INFILE does not
+# end in any of these suffixes, error is reported.
+#
+# BUGS: Any occurrence of 'yy' is replaced, not only 'yy' prefixes.
+#
+case $1 in
+-f) files=$2
+ shift
+ shift
+esac
+
+if test $# -ne 2; then
+ echo >&2 "usage: yyrename [-f "OUTFILE [OUTFILE...]"] COMMAND INFILE"
+ exit 1
+fi
+
+base=`expr "$2" : '.*/\(.*\)\.[ly]'`
+dir=`dirname "$2"`
+case $2 in
+*.y) test -z "$files" && files="y.tab.c y.tab.h"
+ root=`expr "$2" : '.*/\(.*\)-gram\.y'`;;
+*.l) test -z "$files" && files=lex.yy.c
+ root=`expr "$2" : '.*/\(.*\)-lex\.l'`;;
+*) if test -z "$files"; then
+ echo >&2 "$0: suffix unknown, files must be given (use -f)"
+ exit 1
+ fi
+ root=$base
+esac
+
+if test -f $dir/yytrans; then
+ pfx=`awk '
+{ sub(/#.*$/,"") }
+NF == 2 && $1=="'$base'" { exact=$2 }
+NF == 2 && $1=="'$root'" { root=$2 }
+{ next }
+END { print exact ? exact : root ? root : "" }' $dir/yytrans`
+else
+ pfx=
+fi
+if test -z "$pfx"; then
+ pfx=ellinika_`echo $root | tr .- __`
+fi
+
+eval $* || exit $?
+
+for file in $files
+do
+ mv $file ${file}.tmp
+ sed "/^#line/b;/^# *[0-9]/b;s/yy/$pfx/g;s/YY/$pfx/g" ${file}.tmp > $file
+done
diff --git a/style.css b/style.css
index ae0c13e..b09f8b7 100644
--- a/style.css
+++ b/style.css
@@ -463,3 +463,9 @@ div.warning p {
div.warning a {
color: red;
}
+
+img.ellinika-img {
+ display: block;
+ margin-left: auto;
+ margin-right: auto;
+}
diff --git a/xml/lingua.conf.in b/xml/lingua.conf.in
index 098d063..22570b8 100644
--- a/xml/lingua.conf.in
+++ b/xml/lingua.conf.in
@@ -134,6 +134,15 @@ this notice is preserved.</p>\n"
("ru" . "Русский")
("uk" . "Українська"))))
(display "</ul></li></ul>")))
+
+(xmltrans:end-tag
+ "IMAGE"
+ (tag attr text)
+ (let ((alt (or (xmltrans:attr attr "ALT") ""))
+ (src (xmltrans:attr attr "SRC")))
+ (list "<img class=\"ellinika-img\" ALT=\"[ " alt
+ " ]\" src=\"/graphics/" src "\" />")))
+
#f
]]>
</GUILE>
diff --git a/xml/pl/alfabhta.xml b/xml/pl/alfabhta.xml
index 198590f..8a1c1e1 100644
--- a/xml/pl/alfabhta.xml
+++ b/xml/pl/alfabhta.xml
@@ -176,7 +176,211 @@ połączeń omawiają się w następnym rozdziale.
</TABULAR>
</SECTION>
+<SECTION>
+<HEADER>Zapis liczb</HEADER>
+<PARA>Każda litera greckiego alfabetu posiada również znaczenie
+liczbowe. Ten system zapisu liczb nazywany jest <DFN>miletskim</DFN>
+(<DFN>μιλησιακό σύστημα αρίθμησης</DFN>). Używa się go obecnie do
+reprezentacji liczebników porządkowych, numeracji rozdziałów w
+książkach i w sytuacjach analogicznych do stosowania rzymskiego
+zapisu w kulturze zachodniej.</PARA>
+
+<PARA>Zachodni system liczbowy jest systemem pozycyjnym, tzn.
+liczbowe znaczenie cyfry zależy od jej pozycji w liczbie, tak że
+<SAMP>1</SAMP> to jeden, <SAMP>10</SAMP> to dziesięć i td. Na odmianę
+od tego, grecki system liczbowy jest systemem addytywnym, w którym
+każda litera posiada stałe znaczenie liczbowe, znaczenie zaś liczby
+otrzymuje się przez zsumowanie wartości odpowiadającej każdej literze.
+</PARA>
+<PARA>System ten powstał w IV w. p.n.e., kiedy to alfabet posiadał
+więcej liter niż obecnie. Cztery z tych archaicznych liter są wciąż
+używane dla zapisywania liczb. Są to:
+</PARA>
+
+<TABULAR COLHEADING="std">
+ <ROW>
+ <ITEM>Litera</ITEM>
+ <ITEM>Nazwa</ITEM>
+ </ROW>
+ <ROW>
+ <ITEM>ϝ Ϝ</ITEM>
+ <ITEM>Δίγαμμα</ITEM>
+ </ROW>
+ <ROW>
+ <ITEM>ϛ Ϛ</ITEM>
+ <ITEM>Στίγμα</ITEM>
+ </ROW>
+ <ROW>
+ <ITEM>Ϟ ϟ</ITEM>
+ <ITEM>Κόππα</ITEM>
+ </ROW>
+ <ROW>
+ <ITEM>ϡ Ϡ</ITEM>
+ <ITEM>Σαμπί</ITEM>
+ </ROW>
+</TABULAR>
+
+<PARA>Literα <SAMP>ϛ</SAMP> (<DFN>stigma</DFN>) mimo podobieństwa do
+<SAMP>ς</SAMP> (sigma końcowa) nie jest z nią tożsama.</PARA>
+
+<PARA>Znaczenia liczbowe liter greckiego alfabetu są:</PARA>
+
+<TABULAR COLHEADING="std">
+<ROW>
+ <ITEM>Znak</ITEM>
+ <ITEM>Znaczenie</ITEM>
+
+ <ITEM>Znak</ITEM>
+ <ITEM>Znaczenie</ITEM>
+
+ <ITEM>Znak</ITEM>
+ <ITEM>Znaczenie</ITEM>
+</ROW>
+<ROW>
+ <ITEM>α Α</ITEM>
+ <ITEM>1</ITEM>
+ <ITEM>ι Ι</ITEM>
+ <ITEM>10</ITEM>
+ <ITEM>ρ Ρ</ITEM>
+ <ITEM>100</ITEM>
+</ROW>
+<ROW>
+ <ITEM>β Β</ITEM>
+ <ITEM>2</ITEM>
+ <ITEM>κ Κ</ITEM>
+ <ITEM>20</ITEM>
+ <ITEM>σ Σ</ITEM>
+ <ITEM>200</ITEM>
+</ROW>
+<ROW>
+ <ITEM>γ Γ</ITEM>
+ <ITEM>3</ITEM>
+ <ITEM>λ Λ</ITEM>
+ <ITEM>30</ITEM>
+ <ITEM>τ Τ</ITEM>
+ <ITEM>300</ITEM>
+</ROW>
+<ROW>
+ <ITEM>δ Δ</ITEM>
+ <ITEM>4</ITEM>
+ <ITEM>μ Μ</ITEM>
+ <ITEM>40</ITEM>
+ <ITEM>υ Υ</ITEM>
+ <ITEM>400</ITEM>
+</ROW>
+<ROW>
+ <ITEM>ε Ε</ITEM>
+ <ITEM>5</ITEM>
+ <ITEM>ν Ν</ITEM>
+ <ITEM>50</ITEM>
+ <ITEM>φ Φ</ITEM>
+ <ITEM>500</ITEM>
+</ROW>
+<ROW>
+ <ITEM>ϝ Ϝ, ϛ Ϛ, στ</ITEM>
+ <ITEM>6</ITEM>
+ <ITEM>ξ Ξ</ITEM>
+ <ITEM>60</ITEM>
+ <ITEM>χ Χ</ITEM>
+ <ITEM>600</ITEM>
+</ROW>
+<ROW>
+ <ITEM>ζ Ζ</ITEM>
+ <ITEM>7</ITEM>
+ <ITEM>ο Ο</ITEM>
+ <ITEM>70</ITEM>
+ <ITEM>ψ Ψ</ITEM>
+ <ITEM>700</ITEM>
+</ROW>
+<ROW>
+ <ITEM>η Η</ITEM>
+ <ITEM>8</ITEM>
+ <ITEM>π Π</ITEM>
+ <ITEM>80</ITEM>
+ <ITEM>ω Ω</ITEM>
+ <ITEM>800</ITEM>
+</ROW>
+<ROW>
+ <ITEM>θ Θ</ITEM>
+ <ITEM>9</ITEM>
+ <ITEM>Ϟ ϟ</ITEM>
+ <ITEM>90</ITEM>
+ <ITEM>ϡ Ϡ</ITEM>
+ <ITEM>900</ITEM>
+</ROW>
+</TABULAR>
+
+<PARA>Jak widać litery <SAMP>ϝ</SAMP> oraz <SAMP>ϛ</SAMP> mają
+jednakowe znaczenia liczbowe. Ponadto, zamiast nich może występować
+kombinacja <SAMP>στ</SAMP>.</PARA>
+<PARA>Żeby odróżnić liczbę od słowa lub kombinacji liter, liczba jest
+zawsze zakończona symbolem <SAMP>ʹ</SAMP>,
+nazywanym <SAMP>κεραία</SAMP>.</PARA>
+
+<PARA>Na przykład, liczbę 365 w systemie greckim zapisujemy jak
+następuje: <SAMP>τξεʹ</SAMP>.</PARA>
+
+<PARA>Litery od <SAMP>α</SAMP> do <SAMP>θ</SAMP> poprzedzone
+znakiem <SAMP>͵</SAMP> (<SAMP>αριστερή κεραία</SAMP>) przedstawiają
+znaczenia od 1000 do 9999:</PARA>
+
+<TABULAR COLHEADING="std">
+<ROW>
+ <ITEM>Znak</ITEM>
+ <ITEM>Znaczenie</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵α ͵Α</ITEM>
+ <ITEM>1000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵β ͵Β</ITEM>
+ <ITEM>2000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵γ ͵Γ</ITEM>
+ <ITEM>3000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵δ ͵Δ</ITEM>
+ <ITEM>4000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵ε ͵Ε</ITEM>
+ <ITEM>5000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵ϝ ͵Ϝ, ͵ϛ ͵Ϛ</ITEM>
+ <ITEM>6000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵ζ ͵Ζ</ITEM>
+ <ITEM>7000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵η ͵Η</ITEM>
+ <ITEM>8000</ITEM>
+</ROW>
+<ROW>
+ <ITEM>͵θ ͵Θ</ITEM>
+ <ITEM>9000</ITEM>
+</ROW>
+</TABULAR>
+
+<PARA>Na przykład <SAMP>2011</SAMP>
+zapisujemy jako <SAMP>͵βιαʹ</SAMP>.</PARA>
+
+<PARA>Liczby większe od 9999 zapisywano przy pomocy
+litery <SAMP>Μ</SAMP> oznaczającej <SAMP>μυριάς</SAMP>, czyli 10000.
+Nad tą literą zapisywano liczbę przez którą należało pomnożyć 10000
+aby otrzymać pożądaną liczbę. Na przykład:</PARA>
+
+<IMAGE SRC="arith.png" />
+
+
+
+</SECTION>
</PAGE>

Return to:

Send suggestions and report system problems to the System administrator.