From 2bae7da012e2125762855ce014e63345ecbbbb18 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Tue, 7 Jun 2011 22:15:26 +0300 Subject: Improve conjugator * data/dbverb.struct: Remove individual verb definitions. * data/irregular-verbs.xml: New file. * scm/verbop.scm: New file. * scm/Makefile.am: Add rules for verbop. * scm/conjugator.scm: Various fixes. * src/ellinika/elmorph.c (elstr-accent-position): Fix handling of string arguments. (_elstr_set_accent): Fix error message. (elstr-set-accent-character) (elstr-set-accent-character!): New functions. --- data/dbverb.struct | 101 +------ data/irregular-verbs.xml | 641 ++++++++++++++++++++++++++++++++++++++++++++ scm/.gitignore | 2 +- scm/Makefile.am | 7 +- scm/conjugator.scm | 189 ++++++++----- scm/verbop.scm | 676 +++++++++++++++++++++++++++++++++++++++++++++++ src/ellinika/elmorph.c | 66 ++++- 7 files changed, 1516 insertions(+), 166 deletions(-) create mode 100644 data/irregular-verbs.xml create mode 100644 scm/verbop.scm diff --git a/data/dbverb.struct b/data/dbverb.struct index af9d236..06745c8 100644 --- a/data/dbverb.struct +++ b/data/dbverb.struct @@ -32,7 +32,7 @@ DROP TABLE IF EXISTS conjugation; CREATE TABLE conjugation( conj char(2), -- REL 9 voice enum('act','pas'), -- Ενεργητηκή/Μεσοπαθητική - mode enum('ind','sub','imp'), + mood enum('ind','sub','imp'), tense varchar(128), thema enum('pres','aor','sub','synt'), -- Ενεστώτα, Αόριστου, υποτακτικής, syntethic suffix char(32), @@ -43,7 +43,7 @@ CREATE TABLE conjugation( auxtense char(32), KEY (conj), KEY (voice), - KEY (mode) + KEY (mood) ); DROP TABLE IF EXISTS participle; @@ -243,98 +243,21 @@ CREATE TABLE verb( INDEX(verb) ); -DROP TABLE IF EXISTS irregular_root; -CREATE TABLE irregular_root( - verb varchar(128), - voice enum('act','pas'), -- Ενεργητηκή/Μεσοπαθητική - thema enum('pres','aor','sub'), -- Αόριστος/Υποτακτική - root varchar(128) -); - -INSERT INTO verb VALUES -('βρίσκω', 'A', NULL, '000000', NULL), -('θέλω','A','η',NULL, NULL), -('έχω','A',NULL,'000000', NULL), -('ξέρω','A','η',NULL, NULL), -('κρεμάω','B1',NULL,NULL,"ασ"), -('κρεμώ','B1',NULL,NULL,"ασ"), -('κιτάω','B1',NULL,NULL,"αξ"), -('κιτώ','B1',NULL,NULL,"αξ"), -('τραβάω','B1',NULL,NULL,"ηξ"), -('τραβώ','B1',NULL,NULL,"ηξ"), -('νικώ','B1',NULL,NULL,NULL) -; - -INSERT INTO irregular_root VALUES -('βρίσκω','act','aor','βρήκ'), -('βρίσκω','act','sub','βρ'), -('βρίσκω','pas','aor','βρέθ'), -('βρίσκω','pas','sub','βρέθ'), -('θέλω','act','aor','θέλησ') -; - DROP TABLE IF EXISTS individual_verb; CREATE TABLE individual_verb( verb varchar(128), - voice enum('act','pas'), -- Ενεργητηκή/Μεσοπαθητική - mode enum('ind','sub','imp'), + voice enum('act','pas'), + mood enum('ind','sub','imp'), tense varchar(128), ident int(32), - INDEX(verb,voice,mode,tense) + INDEX(verb,voice,mood,tense) ); -INSERT INTO individual_verb VALUES -("είμαι", "act", "ind", "Ενεστώτας", 100), -("είμαι", "act", "ind", "Παρατατικός", 101), -("είμαι", "act", "ind", "Μέλλοντας διαρκείας", 102), -("είμαι", "act", "ind", "Αόριστος", 0), -("είμαι", "act", "ind", "Παρακείμενος", 0), -("είμαι", "act", "ind", "Υπερσυντέλικος", 0), -("είμαι", "act", "ind", "Συντελεσμένος μέλλοντας", 0), -("είμαι", "act", "ind", "Μέλλοντας στιγμιαίος", 0), -("είμαι", "act", "sub", "Ενεστώτας", 103), -("είμαι", "act", "sub", "Αόριστος", 0), -("είμαι", "act", "sub", "Παρακείμενος", 0), -("είμαι", "act", "imp", "Ενεστώτας", 104), -("είμαι", "act", "imp", "Αόριστος", 0), -("είμαι", "act", "imp", "Παρακείμενος", 0), -("είμαι", "pas", "ind", "Ενεστώτας", 0), -("είμαι", "pas", "ind", "Παρατατικός", 0), -("είμαι", "pas", "ind", "Μέλλοντας διαρκείας", 0), -("είμαι", "pas", "ind", "Αόριστος", 0), -("είμαι", "pas", "ind", "Παρακείμενος", 0), -("είμαι", "pas", "ind", "Υπερσυντέλικος", 0), -("είμαι", "pas", "ind", "Συντελεσμένος μέλλοντας", 0), -("είμαι", "pas", "ind", "Μέλλοντας στιγμιαίος", 0), -("είμαι", "pas", "sub", "Ενεστώτας", 0), -("είμαι", "pas", "sub", "Αόριστος", 0), -("είμαι", "pas", "sub", "Παρακείμενος", 0), -("είμαι", "pas", "imp", "Ενεστώτας", 0), -("είμαι", "pas", "imp", "Αόριστος", 0), -("είμαι", "pas", "imp", "Παρακείμενος", 0), +DROP TABLE IF EXISTS irregular_root; +CREATE TABLE irregular_root( + verb varchar(128), + voice enum('act','pas'), -- Ενεργητηκή/Μεσοπαθητική + thema enum('pres','aor','sub'), -- Αόριστος/Υποτακτική + root varchar(128) +); -("έχω", "act", "ind", "Παρατατικός", 105), -("έχω", "act", "ind", "Αόριστος", 0), -("έχω", "act", "ind", "Παρακείμενος", 0), -("έχω", "act", "ind", "Υπερσυντέλικος", 0), -("έχω", "act", "ind", "Συντελεσμένος μέλλοντας", 0), -("έχω", "act", "ind", "Μέλλοντας στιγμιαίος", 0), -("έχω", "act", "sub", "Αόριστος", 0), -("έχω", "act", "sub", "Παρακείμενος", 0), -("έχω", "act", "imp", "Αόριστος", 0), -("έχω", "act", "imp", "Παρακείμενος", 0), -("έχω", "pas", "ind", "Παρατατικός", 0), -("έχω", "pas", "ind", "Μέλλοντας διαρκείας", 0), -("έχω", "pas", "ind", "Αόριστος", 0), -("έχω", "pas", "ind", "Παρακείμενος", 0), -("έχω", "pas", "ind", "Υπερσυντέλικος", 0), -("έχω", "pas", "ind", "Συντελεσμένος μέλλοντας", 0), -("έχω", "pas", "ind", "Μέλλοντας στιγμιαίος", 0), -("έχω", "pas", "sub", "Ενεστώτας", 0), -("έχω", "pas", "sub", "Αόριστος", 0), -("έχω", "pas", "sub", "Παρακείμενος", 0), -("έχω", "pas", "imp", "Ενεστώτας", 0), -("έχω", "pas", "imp", "Αόριστος", 0), -("έχω", "pas", "imp", "Παρακείμενος", 0), -("βρίσκω", "act", "imp", "Αόριστος", 106) -; diff --git a/data/irregular-verbs.xml b/data/irregular-verbs.xml new file mode 100644 index 0000000..ca2c7d1 --- /dev/null +++ b/data/irregular-verbs.xml @@ -0,0 +1,641 @@ + + + είμαι + I + + + +

είμαι

+

είσαι

+

είναι

+

είμαστε

+

είστε,είσαστε

+

είναι

+
+ +

ήμουν(α)

+

ήσουν(α)

+

ήταν(ε)

+

ήμαστε,ήμασταν

+

ήσαστε,ήσασταν

+

ήταν(ε)

+
+ +

θα είμαι

+

θα είσαι

+

θα είναι

+

θα είμαστε

+

θα είστε,θα είσαστε

+

θα είναι

+
+ + + + + +
+ + +

να είμαι

+

να είσαι

+

να είναι

+

να είμαστε

+

να είστε,θα είσαστε

+

να είναι

+
+ + +
+ + +

να είσαι

+

να είστε

+
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + +
+ + + έχω + A + 000000 + + + +

είχα

+

είχες

+

είχα

+

είχαμε

+

είχατε

+

είχαν

+
+ + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + +
+ + + κρεμώ + B1 + ασ + + + + κιτώ + B1 + αξ + + + + τραβώ + B1 + ηξ + + + + νικώ + B1 + + + + θέλω + A + + θέλησ + + + + + αγγέλω + A + + αγγείλ + + + + + άγω + A + + ήγαγ + αγάγ + + + + + ανεβαίνω + A + + ανέβηκ + ανέβ + + + + + απονέμω + A + + απένειμ + απονείμ + + + + + αρέσω + A + + αρεσ + + + + + βάλλω + A + + βαλ + + + + + βαραίνω + A + + βαρυν + + + + + βαστώ + B2 + ηξ + + + + βγάζω + A + + βγαλ + + + + + βγαίνω + A + 000000 + + βγήκ + βγ + + + + + βλέπω + A + 000000 + + είδ + δ + + +

δες

+

δείτε,δέστε

+
+
+
+
+ + + βρίσκω + A + + βρήκ + βρ + + +

βρες

+

βρείτε,βρέστε

+
+
+
+ + βρέθ + +
+ + + γελώ + B2 + ασ + + + + γέρνω + A + + γειρ + + + + + γίνομαι + A + + γίν + + + + + διαβαίνω + A + + διάβηκ + + +

διαβώ

+

διαβείς

+

διαβεί

+

διαβούμε

+

διαβέτε

+

διαβούν

+
+
+
+
+ + + διδάσκω + A + + διδαξ + + + + + δίνω + A + + δωσ + + + + + + + καίω + A + + καψ + + + + + κάν + A + + + + κλαίω + A + + κλαψ + + + + + λαβαίνω + A + + λαβ + + + + + λέω + A + 000000 + + είπ + π + + +

πες

+

πείτε,πέστε

+
+
+
+
+ + + + + μαθαίνω + A + + μαθ + + + + + μπαίνω + A + 000000 + + μπηκ + μπ + + +

μπες

+

μπείτε

+
+
+
+
+ + + + A + + + + + + + + ξέρω + A + η + + + + παθαίνω + A + + παθ + + + + + περνώ + B1 + ασ + + + + πέφτω + A + + πεσ + + + + + πηγαίνω + A + 000000 + + πήγ + + +

θα πάω

+

θα πας

+

θα πάει

+

θα πάμε

+

θα πάτε

+

θα πάνε

+
+
+ + +

να πάω

+

να πας

+

να πάει

+

να πάμε

+

να πάτε

+

να πάνε

+
+
+ + +

πήγαινε

+

πηγαίνετε

+
+
+
+
+ + + πίνω + A + 000000 + + ήπι + πι + + +

πιες

+

πιείτε,πιέστε

+
+
+
+
+ + + πλέω + A + + πλευσ + + + + + πονώ + A + + πονεσ + + + + + + + σέρνω + A + + συρ + + + + + σπέρνω + A + + σπειρ + + + + + + + στέλνω + A + + στείλ + + + + + + + τείνω + A + + τειν + + + + + τραβώ + B1 + ηξ + + + + τρέφω + A + + θρεψ + + + + + τρώω + A + + φαγ + φα + + + + + τυχαίνω + A + + τυχ + + + + + υπάρχω + A + 000000 + + υπήρξ + υπάρξ + + + + + + + φέυγω + A + + φυγ + + + + + φορώ + B1 + εσ + + + + φταίω + A + + φταιξ + + + + + + + χορταίνω + A + + χορτασ + + + + + ψέλνω + A + + ψαλ + + + +
diff --git a/scm/.gitignore b/scm/.gitignore index d033647..0e10ba2 100644 --- a/scm/.gitignore +++ b/scm/.gitignore @@ -1,4 +1,4 @@ dictrans dictrans.sed neatrans - +verbop diff --git a/scm/Makefile.am b/scm/Makefile.am index f2669f4..481676f 100644 --- a/scm/Makefile.am +++ b/scm/Makefile.am @@ -15,8 +15,8 @@ # along with this program. If not, see . pkgmoddir=@GUILE_SITE@ -bin_SCRIPTS=dictrans neatrans -CLEANFILES=dictrans.sed dictrans +bin_SCRIPTS=dictrans neatrans verbop +CLEANFILES=dictrans.sed dictrans neatrans verbop EXTRA_DIST=dictrans.scm dictrans.sed: Makefile @@ -34,3 +34,6 @@ neatrans: $(srcdir)/neatrans.scm dictrans.sed sed -f dictrans.sed $(srcdir)/neatrans.scm > $@ chmod +x $@ +verbop: $(srcdir)/verbop.scm dictrans.sed + sed -f dictrans.sed $(srcdir)/verbop.scm > $@ + chmod +x $@ diff --git a/scm/conjugator.scm b/scm/conjugator.scm index ceda52a..7b2a4a6 100644 --- a/scm/conjugator.scm +++ b/scm/conjugator.scm @@ -128,15 +128,21 @@ WHERE verb='" (force-string verb) "'" (elstr-append root "θ")) (else #f))) - + +(define (lookup-verb-info verb voice thema) + (my-sql-query + (dict-connect) + (string-append + "SELECT root FROM irregular_root \ +WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'"))) + (define (complement-verb-info vinfo verb voice thema) ; (format #t "COMPLEMENT ~S~%" thema) (let ((elverb (string->elstr verb)) - (result (my-sql-query - (dict-connect) - (string-append - "SELECT root FROM irregular_root \ -WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'")))) + (result (let ((tmpres (lookup-verb-info verb voice thema))) + (if (and (null? tmpres) (string=? thema "sub")) + (lookup-verb-info verb voice "aor") + tmpres)))) (verb-info-set! #:root vinfo (cond ((not (null? result)) @@ -247,7 +253,7 @@ WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'")))) ((conj-info-set! #:particle v val) (list-set! v 3 val)))) -(define (get-conj-info conj voice mode tense) +(define (get-conj-info conj voice mood tense) (let ((conn (dict-connect))) (let ((answer (my-sql-query conn @@ -255,7 +261,7 @@ WHERE verb='" verb "' AND voice='" voice "' AND thema='" thema "'")))) "SELECT c.thema,c.suffix,c.accmap,c.particle,c.aux,c.auxtense,\ f.sing1,f.sing2,f.sing3,f.plur1,f.plur2,f.plur3 \ FROM conjugation c, verbflect f \ -WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mode='" mode +WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mood='" mood "' AND c.tense='" tense "' AND c.flect = f.ident")))) (if (null? answer) #f @@ -271,7 +277,17 @@ WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mode='" mode (string->elstr str) str)) -(define (apply-flect conj vinfo) +(define (accented-syllable-0 str) + (let ((syl (elstr-accented-syllable str)) + (len (elstr-number-of-syllables str))) + (if (= syl 0) + syl + (+ (- len syl) 1)))) + +(define (set-accented-syllable-0! str nsyl) + (elstr-set-accent! str (+ (- (elstr-number-of-syllables str) nsyl) 1))) + +(define (apply-flect conj vinfo verb) (let ((root (verb-info #:root vinfo)) (suffix (let ((s (conj-info #:suffix conj))) (if s @@ -294,20 +310,29 @@ WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mode='" mode ((char=? acc #\0) (let* ((rs (force-elstr root)) (suf (elstr-deaccent (elstr-append suffix flect))) - (result (elstr-append rs suf))) - (cond - ((or (= (elstr-accented-syllable rs) 0) - (> (elstr-number-of-syllables suf) 2)) - (let ((nsyl (elstr-number-of-syllables suf))) - (cond - ((= nsyl 1) - result) - ((= nsyl 3) - (elstr-set-accent result 3)) - (else - (elstr-set-accent result 2))))) - (else - result)))) + (result (elstr-append rs suf)) + (acc-syl (let ((n (accented-syllable-0 rs))) + (if (= 0 n) + (accented-syllable-0 verb) + n)))) + (if (> (elstr-number-of-syllables result) 1) + (set-accented-syllable-0! result acc-syl)) + (let ((acc-syl (elstr-accented-syllable result))) + (cond + ((and (= acc-syl 1) + (= (elstr-number-of-syllables result) 1)) + (elstr-deaccent result)) + ((> acc-syl 3) + (let ((nsyl (elstr-number-of-syllables suf))) + (cond + ((= nsyl 1) + result) + ((= nsyl 3) + (elstr-set-accent result 3)) + (else + (elstr-set-accent result 2))))) + (else + result))))) ((char=? acc #\f) (elstr-append (elstr-deaccent (elstr-append root suffix)) @@ -340,13 +365,13 @@ WHERE c.conj='" conj "' AND c.voice='" voice "' AND c.mode='" mode forms) (map force-string forms))))) -(define (individual-verb verb voice mode tense) +(define (individual-verb verb voice mood tense) (let ((res (my-sql-query (dict-connect) (string-append "SELECT f.sing1,f.sing2,f.sing3,f.plur1,f.plur2,f.plur3 \ FROM individual_verb i,verbflect f \ -WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mode='" mode +WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mood='" mood "' AND i.tense = '" tense "' AND i.ident=f.ident")))) (if (not (null? res)) (append (car res) @@ -354,17 +379,17 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mode='" mode '(class root))) #f))) -(define (conjugate verb voice mode tense . rest) +(define (conjugate verb voice mood tense . rest) (cond - ((individual-verb verb voice mode tense) => + ((individual-verb verb voice mood tense) => (lambda (res) res)) (else (let* ((vinfo (get-verb-info verb)) - (conj (get-conj-info (verb-info #:conj vinfo) voice mode tense))) + (conj (get-conj-info (verb-info #:conj vinfo) voice mood tense))) (if (not conj) (error "cannot obtain conjugation information for " - (verb-info #:conj vinfo) voice mode tense)) + (verb-info #:conj vinfo) voice mood tense)) (if (member #:nopart rest) (conj-info-set! #:particle conj #f)) (cond @@ -386,7 +411,7 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mode='" mode (else ; (format #t "CONJ ~S~%" conj) (complement-verb-info vinfo verb voice (conj-info #:thema conj)) - (append (apply-flect conj vinfo) + (append (apply-flect conj vinfo verb) (list (verb-info #:conj vinfo) (verb-info #:attested vinfo))))))))) @@ -423,9 +448,9 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mode='" mode (define (term x) (or (assoc-ref transtab x) x)) -(define (test-conjugation verb voice mode tense) - (format #t "~A ~A/~A/~A: " verb (term voice) (term mode) tense) - (let* ((result (conjugate verb voice mode tense)) +(define (test-conjugation verb voice mood tense) + (format #t "~A ~A/~A/~A: " verb (term voice) (term mood) tense) + (let* ((result (conjugate verb voice mood tense)) (conj (conjugation:table result))) (cond ((empty-conjugation? conj) @@ -444,42 +469,64 @@ WHERE i.verb='" verb "' AND i.voice='" voice "' AND i.mode='" mode (newline) (gc)) -(test-conjugation "είμαι" "act" "ind" "Ενεστώτας") -(test-conjugation "είμαι" "act" "ind" "Παρατατίκος") -(test-conjugation "είμαι" "act" "ind" "Μέλλοντας διαρκείας") -(test-conjugation "είμαι" "act" "sub" "Ενεστώτας") -(test-conjugation "είμαι" "act" "imp" "Ενεστώτας") -(test-conjugation "είμαι" "act" "ind" "Αόριστος") - -(test-conjugation "έχω" "act" "ind" "Ενεστώτας") -(test-conjugation "έχω" "act" "ind" "Παρατατίκος") -(test-conjugation "έχω" "act" "ind" "Μέλλοντας διαρκείας") -(test-conjugation "έχω" "act" "sub" "Ενεστώτας") -(test-conjugation "έχω" "act" "imp" "Ενεστώτας") -(test-conjugation "έχω" "act" "imp" "Αόριστος") - -(test-conjugation "ανοίγω" "act" "ind" "Ενεστώτας") -(test-conjugation "ανοίγω" "act" "ind" "Αόριστος") -(test-conjugation "ανοίγω" "pas" "ind" "Αόριστος") -(test-conjugation "δένω" "act" "ind" "Αόριστος") -(test-conjugation "θέλω" "act" "ind" "Αόριστος") -(test-conjugation "θέλω" "act" "ind" "Παρατατικός") -(test-conjugation "βρίσκω" "act" "ind" "Ενεστώτας") -(test-conjugation "βρίσκω" "act" "ind" "Αόριστος") -(test-conjugation "βρίσκω" "act" "ind" "Μέλλοντας στιγμιαίος") -(test-conjugation "βρίσκω" "pas" "ind" "Αόριστος") -(test-conjugation "βρίσκω" "act" "sub" "Ενεστώτας") -(test-conjugation "βρίσκω" "act" "sub" "Αόριστος") -(test-conjugation "βρίσκω" "act" "ind" "Παρακείμενος") -(test-conjugation "βρίσκω" "act" "sub" "Παρακείμενος") -(test-conjugation "βρίσκω" "act" "imp" "Αόριστος") - -(test-conjugation "νικάω" "act" "ind" "Ενεστώτας") -(test-conjugation "νικάω" "act" "ind" "Αόριστος") -(test-conjugation "νικώ" "act" "ind" "Ενεστώτας") -(test-conjugation "νικώ" "act" "ind" "Αόριστος") -(test-conjugation "νικώ" "pas" "ind" "Αόριστος") -(test-conjugation "κρεμάω" "act" "ind" "Αόριστος") -(test-conjugation "κιτάω" "act" "ind" "Αόριστος") -(test-conjugation "τραβάω" "act" "ind" "Αόριστος") +;; (test-conjugation "είμαι" "act" "ind" "Ενεστώτας") +;; (test-conjugation "είμαι" "act" "ind" "Παρατατίκος") +;; (test-conjugation "είμαι" "act" "ind" "Μέλλοντας διαρκείας") +;; (test-conjugation "είμαι" "act" "sub" "Ενεστώτας") +;; (test-conjugation "είμαι" "act" "imp" "Ενεστώτας") +;; (test-conjugation "είμαι" "act" "ind" "Αόριστος") + +;; (test-conjugation "έχω" "act" "ind" "Ενεστώτας") +;; (test-conjugation "έχω" "act" "ind" "Παρατατίκος") +;; (test-conjugation "έχω" "act" "ind" "Μέλλοντας διαρκείας") +;; (test-conjugation "έχω" "act" "sub" "Ενεστώτας") +;; (test-conjugation "έχω" "act" "imp" "Ενεστώτας") +;; (test-conjugation "έχω" "act" "imp" "Αόριστος") + +;; (test-conjugation "ανοίγω" "act" "ind" "Ενεστώτας") +;; (test-conjugation "ανοίγω" "act" "ind" "Αόριστος") +;; (test-conjugation "ανοίγω" "pas" "ind" "Αόριστος") +;; (test-conjugation "δένω" "act" "ind" "Αόριστος") +;; (test-conjugation "θέλω" "act" "ind" "Αόριστος") +;; (test-conjugation "θέλω" "act" "ind" "Παρατατικός") +;; (test-conjugation "βρίσκω" "act" "ind" "Ενεστώτας") +;; (test-conjugation "βρίσκω" "act" "ind" "Αόριστος") +;; (test-conjugation "βρίσκω" "act" "ind" "Μέλλοντας στιγμιαίος") +;; (test-conjugation "βρίσκω" "pas" "ind" "Αόριστος") +;; (test-conjugation "βρίσκω" "act" "sub" "Ενεστώτας") +;; (test-conjugation "βρίσκω" "act" "sub" "Αόριστος") +;; (test-conjugation "βρίσκω" "act" "ind" "Παρακείμενος") +;; (test-conjugation "βρίσκω" "act" "sub" "Παρακείμενος") +;; (test-conjugation "βρίσκω" "act" "imp" "Αόριστος") + +;; (test-conjugation "νικάω" "act" "ind" "Ενεστώτας") +;; (test-conjugation "νικάω" "act" "ind" "Αόριστος") +;; (test-conjugation "νικώ" "act" "ind" "Ενεστώτας") +;; (test-conjugation "νικώ" "act" "ind" "Αόριστος") +;; (test-conjugation "νικώ" "pas" "ind" "Αόριστος") +;; (test-conjugation "κρεμάω" "act" "ind" "Αόριστος") +;; (test-conjugation "κιτάω" "act" "ind" "Αόριστος") +;; (test-conjugation "τραβάω" "act" "ind" "Αόριστος") + +;; (test-conjugation "αγγέλω" "act" "ind" "Αόριστος") +;; (test-conjugation "αγγέλω" "act" "sub" "Αόριστος") +;; (test-conjugation "άγω" "act" "ind" "Αόριστος") +;; (test-conjugation "άγω" "act" "sub" "Αόριστος") +;; (test-conjugation "ανεβαίνω" "act" "ind" "Αόριστος") +;; (test-conjugation "ανεβαίνω" "act" "sub" "Αόριστος") +;; (test-conjugation "απονέμω" "act" "ind" "Αόριστος") +;; (test-conjugation "απονέμω" "act" "sub" "Αόριστος") +;; (test-conjugation "αρέσω" "act" "ind" "Αόριστος") +;; (test-conjugation "αρέσω" "act" "sub" "Αόριστος") +;; (test-conjugation "βάλλω" "act" "ind" "Αόριστος") +;; (test-conjugation "βάλλω" "act" "sub" "Αόριστος") +;; (test-conjugation "βγαίνω" "act" "ind" "Αόριστος") +;; (test-conjugation "βγαίνω" "act" "sub" "Αόριστος") +;; (test-conjugation "βλέπω" "act" "ind" "Αόριστος") +;; (test-conjugation "βλέπω" "act" "sub" "Αόριστος") +;; (test-conjugation "βλέπω" "act" "imp" "Αόριστος") +(test-conjugation "πηγαίνω" "act" "ind" "Μέλλοντας στιγμιαίος") +(test-conjugation "πίνω" "act" "ind" "Αόριστος") +(test-conjugation "πίνω" "act" "sub" "Αόριστος") +(test-conjugation "πίνω" "act" "imp" "Αόριστος") (newline) diff --git a/scm/verbop.scm b/scm/verbop.scm new file mode 100644 index 0000000..bb54126 --- /dev/null +++ b/scm/verbop.scm @@ -0,0 +1,676 @@ +(use-modules (srfi srfi-1) + (xmltools xmltrans) + (ellinika elmorph) + (gamma sql) + (ellinika xlat) + (ice-9 getopt-long)) + +(define cleanup-option #f) +(define force-option #f) +(define verbose-option #f) +(define dry-run-option #f) +(define debug-level 0) +(define input-files '()) +(define flect-ident 0) + +(define (next-flect-ident) + (set! flect-ident (1+ flect-ident)) + flect-ident) + +(define connection #f) ; SQL connection + +(define sysconf-dir "=SYSCONFDIR=") +(define config-file-name "ellinika.conf") + +(define ellinika-sql-connection '()) + +(define (add-conn-param key val) + (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) + +;;; Load the site defaults +(let ((rc-file (string-append sysconf-dir "/" config-file-name))) + (if (file-exists? rc-file) + (load rc-file))) + +(define (debug level . rest) + (if (>= debug-level level) + (begin + (for-each + (lambda (x) + (display x)) + rest) + (newline)))) + +(define (sql-val val) + ;; FIXME: quote + (if (not val) + "NULL" + (string-append "\"" val "\""))) + +(define (run-query . rest) + (debug 100 rest) + (let ((q (apply format (cons #f rest)))) + (if verbose-option + (format #t "QUERY: ~S\n" q)) + (cond + (connection + (let ((res (sql-query connection q))) + (if verbose-option + (format #t "RESULT: ~S\n" res)) + res)) + (else + #f)))) + +(define (query-number q) + (let ((res (run-query q))) + (if (null? res) + #f + (string->number (caar res))))) + +(define (check-parent elt . rest) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (parent) + (if (xmltrans:parent? parent) + (return #t))) + rest) + (xmltrans:parse-error #f elt " not a child of " rest) + (mark-invalid) + (return #f)))) + +;;;; Internal structures + +;;; Tense is a list of 6 elements or #f +(define tense #f) + +(define (tense-init) + (set! tense (make-list 6 #f))) + +(define (tense-set n val) + (if (not tense) (tense-init)) + (list-set! tense n val)) + +(define (get-tense) + (let ((ret tense)) + (set! tense #f) + ret)) + +;;; Mood is an associative list. Possible keys are: +;;; Tense +(define mood '()) + +(define (get-mood) + (let ((ret mood)) + (set! mood '()) + ret)) + +(define (mood-set key val) + (set! mood (append mood (list (cons key val))))) + +;;; Conjugation is an associative list of moods + +(define conjugation '()) + +(define (get-conjugation) + (let ((ret conjugation)) + (set! conjugation '()) + ret)) + +(define (conjugation-set key val) + (set! conjugation (append conjugation (list (cons key val))))) + +;;; Verb structure: +(define verbdef '()) + +(define (verbdef:index c) + (case c + ((#:verb) 0) + ((#:class) 1) + ((#:action) 2) + ((#:augment) 3) + ((#:suffix) 4) + ((#:accmap) 5) + ((#:act) 6) + ((#:pas) 7) + ((#:validity) 8) + (else + (error "Unknown index " c)))) + +(define (verb-get what) + (if (null? verbdef) + #f + (list-ref verbdef (verbdef:index what)))) + +(define (verb-get-sql what) + (sql-val (verb-get what))) + +(define (verb-set what val) + (if (null? verbdef) + (verb-init)) + (list-set! verbdef (verbdef:index what) val)) + +(define (verb-init) + (set! verbdef (make-list 9 #f)) + (verb-set #:validity #t) + (verb-set #:action 'insert)) + +(define (mark-invalid) + (verb-set #:validity #f)) + +(define (verbdef-validate) + (call-with-current-continuation + (lambda (return) + (if (verb-get #:validity) + (let ((dict-form (verb-get #:verb))) + (cond ((not dict-form) + (xmltrans:parse-error #f "Dictionary form missing") + (verb-set #:validity #f) + (return #f))) + (if (not (verb-get #:class)) + (cond + ((elstr-suffix? dict-form "άω") + (verb-set #:class "B1")) + ((elstr-suffix? dict-form "ώ") + (xmltrans:parse-warning #f "Class not set, assuming B2") + (verb-set #:class "B2")) + (else + (xmltrans:parse-warning #f "Class not set, assuming A") + (verb-set #:class "A")))))) + (return (verb-get #:validity))))) + +(define (mood-key->string key) + (case key + ((#:ind) "ind") + ((#:sub) "sub") + ((#:imp) "imp") + (else + (error "Unknown mood key" key)))) + +(define (empty-conjugation? conj) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (x) + (if x + (return #f))) + conj) + (return #t)))) + +(define (flush-mood mood vstr) + (if (eq? (car mood) #:root) + (let ((val (cdr mood))) + (run-query "INSERT INTO irregular_root (verb,voice,thema,root) \ +VALUES (~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val (car val)) + (sql-val (cdr val)))) + (let ((mood-str (mood-key->string (car mood)))) + (for-each + (lambda (p) + (let ((key (car p))) + (debug 1 "flush-mood: " p) + (cond + ((empty-conjugation? (cdr p)) + (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val mood-str) + (sql-val key) + "0")) + (else + (let ((num (next-flect-ident)) + (l (cdr p))) + (run-query "INSERT INTO verbflect VALUES (~A,~A,~A,~A,~A,~A,~A);~%" + num + (sql-val (list-ref l 0)) + (sql-val (list-ref l 1)) + (sql-val (list-ref l 2)) + (sql-val (list-ref l 3)) + (sql-val (list-ref l 4)) + (sql-val (list-ref l 5))) + (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val mood-str) + (sql-val key) + num) ))))) + (cdr mood))))) + +(define (flush-voice vstr conj-list) + (if conj-list + (for-each + (lambda (mood) + (flush-mood mood vstr)) + conj-list))) + +;;; Fush verb definition to the database +(define (verb-flush) + ;; + (case (verb-get #:action) + ((insert) + (run-query "INSERT INTO verb (verb,conj,augment,accmap,suffix_aor_path) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (verb-get-sql #:class) + (verb-get-sql #:augment) + (verb-get-sql #:accmap) + (verb-get-sql #:suffix)) + (flush-voice "act" (verb-get #:act)) + (flush-voice "pas" (verb-get #:pas))) + ((delete update) + (xmltrans:parse-error #f + "Sorry update and delete are not yet supported")))) + +;;;; XML definitions + +;;; Set the default handler +(define tag-list '()) + +(define (lingua:default-start tag attr) + (xmltrans:set-attr #f "__START__" 1) + #f) + +(xmltrans:set-default-start-handler lingua:default-start) + +(define (lingua:default-end tag attr text) + (if (xmltrans:attr attr "__START__") + (xmltrans:parse-error #f "Unhandled element " tag)) + (set! tag-list + (cons + (xmltrans:make-tag tag attr text) + tag-list)) + #f) + +(xmltrans:set-default-end-handler lingua:default-end) + +;; +(xmltrans:end-tag + "i" + (tag attr text) + #f) + + +;; ... - Verb definition +(xmltrans:end-tag + "v" + (tag attr text) + (check-parent tag "i") + (if (verbdef-validate) + (verb-flush)) + (verb-init) + #f) + +;; verb - Verb in dictionary form +(xmltrans:end-tag + "a" + (tag attr text) + (check-parent tag "v") + (cond ((verb-get #:verb) + (xmltrans:parse-error #f "Verb was already defined") + (mark-invalid))) + (verb-set #:verb text) + #f) + +;;; class - Set conjugation class +(xmltrans:end-tag + "c" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:class) + (xmltrans:parse-error #f "Verb class was already defined") + (mark-invalid)) + ((not (or (string=? text "A") + (string=? text "B1") + (string=? text "B2") + (string=? text "I"))) + (xmltrans:parse-warning #f "Unknown or misspelled verb class"))) + (verb-set #:class text) + #f) + +;;; insert|delete|update - Define action +(xmltrans:end-tag + "action" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:action) + (xmltrans:parse-error #f "Action was already defined") + (mark-invalid))) + (let ((act (string->symbol text))) + (case act + ((insert delete update) + (verb-set #:action act)) + (else + (xmltrans:parse-error #f "Unknown action ~A~%" text)))) + #f) + +;;; C - Define augment +(xmltrans:end-tag + "augment" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:augment) + (xmltrans:parse-error #f "Augment was already defined") + (mark-invalid)) + ((not (or (string= text "η") + (string= text "ε"))) + (xmltrans:parse-warning #f "Suspicious augment"))) + (verb-set #:augment text) + #f) + +;;; S - Define aorist suffix for B verbs +(xmltrans:end-tag + "suffix" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:suffix) + (xmltrans:parse-error #f "Suffix was already defined") + (mark-invalid))) + (verb-set #:suffix text) + #f) + +;;; +(define accmap-char-set + (char-set-adjoin (char-set-copy char-set:digit) #\s #\f #\-)) + +(define (valid-accent-map? accmap) + (let* ((acclist (string->list accmap)) + (len (length acclist))) + (and + (or (= len 6) (= len 7)) + (fold + (lambda (ch prev) + (char-set-contains? accmap-char-set ch)) + #t + (list-head acclist 6)) + (or (= len 6) (char=? (list-ref acclist 6) #\+))))) + +;;; MAP - Define accent map +(xmltrans:end-tag + "accmap" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:accmap) + (xmltrans:parse-error #f "Accmap was already defined") + (mark-invalid)) + ((not (valid-accent-map? text)) + (xmltrans:parse-error #f "Invalid accent map") + (mark-invalid)) + (else + (verb-set #:accmap text))) + #f) + +;;; ... - Define conjugation in active voice +(xmltrans:end-tag + "act" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:act) + (xmltrans:parse-error #f "Active voice was already defined") + (mark-invalid))) + (verb-set #:act (get-conjugation)) + #f) + +;;; ... - Define conjugation in passive voice +(xmltrans:end-tag + "pas" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:pas) + (xmltrans:parse-error #f "Passive voice was already defined") + (mark-invalid))) + (verb-set #:pas (get-conjugation)) + #f) + +;;; ... - Indicative +(xmltrans:end-tag + "ind" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:ind (get-mood)) + #f) + +;;; ... - Subjunctive +(xmltrans:end-tag + "sub" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:sub (get-mood)) + #f) + +;;; ... - Imperative +(xmltrans:end-tag + "imp" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:imp (get-mood)) + #f) + +;;; root - Define aorist root +(xmltrans:end-tag + "aor" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:root (cons "aor" text)) + #f) + +;;; root - Define aorist root +(xmltrans:end-tag + "root" + (tag attr text) + (check-parent tag "act" "pas") + (let ((theme (xmltrans:attr attr "theme"))) + (cond + ((not theme) + (xmltrans:parse-error #f "Required attribute `theme' not specified") + (mark-invalid)) + ((or (string=? theme "aor") + (string=? theme "sub") + (string=? theme "pres")) + (conjugation-set #:root (cons theme text))) + (else + (xmltrans:parse-error #f "Unknown verb theme") + (mark-invalid)))) + #f) + + +;;; ... - Define a tense +(xmltrans:start-tag + "t" + (tag attr) + (check-parent tag "ind" "sub" "imp") + (tense-init) + #f) + +(xmltrans:end-tag + "t" + (tag attr text) + (let ((name (xmltrans:attr attr "name"))) + (if (not name) + (begin + (xmltrans:parse-error #f "Required attribute `name' not specified") + (mark-invalid))) + (mood-set name (get-tense))) + #f) + +;;;

...

- Define a person +(xmltrans:end-tag + "p" + (tag attr text) + (check-parent tag "t") + (call-with-current-continuation + (lambda (return) + (let ((number (xmltrans:attr attr "n")) + (person (xmltrans:attr attr "p")) + (elt #f)) + (cond + ((not number) + (xmltrans:parse-error #f "Required attribute `n' not specified") + (return)) + ((not person) + (xmltrans:parse-error #f "Required attribute `p' not specified") + (return)) + ((string=? person "1") + (set! elt 0)) + ((string=? person "2") + (set! elt 1)) + ((string=? person "3") + (set! elt 2)) + (else + (xmltrans:parse-error #f "Invalid value for `p'") + (return))) + (cond + ((string=? number "s") 0) + ((string=? number "p") + (set! elt (+ 3 elt))) + (else + (xmltrans:parse-error #f "Invalid value for `n'") + (return))) + (tense-set elt text)))) + #f) + + + +;;; DB functions +(define (escape-string str) + (let loop ((lst '()) + (str str)) + (cond + ((string-index str #\") => + (lambda (pos) + (loop (append lst (list (substring str 0 pos) + "\\\"")) + (substring str (1+ pos))))) + (else + (apply string-append (append lst (list str))))))) + + +;;;; Main +(define grammar + `((check (single-char #\c)) + (cleanup) + (database (single-char #\d) (value #t)) + (host (single-char #\h) (value #t)) + (port (single-char #\P) (value #t)) + (password (single-char #\p) (value #t)) + (user (single-char #\u) (value #t)) + (dry-run (single-char #\n)) + (interface (value #t)) + (verbose (single-char #\v)) + (debug (value #t)) + (help))) + +(define (usage) + (display "usage: verbop OPTIONS FILES + +General options: + + --check Only check input syntax and consistency. Do not + update the database. This means that the program will + not access the database at all, so some errors + (mistyped parts of speech and the like) may slip in + unnoticed. + --verbose Verbosely display SQL queries and their results. + --debug NUMBER Set debugging level (0 < NUMBER <= 100) + --dry-run Do nothing, display what would have been done. + +SQL related options: + + --interface STRING Select SQL interface to use. STRING may be + either \"mysql\" (the default) or \"postgres\". + --host HOST-OR-PATH Set name or IP address of the host running SQL + database, or path to the database I/O socket. + --database NAME Set name of the database to use. + --port NUMBER Set the SQL port number + --user USER-NAME Set SQL user name. + --password STRING Set the SQL password + + --cleanup Clean up the database (delete all entries from all the + tables) before proceeding. Use this option with care. + +Informational options: + + --help Output this help info +\n")) + +(define (cons? p) + (and (pair? p) (not (list? p)))) + +(for-each + (lambda (x) + (cond + ((cons? x) + (case (car x) + ((cleanup) + (set! cleanup-option #t)) + ((database) + (add-conn-param #:db (cdr x))) + ((host) + (add-conn-param #:host (cdr x))) + ((port) + (add-conn-param #:port (string->number (cdr x)))) + ((password) + (add-conn-param #:pass (cdr x))) + ((user) + (add-conn-param #:user (cdr x))) + ((interface) + (add-conn-param #:iface (cdr x))) + ((verbose) + (set! verbose-option #t)) + ((preserve) + (set! preserve-option #t)) + ((debug) + (set! debug-level (string->number (cdr x)))) + ((dry-run) + (set! verbose-option #t) + (set! dry-run-option #t)) + ((help) + (usage) + (exit 0)))) + (else + (set! input-files (cdr x))))) + (getopt-long (command-line) grammar)) + +(if (null? input-files) + (begin + (display "Input files not specified\n" (current-error-port)) + (exit 1))) + +(cond + ((not dry-run-option) + (set! connection (sql-open-connection ellinika-sql-connection)) + (if (not connection) + (begin + (display "Cannot connect to the database\n" (current-error-port)) + (exit 1))) + (run-query "SET NAMES utf8") + (set! flect-ident (query-number "SELECT MAX(ident) FROM verbflect")))) + +(cond + (cleanup-option + (run-query "DELETE FROM verbflect where ident > 99") + (run-query "DELETE FROM verb") + (run-query "DELETE FROM irregular_root") + (run-query "DELETE FROM individual_verb"))) + +(for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files) + +(if connection + (sql-close-connection connection)) + + + diff --git a/src/ellinika/elmorph.c b/src/ellinika/elmorph.c index f55e010..1831610 100644 --- a/src/ellinika/elmorph.c +++ b/src/ellinika/elmorph.c @@ -352,7 +352,7 @@ SCM_DEFINE_PUBLIC(scm_elstr_accent_position, "elstr-accent-position", 1, 0, 0, #define FUNC_NAME s_scm_elstr_accent_position { struct elstr *elstr; - force_elstr(&elstr, el, 0, SCM_ARG1, FUNC_NAME); + force_elstr(&elstr, el, 1, SCM_ARG1, FUNC_NAME); return scm_from_uint(elstr->acc_pos); } #undef FUNC_NAME @@ -559,7 +559,7 @@ _elstr_set_accent(SCM el, SCM n, int destructive, const char *func_name) if (num > elstr->nsyl) scm_misc_error(func_name, "cannot set accent on syllable #~S: not enough syllables: ~S", - scm_list_2(el, n)); + scm_list_2(n, el)); acc_num = elstr->nsyl - num; if (acc_num == 0) start = 0; @@ -613,7 +613,67 @@ SCM_DEFINE_PUBLIC(scm_elstr_set_accent_x, "elstr-set-accent!", { return _elstr_set_accent(el, n, 1, s_scm_elstr_set_accent_x); } -#undef FUNC_NAME + +static SCM +_elstr_set_accent_on_char(SCM el, SCM n, int destructive, const char *func_name) +{ + struct elstr *elstr; + unsigned i; + unsigned acc_num, num, len, start; + SCM scm; + int dstate; + + if (destructive) { + SCM_ASSERT(scm_is_elstr(el), el, SCM_ARG1, func_name); + elstr = (struct elstr*) SCM_CDR(el); + } else + scm = force_elstr(&elstr, el, 0, SCM_ARG1, func_name); + + SCM_ASSERT(scm_is_integer(n), n, SCM_ARG2, func_name); + num = scm_to_uint(n); + if (num > elstr->len) + scm_misc_error(func_name, + "cannot set accent on character #~S: not enough characters: ~S", + scm_list_2(el, n)); + if (!elchr_isvowel(elstr->str[num])) + scm_misc_error(func_name, + "cannot set accent on character #~S: not a vowel: ~S", + scm_list_2(el, n)); + + if (destructive) + scm = SCM_UNSPECIFIED; + else if (scm == el) { + scm = _elstr_dup(elstr); + elstr = (struct elstr*) SCM_CDR(scm); + } + + /* Clear all accents */ + for (i = 0; i < elstr->len; i++) + elstr->str[i] = elchr_deaccent(elstr->str[i]); + + elstr->str[num] = elchr_accent(elstr->str[num], CHF_OXEIA); + _elstr_syllabize(elstr); + return scm; +} + +SCM_DEFINE_PUBLIC(scm_elstr_set_accent_character, "elstr-set-accent-character", + 2, 0, 0, + (SCM el, SCM n), +"Set accent on Nth character of EL\n") +{ + return _elstr_set_accent_on_char(el, n, 0, + s_scm_elstr_set_accent_character); +} + +SCM_DEFINE_PUBLIC(scm_elstr_set_accent_character_x, + "elstr-set-accent-character!", + 2, 0, 0, + (SCM el, SCM n), +"Set accent on Nth character of EL (destructive)\n") +{ + return _elstr_set_accent_on_char(el, n, 1, + s_scm_elstr_set_accent_character_x); +} SCM_DEFINE_PUBLIC(scm_elstr_char_prop_bitmask, "elstr-char-prop-bitmask", 2, 0, 0, -- cgit v1.2.1