#! /bin/sh # aside from this initial boilerplate, this is actually -*- scheme -*- code main='(module-ref (resolve-module '\''(src verbop)) '\'main')' exec ${GUILE-guile} -l $0 -c "(apply $main (command-line))" "$@" !# ;;;; This file is part of Ellinika ;;;; Copyright (C) 2011 Sergey Poznyakoff ;;;; ;;;; Ellinika is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; Ellinika is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program. If not, see . ;;;; ;;; Main procedure (define-module (src verbop) :export (verbop)) (use-modules (srfi srfi-1) (xmltools xmltrans) (ellinika elmorph) (ellinika sql) (ellinika tenses) (ice-9 getopt-long)) (define cleanup-option #f) (define force-option #f) (define debug-level 0) (define input-files '()) (define flect-ident 0) (define (next-flect-ident) (set! flect-ident (1+ flect-ident)) flect-ident) (define class-list '()) ; List of defined verb classes. (define sysconf-dir "=SYSCONFDIR=") (define config-file-name "ellinika.conf") (define ellinika-sql-connection '()) (define (add-conn-param key val) (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) ;;; Load the site defaults (let ((rc-file (string-append sysconf-dir "/" config-file-name))) (if (file-exists? rc-file) (load rc-file))) (define (debug level . rest) (if (>= debug-level level) (begin (for-each (lambda (x) (display x)) rest) (newline)))) (define (query-number q) (if ellinika:sql-dry-run 0 (let ((res (ellinika:sql-query q))) (if (null? res) #f (string->number (caar res)))))) (define (check-parent elt . rest) (call-with-current-continuation (lambda (return) (for-each (lambda (parent) (if (xmltrans:parent? parent) (return #t))) rest) (xmltrans:parse-error #f elt " not a child of " rest) (mark-invalid) (return #f)))) ;;;; Internal structures ;;; Tense is a list of 6 elements or #f (define tense #f) (define tense-prop '()) (define (tense-init) (set! tense (make-list 6 #f)) (set! tense-prop '())) (define (tense-set n val) (if (not tense) (tense-init)) (list-set! tense n val)) (define (get-tense) (let ((ret (append tense (if (and (empty-conjugation? tense) (not (null? tense-prop))) (cons (cons "default" #t) tense-prop) tense-prop)))) (tense-init) ret)) ;;; Mood is an associative list. Possible keys are: ;;; Tense (define mood '()) (define (get-mood) (let ((ret mood)) (set! mood '()) ret)) (define (mood-set key val) (set! mood (append mood (list (cons key val))))) ;;; Conjugation is an associative list of moods (define conjugation #f) (define (get-conjugation) (let ((ret conjugation)) (set! conjugation #f) ret)) (define (conjugation-set key val) (set! conjugation (if conjugation (append conjugation (list (cons key val))) (list (cons key val))))) ;;; Verb structure: (define verbdef '()) (define (verbdef:index c) (case c ((#:verb) 0) ((#:class) 1) ((#:action) 2) ((#:augment) 3) ((#:suffix) 4) ((#:accmap) 5) ((#:act) 6) ((#:pas) 7) ((#:validity) 8) (else (error "Unknown index " c)))) (define (verb-get what) (cond ((null? verbdef) #f) ((eq? what #:override) (if (verb-get #:suffix) "suffix" #f)) (else (list-ref verbdef (verbdef:index what))))) (define (verb-set what val) (if (null? verbdef) (verb-init)) (list-set! verbdef (verbdef:index what) val)) (define (verb-init) (set! verbdef (make-list 9 #f)) (verb-set #:act '()) (verb-set #:pas '()) (verb-set #:validity #t) (verb-set #:action 'insert)) (define (mark-invalid) (verb-set #:validity #f)) (define (verbdef-validate) (call-with-current-continuation (lambda (return) (if (verb-get #:validity) (let ((dict-form (verb-get #:verb))) (cond ((not dict-form) (xmltrans:parse-error #f "Dictionary form missing") (verb-set #:validity #f) (return #f))) (if (not (verb-get #:class)) (cond ((elstr-suffix? dict-form "άω") (verb-set #:class "B1")) ((elstr-suffix? dict-form "ώ") (xmltrans:parse-warning #f "Class not set, assuming B2") (verb-set #:class "B2")) (else (xmltrans:parse-warning #f "Class not set, assuming A") (verb-set #:class "A")))))) (return (verb-get #:validity))))) (define (empty-conjugation? conj) (call-with-current-continuation (lambda (return) (for-each (lambda (x) (if x (return #f))) (list-head conj 6)) (return #t)))) (define (insert-individual-verb voice mood tense ident) (ellinika:sql-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \ VALUES (~Q,~Q,~Q,~Q,~Q)" (verb-get #:verb) voice mood tense ident)) (define (flush-mood mood vstr) (if (eq? (car mood) #:root) (let ((val (cdr mood))) (ellinika:sql-query "INSERT INTO irregular_root (verb,voice,thema,root) \ VALUES (~Q,~Q,~Q,~Q)" (verb-get #:verb) vstr (car val) (cdr val))) (let ((mood-str (car mood))) (let ((lst (cdr mood))) (cond ((null? lst) (for-each (lambda (tense) (insert-individual-verb vstr mood-str tense 0)) (assoc-ref ellinika-tense-list mood-str))) (else ; (format #t "LST ~A~%" lst) (for-each (lambda (p) (let ((tense (car p))) (debug 1 "flush-mood: " p) (cond ((assoc-ref (list-tail p 7) "default") #t) ((empty-conjugation? (cdr p)) (insert-individual-verb vstr mood-str tense 0)) (else (let ((num (next-flect-ident)) (l (cdr p))) (ellinika:sql-query "INSERT INTO verbflect VALUES (~Q,~Q,~Q,~Q,~Q,~Q,~Q)" num (list-ref l 0) (list-ref l 1) (list-ref l 2) (list-ref l 3) (list-ref l 4) (list-ref l 5)) (insert-individual-verb vstr mood-str tense num) ))) (for-each (lambda (prop) ; (format #t "PROP ~A~%" prop) (let ((key (car prop))) (if (not (string=? key "default")) (ellinika:sql-query "INSERT INTO verbtense VALUES (~Q,~Q,~Q,~Q,~Q,~Q)" (verb-get #:verb) vstr mood-str tense (car prop) (cdr prop))))) (list-tail p 7)))) lst))))))) (define (flush-voice vstr conj-list) ; (format #t "VOICE ~A~%" conj-list) (cond ((not conj-list) (for-each (lambda (vp) (let ((mood (car vp))) (for-each (lambda (tense) (insert-individual-verb vstr mood tense 0)) (cdr vp)))) ellinika-tense-list)) (conj-list (for-each (lambda (mood) (flush-mood mood vstr)) conj-list)))) ;;; (define (preprocess-voice voice attrlist) ; (format #t "VOICE ~A~%" voice) (if voice (for-each (lambda (arg) (let* ((key (car arg)) (mtlist (cdr arg)) (value (verb-get key))) ; (format #t "KEY ~A / VALUE ~A; MTLIST ~A~%" key value mtlist) (if value (let ((attr (symbol->string (keyword->symbol key)))) (for-each (lambda (mood-tenses) (let* ((mood-name (car mood-tenses)) (mood-ref (or (assoc mood-name voice) (begin (set! voice (cons (cons mood-name '()) voice)) (car voice))))) ; (format #t "MOOD ~A~%" mood-ref) (for-each (lambda (tense-name) (let* ((tense (or (assoc tense-name (cdr mood-ref)) (begin (append! mood-ref (list (cons tense-name (append (make-list 6 #f) (list (cons "default" #t)))))) ; (format #t "NM ~A~%" mood-ref) (assoc tense-name (cdr mood-ref))))) (prop (begin ; (format #t "TENSE ~A~%" tense) (list-tail tense 7)))) ; (format #t "PROP ~A ~A~%" tense prop) (if (not (assoc attr prop)) (append! tense (list (cons attr value)))))) (cdr mood-tenses)))) mtlist))))) attrlist)) ; (format #t "BVOICE ~A~%" voice) voice) ;;; Fush verb definition to the database (define (verb-flush) ;; (case (verb-get #:action) ((insert) (ellinika:sql-query "INSERT INTO verbclass (verb,conj) VALUES (~Q,~Q)" (verb-get #:verb) (verb-get #:class)) (flush-voice "act" (preprocess-voice (verb-get #:act) (list (cons #:suffix '(("ind" . ("Αόριστος" "Παρακείμενος" "Υπερσυντέλικος" "Συντελεσμένος μέλλοντας" "Μέλλοντας στιγμιαίος")) ("sub" . ("Αόριστος")) ("imp" . ("Αόριστος")))) (cons #:accmap ellinika-tense-list) (cons #:augment '(("ind" . ("Αόριστος" "Παρακείμενος"))))))) (flush-voice "pas" (preprocess-voice (verb-get #:pas) (list (cons #:suffix '(("imp" . ("Αόριστος")))) (cons #:override '(("imp" . ("Αόριστος")))) (cons #:accmap ellinika-tense-list))))) ((delete update) (xmltrans:parse-error #f "Sorry update and delete are not yet supported")))) ;;;; XML definitions ;;; Set the default handler (define tag-list '()) (define (lingua:default-start tag attr) (xmltrans:set-attr #f "__START__" 1) #f) (xmltrans:set-default-start-handler lingua:default-start) (define (lingua:default-end tag attr text) (if (xmltrans:attr attr "__START__") (xmltrans:parse-error #f "Unhandled element " tag)) (set! tag-list (cons (xmltrans:make-tag tag attr text) tag-list)) #f) (xmltrans:set-default-end-handler lingua:default-end) ;; (xmltrans:end-tag "i" (tag attr text) #f) ;; ... - Verb definition (xmltrans:end-tag "v" (tag attr text) (check-parent tag "i") (if (verbdef-validate) (verb-flush)) (verb-init) #f) ;; verb - Verb in dictionary form (xmltrans:end-tag "a" (tag attr text) (check-parent tag "v") (cond ((verb-get #:verb) (xmltrans:parse-error #f "Verb was already defined") (mark-invalid))) (verb-set #:verb text) #f) ;;; class - Set conjugation class (xmltrans:end-tag "c" (tag attr text) (check-parent tag "v") (cond ((verb-get #:class) (xmltrans:parse-error #f "Verb class was already defined") (mark-invalid)) ((not (member text class-list)) (xmltrans:parse-warning #f "Unknown or misspelled verb class"))) (verb-set #:class text) #f) ;;; insert|delete|update - Define action (xmltrans:end-tag "action" (tag attr text) (check-parent tag "v") (cond ((verb-get #:action) (xmltrans:parse-error #f "Action was already defined") (mark-invalid))) (let ((act (string->symbol text))) (case act ((insert delete update) (verb-set #:action act)) (else (xmltrans:parse-error #f "Unknown action ~A~%" text)))) #f) ;;; C - Define augment (xmltrans:end-tag "augment" (tag attr text) (if (not (or (string= text "η") (string= text "ε"))) (xmltrans:parse-warning #f "Suspicious augment")) (cond ((xmltrans:parent? "v") (cond ((verb-get #:augment) (xmltrans:parse-error #f "Augment was already defined") (mark-invalid)) (verb-set #:augment text))) ((xmltrans:parent? "t") (set! tense-prop (cons (cons "augment" text) tense-prop))) (else (xmltrans:parse-error #f elt " not a child of v or t"))) #f) ;;; S - Define aorist suffix for B verbs (xmltrans:end-tag "suffix" (tag attr text) (cond ((xmltrans:parent? "v") (cond ((verb-get #:suffix) (xmltrans:parse-error #f "Suffix was already defined") (mark-invalid)) (else (verb-set #:suffix text)))) ((xmltrans:parent? "t") (set! tense-prop (cons (cons "suffix" text) tense-prop))) (else (xmltrans:parse-error #f elt " not a child of v or t"))) #f) ;;; (define accmap-char-set (char-set-adjoin (char-set-copy char-set:digit) #\s #\f #\-)) (define (valid-accent-map? accmap) (let* ((acclist (string->list accmap)) (len (length acclist))) (and (or (= len 6) (= len 7)) (fold (lambda (ch prev) (char-set-contains? accmap-char-set ch)) #t (list-head acclist 6)) (or (= len 6) (char=? (list-ref acclist 6) #\+))))) ;;; MAP - Define accent map (xmltrans:end-tag "accmap" (tag attr text) (cond ((not (valid-accent-map? text)) (xmltrans:parse-error #f "Invalid accent map") (mark-invalid)) ((xmltrans:parent? "v") (cond ((> (string-length text) 6) (xmltrans:parse-error #f "Use of augment not allowed in global accent map") (mark-invalid)) ((verb-get #:accmap) (xmltrans:parse-error #f "Accmap was already defined") (mark-invalid)) (else (verb-set #:accmap text)))) ((xmltrans:parent? "t") (set! tense-prop (cons (cons "accmap" text) tense-prop))) (else (xmltrans:parse-error #f "accmap not a child of v or t"))) #f) ;;; ... - Define conjugation in active voice (xmltrans:end-tag "act" (tag attr text) (check-parent tag "v") (cond ((null? (verb-get #:act)) (verb-set #:act (get-conjugation))) (else (xmltrans:parse-error #f "Active voice was already defined") (mark-invalid))) #f) ;;; ... - Define conjugation in passive voice (xmltrans:end-tag "pas" (tag attr text) (check-parent tag "v") (cond ((null? (verb-get #:pas)) (verb-set #:pas (get-conjugation))) (else (xmltrans:parse-error #f "Passive voice was already defined") (mark-invalid))) #f) ;;; ... - Indicative (xmltrans:end-tag "ind" (tag attr text) (check-parent tag "act" "pas") (conjugation-set "ind" (get-mood)) #f) ;;; ... - Subjunctive (xmltrans:end-tag "sub" (tag attr text) (check-parent tag "act" "pas") (conjugation-set "sub" (get-mood)) #f) ;;; ... - Imperative (xmltrans:end-tag "imp" (tag attr text) (check-parent tag "act" "pas") (conjugation-set "imp" (get-mood)) #f) ;;; root - Define aorist root (xmltrans:end-tag "aor" (tag attr text) (check-parent tag "act" "pas") (conjugation-set #:root (cons "aor" text)) #f) ;;; root - Define aorist root (xmltrans:end-tag "root" (tag attr text) (check-parent tag "act" "pas") (let ((theme (xmltrans:attr attr "theme"))) (cond ((not theme) (xmltrans:parse-error #f "Required attribute `theme' not specified") (mark-invalid)) ((or (string=? theme "aor") (string=? theme "sub") (string=? theme "pres")) (conjugation-set #:root (cons theme text))) (else (xmltrans:parse-error #f "Unknown verb theme") (mark-invalid)))) #f) ;;; ... - Define a tense (xmltrans:start-tag "t" (tag attr) (check-parent tag "ind" "sub" "imp") (tense-init) #f) (xmltrans:end-tag "t" (tag attr text) (let ((name (xmltrans:attr attr "name"))) (if (not name) (begin (xmltrans:parse-error #f "Required attribute `name' not specified") (mark-invalid))) (mood-set name (get-tense))) #f) (xmltrans:end-tag "prop" (tag attr text) (check-parent tag "t") (let ((name (xmltrans:attr attr "name"))) (cond ((not name) (begin (xmltrans:parse-error #f "Required attribute `name' not specified") (mark-invalid))) ((xmltrans:parent? "t") (set! tense-prop (cons (cons name text) tense-prop))) (else (xmltrans:parse-error #f "prop not a child of t")))) #f) ;;;

...

- Define a (grammatical) person (xmltrans:end-tag "p" (tag attr text) (check-parent tag "t") (call-with-current-continuation (lambda (return) (let ((number (xmltrans:attr attr "n")) (person (xmltrans:attr attr "p")) (elt #f)) (cond ((not number) (xmltrans:parse-error #f "Required attribute `n' not specified") (return)) ((not person) (xmltrans:parse-error #f "Required attribute `p' not specified") (return)) ((string=? person "1") (set! elt 0)) ((string=? person "2") (set! elt 1)) ((string=? person "3") (set! elt 2)) (else (xmltrans:parse-error #f "Invalid value for `p'") (return))) (cond ((string=? number "s") 0) ((string=? number "p") (set! elt (+ 3 elt))) (else (xmltrans:parse-error #f "Invalid value for `n'") (return))) (cond ((xmltrans:attr attr "prop") (set! tense-prop (cons (cons (number->string (1+ elt)) text) tense-prop)) (if (not (member "default" tense-prop)) (set! tense-prop (cons (cons "default" #t) tense-prop)))) (else (tense-set elt text)))))) #f) ;;; DB functions (define (escape-string str) (let loop ((lst '()) (str str)) (cond ((string-index str #\") => (lambda (pos) (loop (append lst (list (substring str 0 pos) "\\\"")) (substring str (1+ pos))))) (else (apply string-append (append lst (list str))))))) ;;;; Main (define grammar `((check (single-char #\c)) (cleanup) (database (single-char #\d) (value #t)) (host (single-char #\h) (value #t)) (port (single-char #\P) (value #t)) (password (single-char #\p) (value #t)) (user (single-char #\u) (value #t)) (dry-run (single-char #\n)) (interface (value #t)) (verbose (single-char #\v)) (debug (value #t)) (help))) (define (usage) (display "usage: verbop OPTIONS FILES General options: --check Only check input syntax and consistency. Do not update the database. This means that the program will not access the database at all, so some errors (mistyped parts of speech and the like) may slip in unnoticed. --verbose Verbosely display SQL queries and their results. --debug NUMBER Set debugging level (0 < NUMBER <= 100) --dry-run Do nothing, display what would have been done. SQL related options: --interface STRING Select SQL interface to use. STRING may be either \"mysql\" (the default) or \"postgres\". --host HOST-OR-PATH Set name or IP address of the host running SQL database, or path to the database I/O socket. --database NAME Set name of the database to use. --port NUMBER Set the SQL port number --user USER-NAME Set SQL user name. --password STRING Set the SQL password --cleanup Clean up the database (delete all entries from all the tables) before proceeding. Use this option with care. Informational options: --help Output this help info \n")) (define (cons? p) (and (pair? p) (not (list? p)))) (define (main . args) (for-each (lambda (x) (cond ((cons? x) (case (car x) ((cleanup) (set! cleanup-option #t)) ((database) (add-conn-param #:db (cdr x))) ((host) (add-conn-param #:host (cdr x))) ((port) (add-conn-param #:port (string->number (cdr x)))) ((password) (add-conn-param #:pass (cdr x))) ((user) (add-conn-param #:user (cdr x))) ((interface) (add-conn-param #:iface (cdr x))) ((verbose) (set! verbose-option #t)) ((preserve) (set! preserve-option #t)) ((debug) (set! debug-level (string->number (cdr x))) (set! ellinika:sql-verbose #t)) ((dry-run) (set! ellinika:sql-dry-run #t) (set! ellinika:sql-verbose #t)) ((help) (usage) (exit 0)))) (else (set! input-files (cdr x))))) (getopt-long args grammar)) (if (null? input-files) (begin (display "Input files not specified\n" (current-error-port)) (exit 1))) (if (not (ellinika:sql-connect ellinika-sql-connection)) (begin (display "Cannot connect to the database\n" (current-error-port)) (exit 1))) (set! flect-ident (query-number "SELECT MAX(ident) FROM verbflect")) (cond (cleanup-option (ellinika:sql-query "DELETE FROM verbflect where ident > 99") (ellinika:sql-query "DELETE FROM verbclass") (ellinika:sql-query "DELETE FROM verbtense") (ellinika:sql-query "DELETE FROM irregular_root") (ellinika:sql-query "DELETE FROM individual_verb"))) (set! class-list (cons "I" (if ellinika:sql-dry-run (list "A" "B1" "B2") (map car (ellinika:sql-query "SELECT DISTINCT conj FROM conjugation ORDER BY 1"))))) (for-each (lambda (x) (if (not (xmltrans:parse-file x)) (exit 1))) input-files) (ellinika:sql-disconnect)) ;;;; EOF