diff options
Diffstat (limited to 'scm/verbop.scm')
-rw-r--r-- | scm/verbop.scm | 676 |
1 files changed, 676 insertions, 0 deletions
diff --git a/scm/verbop.scm b/scm/verbop.scm new file mode 100644 index 0000000..bb54126 --- /dev/null +++ b/scm/verbop.scm @@ -0,0 +1,676 @@ +(use-modules (srfi srfi-1) + (xmltools xmltrans) + (ellinika elmorph) + (gamma sql) + (ellinika xlat) + (ice-9 getopt-long)) + +(define cleanup-option #f) +(define force-option #f) +(define verbose-option #f) +(define dry-run-option #f) +(define debug-level 0) +(define input-files '()) +(define flect-ident 0) + +(define (next-flect-ident) + (set! flect-ident (1+ flect-ident)) + flect-ident) + +(define connection #f) ; SQL connection + +(define sysconf-dir "=SYSCONFDIR=") +(define config-file-name "ellinika.conf") + +(define ellinika-sql-connection '()) + +(define (add-conn-param key val) + (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) + +;;; Load the site defaults +(let ((rc-file (string-append sysconf-dir "/" config-file-name))) + (if (file-exists? rc-file) + (load rc-file))) + +(define (debug level . rest) + (if (>= debug-level level) + (begin + (for-each + (lambda (x) + (display x)) + rest) + (newline)))) + +(define (sql-val val) + ;; FIXME: quote + (if (not val) + "NULL" + (string-append "\"" val "\""))) + +(define (run-query . rest) + (debug 100 rest) + (let ((q (apply format (cons #f rest)))) + (if verbose-option + (format #t "QUERY: ~S\n" q)) + (cond + (connection + (let ((res (sql-query connection q))) + (if verbose-option + (format #t "RESULT: ~S\n" res)) + res)) + (else + #f)))) + +(define (query-number q) + (let ((res (run-query q))) + (if (null? res) + #f + (string->number (caar res))))) + +(define (check-parent elt . rest) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (parent) + (if (xmltrans:parent? parent) + (return #t))) + rest) + (xmltrans:parse-error #f elt " not a child of " rest) + (mark-invalid) + (return #f)))) + +;;;; Internal structures + +;;; Tense is a list of 6 elements or #f +(define tense #f) + +(define (tense-init) + (set! tense (make-list 6 #f))) + +(define (tense-set n val) + (if (not tense) (tense-init)) + (list-set! tense n val)) + +(define (get-tense) + (let ((ret tense)) + (set! tense #f) + ret)) + +;;; Mood is an associative list. Possible keys are: +;;; Tense +(define mood '()) + +(define (get-mood) + (let ((ret mood)) + (set! mood '()) + ret)) + +(define (mood-set key val) + (set! mood (append mood (list (cons key val))))) + +;;; Conjugation is an associative list of moods + +(define conjugation '()) + +(define (get-conjugation) + (let ((ret conjugation)) + (set! conjugation '()) + ret)) + +(define (conjugation-set key val) + (set! conjugation (append conjugation (list (cons key val))))) + +;;; Verb structure: +(define verbdef '()) + +(define (verbdef:index c) + (case c + ((#:verb) 0) + ((#:class) 1) + ((#:action) 2) + ((#:augment) 3) + ((#:suffix) 4) + ((#:accmap) 5) + ((#:act) 6) + ((#:pas) 7) + ((#:validity) 8) + (else + (error "Unknown index " c)))) + +(define (verb-get what) + (if (null? verbdef) + #f + (list-ref verbdef (verbdef:index what)))) + +(define (verb-get-sql what) + (sql-val (verb-get what))) + +(define (verb-set what val) + (if (null? verbdef) + (verb-init)) + (list-set! verbdef (verbdef:index what) val)) + +(define (verb-init) + (set! verbdef (make-list 9 #f)) + (verb-set #:validity #t) + (verb-set #:action 'insert)) + +(define (mark-invalid) + (verb-set #:validity #f)) + +(define (verbdef-validate) + (call-with-current-continuation + (lambda (return) + (if (verb-get #:validity) + (let ((dict-form (verb-get #:verb))) + (cond ((not dict-form) + (xmltrans:parse-error #f "Dictionary form missing") + (verb-set #:validity #f) + (return #f))) + (if (not (verb-get #:class)) + (cond + ((elstr-suffix? dict-form "άω") + (verb-set #:class "B1")) + ((elstr-suffix? dict-form "ώ") + (xmltrans:parse-warning #f "Class not set, assuming B2") + (verb-set #:class "B2")) + (else + (xmltrans:parse-warning #f "Class not set, assuming A") + (verb-set #:class "A")))))) + (return (verb-get #:validity))))) + +(define (mood-key->string key) + (case key + ((#:ind) "ind") + ((#:sub) "sub") + ((#:imp) "imp") + (else + (error "Unknown mood key" key)))) + +(define (empty-conjugation? conj) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (x) + (if x + (return #f))) + conj) + (return #t)))) + +(define (flush-mood mood vstr) + (if (eq? (car mood) #:root) + (let ((val (cdr mood))) + (run-query "INSERT INTO irregular_root (verb,voice,thema,root) \ +VALUES (~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val (car val)) + (sql-val (cdr val)))) + (let ((mood-str (mood-key->string (car mood)))) + (for-each + (lambda (p) + (let ((key (car p))) + (debug 1 "flush-mood: " p) + (cond + ((empty-conjugation? (cdr p)) + (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val mood-str) + (sql-val key) + "0")) + (else + (let ((num (next-flect-ident)) + (l (cdr p))) + (run-query "INSERT INTO verbflect VALUES (~A,~A,~A,~A,~A,~A,~A);~%" + num + (sql-val (list-ref l 0)) + (sql-val (list-ref l 1)) + (sql-val (list-ref l 2)) + (sql-val (list-ref l 3)) + (sql-val (list-ref l 4)) + (sql-val (list-ref l 5))) + (run-query "INSERT INTO individual_verb (verb,voice,mood,tense,ident) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (sql-val vstr) + (sql-val mood-str) + (sql-val key) + num) ))))) + (cdr mood))))) + +(define (flush-voice vstr conj-list) + (if conj-list + (for-each + (lambda (mood) + (flush-mood mood vstr)) + conj-list))) + +;;; Fush verb definition to the database +(define (verb-flush) + ;; + (case (verb-get #:action) + ((insert) + (run-query "INSERT INTO verb (verb,conj,augment,accmap,suffix_aor_path) \ +VALUES (~A,~A,~A,~A,~A);~%" + (verb-get-sql #:verb) + (verb-get-sql #:class) + (verb-get-sql #:augment) + (verb-get-sql #:accmap) + (verb-get-sql #:suffix)) + (flush-voice "act" (verb-get #:act)) + (flush-voice "pas" (verb-get #:pas))) + ((delete update) + (xmltrans:parse-error #f + "Sorry update and delete are not yet supported")))) + +;;;; XML definitions + +;;; Set the default handler +(define tag-list '()) + +(define (lingua:default-start tag attr) + (xmltrans:set-attr #f "__START__" 1) + #f) + +(xmltrans:set-default-start-handler lingua:default-start) + +(define (lingua:default-end tag attr text) + (if (xmltrans:attr attr "__START__") + (xmltrans:parse-error #f "Unhandled element " tag)) + (set! tag-list + (cons + (xmltrans:make-tag tag attr text) + tag-list)) + #f) + +(xmltrans:set-default-end-handler lingua:default-end) + +;; <i> +(xmltrans:end-tag + "i" + (tag attr text) + #f) + + +;; <v>...</v> - Verb definition +(xmltrans:end-tag + "v" + (tag attr text) + (check-parent tag "i") + (if (verbdef-validate) + (verb-flush)) + (verb-init) + #f) + +;; <a>verb</a> - Verb in dictionary form +(xmltrans:end-tag + "a" + (tag attr text) + (check-parent tag "v") + (cond ((verb-get #:verb) + (xmltrans:parse-error #f "Verb was already defined") + (mark-invalid))) + (verb-set #:verb text) + #f) + +;;; <c>class</c> - Set conjugation class +(xmltrans:end-tag + "c" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:class) + (xmltrans:parse-error #f "Verb class was already defined") + (mark-invalid)) + ((not (or (string=? text "A") + (string=? text "B1") + (string=? text "B2") + (string=? text "I"))) + (xmltrans:parse-warning #f "Unknown or misspelled verb class"))) + (verb-set #:class text) + #f) + +;;; <action>insert|delete|update</action> - Define action +(xmltrans:end-tag + "action" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:action) + (xmltrans:parse-error #f "Action was already defined") + (mark-invalid))) + (let ((act (string->symbol text))) + (case act + ((insert delete update) + (verb-set #:action act)) + (else + (xmltrans:parse-error #f "Unknown action ~A~%" text)))) + #f) + +;;; <augment>C</augment> - Define augment +(xmltrans:end-tag + "augment" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:augment) + (xmltrans:parse-error #f "Augment was already defined") + (mark-invalid)) + ((not (or (string= text "η") + (string= text "ε"))) + (xmltrans:parse-warning #f "Suspicious augment"))) + (verb-set #:augment text) + #f) + +;;; <suffix>S</suffix> - Define aorist suffix for B verbs +(xmltrans:end-tag + "suffix" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:suffix) + (xmltrans:parse-error #f "Suffix was already defined") + (mark-invalid))) + (verb-set #:suffix text) + #f) + +;;; +(define accmap-char-set + (char-set-adjoin (char-set-copy char-set:digit) #\s #\f #\-)) + +(define (valid-accent-map? accmap) + (let* ((acclist (string->list accmap)) + (len (length acclist))) + (and + (or (= len 6) (= len 7)) + (fold + (lambda (ch prev) + (char-set-contains? accmap-char-set ch)) + #t + (list-head acclist 6)) + (or (= len 6) (char=? (list-ref acclist 6) #\+))))) + +;;; <accmap>MAP</suffix> - Define accent map +(xmltrans:end-tag + "accmap" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:accmap) + (xmltrans:parse-error #f "Accmap was already defined") + (mark-invalid)) + ((not (valid-accent-map? text)) + (xmltrans:parse-error #f "Invalid accent map") + (mark-invalid)) + (else + (verb-set #:accmap text))) + #f) + +;;; <act>...</act> - Define conjugation in active voice +(xmltrans:end-tag + "act" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:act) + (xmltrans:parse-error #f "Active voice was already defined") + (mark-invalid))) + (verb-set #:act (get-conjugation)) + #f) + +;;; <pas>...</pas> - Define conjugation in passive voice +(xmltrans:end-tag + "pas" + (tag attr text) + (check-parent tag "v") + (cond + ((verb-get #:pas) + (xmltrans:parse-error #f "Passive voice was already defined") + (mark-invalid))) + (verb-set #:pas (get-conjugation)) + #f) + +;;; <ind>...</ind> - Indicative +(xmltrans:end-tag + "ind" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:ind (get-mood)) + #f) + +;;; <sub>...</sub> - Subjunctive +(xmltrans:end-tag + "sub" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:sub (get-mood)) + #f) + +;;; <imp>...</imp> - Imperative +(xmltrans:end-tag + "imp" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:imp (get-mood)) + #f) + +;;; <aor>root</aor> - Define aorist root +(xmltrans:end-tag + "aor" + (tag attr text) + (check-parent tag "act" "pas") + (conjugation-set #:root (cons "aor" text)) + #f) + +;;; <root theme="aor|sub|pres">root</root> - Define aorist root +(xmltrans:end-tag + "root" + (tag attr text) + (check-parent tag "act" "pas") + (let ((theme (xmltrans:attr attr "theme"))) + (cond + ((not theme) + (xmltrans:parse-error #f "Required attribute `theme' not specified") + (mark-invalid)) + ((or (string=? theme "aor") + (string=? theme "sub") + (string=? theme "pres")) + (conjugation-set #:root (cons theme text))) + (else + (xmltrans:parse-error #f "Unknown verb theme") + (mark-invalid)))) + #f) + + +;;; <t name="S">...</t> - Define a tense +(xmltrans:start-tag + "t" + (tag attr) + (check-parent tag "ind" "sub" "imp") + (tense-init) + #f) + +(xmltrans:end-tag + "t" + (tag attr text) + (let ((name (xmltrans:attr attr "name"))) + (if (not name) + (begin + (xmltrans:parse-error #f "Required attribute `name' not specified") + (mark-invalid))) + (mood-set name (get-tense))) + #f) + +;;; <p n="[sp]" n="[123]">...</p> - Define a person +(xmltrans:end-tag + "p" + (tag attr text) + (check-parent tag "t") + (call-with-current-continuation + (lambda (return) + (let ((number (xmltrans:attr attr "n")) + (person (xmltrans:attr attr "p")) + (elt #f)) + (cond + ((not number) + (xmltrans:parse-error #f "Required attribute `n' not specified") + (return)) + ((not person) + (xmltrans:parse-error #f "Required attribute `p' not specified") + (return)) + ((string=? person "1") + (set! elt 0)) + ((string=? person "2") + (set! elt 1)) + ((string=? person "3") + (set! elt 2)) + (else + (xmltrans:parse-error #f "Invalid value for `p'") + (return))) + (cond + ((string=? number "s") 0) + ((string=? number "p") + (set! elt (+ 3 elt))) + (else + (xmltrans:parse-error #f "Invalid value for `n'") + (return))) + (tense-set elt text)))) + #f) + + + +;;; DB functions +(define (escape-string str) + (let loop ((lst '()) + (str str)) + (cond + ((string-index str #\") => + (lambda (pos) + (loop (append lst (list (substring str 0 pos) + "\\\"")) + (substring str (1+ pos))))) + (else + (apply string-append (append lst (list str))))))) + + +;;;; Main +(define grammar + `((check (single-char #\c)) + (cleanup) + (database (single-char #\d) (value #t)) + (host (single-char #\h) (value #t)) + (port (single-char #\P) (value #t)) + (password (single-char #\p) (value #t)) + (user (single-char #\u) (value #t)) + (dry-run (single-char #\n)) + (interface (value #t)) + (verbose (single-char #\v)) + (debug (value #t)) + (help))) + +(define (usage) + (display "usage: verbop OPTIONS FILES + +General options: + + --check Only check input syntax and consistency. Do not + update the database. This means that the program will + not access the database at all, so some errors + (mistyped parts of speech and the like) may slip in + unnoticed. + --verbose Verbosely display SQL queries and their results. + --debug NUMBER Set debugging level (0 < NUMBER <= 100) + --dry-run Do nothing, display what would have been done. + +SQL related options: + + --interface STRING Select SQL interface to use. STRING may be + either \"mysql\" (the default) or \"postgres\". + --host HOST-OR-PATH Set name or IP address of the host running SQL + database, or path to the database I/O socket. + --database NAME Set name of the database to use. + --port NUMBER Set the SQL port number + --user USER-NAME Set SQL user name. + --password STRING Set the SQL password + + --cleanup Clean up the database (delete all entries from all the + tables) before proceeding. Use this option with care. + +Informational options: + + --help Output this help info +\n")) + +(define (cons? p) + (and (pair? p) (not (list? p)))) + +(for-each + (lambda (x) + (cond + ((cons? x) + (case (car x) + ((cleanup) + (set! cleanup-option #t)) + ((database) + (add-conn-param #:db (cdr x))) + ((host) + (add-conn-param #:host (cdr x))) + ((port) + (add-conn-param #:port (string->number (cdr x)))) + ((password) + (add-conn-param #:pass (cdr x))) + ((user) + (add-conn-param #:user (cdr x))) + ((interface) + (add-conn-param #:iface (cdr x))) + ((verbose) + (set! verbose-option #t)) + ((preserve) + (set! preserve-option #t)) + ((debug) + (set! debug-level (string->number (cdr x)))) + ((dry-run) + (set! verbose-option #t) + (set! dry-run-option #t)) + ((help) + (usage) + (exit 0)))) + (else + (set! input-files (cdr x))))) + (getopt-long (command-line) grammar)) + +(if (null? input-files) + (begin + (display "Input files not specified\n" (current-error-port)) + (exit 1))) + +(cond + ((not dry-run-option) + (set! connection (sql-open-connection ellinika-sql-connection)) + (if (not connection) + (begin + (display "Cannot connect to the database\n" (current-error-port)) + (exit 1))) + (run-query "SET NAMES utf8") + (set! flect-ident (query-number "SELECT MAX(ident) FROM verbflect")))) + +(cond + (cleanup-option + (run-query "DELETE FROM verbflect where ident > 99") + (run-query "DELETE FROM verb") + (run-query "DELETE FROM irregular_root") + (run-query "DELETE FROM individual_verb"))) + +(for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files) + +(if connection + (sql-close-connection connection)) + + + |