From 7c6fc162fb2c9cba19025cb3237695288216e5c5 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Fri, 8 Oct 2004 18:51:46 +0000 Subject: Mostly finished :^) git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@198 941c8c0f-9102-463b-b60b-cd22ce0e6858 --- scm/dictrans.scm | 508 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 339 insertions(+), 169 deletions(-) (limited to 'scm') diff --git a/scm/dictrans.scm b/scm/dictrans.scm index 5cfc50e..ff99dea 100644 --- a/scm/dictrans.scm +++ b/scm/dictrans.scm @@ -80,8 +80,11 @@ ;;;; (set! %load-path (cons "/usr/local/share/guile-sql" %load-path)) +(set! %load-path (cons "/home/gray/linguae/ellinika" %load-path)) + (use-modules (xmltools xmltrans) (sql) + (ellinika xlat) (ice-9 getopt-long)) (use-syntax (ice-9 syncase)) @@ -110,6 +113,31 @@ (newline)))) +(define pos-xlat #f) + +(define (convert-pos pos) + (cond + (compile-only + pos) + ((assoc pos pos-xlat) => + (lambda (x) + (cdr x))) + (else + (xmltrans:parse-error #f "unknown or misspelled part of speech") + (mark-invalid) + pos))) + +(define (read-pos conn) + (set! pos-xlat + (map + (lambda (p) + (cons (car p) (string->number (cadr p)))) + (append + (run-query conn "SELECT abbr, id FROM pos") + (run-query conn "SELECT abbr_lat, id FROM pos") + (run-query conn "SELECT name, id FROM pos"))))) + + ;;;; XML definitions ;;; Set the default handler @@ -137,93 +165,105 @@ (define-syntax node-get (syntax-rules () - ((node-get #:key) - (vector-ref current-node 0)) - ((node-get #:forms) - (vector-ref current-node 1)) - ((node-get #:xref) - (vector-ref current-node 2)) - ((node-get #:p-list) - (vector-ref current-node 3)))) + ((node-get #:key node) + (vector-ref node 0)) + ((node-get #:forms node) + (vector-ref node 1)) + ((node-get #:xref node) + (vector-ref node 2)) + ((node-get #:p-list node) + (vector-ref node 3)))) + +(define-macro (current-node-get key) + `(node-get ,key current-node)) (define (mark-invalid) (xmltrans:set-attr "NODE" "__INVALID__" 1)) (define-syntax node-set (syntax-rules () - ((node-set #:key k) + ((node-set #:key node k) (cond - ((node-get #:key) => + ((node-get #:key node) => (lambda (klist) - (vector-set! current-node 0 (append klist (list k))))) + (vector-set! node 0 (append klist (list k))))) (else - (vector-set! current-node 0 (list k))))) - ((node-set #:forms f) + (vector-set! node 0 (list k))))) + ((node-set #:forms node f) (begin (cond - ((node-get #:forms) + ((node-get #:forms node) (xmltrans:parse-error #f "Forms already set") (mark-invalid))) - (vector-set! current-node 1 f))) - ((node-set #:xref x) + (vector-set! node 1 f))) + ((node-set #:xref node x) (cond - ((node-get #:xref) => + ((node-get #:xref node) => (lambda (xlist) - (vector-set! current-node 2 (append xlist (list x))))) + (vector-set! node 2 (append xlist (list x))))) (else - (vector-set! current-node 2 (list x))))) - ((node-set! #:p-article p) + (vector-set! node 2 (list x))))) + ((node-set #:p-article node p) (cond - ((node-get #:p-list) => + ((node-get #:p-list node) => (lambda (plist) - (vector-set! current-node 3 (append plist (list p))))) + (vector-set! node 3 (append plist (list p))))) (else - (vector-set! current-node 3 (list p))))) - ((node-set #:topic t) + (vector-set! node 3 (list p))))) + ((node-set #:topic node t) (for-each (lambda (p) - ;; FIXME: Use p-article-set - (vector-set! p 4 (append (vector-ref p 4) (list t)))) - (node-get #:p-list))))) + (p-article-set #:topic p t)) + (node-get #:p-list node))))) + +(define-macro (current-node-set key val) + `(node-set ,key current-node ,val)) + (define p-article #f) (define-syntax p-article-get (syntax-rules () - ((p-article-get #:pos) - (vector-ref p-article 0)) - ((p-article-get #:article) - (vector-ref p-article 1)) - ((p-article-get #:aref) - (vector-ref p-article 2)) - ((p-article-get #:xref) - (vector-ref p-article 3)) - ((p-article-get #:topic) - (vector-ref p-article 4)))) + ((p-article-get #:pos article) + (vector-ref article 0)) + ((p-article-get #:article article) + (vector-ref article 1)) + ((p-article-get #:aref article) + (vector-ref article 2)) + ((p-article-get #:xref article) + (vector-ref article 3)) + ((p-article-get #:topic article) + (vector-ref article 4)))) (define-syntax p-set (syntax-rules () - ((p-set key n val) - (vector-set! p-article n + ((p-set key article n val) + (vector-set! article n (cond - ((p-article-get key) => + ((p-article-get key article) => (lambda (alst) (append alst (list val)))) (else (list val))))))) - + +(define-macro (current-article-get key) + `(p-article-get ,key p-article)) + (define-syntax p-article-set (syntax-rules () - ((p-article-set #:pos val) - (vector-set! p-article 0 val)) - ((p-article-set #:article val) - (p-set #:article 1 val)) - ((p-article-set #:aref val) - (p-set #:aref 2 val)) - ((p-article-set #:xref val) - (p-set #:xref 3 val)) - ((p-article-set #:topic val) - (p-set #:topic 4 val)))) + ((p-article-set #:pos article val) + (vector-set! article 0 val)) + ((p-article-set #:article article val) + (p-set #:article article 1 val)) + ((p-article-set #:aref article val) + (p-set #:aref article 2 val)) + ((p-article-set #:xref article val) + (p-set #:xref article 3 val)) + ((p-article-set #:topic article val) + (p-set #:topic article 4 val)))) + +(define-macro (current-article-set key val) + `(p-article-set ,key p-article ,val)) ;;; Node list @@ -251,7 +291,7 @@ (debug 10 "APPEND") (for-each (lambda (x) - (p-article-set #:topic x)) + (current-article-set #:topic x)) topic-list)) ;;; @@ -299,17 +339,17 @@ (tag attr text) (cond (p-article - (if (not (null? (node-get #:p-list))) + (if (not (null? (current-node-get #:p-list))) (xmltrans:parse-warning #f "Mixed definition style")) (append-topics) - (node-set #:p-article p-article) + (current-node-set #:p-article p-article) (set! p-article #f))) (cond ((xmltrans:attr attr "__INVALID__")) ; Ignore - ((not (node-get #:key)) + ((not (current-node-get #:key)) (xmltrans:parse-error #f "K element is missing")) - ((or (null? (node-get #:p-list)) (null? (null? (node-get #:xref)))) + ((or (null? (current-node-get #:p-list)) (null? (null? (current-node-get #:xref)))) ; (display current-node)(newline) ; (display p-article)(newline) (xmltrans:parse-error #f "No articles and no cross references for the node")) @@ -338,9 +378,9 @@ (cond ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued ((xmltrans:parent? "P") - (p-article-set #:topic (xmltrans:attr attr "ID"))) + (current-article-set #:topic (xmltrans:attr attr "ID"))) ((xmltrans:parent? "NODE") - (node-set #:topic (xmltrans:attr attr "ID"))) + (current-node-set #:topic (xmltrans:attr attr "ID"))) (else (pop-topic))) #f) @@ -370,7 +410,7 @@ (xmltrans:parse-error #f "K tag cannot be used here") (mark-invalid))) (xmltrans:expect-next-tag "K" k-expect) - (node-set #:key text) + (current-node-set #:key text) #f) ;;; F - FORMS @@ -378,7 +418,7 @@ "F" (tag attr text) (expect-context tag "NODE") - (node-set #:forms text) + (current-node-set #:forms text) #f) ;;; P - PartOfSpeach @@ -398,15 +438,15 @@ (cond ((xmltrans:attr attr "ID") => (lambda (id) - (p-article-set #:pos id) + (current-article-set #:pos (convert-pos id)) (append-topics) - (node-set #:p-article p-article) + (current-node-set #:p-article p-article) (set! p-article #f))) - ((not (null? (p-article-get #:article))) ;; FIXME: other elements too + ((not (null? (current-article-get #:article))) ;; FIXME: other elements too (xmltrans:parse-error p-location "P element is missing ID, but has nested elements")) (else - (set! p-article (vector text '() '() '() '())))) + (set! p-article (vector (convert-pos text) '() '() '() '())))) #f) ;;; A - Antonym @@ -415,7 +455,7 @@ "A" (tag attr text) (expect-context tag "NODE" "P") - (p-article-set #:aref text) + (current-article-set #:aref text) #f) ;;; X - Xref @@ -427,13 +467,12 @@ ((string-null? text) (call-with-current-continuation (lambda (return) - ;; FIXME: Use node-get/p-article-get (do ((node node-list (cdr node))) ((null? node) #f) - (if (not (null? - (vector-ref - (car (vector-ref (car node) 3)) 1))) - (return (car (vector-ref (car node) 0)))))))) + (if (not (null? (p-article-get + #:article + (car (node-get #:p-list (car node)))))) + (return (car (node-get #:key (car node))))))))) (else text)))) @@ -442,9 +481,9 @@ (xmltrans:parse-error #f "Empty reference") (mark-invalid)) ((xmltrans:parent? "P") - (p-article-set #:xref ref)) + (current-article-set #:xref ref)) ((xmltrans:parent? "NODE") - (node-set #:xref ref)))) + (current-node-set #:xref ref)))) #f) ;;; M - MEANING @@ -452,7 +491,7 @@ "M" (tag attr text) (expect-context tag "P" "NODE") - (p-article-set #:article text) + (current-article-set #:article text) #f) ;;; Formatting elements @@ -461,7 +500,7 @@ (tag attr text) (expect-context tag "M" "F") (list - "" + "" text "")) @@ -470,10 +509,214 @@ (tag attr text) (expect-context tag "M" "F") (list - "" + "" text "")) + +(define (assert condition . rest) + (cond + (condition) + ((null? rest) + (throw 'misc-error 'assert + "Assertion failed" + (list) #f) + (exit 1)) + (else + (with-output-to-port + (current-error-port) + (lambda () + (for-each + (lambda (x) + (display x)) + rest) + (newline))) + (exit 1)))) + +(define (escape-string str) + (let loop ((lst '()) + (str str)) + (cond + ((string-index str #\") => + (lambda (pos) + (loop (append lst (list (substring str 0 pos) + "\\\"")) + (substring str (1+ pos))))) + (else + (apply string-append (append lst (list str))))))) + +(define (run-query conn q) + (if verbose-option + (format #t "QUERY: ~S\n" q)) + (let ((res (sql-query conn q))) + (if verbose-option + (format #t "RESULT: ~S\n" res)) + res)) + +(define (query-number conn q) + (let ((res (run-query conn q))) + (if (null? res) + #f + (string->number (caar res))))) + +(define (get-dict-index conn) + (set! dict-index + (or + (query-number conn + "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1") + 0))) + +(define (cleanup-db conn) + (run-query conn "DELETE FROM links") + (run-query conn "DELETE FROM articles") + (run-query conn "DELETE FROM dict") + (run-query conn "DELETE FROM topic") + (run-query conn "DELETE FROM topic_tab") + (run-query conn "DELETE FROM pending_links")) + +(define (pending-fixup conn) + (run-query conn + "INSERT IGNORE INTO links \ + SELECT p.type,p.originator,d.ident \ + FROM dict d, pending_links p \ + WHERE p.word = d.word AND p.type != 'CLOSED'") + (run-query conn + "INSERT IGNORE INTO links \ + SELECT p.type,d.ident,p.originator \ + FROM dict d, pending_links p \ + WHERE p.word = d.word AND p.type != 'CLOSED'") + (run-query + conn + (if (string=? sql-iface "mysql") + "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word" + ;; Else assume SQL92 + "UPDATE pending_links SET type='CLOSED' \ + WHERE originator in (SELECT d.ident from dict d, pending_links p \ + WHERE p.word=d.word)")) + + (if (not preserve-option) + run-query("DELETE FROM pending_links WHERE type = 'CLOSED'")) + + (let ((count (query-number conn + "SELECT count(*) FROM pending_links \ + WHERE type != 'CLOSED'"))) + ;;; GROUP BY word"))) + (if (> count 0) + (display (string-append + (number->string count) + " unresolved references\n") + (current-error-port))))) + +(define (update-stat conn) + (let ((count (query-number conn "SELECT count(*) from dict"))) + (run-query conn "DELETE from stat"); + (run-query conn + (string-append + "INSERT INTO stat (count,updated) VALUES(" + (number->string count) + ",now())")))) + + +(define dict-index 0) + +(define (insert-node conn node) + (letrec ((insert-link (lambda (type value) + (run-query + conn + (format + #f + "INSERT INTO pending_links (type,originator,word) VALUES('~A',~A,'~A')" + type + dict-index + value))))) + + (for-each + (lambda (p-article) + (for-each + (lambda (key) + (let ((sound (ellinika:sounds-like key))) + (set! dict-index (1+ dict-index)) + ;;; Insert 'heading' info + (run-query + conn + (cond + ((node-get #:forms node) => + (lambda (f) + (format + #f + "INSERT INTO dict (ident,word,sound,pos,forms) VALUES(~A,\"~A\",\"~A\",~A,\"~A\")" + + dict-index + key + sound + (p-article-get #:pos p-article) + f))) + (else + (format + #f + "INSERT INTO dict (ident,word,sound,pos) VALUES(~A,\"~A\",\"~A\",~A)" + dict-index + key + sound + (p-article-get #:pos p-article))))) + + ;;; Insert cross-references + (for-each + (lambda (x) + (insert-link "XREF" x)) + (append + (node-get #:xref node) + (p-article-get #:xref p-article))) + + ;;; Insert antonyms + (for-each + (lambda (x) + (insert-link "ANT" x)) + (p-article-get #:aref p-article)) + + ;;; Insert topics + (for-each + (lambda (t) + (let ((index (or + (query-number + conn + (format + #f + "SELECT ident FROM topic WHERE title=\"~A\"" + (escape-string t))) + (begin + (run-query + conn + (format + #f + "INSERT INTO topic (title) VALUES (\"~A\")" + (escape-string t))) + (query-number conn "SELECT LAST_INSERT_ID()"))))) + (run-query + conn + (format + #f + "INSERT INTO topic_tab VALUES(~A,~A)" + index + dict-index)))) + (p-article-get #:topic p-article)) + + ;;; Insert articles + (let ((article-index 0)) + (for-each + (lambda (text) + (run-query + conn + (format #f + "INSERT INTO articles VALUES (~A, ~A, \"~A\")" + dict-index + article-index + text)) + (set! article-index (1+ article-index))) + (p-article-get #:article p-article))))) + (node-get #:key node))) + (node-get #:p-list node)))) + ;;;; Main @@ -511,7 +754,7 @@ ((host) (set! sql-host (cdr x))) ((port) - (set! sql-port (cdr x))) + (set! sql-port (string->number (cdr x)))) ((password) (set! sql-password (cdr x))) ((user) @@ -537,115 +780,42 @@ (display "Input files not specified\n" (current-error-port)) (exit 1))) -(for-each - (lambda (x) - (if (not (xmltrans:parse-file x)) - (exit 1))) - input-files) - -(if compile-only - (exit 0)) - -(define (assert cond . rest) - (cond - (cond) - ((null? rest) - (throw 'misc-error 'assert - "Assertion failed" - (list) #f) - (exit 1)) - (else - (with-output-to-port - (current-error-port) - (lambda () - (for-each - (lambda (x) - (display x)) - rest) - (newline))) - (exit 1)))) - -(define dict-index 0) -(define article-index 0) - -(define (get-dict-index conn) - (let ((res (sql-query "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1"))) - (assert (not (null? res))) - (set! dict-index (car res)))) - -(define (cleanup-db conn) - (sql-query "DELETE FROM links") - (sql-query "DELETE FROM articles") - (sql-query "DELETE FROM dict") - (sql-query "DELETE FROM topic") - (sql-query "DELETE FROM topic_tab") - (sql-query "DELETE FROM pending_links")) - -(define (pending-fixup) - (sql-query "INSERT IGNORE INTO links \ - SELECT p.type,p.originator,d.ident \ - FROM dict d, pending_links p \ - WHERE p.word = d.word AND p.type != 'CLOSED'") - (sql-query "INSERT IGNORE INTO links \ - SELECT p.type,d.ident,p.originator \ - FROM dict d, pending_links p \ - WHERE p.word = d.word AND p.type != 'CLOSED'") - (sql-query - (if (string=? sql-iface "mysql") - "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word" - ;; Else assume SQL92 - "UPDATE pending_links SET type='CLOSED' \ - WHERE originator in (SELECT d.ident from dict d, pending_links p \ - WHERE p.word=d.word)")) - - (if (not preserve-pending-option) - sql_query("DELETE FROM pending_links WHERE type = 'CLOSED'")) - - (let ((count (query-number "SELECT count(*) FROM pending_links \ - WHERE type != 'CLOSED' \ - GROUP BY word"))) - (if (> count 0) - (display (string-append - (car res) - " unresolved references") - (current-error-port))))) - -(define (update-stat) - (let ((count (query-number("SELECT count(*) from dict")))) - (sql_query "DELETE from stat"); - (sql-query (string-append - "INSERT INTO stat (count,updated) VALUES(" - (number->string count) - ",now())")))) - - -(define (insert-node conn node) - (set! article-index 0) - (set! dict-index (1+ dict-index)) - - ;;; FIXME - ) +(cond + (compile-only + (for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files))) (let ((conn (sql-connect sql-iface sql-host sql-port sql-database sql-user sql-password))) - (if (not conn) (begin (display "Cannot connect to the database\n" (current-error-port)) (exit 1))) + (read-pos conn) + + (for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files) + + (if cleanup-option (cleanup-db conn)) - + (get-dict-index conn) (for-each (lambda (node) (insert-node conn node)) - node-list) + (reverse node-list)) - (pending-fixup) - (update-stat) + (pending-fixup conn) + (update-stat conn) (sql-connect-close conn)) -- cgit v1.2.1