summaryrefslogtreecommitdiffabout
path: root/scm/dictrans.scm
authorSergey Poznyakoff <gray@gnu.org.ua>2004-10-08 18:51:46 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2004-10-08 18:51:46 (GMT)
commit7c6fc162fb2c9cba19025cb3237695288216e5c5 (patch) (side-by-side diff)
treedc11b08785f5acb19dba16dbad7f5c2ce7e6f333 /scm/dictrans.scm
parent72d789af56bad636cfc1a8d6e55fee928118b22b (diff)
downloadellinika-7c6fc162fb2c9cba19025cb3237695288216e5c5.tar.gz
ellinika-7c6fc162fb2c9cba19025cb3237695288216e5c5.tar.bz2
Mostly finished :^)
git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@198 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'scm/dictrans.scm') (more/less context) (ignore whitespace changes)
-rw-r--r--scm/dictrans.scm508
1 files changed, 339 insertions, 169 deletions
diff --git a/scm/dictrans.scm b/scm/dictrans.scm
index 5cfc50e..ff99dea 100644
--- a/scm/dictrans.scm
+++ b/scm/dictrans.scm
@@ -80,8 +80,11 @@
;;;; </NODE>
(set! %load-path (cons "/usr/local/share/guile-sql" %load-path))
+(set! %load-path (cons "/home/gray/linguae/ellinika" %load-path))
+
(use-modules (xmltools xmltrans)
(sql)
+ (ellinika xlat)
(ice-9 getopt-long))
(use-syntax (ice-9 syncase))
@@ -110,6 +113,31 @@
(newline))))
+(define pos-xlat #f)
+
+(define (convert-pos pos)
+ (cond
+ (compile-only
+ pos)
+ ((assoc pos pos-xlat) =>
+ (lambda (x)
+ (cdr x)))
+ (else
+ (xmltrans:parse-error #f "unknown or misspelled part of speech")
+ (mark-invalid)
+ pos)))
+
+(define (read-pos conn)
+ (set! pos-xlat
+ (map
+ (lambda (p)
+ (cons (car p) (string->number (cadr p))))
+ (append
+ (run-query conn "SELECT abbr, id FROM pos")
+ (run-query conn "SELECT abbr_lat, id FROM pos")
+ (run-query conn "SELECT name, id FROM pos")))))
+
+
;;;; XML definitions
;;; Set the default handler
@@ -137,93 +165,105 @@
(define-syntax node-get
(syntax-rules ()
- ((node-get #:key)
- (vector-ref current-node 0))
- ((node-get #:forms)
- (vector-ref current-node 1))
- ((node-get #:xref)
- (vector-ref current-node 2))
- ((node-get #:p-list)
- (vector-ref current-node 3))))
+ ((node-get #:key node)
+ (vector-ref node 0))
+ ((node-get #:forms node)
+ (vector-ref node 1))
+ ((node-get #:xref node)
+ (vector-ref node 2))
+ ((node-get #:p-list node)
+ (vector-ref node 3))))
+
+(define-macro (current-node-get key)
+ `(node-get ,key current-node))
(define (mark-invalid)
(xmltrans:set-attr "NODE" "__INVALID__" 1))
(define-syntax node-set
(syntax-rules ()
- ((node-set #:key k)
+ ((node-set #:key node k)
(cond
- ((node-get #:key) =>
+ ((node-get #:key node) =>
(lambda (klist)
- (vector-set! current-node 0 (append klist (list k)))))
+ (vector-set! node 0 (append klist (list k)))))
(else
- (vector-set! current-node 0 (list k)))))
- ((node-set #:forms f)
+ (vector-set! node 0 (list k)))))
+ ((node-set #:forms node f)
(begin
(cond
- ((node-get #:forms)
+ ((node-get #:forms node)
(xmltrans:parse-error #f "Forms already set")
(mark-invalid)))
- (vector-set! current-node 1 f)))
- ((node-set #:xref x)
+ (vector-set! node 1 f)))
+ ((node-set #:xref node x)
(cond
- ((node-get #:xref) =>
+ ((node-get #:xref node) =>
(lambda (xlist)
- (vector-set! current-node 2 (append xlist (list x)))))
+ (vector-set! node 2 (append xlist (list x)))))
(else
- (vector-set! current-node 2 (list x)))))
- ((node-set! #:p-article p)
+ (vector-set! node 2 (list x)))))
+ ((node-set #:p-article node p)
(cond
- ((node-get #:p-list) =>
+ ((node-get #:p-list node) =>
(lambda (plist)
- (vector-set! current-node 3 (append plist (list p)))))
+ (vector-set! node 3 (append plist (list p)))))
(else
- (vector-set! current-node 3 (list p)))))
- ((node-set #:topic t)
+ (vector-set! node 3 (list p)))))
+ ((node-set #:topic node t)
(for-each
(lambda (p)
- ;; FIXME: Use p-article-set
- (vector-set! p 4 (append (vector-ref p 4) (list t))))
- (node-get #:p-list)))))
+ (p-article-set #:topic p t))
+ (node-get #:p-list node)))))
+
+(define-macro (current-node-set key val)
+ `(node-set ,key current-node ,val))
+
(define p-article #f)
(define-syntax p-article-get
(syntax-rules ()
- ((p-article-get #:pos)
- (vector-ref p-article 0))
- ((p-article-get #:article)
- (vector-ref p-article 1))
- ((p-article-get #:aref)
- (vector-ref p-article 2))
- ((p-article-get #:xref)
- (vector-ref p-article 3))
- ((p-article-get #:topic)
- (vector-ref p-article 4))))
+ ((p-article-get #:pos article)
+ (vector-ref article 0))
+ ((p-article-get #:article article)
+ (vector-ref article 1))
+ ((p-article-get #:aref article)
+ (vector-ref article 2))
+ ((p-article-get #:xref article)
+ (vector-ref article 3))
+ ((p-article-get #:topic article)
+ (vector-ref article 4))))
(define-syntax p-set
(syntax-rules ()
- ((p-set key n val)
- (vector-set! p-article n
+ ((p-set key article n val)
+ (vector-set! article n
(cond
- ((p-article-get key) =>
+ ((p-article-get key article) =>
(lambda (alst)
(append alst (list val))))
(else
(list val)))))))
-
+
+(define-macro (current-article-get key)
+ `(p-article-get ,key p-article))
+
(define-syntax p-article-set
(syntax-rules ()
- ((p-article-set #:pos val)
- (vector-set! p-article 0 val))
- ((p-article-set #:article val)
- (p-set #:article 1 val))
- ((p-article-set #:aref val)
- (p-set #:aref 2 val))
- ((p-article-set #:xref val)
- (p-set #:xref 3 val))
- ((p-article-set #:topic val)
- (p-set #:topic 4 val))))
+ ((p-article-set #:pos article val)
+ (vector-set! article 0 val))
+ ((p-article-set #:article article val)
+ (p-set #:article article 1 val))
+ ((p-article-set #:aref article val)
+ (p-set #:aref article 2 val))
+ ((p-article-set #:xref article val)
+ (p-set #:xref article 3 val))
+ ((p-article-set #:topic article val)
+ (p-set #:topic article 4 val))))
+
+(define-macro (current-article-set key val)
+ `(p-article-set ,key p-article ,val))
;;; Node list
@@ -251,7 +291,7 @@
(debug 10 "APPEND")
(for-each
(lambda (x)
- (p-article-set #:topic x))
+ (current-article-set #:topic x))
topic-list))
;;;
@@ -299,17 +339,17 @@
(tag attr text)
(cond
(p-article
- (if (not (null? (node-get #:p-list)))
+ (if (not (null? (current-node-get #:p-list)))
(xmltrans:parse-warning #f "Mixed definition style"))
(append-topics)
- (node-set #:p-article p-article)
+ (current-node-set #:p-article p-article)
(set! p-article #f)))
(cond
((xmltrans:attr attr "__INVALID__")) ; Ignore
- ((not (node-get #:key))
+ ((not (current-node-get #:key))
(xmltrans:parse-error #f "K element is missing"))
- ((or (null? (node-get #:p-list)) (null? (null? (node-get #:xref))))
+ ((or (null? (current-node-get #:p-list)) (null? (null? (current-node-get #:xref))))
; (display current-node)(newline)
; (display p-article)(newline)
(xmltrans:parse-error #f "No articles and no cross references for the node"))
@@ -338,9 +378,9 @@
(cond
((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued
((xmltrans:parent? "P")
- (p-article-set #:topic (xmltrans:attr attr "ID")))
+ (current-article-set #:topic (xmltrans:attr attr "ID")))
((xmltrans:parent? "NODE")
- (node-set #:topic (xmltrans:attr attr "ID")))
+ (current-node-set #:topic (xmltrans:attr attr "ID")))
(else
(pop-topic)))
#f)
@@ -370,7 +410,7 @@
(xmltrans:parse-error #f "K tag cannot be used here")
(mark-invalid)))
(xmltrans:expect-next-tag "K" k-expect)
- (node-set #:key text)
+ (current-node-set #:key text)
#f)
;;; F - FORMS
@@ -378,7 +418,7 @@
"F"
(tag attr text)
(expect-context tag "NODE")
- (node-set #:forms text)
+ (current-node-set #:forms text)
#f)
;;; P - PartOfSpeach
@@ -398,15 +438,15 @@
(cond
((xmltrans:attr attr "ID") =>
(lambda (id)
- (p-article-set #:pos id)
+ (current-article-set #:pos (convert-pos id))
(append-topics)
- (node-set #:p-article p-article)
+ (current-node-set #:p-article p-article)
(set! p-article #f)))
- ((not (null? (p-article-get #:article))) ;; FIXME: other elements too
+ ((not (null? (current-article-get #:article))) ;; FIXME: other elements too
(xmltrans:parse-error p-location
"P element is missing ID, but has nested elements"))
(else
- (set! p-article (vector text '() '() '() '()))))
+ (set! p-article (vector (convert-pos text) '() '() '() '()))))
#f)
;;; A - Antonym
@@ -415,7 +455,7 @@
"A"
(tag attr text)
(expect-context tag "NODE" "P")
- (p-article-set #:aref text)
+ (current-article-set #:aref text)
#f)
;;; X - Xref
@@ -427,13 +467,12 @@
((string-null? text)
(call-with-current-continuation
(lambda (return)
- ;; FIXME: Use node-get/p-article-get
(do ((node node-list (cdr node)))
((null? node) #f)
- (if (not (null?
- (vector-ref
- (car (vector-ref (car node) 3)) 1)))
- (return (car (vector-ref (car node) 0))))))))
+ (if (not (null? (p-article-get
+ #:article
+ (car (node-get #:p-list (car node))))))
+ (return (car (node-get #:key (car node)))))))))
(else
text))))
@@ -442,9 +481,9 @@
(xmltrans:parse-error #f "Empty reference")
(mark-invalid))
((xmltrans:parent? "P")
- (p-article-set #:xref ref))
+ (current-article-set #:xref ref))
((xmltrans:parent? "NODE")
- (node-set #:xref ref))))
+ (current-node-set #:xref ref))))
#f)
;;; M - MEANING
@@ -452,7 +491,7 @@
"M"
(tag attr text)
(expect-context tag "P" "NODE")
- (p-article-set #:article text)
+ (current-article-set #:article text)
#f)
;;; Formatting elements
@@ -461,7 +500,7 @@
(tag attr text)
(expect-context tag "M" "F")
(list
- "<span class=\"comment\">"
+ "<span class=\\\"comment\\\">"
text
"</span>"))
@@ -470,10 +509,214 @@
(tag attr text)
(expect-context tag "M" "F")
(list
- "<span class=\"expl\">"
+ "<span class=\\\"expl\\\">"
text
"</span>"))
+
+(define (assert condition . rest)
+ (cond
+ (condition)
+ ((null? rest)
+ (throw 'misc-error 'assert
+ "Assertion failed"
+ (list) #f)
+ (exit 1))
+ (else
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline)))
+ (exit 1))))
+
+(define (escape-string str)
+ (let loop ((lst '())
+ (str str))
+ (cond
+ ((string-index str #\") =>
+ (lambda (pos)
+ (loop (append lst (list (substring str 0 pos)
+ "\\\""))
+ (substring str (1+ pos)))))
+ (else
+ (apply string-append (append lst (list str)))))))
+
+(define (run-query conn q)
+ (if verbose-option
+ (format #t "QUERY: ~S\n" q))
+ (let ((res (sql-query conn q)))
+ (if verbose-option
+ (format #t "RESULT: ~S\n" res))
+ res))
+
+(define (query-number conn q)
+ (let ((res (run-query conn q)))
+ (if (null? res)
+ #f
+ (string->number (caar res)))))
+
+(define (get-dict-index conn)
+ (set! dict-index
+ (or
+ (query-number conn
+ "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1")
+ 0)))
+
+(define (cleanup-db conn)
+ (run-query conn "DELETE FROM links")
+ (run-query conn "DELETE FROM articles")
+ (run-query conn "DELETE FROM dict")
+ (run-query conn "DELETE FROM topic")
+ (run-query conn "DELETE FROM topic_tab")
+ (run-query conn "DELETE FROM pending_links"))
+
+(define (pending-fixup conn)
+ (run-query conn
+ "INSERT IGNORE INTO links \
+ SELECT p.type,p.originator,d.ident \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (run-query conn
+ "INSERT IGNORE INTO links \
+ SELECT p.type,d.ident,p.originator \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (run-query
+ conn
+ (if (string=? sql-iface "mysql")
+ "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word"
+ ;; Else assume SQL92
+ "UPDATE pending_links SET type='CLOSED' \
+ WHERE originator in (SELECT d.ident from dict d, pending_links p \
+ WHERE p.word=d.word)"))
+
+ (if (not preserve-option)
+ run-query("DELETE FROM pending_links WHERE type = 'CLOSED'"))
+
+ (let ((count (query-number conn
+ "SELECT count(*) FROM pending_links \
+ WHERE type != 'CLOSED'")))
+ ;;; GROUP BY word")))
+ (if (> count 0)
+ (display (string-append
+ (number->string count)
+ " unresolved references\n")
+ (current-error-port)))))
+
+(define (update-stat conn)
+ (let ((count (query-number conn "SELECT count(*) from dict")))
+ (run-query conn "DELETE from stat");
+ (run-query conn
+ (string-append
+ "INSERT INTO stat (count,updated) VALUES("
+ (number->string count)
+ ",now())"))))
+
+
+(define dict-index 0)
+
+(define (insert-node conn node)
+ (letrec ((insert-link (lambda (type value)
+ (run-query
+ conn
+ (format
+ #f
+ "INSERT INTO pending_links (type,originator,word) VALUES('~A',~A,'~A')"
+ type
+ dict-index
+ value)))))
+
+ (for-each
+ (lambda (p-article)
+ (for-each
+ (lambda (key)
+ (let ((sound (ellinika:sounds-like key)))
+ (set! dict-index (1+ dict-index))
+ ;;; Insert 'heading' info
+ (run-query
+ conn
+ (cond
+ ((node-get #:forms node) =>
+ (lambda (f)
+ (format
+ #f
+ "INSERT INTO dict (ident,word,sound,pos,forms) VALUES(~A,\"~A\",\"~A\",~A,\"~A\")"
+
+ dict-index
+ key
+ sound
+ (p-article-get #:pos p-article)
+ f)))
+ (else
+ (format
+ #f
+ "INSERT INTO dict (ident,word,sound,pos) VALUES(~A,\"~A\",\"~A\",~A)"
+ dict-index
+ key
+ sound
+ (p-article-get #:pos p-article)))))
+
+ ;;; Insert cross-references
+ (for-each
+ (lambda (x)
+ (insert-link "XREF" x))
+ (append
+ (node-get #:xref node)
+ (p-article-get #:xref p-article)))
+
+ ;;; Insert antonyms
+ (for-each
+ (lambda (x)
+ (insert-link "ANT" x))
+ (p-article-get #:aref p-article))
+
+ ;;; Insert topics
+ (for-each
+ (lambda (t)
+ (let ((index (or
+ (query-number
+ conn
+ (format
+ #f
+ "SELECT ident FROM topic WHERE title=\"~A\""
+ (escape-string t)))
+ (begin
+ (run-query
+ conn
+ (format
+ #f
+ "INSERT INTO topic (title) VALUES (\"~A\")"
+ (escape-string t)))
+ (query-number conn "SELECT LAST_INSERT_ID()")))))
+ (run-query
+ conn
+ (format
+ #f
+ "INSERT INTO topic_tab VALUES(~A,~A)"
+ index
+ dict-index))))
+ (p-article-get #:topic p-article))
+
+ ;;; Insert articles
+ (let ((article-index 0))
+ (for-each
+ (lambda (text)
+ (run-query
+ conn
+ (format #f
+ "INSERT INTO articles VALUES (~A, ~A, \"~A\")"
+ dict-index
+ article-index
+ text))
+ (set! article-index (1+ article-index)))
+ (p-article-get #:article p-article)))))
+ (node-get #:key node)))
+ (node-get #:p-list node))))
+
;;;; Main
@@ -511,7 +754,7 @@
((host)
(set! sql-host (cdr x)))
((port)
- (set! sql-port (cdr x)))
+ (set! sql-port (string->number (cdr x))))
((password)
(set! sql-password (cdr x)))
((user)
@@ -537,115 +780,42 @@
(display "Input files not specified\n" (current-error-port))
(exit 1)))
-(for-each
- (lambda (x)
- (if (not (xmltrans:parse-file x))
- (exit 1)))
- input-files)
-
-(if compile-only
- (exit 0))
-
-(define (assert cond . rest)
- (cond
- (cond)
- ((null? rest)
- (throw 'misc-error 'assert
- "Assertion failed"
- (list) #f)
- (exit 1))
- (else
- (with-output-to-port
- (current-error-port)
- (lambda ()
- (for-each
- (lambda (x)
- (display x))
- rest)
- (newline)))
- (exit 1))))
-
-(define dict-index 0)
-(define article-index 0)
-
-(define (get-dict-index conn)
- (let ((res (sql-query "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1")))
- (assert (not (null? res)))
- (set! dict-index (car res))))
-
-(define (cleanup-db conn)
- (sql-query "DELETE FROM links")
- (sql-query "DELETE FROM articles")
- (sql-query "DELETE FROM dict")
- (sql-query "DELETE FROM topic")
- (sql-query "DELETE FROM topic_tab")
- (sql-query "DELETE FROM pending_links"))
-
-(define (pending-fixup)
- (sql-query "INSERT IGNORE INTO links \
- SELECT p.type,p.originator,d.ident \
- FROM dict d, pending_links p \
- WHERE p.word = d.word AND p.type != 'CLOSED'")
- (sql-query "INSERT IGNORE INTO links \
- SELECT p.type,d.ident,p.originator \
- FROM dict d, pending_links p \
- WHERE p.word = d.word AND p.type != 'CLOSED'")
- (sql-query
- (if (string=? sql-iface "mysql")
- "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word"
- ;; Else assume SQL92
- "UPDATE pending_links SET type='CLOSED' \
- WHERE originator in (SELECT d.ident from dict d, pending_links p \
- WHERE p.word=d.word)"))
-
- (if (not preserve-pending-option)
- sql_query("DELETE FROM pending_links WHERE type = 'CLOSED'"))
-
- (let ((count (query-number "SELECT count(*) FROM pending_links \
- WHERE type != 'CLOSED' \
- GROUP BY word")))
- (if (> count 0)
- (display (string-append
- (car res)
- " unresolved references")
- (current-error-port)))))
-
-(define (update-stat)
- (let ((count (query-number("SELECT count(*) from dict"))))
- (sql_query "DELETE from stat");
- (sql-query (string-append
- "INSERT INTO stat (count,updated) VALUES("
- (number->string count)
- ",now())"))))
-
-
-(define (insert-node conn node)
- (set! article-index 0)
- (set! dict-index (1+ dict-index))
-
- ;;; FIXME
- )
+(cond
+ (compile-only
+ (for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)))
(let ((conn (sql-connect sql-iface sql-host sql-port sql-database
sql-user sql-password)))
-
(if (not conn)
(begin
(display "Cannot connect to the database\n" (current-error-port))
(exit 1)))
+ (read-pos conn)
+
+ (for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)
+
+
(if cleanup-option
(cleanup-db conn))
-
+
(get-dict-index conn)
(for-each
(lambda (node)
(insert-node conn node))
- node-list)
+ (reverse node-list))
- (pending-fixup)
- (update-stat)
+ (pending-fixup conn)
+ (update-stat conn)
(sql-connect-close conn))

Return to:

Send suggestions and report system problems to the System administrator.