aboutsummaryrefslogtreecommitdiff
path: root/dictrans.scm
diff options
context:
space:
mode:
Diffstat (limited to 'dictrans.scm')
-rw-r--r--dictrans.scm834
1 files changed, 0 insertions, 834 deletions
diff --git a/dictrans.scm b/dictrans.scm
deleted file mode 100644
index bbb6058..0000000
--- a/dictrans.scm
+++ /dev/null
@@ -1,834 +0,0 @@
-#! =GUILE_BINDIR=/guile -s
-!#
-;;;; This file is part of Ellinika
-;;;; Copyright (C) 2004 Sergey Poznyakoff
-;;;;
-;;;; Ellinika is free software; you can redistribute it and/or modify
-;;;; it under the terms of the GNU General Public License as published by
-;;;; the Free Software Foundation; either version 2 of the License, or
-;;;; (at your option) any later version.
-;;;;
-;;;; Ellinika is distributed in the hope that it will be useful,
-;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;;;; GNU General Public License for more details.
-;;;;
-;;;; You should have received a copy of the GNU General Public License
-;;;; along with Ellinika; if not, write to the Free Software
-;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-;;;;
-
-;;;; Dictionary structure
-;;;; Internal representation:
-;;;;
-;;;; * Each dictionary entry is represented as a vector:
-;;;;
-;;;; #(KEY FORMS XREF P-LIST)
-;;;;
-;;;; KEY is list of strings
-;;;;
-;;;; * FORMS is either #f or a string describing forms of the key if they
-;;;; are formed in an irregular manner.
-;;;;
-;;;; * XREF is a list of cross-references
-;;;;
-;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector:
-;;;;
-;;;; #(POS ARTICLE AREF XREF TOPIC)
-;;;;
-;;;; Member Type Meaning
-;;;; POS string part of speech
-;;;; ARTICLE list(string) Dictionary article associated with this key/pos
-;;;; AREF list(string) List of antonyms
-;;;; XREF list(string) List of cross-references
-;;;; TOPIC list(string) List of topics this item pertains to
-;;;;
-;;;;
-;;;; External representation (XML):
-;;;;
-;;;; <NODE>
-;;;; <K>string</K>+
-;;;; [<F>string</F>]
-;;;; <P ID="string">
-;;;; <M>string</M>+
-;;;; <A>string</A>*
-;;;; <X>string</X>*
-;;;; <T ID="string" />*
-;;;; </P>+
-;;;; <X>string</X>*
-;;;; </NODE>
-;;;;
-;;;; If only one P entry is present, the following alternative forms
-;;;; are understood:
-;;;;
-;;;; <NODE>
-;;;; <K>string</K>+
-;;;; [<F>string</F>]
-;;;; <P>string</P>
-;;;; <M>string</M>*
-;;;; <A>string</A>*
-;;;; <X>string</X>*
-;;;; <T ID="string" />*
-;;;; </NODE>
-;;;;
-;;;; or
-;;;;
-;;;; <NODE>
-;;;; <K>string</K>+
-;;;; [<F>string</F>]
-;;;; <P>string</P>
-;;;; <X>string</X>*
-;;;; <T ID="string" />*
-;;;; </NODE>
-;=UPDPATH=
-
-(use-modules (xmltools xmltrans)
- (sql)
- (ellinika xlat)
- (ice-9 getopt-long))
-
-(use-syntax (ice-9 syncase))
-
-(define compile-only #f)
-(define cleanup-option #f)
-(define preserve-option #f)
-
-(define sql-iface "mysql")
-(define sql-host "localhost")
-(define sql-database "ellinika")
-(define sql-port 3306)
-(define sql-password #f)
-(define sql-user #f)
-(define verbose-option #f)
-(define debug-level 0)
-(define input-files '())
-
-(define sysconf-dir "=SYSCONFDIR=")
-
-;;; Load the site defaults
-(let ((rc-file (string-append sysconf-dir "/" config-file-name)))
- (if (file-exists? rc-file)
- (load rc-file)))
-
-(define (debug level . rest)
- (if (>= debug-level level)
- (begin
- (for-each
- (lambda (x)
- (display x))
- rest)
- (newline))))
-
-
-(define pos-xlat #f)
-
-(define (convert-pos pos)
- (cond
- (compile-only
- pos)
- ((assoc pos pos-xlat) =>
- (lambda (x)
- (cdr x)))
- (else
- (xmltrans:parse-error #f "unknown or misspelled part of speech")
- (mark-invalid)
- pos)))
-
-(define (read-pos conn)
- (set! pos-xlat
- (map
- (lambda (p)
- (cons (car p) (string->number (cadr p))))
- (append
- (run-query conn "SELECT abbr, id FROM pos")
- (run-query conn "SELECT abbr_lat, id FROM pos")
- (run-query conn "SELECT name, id FROM pos")))))
-
-
-;;;; XML definitions
-
-;;; Set the default handler
-(define tag-list '())
-
-(define (lingua:default-start tag attr)
- (xmltrans:set-attr #f "__START__" 1)
- #f)
-
-(xmltrans:set-default-start-handler lingua:default-start)
-
-(define (lingua:default-end tag attr text)
- (if (xmltrans:attr attr "__START__")
- (xmltrans:parse-error #f "Unhandled element " tag))
- (set! tag-list
- (cons
- (xmltrans:make-tag tag attr text)
- tag-list))
- #f)
-
-(xmltrans:set-default-end-handler lingua:default-end)
-
-;;; Current node
-(define current-node #f)
-
-(define-syntax node-get
- (syntax-rules ()
- ((node-get #:key node)
- (vector-ref node 0))
- ((node-get #:forms node)
- (vector-ref node 1))
- ((node-get #:xref node)
- (vector-ref node 2))
- ((node-get #:p-list node)
- (vector-ref node 3))))
-
-(define-macro (current-node-get key)
- `(node-get ,key current-node))
-
-(define (mark-invalid)
- (xmltrans:set-attr "NODE" "__INVALID__" 1))
-
-(define-syntax node-set
- (syntax-rules ()
- ((node-set #:key node k)
- (cond
- ((node-get #:key node) =>
- (lambda (klist)
- (vector-set! node 0 (append klist (list k)))))
- (else
- (vector-set! node 0 (list k)))))
- ((node-set #:forms node f)
- (begin
- (cond
- ((node-get #:forms node)
- (xmltrans:parse-error #f "Forms already set")
- (mark-invalid)))
- (vector-set! node 1 f)))
- ((node-set #:xref node x)
- (cond
- ((node-get #:xref node) =>
- (lambda (xlist)
- (vector-set! node 2 (append xlist (list x)))))
- (else
- (vector-set! node 2 (list x)))))
- ((node-set #:p-article node p)
- (cond
- ((node-get #:p-list node) =>
- (lambda (plist)
- (vector-set! node 3 (append plist (list p)))))
- (else
- (vector-set! node 3 (list p)))))
- ((node-set #:topic node t)
- (for-each
- (lambda (p)
- (p-article-set #:topic p t))
- (node-get #:p-list node)))))
-
-(define-macro (current-node-set key val)
- `(node-set ,key current-node ,val))
-
-
-(define p-article #f)
-
-(define-syntax p-article-get
- (syntax-rules ()
- ((p-article-get #:pos article)
- (vector-ref article 0))
- ((p-article-get #:article article)
- (vector-ref article 1))
- ((p-article-get #:aref article)
- (vector-ref article 2))
- ((p-article-get #:xref article)
- (vector-ref article 3))
- ((p-article-get #:topic article)
- (vector-ref article 4))))
-
-(define-syntax p-set
- (syntax-rules ()
- ((p-set key article n val)
- (vector-set! article n
- (cond
- ((p-article-get key article) =>
- (lambda (alst)
- (append alst (list val))))
- (else
- (list val)))))))
-
-(define-macro (current-article-get key)
- `(p-article-get ,key p-article))
-
-(define-syntax p-article-set
- (syntax-rules ()
- ((p-article-set #:pos article val)
- (vector-set! article 0 val))
- ((p-article-set #:article article val)
- (p-set #:article article 1 val))
- ((p-article-set #:aref article val)
- (p-set #:aref article 2 val))
- ((p-article-set #:xref article val)
- (p-set #:xref article 3 val))
- ((p-article-set #:topic article val)
- (p-set #:topic article 4 val))))
-
-(define-macro (current-article-set key val)
- `(p-article-set ,key p-article ,val))
-
-
-;;; Node list
-(define node-list '())
-
-(define (push-node node)
- (if (>= debug-level 100)
- (begin
- (write node)
- (newline)))
- (set! node-list (cons current-node node-list)))
-
-;;; Topic stack
-(define topic-list '())
-
-(define (push-topic id)
- (debug 10 "PUSH " id)
- (set! topic-list (cons id topic-list)))
-
-(define (pop-topic)
- (debug 10 "POP " (car topic-list))
- (set! topic-list (cdr topic-list)))
-
-(define (append-topics)
- (debug 10 "APPEND")
- (for-each
- (lambda (x)
- (current-article-set #:topic x))
- topic-list))
-
-;;;
-(define (expect-context tag . rest)
- (let ((parent (xmltrans:get-parent #t)))
- (call-with-current-continuation
- (lambda (return)
- (for-each
- (lambda (ctx-name)
- (if (string=? ctx-name parent)
- (return)))
- rest)
- (xmltrans:parse-error #f tag " used in wrong context")))))
-
-;;; Hanlde main element
-(xmltrans:start-tag
- "DICT"
- (tag attr)
- #f)
-
-(xmltrans:end-tag
- "DICT"
- (tag attr text)
- ;; FIXME
- #f)
-
-
-(define expected-k #f)
-
-(define (k-expect)
- (set! expected-k #f))
-
-;;;
-(xmltrans:start-tag
- "NODE"
- (tag attr)
- (set! current-node (vector #f #f '() '()))
- (set! p-article #f)
- (set! expected-k #t)
- (xmltrans:expect-next-tag "K" k-expect)
- #f)
-
-(xmltrans:end-tag
- "NODE"
- (tag attr text)
- (cond
- (p-article
- (if (not (null? (current-node-get #:p-list)))
- (xmltrans:parse-warning #f "Mixed definition style"))
- (append-topics)
- (current-node-set #:p-article p-article)
- (set! p-article #f)))
-
- (cond
- ((xmltrans:attr attr "__INVALID__")) ; Ignore
- ((not (current-node-get #:key))
- (xmltrans:parse-error #f "K element is missing"))
- ((or (null? (current-node-get #:p-list)) (null? (null? (current-node-get #:xref))))
- (xmltrans:parse-error #f "No articles and no cross references for the node"))
- (else
- (push-node current-node)))
- #f)
-
-;;; Topic
-(xmltrans:start-tag
- "T"
- (tag attr)
- (if (not (xmltrans:attr attr "ID"))
- (letrec ((loc (xmltrans:get-input-location))
- (proc (lambda ()
- (xmltrans:parse-error
- loc
- "T element does not have ID attribute."))))
- (xmltrans:expect-next-tag "ID" proc))
- (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P")))
- (push-topic (xmltrans:attr attr "ID"))))
- #f)
-
-(xmltrans:end-tag
- "T"
- (tag attr text)
- (cond
- ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued
- ((xmltrans:parent? "P")
- (current-article-set #:topic (xmltrans:attr attr "ID")))
- ((xmltrans:parent? "NODE")
- (current-node-set #:topic (xmltrans:attr attr "ID")))
- (else
- (pop-topic)))
- #f)
-
-(xmltrans:end-tag
- "ID"
- (tag attr text)
- (cond
- ((xmltrans:parent? "P")) ;; OK
- ((not (xmltrans:parent? "T"))
- (xmltrans:parse-error #f "ID used in wrong context"))
- ((xmltrans:attr #t "ID")
- (xmltrans:parse-error #f "T element already has ID attribute"))
- (else
- (xmltrans:set-attr #t "ID" text)
- (if (not (xmltrans:parent? "NODE" "T"))
- (push-topic text))))
- #f)
-
-;;; K - KEY
-(xmltrans:end-tag
- "K"
- (tag attr text)
- (expect-context tag "NODE")
- (if (not expected-k)
- (begin
- (xmltrans:parse-error #f "K tag cannot be used here")
- (mark-invalid)))
- (xmltrans:expect-next-tag "K" k-expect)
- (current-node-set #:key text)
- #f)
-
-;;; F - FORMS
-(xmltrans:end-tag
- "F"
- (tag attr text)
- (expect-context tag "NODE")
- (current-node-set #:forms text)
- #f)
-
-;;; P - PartOfSpeach
-(define p-location #f)
-
-(xmltrans:start-tag
- "P"
- (tag attr)
- (set! p-location (xmltrans:get-input-location))
- (set! p-article (vector #f '() '() '() '()))
- #f)
-
-(xmltrans:end-tag
- "P"
- (tag attr text)
- (expect-context tag "NODE")
- (cond
- ((xmltrans:attr attr "ID") =>
- (lambda (id)
- (current-article-set #:pos (convert-pos id))
- (append-topics)
- (current-node-set #:p-article p-article)
- (set! p-article #f)))
- ((not (null? (current-article-get #:article))) ;; FIXME: other elements too
- (xmltrans:parse-error p-location
- "P element is missing ID, but has nested elements"))
- (else
- (set! p-article (vector (convert-pos text) '() '() '() '()))))
- #f)
-
-;;; A - Antonym
-
-(xmltrans:end-tag
- "A"
- (tag attr text)
- (expect-context tag "NODE" "P")
- (current-article-set #:aref text)
- #f)
-
-;;; X - Xref
-(xmltrans:end-tag
- "X"
- (tag attr text)
- (expect-context tag "NODE" "P")
- (let ((ref (cond
- ((string-null? text)
- (call-with-current-continuation
- (lambda (return)
- (do ((node node-list (cdr node)))
- ((null? node) #f)
- (if (not (null? (p-article-get
- #:article
- (car (node-get #:p-list (car node))))))
- (return (car (node-get #:key (car node)))))))))
- (else
- text))))
-
- (cond
- ((not ref)
- (xmltrans:parse-error #f "Empty reference")
- (mark-invalid))
- ((xmltrans:parent? "P")
- (current-article-set #:xref ref))
- ((xmltrans:parent? "NODE")
- (current-node-set #:xref ref))))
- #f)
-
-;;; M - MEANING
-(xmltrans:end-tag
- "M"
- (tag attr text)
- (expect-context tag "P" "NODE")
- (current-article-set #:article text)
- #f)
-
-;;; Formatting elements
-(xmltrans:end-tag
- "C"
- (tag attr text)
- (expect-context tag "M" "F")
- (list
- "<span class=\\\"comment\\\">"
- text
- "</span>"))
-
-(xmltrans:end-tag
- "E"
- (tag attr text)
- (expect-context tag "M" "F")
- (list
- "<span class=\\\"expl\\\">"
- text
- "</span>"))
-
-
-(define (assert condition . rest)
- (cond
- (condition)
- ((null? rest)
- (throw 'misc-error 'assert
- "Assertion failed"
- (list) #f)
- (exit 1))
- (else
- (with-output-to-port
- (current-error-port)
- (lambda ()
- (for-each
- (lambda (x)
- (display x))
- rest)
- (newline)))
- (exit 1))))
-
-(define (escape-string str)
- (let loop ((lst '())
- (str str))
- (cond
- ((string-index str #\") =>
- (lambda (pos)
- (loop (append lst (list (substring str 0 pos)
- "\\\""))
- (substring str (1+ pos)))))
- (else
- (apply string-append (append lst (list str)))))))
-
-(define (run-query conn q)
- (if verbose-option
- (format #t "QUERY: ~S\n" q))
- (let ((res (sql-query conn q)))
- (if verbose-option
- (format #t "RESULT: ~S\n" res))
- res))
-
-(define (query-number conn q)
- (let ((res (run-query conn q)))
- (if (null? res)
- #f
- (string->number (caar res)))))
-
-(define (get-dict-index conn)
- (set! dict-index
- (or
- (query-number conn
- "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1")
- 0)))
-
-(define (cleanup-db conn)
- (run-query conn "DELETE FROM links")
- (run-query conn "DELETE FROM articles")
- (run-query conn "DELETE FROM dict")
- (run-query conn "DELETE FROM topic")
- (run-query conn "DELETE FROM topic_tab")
- (run-query conn "DELETE FROM pending_links"))
-
-(define (pending-fixup conn)
- (run-query conn
- "INSERT IGNORE INTO links \
- SELECT p.type,p.originator,d.ident \
- FROM dict d, pending_links p \
- WHERE p.word = d.word AND p.type != 'CLOSED'")
- (run-query conn
- "INSERT IGNORE INTO links \
- SELECT p.type,d.ident,p.originator \
- FROM dict d, pending_links p \
- WHERE p.word = d.word AND p.type != 'CLOSED'")
- (run-query
- conn
- (if (string=? sql-iface "mysql")
- "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word"
- ;; Else assume SQL92
- "UPDATE pending_links SET type='CLOSED' \
- WHERE originator in (SELECT d.ident from dict d, pending_links p \
- WHERE p.word=d.word)"))
-
- (if (not preserve-option)
- run-query("DELETE FROM pending_links WHERE type = 'CLOSED'"))
-
- (let ((count (query-number conn
- "SELECT count(*) FROM pending_links \
- WHERE type != 'CLOSED'")))
- ;;; GROUP BY word")))
- (if (> count 0)
- (display (string-append
- (number->string count)
- " unresolved references\n")
- (current-error-port)))))
-
-(define (update-stat conn)
- (let ((count (query-number conn "SELECT count(*) from dict")))
- (run-query conn "DELETE from stat");
- (run-query conn
- (string-append
- "INSERT INTO stat (count,updated) VALUES("
- (number->string count)
- ",now())"))))
-
-
-(define dict-index 0)
-
-(define (insert-node conn node)
- (letrec ((insert-link (lambda (type value)
- (run-query
- conn
- (format
- #f
- "INSERT INTO pending_links (type,originator,word) VALUES('~A',~A,'~A')"
- type
- dict-index
- value)))))
-
- (for-each
- (lambda (p-article)
- (for-each
- (lambda (key)
- (let ((sound (ellinika:sounds-like key)))
- (set! dict-index (1+ dict-index))
- ;;; Insert 'heading' info
- (run-query
- conn
- (cond
- ((node-get #:forms node) =>
- (lambda (f)
- (format
- #f
- "INSERT INTO dict (ident,word,sound,pos,forms) VALUES(~A,\"~A\",\"~A\",~A,\"~A\")"
-
- dict-index
- key
- sound
- (p-article-get #:pos p-article)
- f)))
- (else
- (format
- #f
- "INSERT INTO dict (ident,word,sound,pos) VALUES(~A,\"~A\",\"~A\",~A)"
- dict-index
- key
- sound
- (p-article-get #:pos p-article)))))
-
- ;;; Insert cross-references
- (for-each
- (lambda (x)
- (insert-link "XREF" x))
- (append
- (node-get #:xref node)
- (p-article-get #:xref p-article)))
-
- ;;; Insert antonyms
- (for-each
- (lambda (x)
- (insert-link "ANT" x))
- (p-article-get #:aref p-article))
-
- ;;; Insert topics
- (for-each
- (lambda (t)
- (let ((index (or
- (query-number
- conn
- (format
- #f
- "SELECT ident FROM topic WHERE title=\"~A\""
- (escape-string t)))
- (begin
- (run-query
- conn
- (format
- #f
- "INSERT INTO topic (title) VALUES (\"~A\")"
- (escape-string t)))
- (query-number conn "SELECT LAST_INSERT_ID()")))))
- (run-query
- conn
- (format
- #f
- "INSERT INTO topic_tab VALUES(~A,~A)"
- index
- dict-index))))
- (p-article-get #:topic p-article))
-
- ;;; Insert articles
- (let ((article-index 0))
- (for-each
- (lambda (text)
- (run-query
- conn
- (format #f
- "INSERT INTO articles VALUES (~A, ~A, \"~A\")"
- dict-index
- article-index
- text))
- (set! article-index (1+ article-index)))
- (p-article-get #:article p-article)))))
- (node-get #:key node)))
- (node-get #:p-list node))))
-
-
-
-;;;; Main
-(define grammar
- `((check (single-char #\c))
- (cleanup)
- (database (single-char #\d) (value #t))
- (host (single-char #\h) (value #t))
- (port (single-char #\P) (value #t))
- (password (single-char #\p) (value #t))
- (user (single-char #\u) (value #t))
- (interface (value #t))
- (verbose (single-char #\v))
- (debug (value #t))
- (preserve (value #t))
- (help)))
-
-(define (usage)
- (display "usage: dictrans OPTIONS FILES\n"))
-
-(define (cons? p)
- (and (pair? p) (not (list? p))))
-
-(for-each
- (lambda (x)
- (cond
- ((cons? x)
- (case (car x)
- ((check)
- (set! compile-only #t))
- ((cleanup)
- (set! cleanup-option #t))
- ((database)
- (set! sql-database (cdr x)))
- ((host)
- (set! sql-host (cdr x)))
- ((port)
- (set! sql-port (string->number (cdr x))))
- ((password)
- (set! sql-password (cdr x)))
- ((user)
- (set! sql-user (cdr x)))
- ((interface)
- (set! sql-iface (cdr x)))
- ((verbose)
- (set! verbose-option #t))
- ((preserve)
- (set! preserve-option #t))
- ((debug)
- (set! debug-level (string->number (cdr x))))
- ((help)
- (usage)
- (exit 0))))
- (else
- (set! input-files (cdr x)))))
- (getopt-long (command-line) grammar))
-
-
-(if (null? input-files)
- (begin
- (display "Input files not specified\n" (current-error-port))
- (exit 1)))
-
-(cond
- (compile-only
- (for-each
- (lambda (x)
- (if (not (xmltrans:parse-file x))
- (exit 1)))
- input-files)))
-
-(let ((conn (sql-connect sql-iface sql-host sql-port sql-database
- sql-user sql-password)))
- (if (not conn)
- (begin
- (display "Cannot connect to the database\n" (current-error-port))
- (exit 1)))
-
- (read-pos conn)
-
- (for-each
- (lambda (x)
- (if (not (xmltrans:parse-file x))
- (exit 1)))
- input-files)
-
-
- (if cleanup-option
- (cleanup-db conn))
-
- (get-dict-index conn)
-
- (for-each
- (lambda (node)
- (insert-node conn node))
- (reverse node-list))
-
- (pending-fixup conn)
- (update-stat conn)
-
- (sql-connect-close conn))
-
-(exit 0)
-
-;;;; Local variables:
-;;;; mode: Scheme
-;;;; buffer-file-coding-system: utf-8
-;;;; End:
-
- \ No newline at end of file

Return to:

Send suggestions and report system problems to the System administrator.