From 50e4b2a479d0a9c33e4d856e311e94985fa7e070 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Fri, 8 Oct 2004 19:53:21 +0000 Subject: Moved to /scm git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@201 941c8c0f-9102-463b-b60b-cd22ce0e6858 --- dictrans.scm | 834 ----------------------------------------------------------- 1 file changed, 834 deletions(-) delete mode 100644 dictrans.scm diff --git a/dictrans.scm b/dictrans.scm deleted file mode 100644 index bbb6058..0000000 --- a/dictrans.scm +++ /dev/null @@ -1,834 +0,0 @@ -#! =GUILE_BINDIR=/guile -s -!# -;;;; This file is part of Ellinika -;;;; Copyright (C) 2004 Sergey Poznyakoff -;;;; -;;;; Ellinika is free software; you can redistribute it and/or modify -;;;; it under the terms of the GNU General Public License as published by -;;;; the Free Software Foundation; either version 2 of the License, or -;;;; (at your option) any later version. -;;;; -;;;; Ellinika is distributed in the hope that it will be useful, -;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;;;; GNU General Public License for more details. -;;;; -;;;; You should have received a copy of the GNU General Public License -;;;; along with Ellinika; if not, write to the Free Software -;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -;;;; - -;;;; Dictionary structure -;;;; Internal representation: -;;;; -;;;; * Each dictionary entry is represented as a vector: -;;;; -;;;; #(KEY FORMS XREF P-LIST) -;;;; -;;;; KEY is list of strings -;;;; -;;;; * FORMS is either #f or a string describing forms of the key if they -;;;; are formed in an irregular manner. -;;;; -;;;; * XREF is a list of cross-references -;;;; -;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector: -;;;; -;;;; #(POS ARTICLE AREF XREF TOPIC) -;;;; -;;;; Member Type Meaning -;;;; POS string part of speech -;;;; ARTICLE list(string) Dictionary article associated with this key/pos -;;;; AREF list(string) List of antonyms -;;;; XREF list(string) List of cross-references -;;;; TOPIC list(string) List of topics this item pertains to -;;;; -;;;; -;;;; External representation (XML): -;;;; -;;;; -;;;; string+ -;;;; [string] -;;;;

-;;;; string+ -;;;; string* -;;;; string* -;;;; * -;;;;

+ -;;;; string* -;;;;
-;;;; -;;;; If only one P entry is present, the following alternative forms -;;;; are understood: -;;;; -;;;; -;;;; string+ -;;;; [string] -;;;;

string

-;;;; string* -;;;; string* -;;;; string* -;;;; * -;;;;
-;;;; -;;;; or -;;;; -;;;; -;;;; string+ -;;;; [string] -;;;;

string

-;;;; string* -;;;; * -;;;;
-;=UPDPATH= - -(use-modules (xmltools xmltrans) - (sql) - (ellinika xlat) - (ice-9 getopt-long)) - -(use-syntax (ice-9 syncase)) - -(define compile-only #f) -(define cleanup-option #f) -(define preserve-option #f) - -(define sql-iface "mysql") -(define sql-host "localhost") -(define sql-database "ellinika") -(define sql-port 3306) -(define sql-password #f) -(define sql-user #f) -(define verbose-option #f) -(define debug-level 0) -(define input-files '()) - -(define sysconf-dir "=SYSCONFDIR=") - -;;; Load the site defaults -(let ((rc-file (string-append sysconf-dir "/" config-file-name))) - (if (file-exists? rc-file) - (load rc-file))) - -(define (debug level . rest) - (if (>= debug-level level) - (begin - (for-each - (lambda (x) - (display x)) - rest) - (newline)))) - - -(define pos-xlat #f) - -(define (convert-pos pos) - (cond - (compile-only - pos) - ((assoc pos pos-xlat) => - (lambda (x) - (cdr x))) - (else - (xmltrans:parse-error #f "unknown or misspelled part of speech") - (mark-invalid) - pos))) - -(define (read-pos conn) - (set! pos-xlat - (map - (lambda (p) - (cons (car p) (string->number (cadr p)))) - (append - (run-query conn "SELECT abbr, id FROM pos") - (run-query conn "SELECT abbr_lat, id FROM pos") - (run-query conn "SELECT name, id FROM pos"))))) - - -;;;; XML definitions - -;;; Set the default handler -(define tag-list '()) - -(define (lingua:default-start tag attr) - (xmltrans:set-attr #f "__START__" 1) - #f) - -(xmltrans:set-default-start-handler lingua:default-start) - -(define (lingua:default-end tag attr text) - (if (xmltrans:attr attr "__START__") - (xmltrans:parse-error #f "Unhandled element " tag)) - (set! tag-list - (cons - (xmltrans:make-tag tag attr text) - tag-list)) - #f) - -(xmltrans:set-default-end-handler lingua:default-end) - -;;; Current node -(define current-node #f) - -(define-syntax node-get - (syntax-rules () - ((node-get #:key node) - (vector-ref node 0)) - ((node-get #:forms node) - (vector-ref node 1)) - ((node-get #:xref node) - (vector-ref node 2)) - ((node-get #:p-list node) - (vector-ref node 3)))) - -(define-macro (current-node-get key) - `(node-get ,key current-node)) - -(define (mark-invalid) - (xmltrans:set-attr "NODE" "__INVALID__" 1)) - -(define-syntax node-set - (syntax-rules () - ((node-set #:key node k) - (cond - ((node-get #:key node) => - (lambda (klist) - (vector-set! node 0 (append klist (list k))))) - (else - (vector-set! node 0 (list k))))) - ((node-set #:forms node f) - (begin - (cond - ((node-get #:forms node) - (xmltrans:parse-error #f "Forms already set") - (mark-invalid))) - (vector-set! node 1 f))) - ((node-set #:xref node x) - (cond - ((node-get #:xref node) => - (lambda (xlist) - (vector-set! node 2 (append xlist (list x))))) - (else - (vector-set! node 2 (list x))))) - ((node-set #:p-article node p) - (cond - ((node-get #:p-list node) => - (lambda (plist) - (vector-set! node 3 (append plist (list p))))) - (else - (vector-set! node 3 (list p))))) - ((node-set #:topic node t) - (for-each - (lambda (p) - (p-article-set #:topic p t)) - (node-get #:p-list node))))) - -(define-macro (current-node-set key val) - `(node-set ,key current-node ,val)) - - -(define p-article #f) - -(define-syntax p-article-get - (syntax-rules () - ((p-article-get #:pos article) - (vector-ref article 0)) - ((p-article-get #:article article) - (vector-ref article 1)) - ((p-article-get #:aref article) - (vector-ref article 2)) - ((p-article-get #:xref article) - (vector-ref article 3)) - ((p-article-get #:topic article) - (vector-ref article 4)))) - -(define-syntax p-set - (syntax-rules () - ((p-set key article n val) - (vector-set! article n - (cond - ((p-article-get key article) => - (lambda (alst) - (append alst (list val)))) - (else - (list val))))))) - -(define-macro (current-article-get key) - `(p-article-get ,key p-article)) - -(define-syntax p-article-set - (syntax-rules () - ((p-article-set #:pos article val) - (vector-set! article 0 val)) - ((p-article-set #:article article val) - (p-set #:article article 1 val)) - ((p-article-set #:aref article val) - (p-set #:aref article 2 val)) - ((p-article-set #:xref article val) - (p-set #:xref article 3 val)) - ((p-article-set #:topic article val) - (p-set #:topic article 4 val)))) - -(define-macro (current-article-set key val) - `(p-article-set ,key p-article ,val)) - - -;;; Node list -(define node-list '()) - -(define (push-node node) - (if (>= debug-level 100) - (begin - (write node) - (newline))) - (set! node-list (cons current-node node-list))) - -;;; Topic stack -(define topic-list '()) - -(define (push-topic id) - (debug 10 "PUSH " id) - (set! topic-list (cons id topic-list))) - -(define (pop-topic) - (debug 10 "POP " (car topic-list)) - (set! topic-list (cdr topic-list))) - -(define (append-topics) - (debug 10 "APPEND") - (for-each - (lambda (x) - (current-article-set #:topic x)) - topic-list)) - -;;; -(define (expect-context tag . rest) - (let ((parent (xmltrans:get-parent #t))) - (call-with-current-continuation - (lambda (return) - (for-each - (lambda (ctx-name) - (if (string=? ctx-name parent) - (return))) - rest) - (xmltrans:parse-error #f tag " used in wrong context"))))) - -;;; Hanlde main element -(xmltrans:start-tag - "DICT" - (tag attr) - #f) - -(xmltrans:end-tag - "DICT" - (tag attr text) - ;; FIXME - #f) - - -(define expected-k #f) - -(define (k-expect) - (set! expected-k #f)) - -;;; -(xmltrans:start-tag - "NODE" - (tag attr) - (set! current-node (vector #f #f '() '())) - (set! p-article #f) - (set! expected-k #t) - (xmltrans:expect-next-tag "K" k-expect) - #f) - -(xmltrans:end-tag - "NODE" - (tag attr text) - (cond - (p-article - (if (not (null? (current-node-get #:p-list))) - (xmltrans:parse-warning #f "Mixed definition style")) - (append-topics) - (current-node-set #:p-article p-article) - (set! p-article #f))) - - (cond - ((xmltrans:attr attr "__INVALID__")) ; Ignore - ((not (current-node-get #:key)) - (xmltrans:parse-error #f "K element is missing")) - ((or (null? (current-node-get #:p-list)) (null? (null? (current-node-get #:xref)))) - (xmltrans:parse-error #f "No articles and no cross references for the node")) - (else - (push-node current-node))) - #f) - -;;; Topic -(xmltrans:start-tag - "T" - (tag attr) - (if (not (xmltrans:attr attr "ID")) - (letrec ((loc (xmltrans:get-input-location)) - (proc (lambda () - (xmltrans:parse-error - loc - "T element does not have ID attribute.")))) - (xmltrans:expect-next-tag "ID" proc)) - (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P"))) - (push-topic (xmltrans:attr attr "ID")))) - #f) - -(xmltrans:end-tag - "T" - (tag attr text) - (cond - ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued - ((xmltrans:parent? "P") - (current-article-set #:topic (xmltrans:attr attr "ID"))) - ((xmltrans:parent? "NODE") - (current-node-set #:topic (xmltrans:attr attr "ID"))) - (else - (pop-topic))) - #f) - -(xmltrans:end-tag - "ID" - (tag attr text) - (cond - ((xmltrans:parent? "P")) ;; OK - ((not (xmltrans:parent? "T")) - (xmltrans:parse-error #f "ID used in wrong context")) - ((xmltrans:attr #t "ID") - (xmltrans:parse-error #f "T element already has ID attribute")) - (else - (xmltrans:set-attr #t "ID" text) - (if (not (xmltrans:parent? "NODE" "T")) - (push-topic text)))) - #f) - -;;; K - KEY -(xmltrans:end-tag - "K" - (tag attr text) - (expect-context tag "NODE") - (if (not expected-k) - (begin - (xmltrans:parse-error #f "K tag cannot be used here") - (mark-invalid))) - (xmltrans:expect-next-tag "K" k-expect) - (current-node-set #:key text) - #f) - -;;; F - FORMS -(xmltrans:end-tag - "F" - (tag attr text) - (expect-context tag "NODE") - (current-node-set #:forms text) - #f) - -;;; P - PartOfSpeach -(define p-location #f) - -(xmltrans:start-tag - "P" - (tag attr) - (set! p-location (xmltrans:get-input-location)) - (set! p-article (vector #f '() '() '() '())) - #f) - -(xmltrans:end-tag - "P" - (tag attr text) - (expect-context tag "NODE") - (cond - ((xmltrans:attr attr "ID") => - (lambda (id) - (current-article-set #:pos (convert-pos id)) - (append-topics) - (current-node-set #:p-article p-article) - (set! p-article #f))) - ((not (null? (current-article-get #:article))) ;; FIXME: other elements too - (xmltrans:parse-error p-location - "P element is missing ID, but has nested elements")) - (else - (set! p-article (vector (convert-pos text) '() '() '() '())))) - #f) - -;;; A - Antonym - -(xmltrans:end-tag - "A" - (tag attr text) - (expect-context tag "NODE" "P") - (current-article-set #:aref text) - #f) - -;;; X - Xref -(xmltrans:end-tag - "X" - (tag attr text) - (expect-context tag "NODE" "P") - (let ((ref (cond - ((string-null? text) - (call-with-current-continuation - (lambda (return) - (do ((node node-list (cdr node))) - ((null? node) #f) - (if (not (null? (p-article-get - #:article - (car (node-get #:p-list (car node)))))) - (return (car (node-get #:key (car node))))))))) - (else - text)))) - - (cond - ((not ref) - (xmltrans:parse-error #f "Empty reference") - (mark-invalid)) - ((xmltrans:parent? "P") - (current-article-set #:xref ref)) - ((xmltrans:parent? "NODE") - (current-node-set #:xref ref)))) - #f) - -;;; M - MEANING -(xmltrans:end-tag - "M" - (tag attr text) - (expect-context tag "P" "NODE") - (current-article-set #:article text) - #f) - -;;; Formatting elements -(xmltrans:end-tag - "C" - (tag attr text) - (expect-context tag "M" "F") - (list - "" - text - "")) - -(xmltrans:end-tag - "E" - (tag attr text) - (expect-context tag "M" "F") - (list - "" - text - "")) - - -(define (assert condition . rest) - (cond - (condition) - ((null? rest) - (throw 'misc-error 'assert - "Assertion failed" - (list) #f) - (exit 1)) - (else - (with-output-to-port - (current-error-port) - (lambda () - (for-each - (lambda (x) - (display x)) - rest) - (newline))) - (exit 1)))) - -(define (escape-string str) - (let loop ((lst '()) - (str str)) - (cond - ((string-index str #\") => - (lambda (pos) - (loop (append lst (list (substring str 0 pos) - "\\\"")) - (substring str (1+ pos))))) - (else - (apply string-append (append lst (list str))))))) - -(define (run-query conn q) - (if verbose-option - (format #t "QUERY: ~S\n" q)) - (let ((res (sql-query conn q))) - (if verbose-option - (format #t "RESULT: ~S\n" res)) - res)) - -(define (query-number conn q) - (let ((res (run-query conn q))) - (if (null? res) - #f - (string->number (caar res))))) - -(define (get-dict-index conn) - (set! dict-index - (or - (query-number conn - "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1") - 0))) - -(define (cleanup-db conn) - (run-query conn "DELETE FROM links") - (run-query conn "DELETE FROM articles") - (run-query conn "DELETE FROM dict") - (run-query conn "DELETE FROM topic") - (run-query conn "DELETE FROM topic_tab") - (run-query conn "DELETE FROM pending_links")) - -(define (pending-fixup conn) - (run-query conn - "INSERT IGNORE INTO links \ - SELECT p.type,p.originator,d.ident \ - FROM dict d, pending_links p \ - WHERE p.word = d.word AND p.type != 'CLOSED'") - (run-query conn - "INSERT IGNORE INTO links \ - SELECT p.type,d.ident,p.originator \ - FROM dict d, pending_links p \ - WHERE p.word = d.word AND p.type != 'CLOSED'") - (run-query - conn - (if (string=? sql-iface "mysql") - "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word" - ;; Else assume SQL92 - "UPDATE pending_links SET type='CLOSED' \ - WHERE originator in (SELECT d.ident from dict d, pending_links p \ - WHERE p.word=d.word)")) - - (if (not preserve-option) - run-query("DELETE FROM pending_links WHERE type = 'CLOSED'")) - - (let ((count (query-number conn - "SELECT count(*) FROM pending_links \ - WHERE type != 'CLOSED'"))) - ;;; GROUP BY word"))) - (if (> count 0) - (display (string-append - (number->string count) - " unresolved references\n") - (current-error-port))))) - -(define (update-stat conn) - (let ((count (query-number conn "SELECT count(*) from dict"))) - (run-query conn "DELETE from stat"); - (run-query conn - (string-append - "INSERT INTO stat (count,updated) VALUES(" - (number->string count) - ",now())")))) - - -(define dict-index 0) - -(define (insert-node conn node) - (letrec ((insert-link (lambda (type value) - (run-query - conn - (format - #f - "INSERT INTO pending_links (type,originator,word) VALUES('~A',~A,'~A')" - type - dict-index - value))))) - - (for-each - (lambda (p-article) - (for-each - (lambda (key) - (let ((sound (ellinika:sounds-like key))) - (set! dict-index (1+ dict-index)) - ;;; Insert 'heading' info - (run-query - conn - (cond - ((node-get #:forms node) => - (lambda (f) - (format - #f - "INSERT INTO dict (ident,word,sound,pos,forms) VALUES(~A,\"~A\",\"~A\",~A,\"~A\")" - - dict-index - key - sound - (p-article-get #:pos p-article) - f))) - (else - (format - #f - "INSERT INTO dict (ident,word,sound,pos) VALUES(~A,\"~A\",\"~A\",~A)" - dict-index - key - sound - (p-article-get #:pos p-article))))) - - ;;; Insert cross-references - (for-each - (lambda (x) - (insert-link "XREF" x)) - (append - (node-get #:xref node) - (p-article-get #:xref p-article))) - - ;;; Insert antonyms - (for-each - (lambda (x) - (insert-link "ANT" x)) - (p-article-get #:aref p-article)) - - ;;; Insert topics - (for-each - (lambda (t) - (let ((index (or - (query-number - conn - (format - #f - "SELECT ident FROM topic WHERE title=\"~A\"" - (escape-string t))) - (begin - (run-query - conn - (format - #f - "INSERT INTO topic (title) VALUES (\"~A\")" - (escape-string t))) - (query-number conn "SELECT LAST_INSERT_ID()"))))) - (run-query - conn - (format - #f - "INSERT INTO topic_tab VALUES(~A,~A)" - index - dict-index)))) - (p-article-get #:topic p-article)) - - ;;; Insert articles - (let ((article-index 0)) - (for-each - (lambda (text) - (run-query - conn - (format #f - "INSERT INTO articles VALUES (~A, ~A, \"~A\")" - dict-index - article-index - text)) - (set! article-index (1+ article-index))) - (p-article-get #:article p-article))))) - (node-get #:key node))) - (node-get #:p-list node)))) - - - -;;;; Main -(define grammar - `((check (single-char #\c)) - (cleanup) - (database (single-char #\d) (value #t)) - (host (single-char #\h) (value #t)) - (port (single-char #\P) (value #t)) - (password (single-char #\p) (value #t)) - (user (single-char #\u) (value #t)) - (interface (value #t)) - (verbose (single-char #\v)) - (debug (value #t)) - (preserve (value #t)) - (help))) - -(define (usage) - (display "usage: dictrans OPTIONS FILES\n")) - -(define (cons? p) - (and (pair? p) (not (list? p)))) - -(for-each - (lambda (x) - (cond - ((cons? x) - (case (car x) - ((check) - (set! compile-only #t)) - ((cleanup) - (set! cleanup-option #t)) - ((database) - (set! sql-database (cdr x))) - ((host) - (set! sql-host (cdr x))) - ((port) - (set! sql-port (string->number (cdr x)))) - ((password) - (set! sql-password (cdr x))) - ((user) - (set! sql-user (cdr x))) - ((interface) - (set! sql-iface (cdr x))) - ((verbose) - (set! verbose-option #t)) - ((preserve) - (set! preserve-option #t)) - ((debug) - (set! debug-level (string->number (cdr x)))) - ((help) - (usage) - (exit 0)))) - (else - (set! input-files (cdr x))))) - (getopt-long (command-line) grammar)) - - -(if (null? input-files) - (begin - (display "Input files not specified\n" (current-error-port)) - (exit 1))) - -(cond - (compile-only - (for-each - (lambda (x) - (if (not (xmltrans:parse-file x)) - (exit 1))) - input-files))) - -(let ((conn (sql-connect sql-iface sql-host sql-port sql-database - sql-user sql-password))) - (if (not conn) - (begin - (display "Cannot connect to the database\n" (current-error-port)) - (exit 1))) - - (read-pos conn) - - (for-each - (lambda (x) - (if (not (xmltrans:parse-file x)) - (exit 1))) - input-files) - - - (if cleanup-option - (cleanup-db conn)) - - (get-dict-index conn) - - (for-each - (lambda (node) - (insert-node conn node)) - (reverse node-list)) - - (pending-fixup conn) - (update-stat conn) - - (sql-connect-close conn)) - -(exit 0) - -;;;; Local variables: -;;;; mode: Scheme -;;;; buffer-file-coding-system: utf-8 -;;;; End: - - \ No newline at end of file -- cgit v1.2.1