;;;; This file is part of Ellinika ;;;; Copyright (C) 2004 Sergey Poznyakoff ;;;; ;;;; Ellinika is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 2 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; Ellinika is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with Ellinika; if not, write to the Free Software ;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ;;;; ;;;; Dictionary structure ;;;; Internal representation: ;;;; ;;;; * Each dictionary entry is represented as a vector: ;;;; ;;;; #(KEY FORMS XREF P-LIST) ;;;; ;;;; KEY is list of strings ;;;; ;;;; * FORMS is either #f or a string describing forms of the key if they ;;;; are formed in an irregular manner. ;;;; ;;;; * XREF is a list of cross-references ;;;; ;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector: ;;;; ;;;; #(POS ARTICLE AREF XREF TOPIC) ;;;; ;;;; Member Type Meaning ;;;; POS string part of speech ;;;; ARTICLE list(string) Dictionary article associated with this key/pos ;;;; AREF list(string) List of antonyms ;;;; XREF list(string) List of cross-references ;;;; TOPIC list(string) List of topics this item pertains to ;;;; ;;;; ;;;; External representation (XML): ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

;;;; string+ ;;;; string* ;;;; string* ;;;; * ;;;;

+ ;;;; string* ;;;;
;;;; ;;;; If only one P entry is present, the following alternative forms ;;;; are understood: ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

string

;;;; string* ;;;; string* ;;;; string* ;;;; * ;;;;
;;;; ;;;; or ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

string

;;;; string* ;;;; * ;;;;
(set! %load-path (cons "/usr/local/share/guile-sql" %load-path)) (use-modules (xmltools xmltrans) (sql) (ice-9 getopt-long)) (use-syntax (ice-9 syncase)) (define compile-only #f) (define cleanup-option #f) (define preserve-option #f) (define sql-iface "mysql") (define sql-host "localhost") (define sql-database "ellinika") (define sql-port 3306) (define sql-password #f) (define sql-user #f) (define verbose-option #f) (define debug-level 0) (define input-files '()) (define (debug level . rest) (if (>= debug-level level) (begin (for-each (lambda (x) (display x)) rest) (newline)))) ;;;; XML definitions ;;; Set the default handler (define tag-list '()) (define (lingua:default-start tag attr) (xmltrans:set-attr #f "__START__" 1) #f) (xmltrans:set-default-start-handler lingua:default-start) (define (lingua:default-end tag attr text) (if (xmltrans:attr attr "__START__") (xmltrans:parse-error #f "Unhandled element " tag)) (set! tag-list (cons (xmltrans:make-tag tag attr text) tag-list)) #f) (xmltrans:set-default-end-handler lingua:default-end) ;;; Current node (define current-node #f) (define-syntax node-get (syntax-rules () ((node-get #:key) (vector-ref current-node 0)) ((node-get #:forms) (vector-ref current-node 1)) ((node-get #:xref) (vector-ref current-node 2)) ((node-get #:p-list) (vector-ref current-node 3)))) (define (mark-invalid) (xmltrans:set-attr "NODE" "__INVALID__" 1)) (define-syntax node-set (syntax-rules () ((node-set #:key k) (cond ((node-get #:key) => (lambda (klist) (vector-set! current-node 0 (append klist (list k))))) (else (vector-set! current-node 0 (list k))))) ((node-set #:forms f) (begin (cond ((node-get #:forms) (xmltrans:parse-error #f "Forms already set") (mark-invalid))) (vector-set! current-node 1 f))) ((node-set #:xref x) (cond ((node-get #:xref) => (lambda (xlist) (vector-set! current-node 2 (append xlist (list x))))) (else (vector-set! current-node 2 (list x))))) ((node-set! #:p-article p) (cond ((node-get #:p-list) => (lambda (plist) (vector-set! current-node 3 (append plist (list p))))) (else (vector-set! current-node 3 (list p))))) ((node-set #:topic t) (for-each (lambda (p) ;; FIXME: Use p-article-set (vector-set! p 4 (append (vector-ref p 4) (list t)))) (node-get #:p-list))))) (define p-article #f) (define-syntax p-article-get (syntax-rules () ((p-article-get #:pos) (vector-ref p-article 0)) ((p-article-get #:article) (vector-ref p-article 1)) ((p-article-get #:aref) (vector-ref p-article 2)) ((p-article-get #:xref) (vector-ref p-article 3)) ((p-article-get #:topic) (vector-ref p-article 4)))) (define-syntax p-set (syntax-rules () ((p-set key n val) (vector-set! p-article n (cond ((p-article-get key) => (lambda (alst) (append alst (list val)))) (else (list val))))))) (define-syntax p-article-set (syntax-rules () ((p-article-set #:pos val) (vector-set! p-article 0 val)) ((p-article-set #:article val) (p-set #:article 1 val)) ((p-article-set #:aref val) (p-set #:aref 2 val)) ((p-article-set #:xref val) (p-set #:xref 3 val)) ((p-article-set #:topic val) (p-set #:topic 4 val)))) ;;; Node list (define node-list '()) (define (push-node node) (if (>= debug-level 100) (begin (write node) (newline))) (set! node-list (cons current-node node-list))) ;;; Topic stack (define topic-list '()) (define (push-topic id) (debug 10 "PUSH " id) (set! topic-list (cons id topic-list))) (define (pop-topic) (debug 10 "POP " (car topic-list)) (set! topic-list (cdr topic-list))) (define (append-topics) (debug 10 "APPEND") (for-each (lambda (x) (p-article-set #:topic x)) topic-list)) ;;; (define (expect-context tag . rest) (let ((parent (xmltrans:get-parent #t))) (call-with-current-continuation (lambda (return) (for-each (lambda (ctx-name) (if (string=? ctx-name parent) (return))) rest) (xmltrans:parse-error #f tag " used in wrong context"))))) ;;; Hanlde main element (xmltrans:start-tag "DICT" (tag attr) #f) (xmltrans:end-tag "DICT" (tag attr text) ;; FIXME #f) (define expected-k #f) (define (k-expect) (set! expected-k #f)) ;;; (xmltrans:start-tag "NODE" (tag attr) (set! current-node (vector #f #f '() '())) (set! p-article #f) (set! expected-k #t) (xmltrans:expect-next-tag "K" k-expect) #f) (xmltrans:end-tag "NODE" (tag attr text) (cond (p-article (if (not (null? (node-get #:p-list))) (xmltrans:parse-warning #f "Mixed definition style")) (append-topics) (node-set #:p-article p-article) (set! p-article #f))) (cond ((xmltrans:attr attr "__INVALID__")) ; Ignore ((not (node-get #:key)) (xmltrans:parse-error #f "K element is missing")) ((or (null? (node-get #:p-list)) (null? (null? (node-get #:xref)))) ; (display current-node)(newline) ; (display p-article)(newline) (xmltrans:parse-error #f "No articles and no cross references for the node")) (else (push-node current-node))) #f) ;;; Topic (xmltrans:start-tag "T" (tag attr) (if (not (xmltrans:attr attr "ID")) (letrec ((loc (xmltrans:get-input-location)) (proc (lambda () (xmltrans:parse-error loc "T element does not have ID attribute.")))) (xmltrans:expect-next-tag "ID" proc)) (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P"))) (push-topic (xmltrans:attr attr "ID")))) #f) (xmltrans:end-tag "T" (tag attr text) (cond ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued ((xmltrans:parent? "P") (p-article-set #:topic (xmltrans:attr attr "ID"))) ((xmltrans:parent? "NODE") (node-set #:topic (xmltrans:attr attr "ID"))) (else (pop-topic))) #f) (xmltrans:end-tag "ID" (tag attr text) (cond ((xmltrans:parent? "P")) ;; OK ((not (xmltrans:parent? "T")) (xmltrans:parse-error #f "ID used in wrong context")) ((xmltrans:attr #t "ID") (xmltrans:parse-error #f "T element already has ID attribute")) (else (xmltrans:set-attr #t "ID" text) (if (not (xmltrans:parent? "NODE" "T")) (push-topic text)))) #f) ;;; K - KEY (xmltrans:end-tag "K" (tag attr text) (expect-context tag "NODE") (if (not expected-k) (begin (xmltrans:parse-error #f "K tag cannot be used here") (mark-invalid))) (xmltrans:expect-next-tag "K" k-expect) (node-set #:key text) #f) ;;; F - FORMS (xmltrans:end-tag "F" (tag attr text) (expect-context tag "NODE") (node-set #:forms text) #f) ;;; P - PartOfSpeach (define p-location #f) (xmltrans:start-tag "P" (tag attr) (set! p-location (xmltrans:get-input-location)) (set! p-article (vector #f '() '() '() '())) #f) (xmltrans:end-tag "P" (tag attr text) (expect-context tag "NODE") (cond ((xmltrans:attr attr "ID") => (lambda (id) (p-article-set #:pos id) (append-topics) (node-set #:p-article p-article) (set! p-article #f))) ((not (null? (p-article-get #:article))) ;; FIXME: other elements too (xmltrans:parse-error p-location "P element is missing ID, but has nested elements")) (else (set! p-article (vector text '() '() '() '())))) #f) ;;; A - Antonym (xmltrans:end-tag "A" (tag attr text) (expect-context tag "NODE" "P") (p-article-set #:aref text) #f) ;;; X - Xref (xmltrans:end-tag "X" (tag attr text) (expect-context tag "NODE" "P") (let ((ref (cond ((string-null? text) (call-with-current-continuation (lambda (return) ;; FIXME: Use node-get/p-article-get (do ((node node-list (cdr node))) ((null? node) #f) (if (not (null? (vector-ref (car (vector-ref (car node) 3)) 1))) (return (car (vector-ref (car node) 0)))))))) (else text)))) (cond ((not ref) (xmltrans:parse-error #f "Empty reference") (mark-invalid)) ((xmltrans:parent? "P") (p-article-set #:xref ref)) ((xmltrans:parent? "NODE") (node-set #:xref ref)))) #f) ;;; M - MEANING (xmltrans:end-tag "M" (tag attr text) (expect-context tag "P" "NODE") (p-article-set #:article text) #f) ;;; Formatting elements (xmltrans:end-tag "C" (tag attr text) (expect-context tag "M" "F") (list "" text "")) (xmltrans:end-tag "E" (tag attr text) (expect-context tag "M" "F") (list "" text "")) ;;;; Main (define grammar `((check (single-char #\c)) (cleanup) (database (single-char #\d) (value #t)) (host (single-char #\h) (value #t)) (port (single-char #\P) (value #t)) (password (single-char #\p) (value #t)) (user (single-char #\u) (value #t)) (interface (value #t)) (verbose (single-char #\v)) (debug (value #t)) (preserve (value #t)) (help))) (define (usage) (display "usage: dictrans OPTIONS FILES\n")) (define (cons? p) (and (pair? p) (not (list? p)))) (for-each (lambda (x) (cond ((cons? x) (case (car x) ((check) (set! compile-only #t)) ((cleanup) (set! cleanup-option #t)) ((database) (set! sql-database (cdr x))) ((host) (set! sql-host (cdr x))) ((port) (set! sql-port (cdr x))) ((password) (set! sql-password (cdr x))) ((user) (set! sql-user (cdr x))) ((interface) (set! sql-iface (cdr x))) ((verbose) (set! verbose-option #t)) ((preserve) (set! preserve-option #t)) ((debug) (set! debug-level (string->number (cdr x)))) ((help) (usage) (exit 0)))) (else (set! input-files (cdr x))))) (getopt-long (command-line) grammar)) (if (null? input-files) (begin (display "Input files not specified\n" (current-error-port)) (exit 1))) (for-each (lambda (x) (if (not (xmltrans:parse-file x)) (exit 1))) input-files) (if compile-only (exit 0)) (define (assert cond . rest) (cond (cond) ((null? rest) (throw 'misc-error 'assert "Assertion failed" (list) #f) (exit 1)) (else (with-output-to-port (current-error-port) (lambda () (for-each (lambda (x) (display x)) rest) (newline))) (exit 1)))) (define dict-index 0) (define article-index 0) (define (get-dict-index conn) (let ((res (sql-query "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1"))) (assert (not (null? res))) (set! dict-index (car res)))) (define (cleanup-db conn) (sql-query "DELETE FROM links") (sql-query "DELETE FROM articles") (sql-query "DELETE FROM dict") (sql-query "DELETE FROM topic") (sql-query "DELETE FROM topic_tab") (sql-query "DELETE FROM pending_links")) (define (pending-fixup) (sql-query "INSERT IGNORE INTO links \ SELECT p.type,p.originator,d.ident \ FROM dict d, pending_links p \ WHERE p.word = d.word AND p.type != 'CLOSED'") (sql-query "INSERT IGNORE INTO links \ SELECT p.type,d.ident,p.originator \ FROM dict d, pending_links p \ WHERE p.word = d.word AND p.type != 'CLOSED'") (sql-query (if (string=? sql-iface "mysql") "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word" ;; Else assume SQL92 "UPDATE pending_links SET type='CLOSED' \ WHERE originator in (SELECT d.ident from dict d, pending_links p \ WHERE p.word=d.word)")) (if (not preserve-pending-option) sql_query("DELETE FROM pending_links WHERE type = 'CLOSED'")) (let ((count (query-number "SELECT count(*) FROM pending_links \ WHERE type != 'CLOSED' \ GROUP BY word"))) (if (> count 0) (display (string-append (car res) " unresolved references") (current-error-port))))) (define (update-stat) (let ((count (query-number("SELECT count(*) from dict")))) (sql_query "DELETE from stat"); (sql-query (string-append "INSERT INTO stat (count,updated) VALUES(" (number->string count) ",now())")))) (define (insert-node conn node) (set! article-index 0) (set! dict-index (1+ dict-index)) ;;; FIXME ) (let ((conn (sql-connect sql-iface sql-host sql-port sql-database sql-user sql-password))) (if (not conn) (begin (display "Cannot connect to the database\n" (current-error-port)) (exit 1))) (if cleanup-option (cleanup-db conn)) (get-dict-index conn) (for-each (lambda (node) (insert-node conn node)) node-list) (pending-fixup) (update-stat) (sql-connect-close conn)) (exit 0)