aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2004-10-07 16:00:25 +0000
committerSergey Poznyakoff <gray@gnu.org.ua>2004-10-07 16:00:25 +0000
commitb27976bda04ef7a2219c44dfc7150441b62b3a0a (patch)
treee003a51f0b829d1664358b6cc4dfac79ef67f7be
parent21ee0a3c2fc48d51b9a3e29a61a661afdaee4fc9 (diff)
downloadellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.gz
ellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.bz2
Dictionary parser
git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@195 941c8c0f-9102-463b-b60b-cd22ce0e6858
-rw-r--r--ChangeLog11
-rw-r--r--data/dict.m44
-rw-r--r--dictrans.scm654
-rw-r--r--scm/dictrans.scm654
-rw-r--r--src/gram.y123
-rw-r--r--src/input.l4
-rw-r--r--src/main.c8
7 files changed, 1359 insertions, 99 deletions
diff --git a/ChangeLog b/ChangeLog
index 3e39e87..2b40a64 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2004-10-07 Sergey Poznyakoff <gray@Mirddin.farlep.net>
+
+ * dictrans.scm: Dictionary parser
+ * data/0.xml: Converted dictionary file
+ * data/1.xml: Converted dictionary file
+ * data/2.xml: Converted dictionary file
+ * data/3.xml: Converted dictionary file
+ * data/4.xml: Converted dictionary file
+ * data/5.xml: Converted dictionary file
+ * data/7.xml: Converted dictionary file
+
2004-10-04 Sergey Poznyakoff <gray@Mirddin.farlep.net>
* xml/COPYING.FDL: New file
diff --git a/data/dict.m4 b/data/dict.m4
index 59d7468..e4fb72d 100644
--- a/data/dict.m4
+++ b/data/dict.m4
@@ -1,7 +1,7 @@
changequote([,])
-define([COMMENT],[<span class=\"comment\">$1</span>])
-define([EXPL],[(<span class=\"expl\">$1</span>)])
+define([COMMENT],[<C>$1</C>])
+define([EXPL],[(<E>$1</E>)])
define([OR],COMMENT(или))
define([AND],COMMENT(и))
define([LOCATION],[LINE "__file__" __line__])
diff --git a/dictrans.scm b/dictrans.scm
new file mode 100644
index 0000000..5cfc50e
--- /dev/null
+++ b/dictrans.scm
@@ -0,0 +1,654 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2004 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 2 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with Ellinika; if not, write to the Free Software
+;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;;;
+
+;;;; Dictionary structure
+;;;; Internal representation:
+;;;;
+;;;; * Each dictionary entry is represented as a vector:
+;;;;
+;;;; #(KEY FORMS XREF P-LIST)
+;;;;
+;;;; KEY is list of strings
+;;;;
+;;;; * FORMS is either #f or a string describing forms of the key if they
+;;;; are formed in an irregular manner.
+;;;;
+;;;; * XREF is a list of cross-references
+;;;;
+;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector:
+;;;;
+;;;; #(POS ARTICLE AREF XREF TOPIC)
+;;;;
+;;;; Member Type Meaning
+;;;; POS string part of speech
+;;;; ARTICLE list(string) Dictionary article associated with this key/pos
+;;;; AREF list(string) List of antonyms
+;;;; XREF list(string) List of cross-references
+;;;; TOPIC list(string) List of topics this item pertains to
+;;;;
+;;;;
+;;;; External representation (XML):
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P ID="string">
+;;;; <M>string</M>+
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </P>+
+;;;; <X>string</X>*
+;;;; </NODE>
+;;;;
+;;;; If only one P entry is present, the following alternative forms
+;;;; are understood:
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <M>string</M>*
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+;;;;
+;;;; or
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+
+(set! %load-path (cons "/usr/local/share/guile-sql" %load-path))
+(use-modules (xmltools xmltrans)
+ (sql)
+ (ice-9 getopt-long))
+
+(use-syntax (ice-9 syncase))
+
+(define compile-only #f)
+(define cleanup-option #f)
+(define preserve-option #f)
+
+(define sql-iface "mysql")
+(define sql-host "localhost")
+(define sql-database "ellinika")
+(define sql-port 3306)
+(define sql-password #f)
+(define sql-user #f)
+(define verbose-option #f)
+(define debug-level 0)
+(define input-files '())
+
+(define (debug level . rest)
+ (if (>= debug-level level)
+ (begin
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline))))
+
+
+;;;; XML definitions
+
+;;; Set the default handler
+(define tag-list '())
+
+(define (lingua:default-start tag attr)
+ (xmltrans:set-attr #f "__START__" 1)
+ #f)
+
+(xmltrans:set-default-start-handler lingua:default-start)
+
+(define (lingua:default-end tag attr text)
+ (if (xmltrans:attr attr "__START__")
+ (xmltrans:parse-error #f "Unhandled element " tag))
+ (set! tag-list
+ (cons
+ (xmltrans:make-tag tag attr text)
+ tag-list))
+ #f)
+
+(xmltrans:set-default-end-handler lingua:default-end)
+
+;;; Current node
+(define current-node #f)
+
+(define-syntax node-get
+ (syntax-rules ()
+ ((node-get #:key)
+ (vector-ref current-node 0))
+ ((node-get #:forms)
+ (vector-ref current-node 1))
+ ((node-get #:xref)
+ (vector-ref current-node 2))
+ ((node-get #:p-list)
+ (vector-ref current-node 3))))
+
+(define (mark-invalid)
+ (xmltrans:set-attr "NODE" "__INVALID__" 1))
+
+(define-syntax node-set
+ (syntax-rules ()
+ ((node-set #:key k)
+ (cond
+ ((node-get #:key) =>
+ (lambda (klist)
+ (vector-set! current-node 0 (append klist (list k)))))
+ (else
+ (vector-set! current-node 0 (list k)))))
+ ((node-set #:forms f)
+ (begin
+ (cond
+ ((node-get #:forms)
+ (xmltrans:parse-error #f "Forms already set")
+ (mark-invalid)))
+ (vector-set! current-node 1 f)))
+ ((node-set #:xref x)
+ (cond
+ ((node-get #:xref) =>
+ (lambda (xlist)
+ (vector-set! current-node 2 (append xlist (list x)))))
+ (else
+ (vector-set! current-node 2 (list x)))))
+ ((node-set! #:p-article p)
+ (cond
+ ((node-get #:p-list) =>
+ (lambda (plist)
+ (vector-set! current-node 3 (append plist (list p)))))
+ (else
+ (vector-set! current-node 3 (list p)))))
+ ((node-set #:topic t)
+ (for-each
+ (lambda (p)
+ ;; FIXME: Use p-article-set
+ (vector-set! p 4 (append (vector-ref p 4) (list t))))
+ (node-get #:p-list)))))
+
+(define p-article #f)
+
+(define-syntax p-article-get
+ (syntax-rules ()
+ ((p-article-get #:pos)
+ (vector-ref p-article 0))
+ ((p-article-get #:article)
+ (vector-ref p-article 1))
+ ((p-article-get #:aref)
+ (vector-ref p-article 2))
+ ((p-article-get #:xref)
+ (vector-ref p-article 3))
+ ((p-article-get #:topic)
+ (vector-ref p-article 4))))
+
+(define-syntax p-set
+ (syntax-rules ()
+ ((p-set key n val)
+ (vector-set! p-article n
+ (cond
+ ((p-article-get key) =>
+ (lambda (alst)
+ (append alst (list val))))
+ (else
+ (list val)))))))
+
+(define-syntax p-article-set
+ (syntax-rules ()
+ ((p-article-set #:pos val)
+ (vector-set! p-article 0 val))
+ ((p-article-set #:article val)
+ (p-set #:article 1 val))
+ ((p-article-set #:aref val)
+ (p-set #:aref 2 val))
+ ((p-article-set #:xref val)
+ (p-set #:xref 3 val))
+ ((p-article-set #:topic val)
+ (p-set #:topic 4 val))))
+
+
+;;; Node list
+(define node-list '())
+
+(define (push-node node)
+ (if (>= debug-level 100)
+ (begin
+ (write node)
+ (newline)))
+ (set! node-list (cons current-node node-list)))
+
+;;; Topic stack
+(define topic-list '())
+
+(define (push-topic id)
+ (debug 10 "PUSH " id)
+ (set! topic-list (cons id topic-list)))
+
+(define (pop-topic)
+ (debug 10 "POP " (car topic-list))
+ (set! topic-list (cdr topic-list)))
+
+(define (append-topics)
+ (debug 10 "APPEND")
+ (for-each
+ (lambda (x)
+ (p-article-set #:topic x))
+ topic-list))
+
+;;;
+(define (expect-context tag . rest)
+ (let ((parent (xmltrans:get-parent #t)))
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (ctx-name)
+ (if (string=? ctx-name parent)
+ (return)))
+ rest)
+ (xmltrans:parse-error #f tag " used in wrong context")))))
+
+;;; Hanlde main element
+(xmltrans:start-tag
+ "DICT"
+ (tag attr)
+ #f)
+
+(xmltrans:end-tag
+ "DICT"
+ (tag attr text)
+ ;; FIXME
+ #f)
+
+
+(define expected-k #f)
+
+(define (k-expect)
+ (set! expected-k #f))
+
+;;;
+(xmltrans:start-tag
+ "NODE"
+ (tag attr)
+ (set! current-node (vector #f #f '() '()))
+ (set! p-article #f)
+ (set! expected-k #t)
+ (xmltrans:expect-next-tag "K" k-expect)
+ #f)
+
+(xmltrans:end-tag
+ "NODE"
+ (tag attr text)
+ (cond
+ (p-article
+ (if (not (null? (node-get #:p-list)))
+ (xmltrans:parse-warning #f "Mixed definition style"))
+ (append-topics)
+ (node-set #:p-article p-article)
+ (set! p-article #f)))
+
+ (cond
+ ((xmltrans:attr attr "__INVALID__")) ; Ignore
+ ((not (node-get #:key))
+ (xmltrans:parse-error #f "K element is missing"))
+ ((or (null? (node-get #:p-list)) (null? (null? (node-get #:xref))))
+; (display current-node)(newline)
+; (display p-article)(newline)
+ (xmltrans:parse-error #f "No articles and no cross references for the node"))
+ (else
+ (push-node current-node)))
+ #f)
+
+;;; Topic
+(xmltrans:start-tag
+ "T"
+ (tag attr)
+ (if (not (xmltrans:attr attr "ID"))
+ (letrec ((loc (xmltrans:get-input-location))
+ (proc (lambda ()
+ (xmltrans:parse-error
+ loc
+ "T element does not have ID attribute."))))
+ (xmltrans:expect-next-tag "ID" proc))
+ (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P")))
+ (push-topic (xmltrans:attr attr "ID"))))
+ #f)
+
+(xmltrans:end-tag
+ "T"
+ (tag attr text)
+ (cond
+ ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued
+ ((xmltrans:parent? "P")
+ (p-article-set #:topic (xmltrans:attr attr "ID")))
+ ((xmltrans:parent? "NODE")
+ (node-set #:topic (xmltrans:attr attr "ID")))
+ (else
+ (pop-topic)))
+ #f)
+
+(xmltrans:end-tag
+ "ID"
+ (tag attr text)
+ (cond
+ ((xmltrans:parent? "P")) ;; OK
+ ((not (xmltrans:parent? "T"))
+ (xmltrans:parse-error #f "ID used in wrong context"))
+ ((xmltrans:attr #t "ID")
+ (xmltrans:parse-error #f "T element already has ID attribute"))
+ (else
+ (xmltrans:set-attr #t "ID" text)
+ (if (not (xmltrans:parent? "NODE" "T"))
+ (push-topic text))))
+ #f)
+
+;;; K - KEY
+(xmltrans:end-tag
+ "K"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (if (not expected-k)
+ (begin
+ (xmltrans:parse-error #f "K tag cannot be used here")
+ (mark-invalid)))
+ (xmltrans:expect-next-tag "K" k-expect)
+ (node-set #:key text)
+ #f)
+
+;;; F - FORMS
+(xmltrans:end-tag
+ "F"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (node-set #:forms text)
+ #f)
+
+;;; P - PartOfSpeach
+(define p-location #f)
+
+(xmltrans:start-tag
+ "P"
+ (tag attr)
+ (set! p-location (xmltrans:get-input-location))
+ (set! p-article (vector #f '() '() '() '()))
+ #f)
+
+(xmltrans:end-tag
+ "P"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (cond
+ ((xmltrans:attr attr "ID") =>
+ (lambda (id)
+ (p-article-set #:pos id)
+ (append-topics)
+ (node-set #:p-article p-article)
+ (set! p-article #f)))
+ ((not (null? (p-article-get #:article))) ;; FIXME: other elements too
+ (xmltrans:parse-error p-location
+ "P element is missing ID, but has nested elements"))
+ (else
+ (set! p-article (vector text '() '() '() '()))))
+ #f)
+
+;;; A - Antonym
+
+(xmltrans:end-tag
+ "A"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (p-article-set #:aref text)
+ #f)
+
+;;; X - Xref
+(xmltrans:end-tag
+ "X"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (let ((ref (cond
+ ((string-null? text)
+ (call-with-current-continuation
+ (lambda (return)
+ ;; FIXME: Use node-get/p-article-get
+ (do ((node node-list (cdr node)))
+ ((null? node) #f)
+ (if (not (null?
+ (vector-ref
+ (car (vector-ref (car node) 3)) 1)))
+ (return (car (vector-ref (car node) 0))))))))
+ (else
+ text))))
+
+ (cond
+ ((not ref)
+ (xmltrans:parse-error #f "Empty reference")
+ (mark-invalid))
+ ((xmltrans:parent? "P")
+ (p-article-set #:xref ref))
+ ((xmltrans:parent? "NODE")
+ (node-set #:xref ref))))
+ #f)
+
+;;; M - MEANING
+(xmltrans:end-tag
+ "M"
+ (tag attr text)
+ (expect-context tag "P" "NODE")
+ (p-article-set #:article text)
+ #f)
+
+;;; Formatting elements
+(xmltrans:end-tag
+ "C"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\"comment\">"
+ text
+ "</span>"))
+
+(xmltrans:end-tag
+ "E"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\"expl\">"
+ text
+ "</span>"))
+
+
+
+;;;; Main
+(define grammar
+ `((check (single-char #\c))
+ (cleanup)
+ (database (single-char #\d) (value #t))
+ (host (single-char #\h) (value #t))
+ (port (single-char #\P) (value #t))
+ (password (single-char #\p) (value #t))
+ (user (single-char #\u) (value #t))
+ (interface (value #t))
+ (verbose (single-char #\v))
+ (debug (value #t))
+ (preserve (value #t))
+ (help)))
+
+(define (usage)
+ (display "usage: dictrans OPTIONS FILES\n"))
+
+(define (cons? p)
+ (and (pair? p) (not (list? p))))
+
+(for-each
+ (lambda (x)
+ (cond
+ ((cons? x)
+ (case (car x)
+ ((check)
+ (set! compile-only #t))
+ ((cleanup)
+ (set! cleanup-option #t))
+ ((database)
+ (set! sql-database (cdr x)))
+ ((host)
+ (set! sql-host (cdr x)))
+ ((port)
+ (set! sql-port (cdr x)))
+ ((password)
+ (set! sql-password (cdr x)))
+ ((user)
+ (set! sql-user (cdr x)))
+ ((interface)
+ (set! sql-iface (cdr x)))
+ ((verbose)
+ (set! verbose-option #t))
+ ((preserve)
+ (set! preserve-option #t))
+ ((debug)
+ (set! debug-level (string->number (cdr x))))
+ ((help)
+ (usage)
+ (exit 0))))
+ (else
+ (set! input-files (cdr x)))))
+ (getopt-long (command-line) grammar))
+
+
+(if (null? input-files)
+ (begin
+ (display "Input files not specified\n" (current-error-port))
+ (exit 1)))
+
+(for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)
+
+(if compile-only
+ (exit 0))
+
+(define (assert cond . rest)
+ (cond
+ (cond)
+ ((null? rest)
+ (throw 'misc-error 'assert
+ "Assertion failed"
+ (list) #f)
+ (exit 1))
+ (else
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline)))
+ (exit 1))))
+
+(define dict-index 0)
+(define article-index 0)
+
+(define (get-dict-index conn)
+ (let ((res (sql-query "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1")))
+ (assert (not (null? res)))
+ (set! dict-index (car res))))
+
+(define (cleanup-db conn)
+ (sql-query "DELETE FROM links")
+ (sql-query "DELETE FROM articles")
+ (sql-query "DELETE FROM dict")
+ (sql-query "DELETE FROM topic")
+ (sql-query "DELETE FROM topic_tab")
+ (sql-query "DELETE FROM pending_links"))
+
+(define (pending-fixup)
+ (sql-query "INSERT IGNORE INTO links \
+ SELECT p.type,p.originator,d.ident \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (sql-query "INSERT IGNORE INTO links \
+ SELECT p.type,d.ident,p.originator \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (sql-query
+ (if (string=? sql-iface "mysql")
+ "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word"
+ ;; Else assume SQL92
+ "UPDATE pending_links SET type='CLOSED' \
+ WHERE originator in (SELECT d.ident from dict d, pending_links p \
+ WHERE p.word=d.word)"))
+
+ (if (not preserve-pending-option)
+ sql_query("DELETE FROM pending_links WHERE type = 'CLOSED'"))
+
+ (let ((count (query-number "SELECT count(*) FROM pending_links \
+ WHERE type != 'CLOSED' \
+ GROUP BY word")))
+ (if (> count 0)
+ (display (string-append
+ (car res)
+ " unresolved references")
+ (current-error-port)))))
+
+(define (update-stat)
+ (let ((count (query-number("SELECT count(*) from dict"))))
+ (sql_query "DELETE from stat");
+ (sql-query (string-append
+ "INSERT INTO stat (count,updated) VALUES("
+ (number->string count)
+ ",now())"))))
+
+
+(define (insert-node conn node)
+ (set! article-index 0)
+ (set! dict-index (1+ dict-index))
+
+ ;;; FIXME
+ )
+
+(let ((conn (sql-connect sql-iface sql-host sql-port sql-database
+ sql-user sql-password)))
+
+ (if (not conn)
+ (begin
+ (display "Cannot connect to the database\n" (current-error-port))
+ (exit 1)))
+
+ (if cleanup-option
+ (cleanup-db conn))
+
+ (get-dict-index conn)
+
+ (for-each
+ (lambda (node)
+ (insert-node conn node))
+ node-list)
+
+ (pending-fixup)
+ (update-stat)
+
+ (sql-connect-close conn))
+
+(exit 0)
+
+ \ No newline at end of file
diff --git a/scm/dictrans.scm b/scm/dictrans.scm
new file mode 100644
index 0000000..5cfc50e
--- /dev/null
+++ b/scm/dictrans.scm
@@ -0,0 +1,654 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2004 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 2 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with Ellinika; if not, write to the Free Software
+;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;;;
+
+;;;; Dictionary structure
+;;;; Internal representation:
+;;;;
+;;;; * Each dictionary entry is represented as a vector:
+;;;;
+;;;; #(KEY FORMS XREF P-LIST)
+;;;;
+;;;; KEY is list of strings
+;;;;
+;;;; * FORMS is either #f or a string describing forms of the key if they
+;;;; are formed in an irregular manner.
+;;;;
+;;;; * XREF is a list of cross-references
+;;;;
+;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector:
+;;;;
+;;;; #(POS ARTICLE AREF XREF TOPIC)
+;;;;
+;;;; Member Type Meaning
+;;;; POS string part of speech
+;;;; ARTICLE list(string) Dictionary article associated with this key/pos
+;;;; AREF list(string) List of antonyms
+;;;; XREF list(string) List of cross-references
+;;;; TOPIC list(string) List of topics this item pertains to
+;;;;
+;;;;
+;;;; External representation (XML):
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P ID="string">
+;;;; <M>string</M>+
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </P>+
+;;;; <X>string</X>*
+;;;; </NODE>
+;;;;
+;;;; If only one P entry is present, the following alternative forms
+;;;; are understood:
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <M>string</M>*
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+;;;;
+;;;; or
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+
+(set! %load-path (cons "/usr/local/share/guile-sql" %load-path))
+(use-modules (xmltools xmltrans)
+ (sql)
+ (ice-9 getopt-long))
+
+(use-syntax (ice-9 syncase))
+
+(define compile-only #f)
+(define cleanup-option #f)
+(define preserve-option #f)
+
+(define sql-iface "mysql")
+(define sql-host "localhost")
+(define sql-database "ellinika")
+(define sql-port 3306)
+(define sql-password #f)
+(define sql-user #f)
+(define verbose-option #f)
+(define debug-level 0)
+(define input-files '())
+
+(define (debug level . rest)
+ (if (>= debug-level level)
+ (begin
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline))))
+
+
+;;;; XML definitions
+
+;;; Set the default handler
+(define tag-list '())
+
+(define (lingua:default-start tag attr)
+ (xmltrans:set-attr #f "__START__" 1)
+ #f)
+
+(xmltrans:set-default-start-handler lingua:default-start)
+
+(define (lingua:default-end tag attr text)
+ (if (xmltrans:attr attr "__START__")
+ (xmltrans:parse-error #f "Unhandled element " tag))
+ (set! tag-list
+ (cons
+ (xmltrans:make-tag tag attr text)
+ tag-list))
+ #f)
+
+(xmltrans:set-default-end-handler lingua:default-end)
+
+;;; Current node
+(define current-node #f)
+
+(define-syntax node-get
+ (syntax-rules ()
+ ((node-get #:key)
+ (vector-ref current-node 0))
+ ((node-get #:forms)
+ (vector-ref current-node 1))
+ ((node-get #:xref)
+ (vector-ref current-node 2))
+ ((node-get #:p-list)
+ (vector-ref current-node 3))))
+
+(define (mark-invalid)
+ (xmltrans:set-attr "NODE" "__INVALID__" 1))
+
+(define-syntax node-set
+ (syntax-rules ()
+ ((node-set #:key k)
+ (cond
+ ((node-get #:key) =>
+ (lambda (klist)
+ (vector-set! current-node 0 (append klist (list k)))))
+ (else
+ (vector-set! current-node 0 (list k)))))
+ ((node-set #:forms f)
+ (begin
+ (cond
+ ((node-get #:forms)
+ (xmltrans:parse-error #f "Forms already set")
+ (mark-invalid)))
+ (vector-set! current-node 1 f)))
+ ((node-set #:xref x)
+ (cond
+ ((node-get #:xref) =>
+ (lambda (xlist)
+ (vector-set! current-node 2 (append xlist (list x)))))
+ (else
+ (vector-set! current-node 2 (list x)))))
+ ((node-set! #:p-article p)
+ (cond
+ ((node-get #:p-list) =>
+ (lambda (plist)
+ (vector-set! current-node 3 (append plist (list p)))))
+ (else
+ (vector-set! current-node 3 (list p)))))
+ ((node-set #:topic t)
+ (for-each
+ (lambda (p)
+ ;; FIXME: Use p-article-set
+ (vector-set! p 4 (append (vector-ref p 4) (list t))))
+ (node-get #:p-list)))))
+
+(define p-article #f)
+
+(define-syntax p-article-get
+ (syntax-rules ()
+ ((p-article-get #:pos)
+ (vector-ref p-article 0))
+ ((p-article-get #:article)
+ (vector-ref p-article 1))
+ ((p-article-get #:aref)
+ (vector-ref p-article 2))
+ ((p-article-get #:xref)
+ (vector-ref p-article 3))
+ ((p-article-get #:topic)
+ (vector-ref p-article 4))))
+
+(define-syntax p-set
+ (syntax-rules ()
+ ((p-set key n val)
+ (vector-set! p-article n
+ (cond
+ ((p-article-get key) =>
+ (lambda (alst)
+ (append alst (list val))))
+ (else
+ (list val)))))))
+
+(define-syntax p-article-set
+ (syntax-rules ()
+ ((p-article-set #:pos val)
+ (vector-set! p-article 0 val))
+ ((p-article-set #:article val)
+ (p-set #:article 1 val))
+ ((p-article-set #:aref val)
+ (p-set #:aref 2 val))
+ ((p-article-set #:xref val)
+ (p-set #:xref 3 val))
+ ((p-article-set #:topic val)
+ (p-set #:topic 4 val))))
+
+
+;;; Node list
+(define node-list '())
+
+(define (push-node node)
+ (if (>= debug-level 100)
+ (begin
+ (write node)
+ (newline)))
+ (set! node-list (cons current-node node-list)))
+
+;;; Topic stack
+(define topic-list '())
+
+(define (push-topic id)
+ (debug 10 "PUSH " id)
+ (set! topic-list (cons id topic-list)))
+
+(define (pop-topic)
+ (debug 10 "POP " (car topic-list))
+ (set! topic-list (cdr topic-list)))
+
+(define (append-topics)
+ (debug 10 "APPEND")
+ (for-each
+ (lambda (x)
+ (p-article-set #:topic x))
+ topic-list))
+
+;;;
+(define (expect-context tag . rest)
+ (let ((parent (xmltrans:get-parent #t)))
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (ctx-name)
+ (if (string=? ctx-name parent)
+ (return)))
+ rest)
+ (xmltrans:parse-error #f tag " used in wrong context")))))
+
+;;; Hanlde main element
+(xmltrans:start-tag
+ "DICT"
+ (tag attr)
+ #f)
+
+(xmltrans:end-tag
+ "DICT"
+ (tag attr text)
+ ;; FIXME
+ #f)
+
+
+(define expected-k #f)
+
+(define (k-expect)
+ (set! expected-k #f))
+
+;;;
+(xmltrans:start-tag
+ "NODE"
+ (tag attr)
+ (set! current-node (vector #f #f '() '()))
+ (set! p-article #f)
+ (set! expected-k #t)
+ (xmltrans:expect-next-tag "K" k-expect)
+ #f)
+
+(xmltrans:end-tag
+ "NODE"
+ (tag attr text)
+ (cond
+ (p-article
+ (if (not (null? (node-get #:p-list)))
+ (xmltrans:parse-warning #f "Mixed definition style"))
+ (append-topics)
+ (node-set #:p-article p-article)
+ (set! p-article #f)))
+
+ (cond
+ ((xmltrans:attr attr "__INVALID__")) ; Ignore
+ ((not (node-get #:key))
+ (xmltrans:parse-error #f "K element is missing"))
+ ((or (null? (node-get #:p-list)) (null? (null? (node-get #:xref))))
+; (display current-node)(newline)
+; (display p-article)(newline)
+ (xmltrans:parse-error #f "No articles and no cross references for the node"))
+ (else
+ (push-node current-node)))
+ #f)
+
+;;; Topic
+(xmltrans:start-tag
+ "T"
+ (tag attr)
+ (if (not (xmltrans:attr attr "ID"))
+ (letrec ((loc (xmltrans:get-input-location))
+ (proc (lambda ()
+ (xmltrans:parse-error
+ loc
+ "T element does not have ID attribute."))))
+ (xmltrans:expect-next-tag "ID" proc))
+ (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P")))
+ (push-topic (xmltrans:attr attr "ID"))))
+ #f)
+
+(xmltrans:end-tag
+ "T"
+ (tag attr text)
+ (cond
+ ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warnings has already been issued
+ ((xmltrans:parent? "P")
+ (p-article-set #:topic (xmltrans:attr attr "ID")))
+ ((xmltrans:parent? "NODE")
+ (node-set #:topic (xmltrans:attr attr "ID")))
+ (else
+ (pop-topic)))
+ #f)
+
+(xmltrans:end-tag
+ "ID"
+ (tag attr text)
+ (cond
+ ((xmltrans:parent? "P")) ;; OK
+ ((not (xmltrans:parent? "T"))
+ (xmltrans:parse-error #f "ID used in wrong context"))
+ ((xmltrans:attr #t "ID")
+ (xmltrans:parse-error #f "T element already has ID attribute"))
+ (else
+ (xmltrans:set-attr #t "ID" text)
+ (if (not (xmltrans:parent? "NODE" "T"))
+ (push-topic text))))
+ #f)
+
+;;; K - KEY
+(xmltrans:end-tag
+ "K"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (if (not expected-k)
+ (begin
+ (xmltrans:parse-error #f "K tag cannot be used here")
+ (mark-invalid)))
+ (xmltrans:expect-next-tag "K" k-expect)
+ (node-set #:key text)
+ #f)
+
+;;; F - FORMS
+(xmltrans:end-tag
+ "F"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (node-set #:forms text)
+ #f)
+
+;;; P - PartOfSpeach
+(define p-location #f)
+
+(xmltrans:start-tag
+ "P"
+ (tag attr)
+ (set! p-location (xmltrans:get-input-location))
+ (set! p-article (vector #f '() '() '() '()))
+ #f)
+
+(xmltrans:end-tag
+ "P"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (cond
+ ((xmltrans:attr attr "ID") =>
+ (lambda (id)
+ (p-article-set #:pos id)
+ (append-topics)
+ (node-set #:p-article p-article)
+ (set! p-article #f)))
+ ((not (null? (p-article-get #:article))) ;; FIXME: other elements too
+ (xmltrans:parse-error p-location
+ "P element is missing ID, but has nested elements"))
+ (else
+ (set! p-article (vector text '() '() '() '()))))
+ #f)
+
+;;; A - Antonym
+
+(xmltrans:end-tag
+ "A"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (p-article-set #:aref text)
+ #f)
+
+;;; X - Xref
+(xmltrans:end-tag
+ "X"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (let ((ref (cond
+ ((string-null? text)
+ (call-with-current-continuation
+ (lambda (return)
+ ;; FIXME: Use node-get/p-article-get
+ (do ((node node-list (cdr node)))
+ ((null? node) #f)
+ (if (not (null?
+ (vector-ref
+ (car (vector-ref (car node) 3)) 1)))
+ (return (car (vector-ref (car node) 0))))))))
+ (else
+ text))))
+
+ (cond
+ ((not ref)
+ (xmltrans:parse-error #f "Empty reference")
+ (mark-invalid))
+ ((xmltrans:parent? "P")
+ (p-article-set #:xref ref))
+ ((xmltrans:parent? "NODE")
+ (node-set #:xref ref))))
+ #f)
+
+;;; M - MEANING
+(xmltrans:end-tag
+ "M"
+ (tag attr text)
+ (expect-context tag "P" "NODE")
+ (p-article-set #:article text)
+ #f)
+
+;;; Formatting elements
+(xmltrans:end-tag
+ "C"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\"comment\">"
+ text
+ "</span>"))
+
+(xmltrans:end-tag
+ "E"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\"expl\">"
+ text
+ "</span>"))
+
+
+
+;;;; Main
+(define grammar
+ `((check (single-char #\c))
+ (cleanup)
+ (database (single-char #\d) (value #t))
+ (host (single-char #\h) (value #t))
+ (port (single-char #\P) (value #t))
+ (password (single-char #\p) (value #t))
+ (user (single-char #\u) (value #t))
+ (interface (value #t))
+ (verbose (single-char #\v))
+ (debug (value #t))
+ (preserve (value #t))
+ (help)))
+
+(define (usage)
+ (display "usage: dictrans OPTIONS FILES\n"))
+
+(define (cons? p)
+ (and (pair? p) (not (list? p))))
+
+(for-each
+ (lambda (x)
+ (cond
+ ((cons? x)
+ (case (car x)
+ ((check)
+ (set! compile-only #t))
+ ((cleanup)
+ (set! cleanup-option #t))
+ ((database)
+ (set! sql-database (cdr x)))
+ ((host)
+ (set! sql-host (cdr x)))
+ ((port)
+ (set! sql-port (cdr x)))
+ ((password)
+ (set! sql-password (cdr x)))
+ ((user)
+ (set! sql-user (cdr x)))
+ ((interface)
+ (set! sql-iface (cdr x)))
+ ((verbose)
+ (set! verbose-option #t))
+ ((preserve)
+ (set! preserve-option #t))
+ ((debug)
+ (set! debug-level (string->number (cdr x))))
+ ((help)
+ (usage)
+ (exit 0))))
+ (else
+ (set! input-files (cdr x)))))
+ (getopt-long (command-line) grammar))
+
+
+(if (null? input-files)
+ (begin
+ (display "Input files not specified\n" (current-error-port))
+ (exit 1)))
+
+(for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)
+
+(if compile-only
+ (exit 0))
+
+(define (assert cond . rest)
+ (cond
+ (cond)
+ ((null? rest)
+ (throw 'misc-error 'assert
+ "Assertion failed"
+ (list) #f)
+ (exit 1))
+ (else
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline)))
+ (exit 1))))
+
+(define dict-index 0)
+(define article-index 0)
+
+(define (get-dict-index conn)
+ (let ((res (sql-query "SELECT ident FROM dict ORDER BY ident DESC LIMIT 1")))
+ (assert (not (null? res)))
+ (set! dict-index (car res))))
+
+(define (cleanup-db conn)
+ (sql-query "DELETE FROM links")
+ (sql-query "DELETE FROM articles")
+ (sql-query "DELETE FROM dict")
+ (sql-query "DELETE FROM topic")
+ (sql-query "DELETE FROM topic_tab")
+ (sql-query "DELETE FROM pending_links"))
+
+(define (pending-fixup)
+ (sql-query "INSERT IGNORE INTO links \
+ SELECT p.type,p.originator,d.ident \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (sql-query "INSERT IGNORE INTO links \
+ SELECT p.type,d.ident,p.originator \
+ FROM dict d, pending_links p \
+ WHERE p.word = d.word AND p.type != 'CLOSED'")
+ (sql-query
+ (if (string=? sql-iface "mysql")
+ "UPDATE pending_links p, dict d SET p.type='CLOSED' WHERE p.word = d.word"
+ ;; Else assume SQL92
+ "UPDATE pending_links SET type='CLOSED' \
+ WHERE originator in (SELECT d.ident from dict d, pending_links p \
+ WHERE p.word=d.word)"))
+
+ (if (not preserve-pending-option)
+ sql_query("DELETE FROM pending_links WHERE type = 'CLOSED'"))
+
+ (let ((count (query-number "SELECT count(*) FROM pending_links \
+ WHERE type != 'CLOSED' \
+ GROUP BY word")))
+ (if (> count 0)
+ (display (string-append
+ (car res)
+ " unresolved references")
+ (current-error-port)))))
+
+(define (update-stat)
+ (let ((count (query-number("SELECT count(*) from dict"))))
+ (sql_query "DELETE from stat");
+ (sql-query (string-append
+ "INSERT INTO stat (count,updated) VALUES("
+ (number->string count)
+ ",now())"))))
+
+
+(define (insert-node conn node)
+ (set! article-index 0)
+ (set! dict-index (1+ dict-index))
+
+ ;;; FIXME
+ )
+
+(let ((conn (sql-connect sql-iface sql-host sql-port sql-database
+ sql-user sql-password)))
+
+ (if (not conn)
+ (begin
+ (display "Cannot connect to the database\n" (current-error-port))
+ (exit 1)))
+
+ (if cleanup-option
+ (cleanup-db conn))
+
+ (get-dict-index conn)
+
+ (for-each
+ (lambda (node)
+ (insert-node conn node))
+ node-list)
+
+ (pending-fixup)
+ (update-stat)
+
+ (sql-connect-close conn))
+
+(exit 0)
+
+ \ No newline at end of file
diff --git a/src/gram.y b/src/gram.y
index 93484a7..a60f170 100644
--- a/src/gram.y
+++ b/src/gram.y
@@ -34,12 +34,7 @@ static int convert_pos(char *text, int *pos);
%token NODE POS END MEANING ALIAS ANT TOPIC FORMS XREF
%token <string> STRING
-%type <num> pos
-%type <string> string forms
-%type <header> nodehdr alias
-%type <descr> descr
-%type <list> list aliases descrlist header
-%type <item> item
+%type <string> string
%union {
int num;
@@ -54,146 +49,90 @@ static int convert_pos(char *text, int *pos);
%%
input : list
- {
- node_list = $1;
- }
;
list : item
- {
- $$ = list_create();
- switch ($1.type) {
- case item_node:
- list_append($$, $1.v.node);
- break;
-
- case item_list:
- list_concat($$, $1.v.list);
- list_destroy(&$1.v.list, NULL, NULL);
- }
- }
| list item
- {
- switch ($2.type) {
- case item_node:
- list_append($1, $2.v.node);
- break;
-
- case item_list:
- list_concat($1, $2.v.list);
- list_destroy(&$2.v.list, NULL, NULL);
- }
- $$ = $1;
- }
;
-item : header descrlist end
+item : header descrlist END
{
- $$.type = item_node;
- $$.v.node = create_node($1, $2);
+ printf("</NODE>\n\n");
}
- | TOPIC string list end
+ | thead list END
{
- $$.type = item_list;
- $$.v.list = $3;
- list_iterate($3, _register_topic,
- make_descr(descr_topic, $2));
+ printf("</T>\n");
}
;
+thead : TOPIC string
+ {
+ printf("<T ID=\"%s\">\n", $2);
+ }
+ ;
+
end : END
;
header : nodehdr
- {
- $$ = list_create();
- list_append($$, $1);
- }
| nodehdr aliases
- {
- list_prepend($2, $1);
- $$ = $2;
- }
;
-nodehdr : NODE string pos forms
- {
- $$ = emalloc(sizeof(*$$));
- $$->key = $2;
- $$->pos = $3;
- $$->forms = $4;
- }
+nodehdr : node key pos forms
;
-pos : /* empty */
- {
- $$ = -1;
+node : NODE
+ {
+ printf("<NODE>\n");
+ }
+ ;
+
+key : string
+ {
+ printf(" <K>%s</K>\n", $1);
}
+ ;
+
+pos : /* empty */
| POS string
{
- if (convert_pos($2, &$$))
- YYERROR;
+ printf(" <P>%s</P>\n", $2);
}
;
forms : /* empty */
- {
- $$ = NULL;
- }
| FORMS string
{
- $$ = $2;
+ printf(" <F>%s</F>\n", $2);
}
;
aliases : alias
- {
- $$ = list_create();
- list_append($$, $1);
- }
| aliases alias
- {
- list_append($1, $2);
- $$ = $1;
- }
;
-alias : ALIAS string pos forms
- {
- $$ = emalloc(sizeof(*$$));
- $$->key = $2;
- $$->pos = $3;
- $$->forms = $4;
- }
+alias : ALIAS key pos forms
;
descrlist: descr
- {
- $$ = list_create();
- list_append($$, $1);
- }
| descrlist descr
- {
- list_append($1, $2);
- $$ = $1;
- }
;
descr : TOPIC string
{
- $$ = make_descr(descr_topic, $2);
+ printf(" <T ID=\"%s\" />\n", $2);
}
| MEANING string
{
- $$ = make_descr(descr_meaning, $2);
+ printf(" <M>%s</M>\n", $2);
}
| ANT string
{
- $$ = make_descr(descr_antonym, $2);
+ printf(" <A>%s</A>\n", $2);
}
| XREF string
{
- $$ = make_descr(descr_xref, $2);
+ printf(" <X>%s</X>\n", $2);
}
;
diff --git a/src/input.l b/src/input.l
index f05a6b5..02dd499 100644
--- a/src/input.l
+++ b/src/input.l
@@ -32,7 +32,9 @@ void set_location();
MWS [ \t]*
WS [ \t]+
%%
-#.*\n input_line++;
+#.*\n {
+ printf("<!-- %*.*s -->\n",yyleng-2,yyleng-2,yytext+1);
+ input_line++; }
^[nN][oO][dD][eE] return NODE;
^[pP][oO][sS] return POS;
^[eE][nN][dD]{MWS} return END;
diff --git a/src/main.c b/src/main.c
index 4d0f2cf..cacfe5b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -347,12 +347,12 @@ main(int argc, char **argv)
case ARG_VERSION:
printf("trans (%s)\n", PACKAGE_STRING);
exit(0);
- }
+x }
}
make_m4_args (m4_bin, include_list);
- sql_connect();
+// sql_connect();
node_list = list_create();
@@ -361,7 +361,7 @@ main(int argc, char **argv)
if (parse(argc, argv) || error_count)
return 1;
- if (compile_only)
+// if (compile_only)
return 0;
if (cleanup_flag)
@@ -369,7 +369,7 @@ main(int argc, char **argv)
sql_query_n(&dict_index,
"SELECT ident FROM dict ORDER BY ident DESC LIMIT 1");
-
+
list_iterate(node_list, emit_node, NULL);
pending_fixup();

Return to:

Send suggestions and report system problems to the System administrator.