diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-09-17 13:14:28 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-09-17 13:14:28 +0300 |
commit | 73c4f370284c1e98ba1ac830d47865fe728cedab (patch) | |
tree | b323247f145e87fc516572c899349f18ea1f4ded | |
parent | 9be3f798a55ae3b5db89ee9439ce2a14664b8b25 (diff) | |
download | ellinika-master.tar.gz ellinika-master.tar.bz2 |
* scm/freedict.scm: New file.
-rw-r--r-- | scm/freedict.scm | 847 |
1 files changed, 847 insertions, 0 deletions
diff --git a/scm/freedict.scm b/scm/freedict.scm new file mode 100644 index 0000000..f931299 --- /dev/null +++ b/scm/freedict.scm @@ -0,0 +1,847 @@ +#! =GUILE_BINDIR=/guile -s +=AUTOGENERATED= +!# +;;;; This file is part of Ellinika +;;;; Copyright (C) 2004, 2005, 2007, 2010, 2015 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. +;;;; + +;;;; Dictionary structure +;;;; Internal representation: +;;;; +;;;; * Each dictionary entry is represented as a vector: +;;;; +;;;; #(KEY FORMS XREF P-LIST LOC) +;;;; +;;;; KEY is list of strings +;;;; +;;;; * FORMS is either #f or a string describing forms of the key if they +;;;; are formed in an irregular manner. +;;;; +;;;; * XREF is a list of cross-references +;;;; +;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector: +;;;; +;;;; #(POS ARTICLE AREF XREF TOPIC) +;;;; +;;;; Member Type Meaning +;;;; POS string part of speech +;;;; ARTICLE list(string) Dictionary article associated with this key/pos +;;;; AREF list(string) List of antonyms +;;;; XREF list(string) List of cross-references +;;;; TOPIC list(string) List of topics this item pertains to +;;;; +;;;; * LOC is source location where the entry was defined (cons FILE LINE). +;;;; +;;;; External representation (XML): +;;;; +;;;; <NODE> +;;;; <K>string</K>+ +;;;; [<F>string</F>] +;;;; <P ID="string"> +;;;; <M>string</M>+ +;;;; <A>string</A>* +;;;; <X>string</X>* +;;;; <T ID="string" />* +;;;; </P>+ +;;;; <X>string</X>* +;;;; </NODE> +;;;; +;;;; If only one P entry is present, the following alternative forms +;;;; are understood: +;;;; +;;;; <NODE> +;;;; <K>string</K>+ +;;;; [<F>string</F>] +;;;; <P>string</P> +;;;; <M>string</M>* +;;;; <A>string</A>* +;;;; <X>string</X>* +;;;; <T ID="string" />* +;;;; </NODE> +;;;; +;;;; or +;;;; +;;;; <NODE> +;;;; <K>string</K>+ +;;;; [<F>string</F>] +;;;; <P>string</P> +;;;; <X>string</X>* +;;;; <T ID="string" />* +;;;; </NODE> + +;;; Tailor this statement to your needs if necessary. +;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path)) +(set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path)) +(use-modules (xmltools xmltrans) + (gamma sql) + (ellinika xlat) + (ellinika elmorph) + (ice-9 getopt-long) + (ice-9 regex)) + +(setlocale LC_ALL "") + +(define compile-only #f) +(define cleanup-option #f) +(define preserve-option #f) + +(define ellinika-sql-connection '()) +(define verbose-option #f) +(define debug-level 0) +(define input-files '()) + +(define sysconf-dir "=SYSCONFDIR=") +(define config-file-name "ellinika.conf") + +(define dict-cgi-path #f) + +;;; Load the site defaults +(let ((rc-file (string-append sysconf-dir "/" config-file-name))) + (if (file-exists? rc-file) + (load rc-file))) + +(define (debug level . rest) + (if (>= debug-level level) + (begin + (for-each + (lambda (x) + (display x)) + rest) + (newline)))) + +(define (add-conn-param key val) + (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) + + +(define pos-xlat #f) + +;FIXME +(define (convert-pos pos) pos) + +;FIXME +(define (read-pos conn) + #t) + +;;;; XML definitions + +;;; Set the default handler +(define tag-list '()) + +(define (lingua:default-start tag attr) + (xmltrans:set-attr #f "__START__" 1) + #f) + +(xmltrans:set-default-start-handler lingua:default-start) + +(define (lingua:default-end tag attr text) + (if (xmltrans:attr attr "__START__") + (xmltrans:parse-error #f "Unhandled element " tag)) + (set! tag-list + (cons + (xmltrans:make-tag tag attr text) + tag-list)) + #f) + +(xmltrans:set-default-end-handler lingua:default-end) + +;;; Themes class list +(define class-list '()) + +;;; Current node +(define current-node #f) + +(define-syntax node-get + (syntax-rules () + ((node-get #:key node) + (vector-ref node 0)) + ((node-get #:forms node) + (vector-ref node 1)) + ((node-get #:xref node) + (vector-ref node 2)) + ((node-get #:p-list node) + (vector-ref node 3)) + ((node-get #:locus node) + (vector-ref node 4)))) + +(define-macro (current-node-get key) + `(node-get ,key current-node)) + +(define (mark-invalid) + (xmltrans:set-attr "NODE" "__INVALID__" 1)) + +(define-syntax node-set + (syntax-rules () + ((node-set #:key node k) + (cond + ((node-get #:key node) => + (lambda (klist) + (vector-set! node 0 (append klist (list k))))) + (else + (vector-set! node 0 (list k))))) + ((node-set #:forms node f) + (begin + (cond + ((node-get #:forms node) + (xmltrans:parse-error #f "Forms already set") + (mark-invalid))) + (vector-set! node 1 f))) + ((node-set #:xref node x) + (vector-set! node 2 (append (node-get #:xref node) (list x)))) + ((node-set #:p-article node p) + (vector-set! node 3 (append (node-get #:p-list node) (list p)))) + ((node-set #:topic node t) + ;; FIXME: Scope of <T> is position-dependent relative to <P> + (let ((pl (node-get #:p-list node))) + (if (null? pl) + (current-article-set #:topic t) + (for-each + (lambda (p) + (p-article-set #:topic p t)) + pl)))) + ((node-set #:locus node loc) + (vector-set! node 4 loc)))) + + +(define-macro (current-node-set key val) + `(node-set ,key current-node ,val)) + +(define p-article #f) + +(define-syntax p-article-get + (syntax-rules () + ((p-article-get #:pos article) + (vector-ref article 0)) + ((p-article-get #:article article) + (vector-ref article 1)) + ((p-article-get #:aref article) + (vector-ref article 2)) + ((p-article-get #:xref article) + (vector-ref article 3)) + ((p-article-get #:topic article) + (vector-ref article 4)))) + +(define-syntax p-set + (syntax-rules () + ((p-set key article n val) + (vector-set! article n + (cond + ((p-article-get key article) => + (lambda (alst) + (append alst (list val)))) + (else + (list val))))))) + +(define-macro (current-article-get key) + `(p-article-get ,key p-article)) + +(define-syntax p-article-set + (syntax-rules () + ((p-article-set #:pos article val) + (vector-set! article 0 val)) + ((p-article-set #:article article val) + (p-set #:article article 1 val)) + ((p-article-set #:aref article val) + (p-set #:aref article 2 val)) + ((p-article-set #:xref article val) + (p-set #:xref article 3 val)) + ((p-article-set #:topic article val) + (p-set #:topic article 4 val)))) + +;(define-macro (current-article-set key val) +; `(p-article-set ,key p-article ,val)) + +(define-syntax current-article-set + (syntax-rules () + ((current-article-set key val) + (if p-article + (p-article-set key p-article val) + (for-each + (lambda (x) + (p-article-set key x val)) + (current-node-get #:p-list)))))) + +;;; Node list +(define node-list '()) + +(define (push-node node) + (if (>= debug-level 100) + (begin + (write node) + (newline))) + (set! node-list (cons node node-list))) + +;;; Topic stack +(define topic-list '()) + +(define (push-topic id) + (debug 10 "PUSH " id) + (set! topic-list (cons id topic-list))) + +(define (pop-topic) + (debug 10 "POP " (car topic-list)) + (set! topic-list (cdr topic-list))) + +(define (append-topics) + (debug 10 "APPEND") + (for-each + (lambda (x) + (current-article-set #:topic x)) + topic-list)) + +;;; +(define (expect-context tag . rest) + (let ((parent (xmltrans:get-parent #t))) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (ctx-name) + (if (string=? ctx-name parent) + (return))) + rest) + (xmltrans:parse-error #f tag " used in wrong context"))))) + +;;; Handle main element +(xmltrans:start-tag + "DICT" + (tag attr) + (let ((lang (xmltrans:attr attr "LANG"))) + (if (not lang) + (xmltrans:parse-error #f "Required attribute LANG not specified")) + (push-node lang)) + #f) + +(xmltrans:end-tag + "DICT" + (tag attr text) + ;; FIXME + #f) + + +(define expected-k #f) + +(define (k-expect) + (set! expected-k #f)) + +;;;; NODE +(xmltrans:start-tag + "NODE" + (tag attr) + (set! current-node (vector #f #f '() '() (xmltrans:get-input-location))) + (set! p-article #f) + (set! expected-k #t) + (xmltrans:expect-next-tag "K" k-expect) + #f) + +;;;; INCLUDE +(xmltrans:end-tag + "INCLUDE" + (tag attr text) + (let ((fname (xmltrans:attr attr "FILE"))) + (cond + ((not fname) + (xmltrans:parse-error #f "File name not specified")) + ((string? fname) + (xmltrans:parse-file fname)) + (else + (xmltrans:parse-error #f "FILE must be a valid string")))) + #f) + +(xmltrans:end-tag + "NODE" + (tag attr text) + (cond + (p-article + (if (not (null? (current-node-get #:p-list))) + (xmltrans:parse-warning #f "Mixed definition style")) + (append-topics) + (current-node-set #:p-article p-article) + (set! p-article #f))) + + (cond + ((xmltrans:attr attr "__INVALID__")) ; Ignore + ((not (current-node-get #:key)) + (xmltrans:parse-error #f "K element is missing")) + ((and (null? (current-node-get #:p-list)) (null? (current-node-get #:xref))) + (xmltrans:parse-error #f "No articles and no cross references for the node")) + (else + (push-node current-node))) + #f) + +;;; Topic +(xmltrans:start-tag + "T" + (tag attr) + (if (not (xmltrans:attr attr "ID")) + (letrec ((loc (xmltrans:get-input-location)) + (proc (lambda () + (xmltrans:parse-error + loc + "T element does not have ID attribute.")))) + (xmltrans:expect-next-tag "ID" proc)) + (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P"))) + (push-topic (xmltrans:attr attr "ID")))) + #f) + +(xmltrans:end-tag + "T" + (tag attr text) + (cond + ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warning has already been issued + ((xmltrans:parent? "P") + (current-article-set #:topic (xmltrans:attr attr "ID"))) + ((xmltrans:parent? "NODE") + (current-node-set #:topic (xmltrans:attr attr "ID"))) + (else + (pop-topic))) + #f) + +(xmltrans:end-tag + "ID" + (tag attr text) + (cond + ((xmltrans:parent? "P")) ;; OK + ((not (xmltrans:parent? "T")) + (xmltrans:parse-error #f "ID used in wrong context")) + ((xmltrans:attr #t "ID") + (xmltrans:parse-error #f "T element already has ID attribute")) + (else + (xmltrans:set-attr #t "ID" text) + (if (not (xmltrans:parent? "NODE" "T")) + (push-topic text)))) + #f) + +;;; K - KEY +(xmltrans:end-tag + "K" + (tag attr text) + (expect-context tag "NODE") + (if (not expected-k) + (begin + (xmltrans:parse-error #f "K tag cannot be used here") + (mark-invalid))) + (xmltrans:expect-next-tag "K" k-expect) + (current-node-set #:key text) + #f) + +;;; F - FORMS +(xmltrans:end-tag + "F" + (tag attr text) + (expect-context tag "NODE") + (current-node-set #:forms text) + #f) + +;;; P - PartOfSpeach +(define p-location #f) + +(xmltrans:start-tag + "P" + (tag attr) + (set! p-location (xmltrans:get-input-location)) + (set! p-article (vector #f '() '() '() '())) + #f) + +(xmltrans:end-tag + "P" + (tag attr text) + (expect-context tag "NODE") + (cond + ((xmltrans:attr attr "ID") => + (lambda (id) + (current-article-set #:pos (convert-pos id)) + (append-topics) + (current-node-set #:p-article p-article) + (set! p-article #f))) + ((not (null? (current-article-get #:article))) ;; FIXME: other elements too + (xmltrans:parse-error p-location + "P element is missing ID, but has nested elements")) + (else + (set! p-article (vector (convert-pos text) '() '() '() '())))) + #f) + +;;; A - Antonym + +(xmltrans:end-tag + "A" + (tag attr text) + (expect-context tag "NODE" "P") + (current-article-set #:aref text) + #f) + +;;; X - Xref +(xmltrans:end-tag + "X" + (tag attr text) + (expect-context tag "NODE" "P") + (let ((ref (cond + ((string-null? text) + (call-with-current-continuation + (lambda (return) + (do ((node node-list (cdr node))) + ((null? node) #f) + (if (not (null? (p-article-get + #:article + (car (node-get #:p-list (car node)))))) + (return (car (node-get #:key (car node))))))))) + (else + text)))) + + (cond + ((not ref) + (xmltrans:parse-error #f "Empty reference") + (mark-invalid)) + ((xmltrans:parent? "P") + (current-article-set #:xref ref)) + ((xmltrans:parent? "NODE") + (current-node-set #:xref ref)))) + #f) + +;;; M - MEANING +(xmltrans:end-tag + "M" + (tag attr text) + (expect-context tag "P" "NODE") + (cond + ((and (not p-article) (null? (current-node-get #:p-list))) + (xmltrans:parse-error #f "P element not defined") + (mark-invalid)) + (else + (current-article-set #:article text))) + #f) + + +;;; CLASS +(xmltrans:end-tag + "CLASS" + (tag attr text) + (expect-context tag "DICT") + (let ((lang (xmltrans:attr #t "LANG")) + (id (xmltrans:attr attr "ID")) + (descr (xmltrans:attr attr "DESCR")) + (tds (xmltrans:attr attr "__TDS__"))) + (set! class-list (cons + (list id descr lang tds) + class-list))) + #f) + +;;; D +(xmltrans:end-tag + "D" + (tag attr text) + (expect-context tag "CLASS") + ;; FIXME: Check for duplicates + (xmltrans:set-attr #t "DESCR" text) + #f) + +;;; TD +(xmltrans:end-tag + "TD" + (tag attr text) + (expect-context tag "CLASS") + (xmltrans:set-attr #t "__TDS__" + (cons + text + (or (xmltrans:attr #t "__TDS__") '()))) + #f) + + +;;; Formatting elements +(xmltrans:end-tag + "C" + (tag attr text) + (expect-context tag "M" "F") + (list + "<span class=\\\"comment\\\">" + text + "</span>")) + +(xmltrans:end-tag + "E" + (tag attr text) + (expect-context tag "M" "F") + (list + "<span class=\\\"expl\\\">" + text + "</span>")) + + +(define (assert condition . rest) + (cond + (condition) + ((null? rest) + (throw 'misc-error 'assert + "Assertion failed" + (list) #f) + (exit 1)) + (else + (with-output-to-port + (current-error-port) + (lambda () + (for-each + (lambda (x) + (display x)) + rest) + (newline))) + (exit 1)))) + +;FIXME: Removed SQL statements + +(define (format-dict-node node) +; (format #t "NODE ~a~%" node) + (format #t "~a~%" (string-join (node-get #:key node) ", ")) + (cond + ((node-get #:forms node) => + (lambda (forms) + (format #t "~a~%" forms)))) + (do ((i 1 (1+ i)) + (p-list (node-get #:p-list node) (cdr p-list))) + ((null? p-list)) + (let ((art (car p-list)) + (j 1)) + (if (p-article-get #:pos art) + (format #t "[~a] " (p-article-get #:pos art))) + (let ((p-art (p-article-get #:article art))) + (if (not (null? p-art)) + (if (null? (cdr p-art)) + (format #t "~a~%" (car p-art)) + (for-each + (lambda (text) + (format #t "~a. ~a~%" j + (regexp-substitute/global #f + "<[a-zA-Z]+[^>]*>([^<]*)</[a-zA-Z]+>" + text + 'pre + (lambda (m) + (match:substring m 1)) + 'post)) + (set! j (1+ j))) + (p-article-get #:article art)))) + (newline) + (let ((ant (p-article-get #:aref art))) + (cond ((not (null? ant)) + (format #t "(ант: ~a)~%" (string-join ant ", "))))) + (let ((xref (p-article-get #:xref art))) + (cond ((not (null? xref)) + (format #t "см. также: ~a~%" (string-join xref ", ")))))))) + + (let ((xref (node-get #:xref node))) + (if (not (null? xref)) + (format #t "см. также: ~a~%" (string-join xref ", ")))) + (newline) ) + +(define b64_alpha + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") +(define (b64_encode val) + (if (= 0 val) + "A" + (string-trim + (string + (string-ref b64_alpha (ash (logand val #xc0000000) -30)) + (string-ref b64_alpha (ash (logand val #x3f000000) -24)) + (string-ref b64_alpha (ash (logand val #x00fc0000) -18)) + (string-ref b64_alpha (ash (logand val #x0003f000) -12)) + (string-ref b64_alpha (ash (logand val #x00000fc0) -6)) + (string-ref b64_alpha (logand val #x0000003f))) + #\A))) + + +;;;; Main +(define dbname "greek") + +(define grammar + `((check (single-char #\c)) + (name (single-char #\n) (value #t)) + (cleanup) + (database (single-char #\d) (value #t)) + (host (single-char #\h) (value #t)) + (port (single-char #\P) (value #t)) + (password (single-char #\p) (value #t)) + (user (single-char #\u) (value #t)) + (interface (value #t)) + (verbose (single-char #\v)) + (debug (value #t)) + (preserve (value #t)) + (help))) + +(define (usage) + (display "usage: dictrans OPTIONS FILES +dictrans parses XML dictionary files in Ellinika dictionary format +and stores them into SQL database. + +General options: + + --check Only check input syntax and consistency. Do not + update the database. This means that dictrans will + not access the database at all, so some errors + (mistyped parts of speech and the like) may slip in + unnoticed. + --verbose Verbosely display SQL queries and their results. + --debug NUMBER Set debugging level (0 < NUMBER <= 100) + +SQL related options: + + --interface STRING Select SQL interface to use. STRING may be + either \"mysql\" (the default) or \"postgres\". + --host HOST-OR-PATH Set name or IP address of the host running SQL + database, or path to the database I/O socket. + --database NAME Set name of the database to use. + --port NUMBER Set the SQL port number + --user USER-NAME Set SQL user name. + --password STRING Set the SQL password + + --cleanup Clean up the database (delete all entries from all the + tables) before proceeding. Use this option with care. + --preserve Do not delete resolved entries from pending_links + table. This is intended mainly for debugging. + +Informational options: + + --help Output this help info +\n")) + +(define (cons? p) + (and (pair? p) (not (list? p)))) + +(for-each + (lambda (x) + (cond + ((cons? x) + (case (car x) + ((check) + (set! compile-only #t)) + ((verbose) + (set! verbose-option #t)) + ((name) + (set! dbname (cdr x))) + ((debug) + (set! debug-level (string->number (cdr x)))) + ((help) + (usage) + (exit 0)))) + (else + (set! input-files (cdr x))))) + (getopt-long (command-line) grammar)) + + +(if (and (null? input-files) (not cleanup-option)) + (begin + (display "Input files not specified\n" (current-error-port)) + (exit 1))) + +(define (push-special-node key val) + (push-node + (let ((node (vector #f #f '() '() (xmltrans:get-input-location)))) + (node-set #:key node key) + (node-set #:p-article node + (let ((art (vector #f '() '() '() '()))) + (p-article-set #:article art val) + art)) + node))) + +(push-special-node "00-database-info" + (string-join + (list + "Small greek-russian dictionary." + "" + "This file was converted from the original database on" + (strftime " %c" (localtime (current-time))) + "" + "The original data is available from:" + " http://git.gnu.org.ua/cgit/ellinika.git" + "" + "This dictionary is part of the Ellinika project (http://ellinika.gnu.org.ua)" + "Copyright (C) 2010 Sergey Poznyakoff." + "Distributed under the GPLv3 or later. See http://www.gnu.org/licenses/gpl.html") "\n")) + +(push-special-node "00-database-short" + "Греческо-русский словарь") +(push-special-node "00-database-url" + "http://git.gnu.org.ua/cgit/ellinika.git") +(push-special-node "00-database-utf8" "\n") +(push-special-node "00-database-allchars" "\n") + +(for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files) + +(if compile-only + (exit 0)) + +(define (index<? a b) + (string<? (car a) (car b))) + +(let ((index (list))) + (with-output-to-file (string-append dbname ".dict") + (lambda () + (let ((lang #f)) + (for-each + (lambda (node) + (cond + ((string? node) + ; skip language marker + ) + ((vector? node) + (let ((start (ftell (current-output-port)))) + (format-dict-node node) + (let ((kwl (node-get #:key node))) + (if (and (= (length kwl) 1) + (string-prefix? "00-database" (car kwl))) + (set! start (+ start (string-length (car kwl)) 1)))) + (let ((bstart (b64_encode start)) + (blen (b64_encode (- (ftell (current-output-port)) start)))) + (for-each + (lambda (key) + (set! index (cons + (list key bstart blen) + index)) + (if (not (string-prefix? "00-database" key)) + (let ((skey (elstr->string + (elstr-deaccent (string->elstr key))))) + (if (not (string=? key skey)) + (set! index + (cons + (list skey bstart blen key) + index)))))) + (node-get #:key node))))) + (else + (display "Unexpected node type!\n") + (exit 1)))) + (reverse node-list))))) + + (with-output-to-file (string-append dbname ".index") + (lambda () + (for-each + (lambda (entry) + (format #t "~a\t~a\t~a" + (list-ref entry 0) + (list-ref entry 1) + (list-ref entry 2)) + (if (= (length entry) 4) + (format #t "\t~a" (list-ref entry 3))) + (newline)) + (sort-list index index<?))))) + + +(exit 0) + +;;;; Local variables: +;;;; mode: Scheme +;;;; buffer-file-coding-system: utf-8 +;;;; End: + + |