#! =GUILE_BINDIR=/guile -s =AUTOGENERATED= !# ;;;; This file is part of Ellinika ;;;; Copyright (C) 2004, 2005, 2007, 2010, 2015 Sergey Poznyakoff ;;;; ;;;; Ellinika is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; Ellinika is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program. If not, see . ;;;; ;;;; Dictionary structure ;;;; Internal representation: ;;;; ;;;; * Each dictionary entry is represented as a vector: ;;;; ;;;; #(KEY FORMS XREF P-LIST LOC) ;;;; ;;;; KEY is list of strings ;;;; ;;;; * FORMS is either #f or a string describing forms of the key if they ;;;; are formed in an irregular manner. ;;;; ;;;; * XREF is a list of cross-references ;;;; ;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector: ;;;; ;;;; #(POS ARTICLE AREF XREF TOPIC) ;;;; ;;;; Member Type Meaning ;;;; POS string part of speech ;;;; ARTICLE list(string) Dictionary article associated with this key/pos ;;;; AREF list(string) List of antonyms ;;;; XREF list(string) List of cross-references ;;;; TOPIC list(string) List of topics this item pertains to ;;;; ;;;; * LOC is source location where the entry was defined (cons FILE LINE). ;;;; ;;;; External representation (XML): ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

;;;; string+ ;;;; string* ;;;; string* ;;;; * ;;;;

+ ;;;; string* ;;;;
;;;; ;;;; If only one P entry is present, the following alternative forms ;;;; are understood: ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

string

;;;; string* ;;;; string* ;;;; string* ;;;; * ;;;;
;;;; ;;;; or ;;;; ;;;; ;;;; string+ ;;;; [string] ;;;;

string

;;;; string* ;;;; * ;;;;
;;; Tailor this statement to your needs if necessary. ;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path)) (set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path)) (use-modules (xmltools xmltrans) (gamma sql) (ellinika xlat) (ellinika elmorph) (ice-9 getopt-long) (ice-9 regex)) (setlocale LC_ALL "") (define compile-only #f) (define cleanup-option #f) (define preserve-option #f) (define ellinika-sql-connection '()) (define verbose-option #f) (define debug-level 0) (define input-files '()) (define sysconf-dir "=SYSCONFDIR=") (define config-file-name "ellinika.conf") (define dict-cgi-path #f) ;;; Load the site defaults (let ((rc-file (string-append sysconf-dir "/" config-file-name))) (if (file-exists? rc-file) (load rc-file))) (define (debug level . rest) (if (>= debug-level level) (begin (for-each (lambda (x) (display x)) rest) (newline)))) (define (add-conn-param key val) (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) (define pos-xlat #f) ;FIXME (define (convert-pos pos) pos) ;FIXME (define (read-pos conn) #t) ;;;; XML definitions ;;; Set the default handler (define tag-list '()) (define (lingua:default-start tag attr) (xmltrans:set-attr #f "__START__" 1) #f) (xmltrans:set-default-start-handler lingua:default-start) (define (lingua:default-end tag attr text) (if (xmltrans:attr attr "__START__") (xmltrans:parse-error #f "Unhandled element " tag)) (set! tag-list (cons (xmltrans:make-tag tag attr text) tag-list)) #f) (xmltrans:set-default-end-handler lingua:default-end) ;;; Themes class list (define class-list '()) ;;; Current node (define current-node #f) (define-syntax node-get (syntax-rules () ((node-get #:key node) (vector-ref node 0)) ((node-get #:forms node) (vector-ref node 1)) ((node-get #:xref node) (vector-ref node 2)) ((node-get #:p-list node) (vector-ref node 3)) ((node-get #:locus node) (vector-ref node 4)))) (define-macro (current-node-get key) `(node-get ,key current-node)) (define (mark-invalid) (xmltrans:set-attr "NODE" "__INVALID__" 1)) (define-syntax node-set (syntax-rules () ((node-set #:key node k) (cond ((node-get #:key node) => (lambda (klist) (vector-set! node 0 (append klist (list k))))) (else (vector-set! node 0 (list k))))) ((node-set #:forms node f) (begin (cond ((node-get #:forms node) (xmltrans:parse-error #f "Forms already set") (mark-invalid))) (vector-set! node 1 f))) ((node-set #:xref node x) (vector-set! node 2 (append (node-get #:xref node) (list x)))) ((node-set #:p-article node p) (vector-set! node 3 (append (node-get #:p-list node) (list p)))) ((node-set #:topic node t) ;; FIXME: Scope of is position-dependent relative to

(let ((pl (node-get #:p-list node))) (if (null? pl) (current-article-set #:topic t) (for-each (lambda (p) (p-article-set #:topic p t)) pl)))) ((node-set #:locus node loc) (vector-set! node 4 loc)))) (define-macro (current-node-set key val) `(node-set ,key current-node ,val)) (define p-article #f) (define-syntax p-article-get (syntax-rules () ((p-article-get #:pos article) (vector-ref article 0)) ((p-article-get #:article article) (vector-ref article 1)) ((p-article-get #:aref article) (vector-ref article 2)) ((p-article-get #:xref article) (vector-ref article 3)) ((p-article-get #:topic article) (vector-ref article 4)))) (define-syntax p-set (syntax-rules () ((p-set key article n val) (vector-set! article n (cond ((p-article-get key article) => (lambda (alst) (append alst (list val)))) (else (list val))))))) (define-macro (current-article-get key) `(p-article-get ,key p-article)) (define-syntax p-article-set (syntax-rules () ((p-article-set #:pos article val) (vector-set! article 0 val)) ((p-article-set #:article article val) (p-set #:article article 1 val)) ((p-article-set #:aref article val) (p-set #:aref article 2 val)) ((p-article-set #:xref article val) (p-set #:xref article 3 val)) ((p-article-set #:topic article val) (p-set #:topic article 4 val)))) ;(define-macro (current-article-set key val) ; `(p-article-set ,key p-article ,val)) (define-syntax current-article-set (syntax-rules () ((current-article-set key val) (if p-article (p-article-set key p-article val) (for-each (lambda (x) (p-article-set key x val)) (current-node-get #:p-list)))))) ;;; Node list (define node-list '()) (define (push-node node) (if (>= debug-level 100) (begin (write node) (newline))) (set! node-list (cons node node-list))) ;;; Topic stack (define topic-list '()) (define (push-topic id) (debug 10 "PUSH " id) (set! topic-list (cons id topic-list))) (define (pop-topic) (debug 10 "POP " (car topic-list)) (set! topic-list (cdr topic-list))) (define (append-topics) (debug 10 "APPEND") (for-each (lambda (x) (current-article-set #:topic x)) topic-list)) ;;; (define (expect-context tag . rest) (let ((parent (xmltrans:get-parent #t))) (call-with-current-continuation (lambda (return) (for-each (lambda (ctx-name) (if (string=? ctx-name parent) (return))) rest) (xmltrans:parse-error #f tag " used in wrong context"))))) ;;; Handle main element (xmltrans:start-tag "DICT" (tag attr) (let ((lang (xmltrans:attr attr "LANG"))) (if (not lang) (xmltrans:parse-error #f "Required attribute LANG not specified")) (push-node lang)) #f) (xmltrans:end-tag "DICT" (tag attr text) ;; FIXME #f) (define expected-k #f) (define (k-expect) (set! expected-k #f)) ;;;; NODE (xmltrans:start-tag "NODE" (tag attr) (set! current-node (vector #f #f '() '() (xmltrans:get-input-location))) (set! p-article #f) (set! expected-k #t) (xmltrans:expect-next-tag "K" k-expect) #f) ;;;; INCLUDE (xmltrans:end-tag "INCLUDE" (tag attr text) (let ((fname (xmltrans:attr attr "FILE"))) (cond ((not fname) (xmltrans:parse-error #f "File name not specified")) ((string? fname) (xmltrans:parse-file fname)) (else (xmltrans:parse-error #f "FILE must be a valid string")))) #f) (xmltrans:end-tag "NODE" (tag attr text) (cond (p-article (if (not (null? (current-node-get #:p-list))) (xmltrans:parse-warning #f "Mixed definition style")) (append-topics) (current-node-set #:p-article p-article) (set! p-article #f))) (cond ((xmltrans:attr attr "__INVALID__")) ; Ignore ((not (current-node-get #:key)) (xmltrans:parse-error #f "K element is missing")) ((and (null? (current-node-get #:p-list)) (null? (current-node-get #:xref))) (xmltrans:parse-error #f "No articles and no cross references for the node")) (else (push-node current-node))) #f) ;;; Topic (xmltrans:start-tag "T" (tag attr) (if (not (xmltrans:attr attr "ID")) (letrec ((loc (xmltrans:get-input-location)) (proc (lambda () (xmltrans:parse-error loc "T element does not have ID attribute.")))) (xmltrans:expect-next-tag "ID" proc)) (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P"))) (push-topic (xmltrans:attr attr "ID")))) #f) (xmltrans:end-tag "T" (tag attr text) (cond ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warning has already been issued ((xmltrans:parent? "P") (current-article-set #:topic (xmltrans:attr attr "ID"))) ((xmltrans:parent? "NODE") (current-node-set #:topic (xmltrans:attr attr "ID"))) (else (pop-topic))) #f) (xmltrans:end-tag "ID" (tag attr text) (cond ((xmltrans:parent? "P")) ;; OK ((not (xmltrans:parent? "T")) (xmltrans:parse-error #f "ID used in wrong context")) ((xmltrans:attr #t "ID") (xmltrans:parse-error #f "T element already has ID attribute")) (else (xmltrans:set-attr #t "ID" text) (if (not (xmltrans:parent? "NODE" "T")) (push-topic text)))) #f) ;;; K - KEY (xmltrans:end-tag "K" (tag attr text) (expect-context tag "NODE") (if (not expected-k) (begin (xmltrans:parse-error #f "K tag cannot be used here") (mark-invalid))) (xmltrans:expect-next-tag "K" k-expect) (current-node-set #:key text) #f) ;;; F - FORMS (xmltrans:end-tag "F" (tag attr text) (expect-context tag "NODE") (current-node-set #:forms text) #f) ;;; P - PartOfSpeach (define p-location #f) (xmltrans:start-tag "P" (tag attr) (set! p-location (xmltrans:get-input-location)) (set! p-article (vector #f '() '() '() '())) #f) (xmltrans:end-tag "P" (tag attr text) (expect-context tag "NODE") (cond ((xmltrans:attr attr "ID") => (lambda (id) (current-article-set #:pos (convert-pos id)) (append-topics) (current-node-set #:p-article p-article) (set! p-article #f))) ((not (null? (current-article-get #:article))) ;; FIXME: other elements too (xmltrans:parse-error p-location "P element is missing ID, but has nested elements")) (else (set! p-article (vector (convert-pos text) '() '() '() '())))) #f) ;;; A - Antonym (xmltrans:end-tag "A" (tag attr text) (expect-context tag "NODE" "P") (current-article-set #:aref text) #f) ;;; X - Xref (xmltrans:end-tag "X" (tag attr text) (expect-context tag "NODE" "P") (let ((ref (cond ((string-null? text) (call-with-current-continuation (lambda (return) (do ((node node-list (cdr node))) ((null? node) #f) (if (not (null? (p-article-get #:article (car (node-get #:p-list (car node)))))) (return (car (node-get #:key (car node))))))))) (else text)))) (cond ((not ref) (xmltrans:parse-error #f "Empty reference") (mark-invalid)) ((xmltrans:parent? "P") (current-article-set #:xref ref)) ((xmltrans:parent? "NODE") (current-node-set #:xref ref)))) #f) ;;; M - MEANING (xmltrans:end-tag "M" (tag attr text) (expect-context tag "P" "NODE") (cond ((and (not p-article) (null? (current-node-get #:p-list))) (xmltrans:parse-error #f "P element not defined") (mark-invalid)) (else (current-article-set #:article text))) #f) ;;; CLASS (xmltrans:end-tag "CLASS" (tag attr text) (expect-context tag "DICT") (let ((lang (xmltrans:attr #t "LANG")) (id (xmltrans:attr attr "ID")) (descr (xmltrans:attr attr "DESCR")) (tds (xmltrans:attr attr "__TDS__"))) (set! class-list (cons (list id descr lang tds) class-list))) #f) ;;; D (xmltrans:end-tag "D" (tag attr text) (expect-context tag "CLASS") ;; FIXME: Check for duplicates (xmltrans:set-attr #t "DESCR" text) #f) ;;; TD (xmltrans:end-tag "TD" (tag attr text) (expect-context tag "CLASS") (xmltrans:set-attr #t "__TDS__" (cons text (or (xmltrans:attr #t "__TDS__") '()))) #f) ;;; Formatting elements (xmltrans:end-tag "C" (tag attr text) (expect-context tag "M" "F") (list "" text "")) (xmltrans:end-tag "E" (tag attr text) (expect-context tag "M" "F") (list "" text "")) (define (assert condition . rest) (cond (condition) ((null? rest) (throw 'misc-error 'assert "Assertion failed" (list) #f) (exit 1)) (else (with-output-to-port (current-error-port) (lambda () (for-each (lambda (x) (display x)) rest) (newline))) (exit 1)))) ;FIXME: Removed SQL statements (define (format-dict-node node) ; (format #t "NODE ~a~%" node) (format #t "~a~%" (string-join (node-get #:key node) ", ")) (cond ((node-get #:forms node) => (lambda (forms) (format #t "~a~%" forms)))) (do ((i 1 (1+ i)) (p-list (node-get #:p-list node) (cdr p-list))) ((null? p-list)) (let ((art (car p-list)) (j 1)) (if (p-article-get #:pos art) (format #t "[~a] " (p-article-get #:pos art))) (let ((p-art (p-article-get #:article art))) (if (not (null? p-art)) (if (null? (cdr p-art)) (format #t "~a~%" (car p-art)) (for-each (lambda (text) (format #t "~a. ~a~%" j (regexp-substitute/global #f "<[a-zA-Z]+[^>]*>([^<]*)" text 'pre (lambda (m) (match:substring m 1)) 'post)) (set! j (1+ j))) (p-article-get #:article art)))) (newline) (let ((ant (p-article-get #:aref art))) (cond ((not (null? ant)) (format #t "(ант: ~a)~%" (string-join ant ", "))))) (let ((xref (p-article-get #:xref art))) (cond ((not (null? xref)) (format #t "см. также: ~a~%" (string-join xref ", ")))))))) (let ((xref (node-get #:xref node))) (if (not (null? xref)) (format #t "см. также: ~a~%" (string-join xref ", ")))) (newline) ) (define b64_alpha "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") (define (b64_encode val) (if (= 0 val) "A" (string-trim (string (string-ref b64_alpha (ash (logand val #xc0000000) -30)) (string-ref b64_alpha (ash (logand val #x3f000000) -24)) (string-ref b64_alpha (ash (logand val #x00fc0000) -18)) (string-ref b64_alpha (ash (logand val #x0003f000) -12)) (string-ref b64_alpha (ash (logand val #x00000fc0) -6)) (string-ref b64_alpha (logand val #x0000003f))) #\A))) ;;;; Main (define dbname "greek") (define grammar `((check (single-char #\c)) (name (single-char #\n) (value #t)) (cleanup) (database (single-char #\d) (value #t)) (host (single-char #\h) (value #t)) (port (single-char #\P) (value #t)) (password (single-char #\p) (value #t)) (user (single-char #\u) (value #t)) (interface (value #t)) (verbose (single-char #\v)) (debug (value #t)) (preserve (value #t)) (help))) (define (usage) (display "usage: dictrans OPTIONS FILES dictrans parses XML dictionary files in Ellinika dictionary format and stores them into SQL database. General options: --check Only check input syntax and consistency. Do not update the database. This means that dictrans will not access the database at all, so some errors (mistyped parts of speech and the like) may slip in unnoticed. --verbose Verbosely display SQL queries and their results. --debug NUMBER Set debugging level (0 < NUMBER <= 100) SQL related options: --interface STRING Select SQL interface to use. STRING may be either \"mysql\" (the default) or \"postgres\". --host HOST-OR-PATH Set name or IP address of the host running SQL database, or path to the database I/O socket. --database NAME Set name of the database to use. --port NUMBER Set the SQL port number --user USER-NAME Set SQL user name. --password STRING Set the SQL password --cleanup Clean up the database (delete all entries from all the tables) before proceeding. Use this option with care. --preserve Do not delete resolved entries from pending_links table. This is intended mainly for debugging. Informational options: --help Output this help info \n")) (define (cons? p) (and (pair? p) (not (list? p)))) (for-each (lambda (x) (cond ((cons? x) (case (car x) ((check) (set! compile-only #t)) ((verbose) (set! verbose-option #t)) ((name) (set! dbname (cdr x))) ((debug) (set! debug-level (string->number (cdr x)))) ((help) (usage) (exit 0)))) (else (set! input-files (cdr x))))) (getopt-long (command-line) grammar)) (if (and (null? input-files) (not cleanup-option)) (begin (display "Input files not specified\n" (current-error-port)) (exit 1))) (define (push-special-node key val) (push-node (let ((node (vector #f #f '() '() (xmltrans:get-input-location)))) (node-set #:key node key) (node-set #:p-article node (let ((art (vector #f '() '() '() '()))) (p-article-set #:article art val) art)) node))) (push-special-node "00-database-info" (string-join (list "Small greek-russian dictionary." "" "This file was converted from the original database on" (strftime " %c" (localtime (current-time))) "" "The original data is available from:" " http://git.gnu.org.ua/cgit/ellinika.git" "" "This dictionary is part of the Ellinika project (http://ellinika.gnu.org.ua)" "Copyright (C) 2010 Sergey Poznyakoff." "Distributed under the GPLv3 or later. See http://www.gnu.org/licenses/gpl.html") "\n")) (push-special-node "00-database-short" "Греческо-русский словарь") (push-special-node "00-database-url" "http://git.gnu.org.ua/cgit/ellinika.git") (push-special-node "00-database-utf8" "\n") (push-special-node "00-database-allchars" "\n") (for-each (lambda (x) (if (not (xmltrans:parse-file x)) (exit 1))) input-files) (if compile-only (exit 0)) (define (indexstring (elstr-deaccent (string->elstr key))))) (if (not (string=? key skey)) (set! index (cons (list skey bstart blen key) index)))))) (node-get #:key node))))) (else (display "Unexpected node type!\n") (exit 1)))) (reverse node-list))))) (with-output-to-file (string-append dbname ".index") (lambda () (for-each (lambda (entry) (format #t "~a\t~a\t~a" (list-ref entry 0) (list-ref entry 1) (list-ref entry 2)) (if (= (length entry) 4) (format #t "\t~a" (list-ref entry 3))) (newline)) (sort-list index index