;;;;
string
;;;;string
;;;;(let ((pl (node-get #:p-list node))) (if (null? pl) (current-article-set #:topic t) (for-each (lambda (p) (p-article-set #:topic p t)) pl)))) ((node-set #:locus node loc) (vector-set! node 4 loc)))) (define-macro (current-node-set key val) `(node-set ,key current-node ,val)) (define p-article #f) (define-syntax p-article-get (syntax-rules () ((p-article-get #:pos article) (vector-ref article 0)) ((p-article-get #:article article) (vector-ref article 1)) ((p-article-get #:aref article) (vector-ref article 2)) ((p-article-get #:xref article) (vector-ref article 3)) ((p-article-get #:topic article) (vector-ref article 4)))) (define-syntax p-set (syntax-rules () ((p-set key article n val) (vector-set! article n (cond ((p-article-get key article) => (lambda (alst) (append alst (list val)))) (else (list val))))))) (define-macro (current-article-get key) `(p-article-get ,key p-article)) (define-syntax p-article-set (syntax-rules () ((p-article-set #:pos article val) (vector-set! article 0 val)) ((p-article-set #:article article val) (p-set #:article article 1 val)) ((p-article-set #:aref article val) (p-set #:aref article 2 val)) ((p-article-set #:xref article val) (p-set #:xref article 3 val)) ((p-article-set #:topic article val) (p-set #:topic article 4 val)))) ;(define-macro (current-article-set key val) ; `(p-article-set ,key p-article ,val)) (define-syntax current-article-set (syntax-rules () ((current-article-set key val) (if p-article (p-article-set key p-article val) (for-each (lambda (x) (p-article-set key x val)) (current-node-get #:p-list)))))) ;;; Node list (define node-list '()) (define (push-node node) (if (>= debug-level 100) (begin (write node) (newline))) (set! node-list (cons node node-list))) ;;; Topic stack (define topic-list '()) (define (push-topic id) (debug 10 "PUSH " id) (set! topic-list (cons id topic-list))) (define (pop-topic) (debug 10 "POP " (car topic-list)) (set! topic-list (cdr topic-list))) (define (append-topics) (debug 10 "APPEND") (for-each (lambda (x) (current-article-set #:topic x)) topic-list)) ;;; (define (expect-context tag . rest) (let ((parent (xmltrans:get-parent #t))) (call-with-current-continuation (lambda (return) (for-each (lambda (ctx-name) (if (string=? ctx-name parent) (return))) rest) (xmltrans:parse-error #f tag " used in wrong context"))))) ;;; Handle main element (xmltrans:start-tag "DICT" (tag attr) (let ((lang (xmltrans:attr attr "LANG"))) (if (not lang) (xmltrans:parse-error #f "Required attribute LANG not specified")) (push-node lang)) #f) (xmltrans:end-tag "DICT" (tag attr text) ;; FIXME #f) (define expected-k #f) (define (k-expect) (set! expected-k #f)) ;;;; NODE (xmltrans:start-tag "NODE" (tag attr) (set! current-node (vector #f #f '() '() (xmltrans:get-input-location))) (set! p-article #f) (set! expected-k #t) (xmltrans:expect-next-tag "K" k-expect) #f) ;;;; INCLUDE (xmltrans:end-tag "INCLUDE" (tag attr text) (let ((fname (xmltrans:attr attr "FILE"))) (cond ((not fname) (xmltrans:parse-error #f "File name not specified")) ((string? fname) (xmltrans:parse-file fname)) (else (xmltrans:parse-error #f "FILE must be a valid string")))) #f) (xmltrans:end-tag "NODE" (tag attr text) (cond (p-article (if (not (null? (current-node-get #:p-list))) (xmltrans:parse-warning #f "Mixed definition style")) (append-topics) (current-node-set #:p-article p-article) (set! p-article #f))) (cond ((xmltrans:attr attr "__INVALID__")) ; Ignore ((not (current-node-get #:key)) (xmltrans:parse-error #f "K element is missing")) ((and (null? (current-node-get #:p-list)) (null? (current-node-get #:xref))) (xmltrans:parse-error #f "No articles and no cross references for the node")) (else (push-node current-node))) #f) ;;; Topic (xmltrans:start-tag "T" (tag attr) (if (not (xmltrans:attr attr "ID")) (letrec ((loc (xmltrans:get-input-location)) (proc (lambda () (xmltrans:parse-error loc "T element does not have ID attribute.")))) (xmltrans:expect-next-tag "ID" proc)) (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P"))) (push-topic (xmltrans:attr attr "ID")))) #f) (xmltrans:end-tag "T" (tag attr text) (cond ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warning has already been issued ((xmltrans:parent? "P") (current-article-set #:topic (xmltrans:attr attr "ID"))) ((xmltrans:parent? "NODE") (current-node-set #:topic (xmltrans:attr attr "ID"))) (else (pop-topic))) #f) (xmltrans:end-tag "ID" (tag attr text) (cond ((xmltrans:parent? "P")) ;; OK ((not (xmltrans:parent? "T")) (xmltrans:parse-error #f "ID used in wrong context")) ((xmltrans:attr #t "ID") (xmltrans:parse-error #f "T element already has ID attribute")) (else (xmltrans:set-attr #t "ID" text) (if (not (xmltrans:parent? "NODE" "T")) (push-topic text)))) #f) ;;; K - KEY (xmltrans:end-tag "K" (tag attr text) (expect-context tag "NODE") (if (not expected-k) (begin (xmltrans:parse-error #f "K tag cannot be used here") (mark-invalid))) (xmltrans:expect-next-tag "K" k-expect) (current-node-set #:key text) #f) ;;; F - FORMS (xmltrans:end-tag "F" (tag attr text) (expect-context tag "NODE") (current-node-set #:forms text) #f) ;;; P - PartOfSpeach (define p-location #f) (xmltrans:start-tag "P" (tag attr) (set! p-location (xmltrans:get-input-location)) (set! p-article (vector #f '() '() '() '())) #f) (xmltrans:end-tag "P" (tag attr text) (expect-context tag "NODE") (cond ((xmltrans:attr attr "ID") => (lambda (id) (current-article-set #:pos (convert-pos id)) (append-topics) (current-node-set #:p-article p-article) (set! p-article #f))) ((not (null? (current-article-get #:article))) ;; FIXME: other elements too (xmltrans:parse-error p-location "P element is missing ID, but has nested elements")) (else (set! p-article (vector (convert-pos text) '() '() '() '())))) #f) ;;; A - Antonym (xmltrans:end-tag "A" (tag attr text) (expect-context tag "NODE" "P") (current-article-set #:aref text) #f) ;;; X - Xref (xmltrans:end-tag "X" (tag attr text) (expect-context tag "NODE" "P") (let ((ref (cond ((string-null? text) (call-with-current-continuation (lambda (return) (do ((node node-list (cdr node))) ((null? node) #f) (if (not (null? (p-article-get #:article (car (node-get #:p-list (car node)))))) (return (car (node-get #:key (car node))))))))) (else text)))) (cond ((not ref) (xmltrans:parse-error #f "Empty reference") (mark-invalid)) ((xmltrans:parent? "P") (current-article-set #:xref ref)) ((xmltrans:parent? "NODE") (current-node-set #:xref ref)))) #f) ;;; M - MEANING (xmltrans:end-tag "M" (tag attr text) (expect-context tag "P" "NODE") (cond ((and (not p-article) (null? (current-node-get #:p-list))) (xmltrans:parse-error #f "P element not defined") (mark-invalid)) (else (current-article-set #:article text))) #f) ;;; CLASS (xmltrans:end-tag "CLASS" (tag attr text) (expect-context tag "DICT") (let ((lang (xmltrans:attr #t "LANG")) (id (xmltrans:attr attr "ID")) (descr (xmltrans:attr attr "DESCR")) (tds (xmltrans:attr attr "__TDS__"))) (set! class-list (cons (list id descr lang tds) class-list))) #f) ;;; D (xmltrans:end-tag "D" (tag attr text) (expect-context tag "CLASS") ;; FIXME: Check for duplicates (xmltrans:set-attr #t "DESCR" text) #f) ;;; TD (xmltrans:end-tag "TD" (tag attr text) (expect-context tag "CLASS") (xmltrans:set-attr #t "__TDS__" (cons text (or (xmltrans:attr #t "__TDS__") '()))) #f) ;;; Formatting elements (xmltrans:end-tag "C" (tag attr text) (expect-context tag "M" "F") (list "" text "")) (xmltrans:end-tag "E" (tag attr text) (expect-context tag "M" "F") (list "" text "")) (define (assert condition . rest) (cond (condition) ((null? rest) (throw 'misc-error 'assert "Assertion failed" (list) #f) (exit 1)) (else (with-output-to-port (current-error-port) (lambda () (for-each (lambda (x) (display x)) rest) (newline))) (exit 1)))) ;FIXME: Removed SQL statements (define (format-dict-node node) ; (format #t "NODE ~a~%" node) (format #t "~a~%" (string-join (node-get #:key node) ", ")) (cond ((node-get #:forms node) => (lambda (forms) (format #t "~a~%" forms)))) (do ((i 1 (1+ i)) (p-list (node-get #:p-list node) (cdr p-list))) ((null? p-list)) (let ((art (car p-list)) (j 1)) (if (p-article-get #:pos art) (format #t "[~a] " (p-article-get #:pos art))) (let ((p-art (p-article-get #:article art))) (if (not (null? p-art)) (if (null? (cdr p-art)) (format #t "~a~%" (car p-art)) (for-each (lambda (text) (format #t "~a. ~a~%" j (regexp-substitute/global #f "<[a-zA-Z]+[^>]*>([^<]*)[a-zA-Z]+>" text 'pre (lambda (m) (match:substring m 1)) 'post)) (set! j (1+ j))) (p-article-get #:article art)))) (newline) (let ((ant (p-article-get #:aref art))) (cond ((not (null? ant)) (format #t "(ант: ~a)~%" (string-join ant ", "))))) (let ((xref (p-article-get #:xref art))) (cond ((not (null? xref)) (format #t "см. также: ~a~%" (string-join xref ", ")))))))) (let ((xref (node-get #:xref node))) (if (not (null? xref)) (format #t "см. также: ~a~%" (string-join xref ", ")))) (newline) ) (define b64_alpha "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") (define (b64_encode val) (if (= 0 val) "A" (string-trim (string (string-ref b64_alpha (ash (logand val #xc0000000) -30)) (string-ref b64_alpha (ash (logand val #x3f000000) -24)) (string-ref b64_alpha (ash (logand val #x00fc0000) -18)) (string-ref b64_alpha (ash (logand val #x0003f000) -12)) (string-ref b64_alpha (ash (logand val #x00000fc0) -6)) (string-ref b64_alpha (logand val #x0000003f))) #\A))) ;;;; Main (define dbname "greek") (define grammar `((check (single-char #\c)) (name (single-char #\n) (value #t)) (cleanup) (database (single-char #\d) (value #t)) (host (single-char #\h) (value #t)) (port (single-char #\P) (value #t)) (password (single-char #\p) (value #t)) (user (single-char #\u) (value #t)) (interface (value #t)) (verbose (single-char #\v)) (debug (value #t)) (preserve (value #t)) (help))) (define (usage) (display "usage: dictrans OPTIONS FILES dictrans parses XML dictionary files in Ellinika dictionary format and stores them into SQL database. General options: --check Only check input syntax and consistency. Do not update the database. This means that dictrans will not access the database at all, so some errors (mistyped parts of speech and the like) may slip in unnoticed. --verbose Verbosely display SQL queries and their results. --debug NUMBER Set debugging level (0 < NUMBER <= 100) SQL related options: --interface STRING Select SQL interface to use. STRING may be either \"mysql\" (the default) or \"postgres\". --host HOST-OR-PATH Set name or IP address of the host running SQL database, or path to the database I/O socket. --database NAME Set name of the database to use. --port NUMBER Set the SQL port number --user USER-NAME Set SQL user name. --password STRING Set the SQL password --cleanup Clean up the database (delete all entries from all the tables) before proceeding. Use this option with care. --preserve Do not delete resolved entries from pending_links table. This is intended mainly for debugging. Informational options: --help Output this help info \n")) (define (cons? p) (and (pair? p) (not (list? p)))) (for-each (lambda (x) (cond ((cons? x) (case (car x) ((check) (set! compile-only #t)) ((verbose) (set! verbose-option #t)) ((name) (set! dbname (cdr x))) ((debug) (set! debug-level (string->number (cdr x)))) ((help) (usage) (exit 0)))) (else (set! input-files (cdr x))))) (getopt-long (command-line) grammar)) (if (and (null? input-files) (not cleanup-option)) (begin (display "Input files not specified\n" (current-error-port)) (exit 1))) (define (push-special-node key val) (push-node (let ((node (vector #f #f '() '() (xmltrans:get-input-location)))) (node-set #:key node key) (node-set #:p-article node (let ((art (vector #f '() '() '() '()))) (p-article-set #:article art val) art)) node))) (push-special-node "00-database-info" (string-join (list "Small greek-russian dictionary." "" "This file was converted from the original database on" (strftime " %c" (localtime (current-time))) "" "The original data is available from:" " http://git.gnu.org.ua/cgit/ellinika.git" "" "This dictionary is part of the Ellinika project (http://ellinika.gnu.org.ua)" "Copyright (C) 2010 Sergey Poznyakoff." "Distributed under the GPLv3 or later. See http://www.gnu.org/licenses/gpl.html") "\n")) (push-special-node "00-database-short" "Греческо-русский словарь") (push-special-node "00-database-url" "http://git.gnu.org.ua/cgit/ellinika.git") (push-special-node "00-database-utf8" "\n") (push-special-node "00-database-allchars" "\n") (for-each (lambda (x) (if (not (xmltrans:parse-file x)) (exit 1))) input-files) (if compile-only (exit 0)) (define (index a b) (string (car a) (car b))) (let ((index (list))) (with-output-to-file (string-append dbname ".dict") (lambda () (let ((lang #f)) (for-each (lambda (node) (cond ((string? node) ; skip language marker ) ((vector? node) (let ((start (ftell (current-output-port)))) (format-dict-node node) (let ((kwl (node-get #:key node))) (if (and (= (length kwl) 1) (string-prefix? "00-database" (car kwl))) (set! start (+ start (string-length (car kwl)) 1)))) (let ((bstart (b64_encode start)) (blen (b64_encode (- (ftell (current-output-port)) start)))) (for-each (lambda (key) (set! index (cons (list key bstart blen) index)) (if (not (string-prefix? "00-database" key)) (let ((skey (elstr->string (elstr-deaccent (string->elstr key))))) (if (not (string=? key skey)) (set! index (cons (list skey bstart blen key) index)))))) (node-get #:key node))))) (else (display "Unexpected node type!\n") (exit 1)))) (reverse node-list))))) (with-output-to-file (string-append dbname ".index") (lambda () (for-each (lambda (entry) (format #t "~a\t~a\t~a" (list-ref entry 0) (list-ref entry 1) (list-ref entry 2)) (if (= (length entry) 4) (format #t "\t~a" (list-ref entry 3))) (newline)) (sort-list index index))))) (exit 0) ;;;; Local variables: ;;;; mode: Scheme ;;;; buffer-file-coding-system: utf-8 ;;;; End: