aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-09-17 13:14:28 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-09-17 13:14:28 +0300
commit73c4f370284c1e98ba1ac830d47865fe728cedab (patch)
treeb323247f145e87fc516572c899349f18ea1f4ded
parent9be3f798a55ae3b5db89ee9439ce2a14664b8b25 (diff)
downloadellinika-master.tar.gz
ellinika-master.tar.bz2
New utility to convert dictionary to freedict formatHEADmaster
* scm/freedict.scm: New file.
-rw-r--r--scm/freedict.scm847
1 files changed, 847 insertions, 0 deletions
diff --git a/scm/freedict.scm b/scm/freedict.scm
new file mode 100644
index 0000000..f931299
--- /dev/null
+++ b/scm/freedict.scm
@@ -0,0 +1,847 @@
+#! =GUILE_BINDIR=/guile -s
+=AUTOGENERATED=
+!#
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2004, 2005, 2007, 2010, 2015 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+
+;;;; Dictionary structure
+;;;; Internal representation:
+;;;;
+;;;; * Each dictionary entry is represented as a vector:
+;;;;
+;;;; #(KEY FORMS XREF P-LIST LOC)
+;;;;
+;;;; KEY is list of strings
+;;;;
+;;;; * FORMS is either #f or a string describing forms of the key if they
+;;;; are formed in an irregular manner.
+;;;;
+;;;; * XREF is a list of cross-references
+;;;;
+;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector:
+;;;;
+;;;; #(POS ARTICLE AREF XREF TOPIC)
+;;;;
+;;;; Member Type Meaning
+;;;; POS string part of speech
+;;;; ARTICLE list(string) Dictionary article associated with this key/pos
+;;;; AREF list(string) List of antonyms
+;;;; XREF list(string) List of cross-references
+;;;; TOPIC list(string) List of topics this item pertains to
+;;;;
+;;;; * LOC is source location where the entry was defined (cons FILE LINE).
+;;;;
+;;;; External representation (XML):
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P ID="string">
+;;;; <M>string</M>+
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </P>+
+;;;; <X>string</X>*
+;;;; </NODE>
+;;;;
+;;;; If only one P entry is present, the following alternative forms
+;;;; are understood:
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <M>string</M>*
+;;;; <A>string</A>*
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+;;;;
+;;;; or
+;;;;
+;;;; <NODE>
+;;;; <K>string</K>+
+;;;; [<F>string</F>]
+;;;; <P>string</P>
+;;;; <X>string</X>*
+;;;; <T ID="string" />*
+;;;; </NODE>
+
+;;; Tailor this statement to your needs if necessary.
+;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path))
+(set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path))
+(use-modules (xmltools xmltrans)
+ (gamma sql)
+ (ellinika xlat)
+ (ellinika elmorph)
+ (ice-9 getopt-long)
+ (ice-9 regex))
+
+(setlocale LC_ALL "")
+
+(define compile-only #f)
+(define cleanup-option #f)
+(define preserve-option #f)
+
+(define ellinika-sql-connection '())
+(define verbose-option #f)
+(define debug-level 0)
+(define input-files '())
+
+(define sysconf-dir "=SYSCONFDIR=")
+(define config-file-name "ellinika.conf")
+
+(define dict-cgi-path #f)
+
+;;; Load the site defaults
+(let ((rc-file (string-append sysconf-dir "/" config-file-name)))
+ (if (file-exists? rc-file)
+ (load rc-file)))
+
+(define (debug level . rest)
+ (if (>= debug-level level)
+ (begin
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline))))
+
+(define (add-conn-param key val)
+ (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection)))
+
+
+(define pos-xlat #f)
+
+;FIXME
+(define (convert-pos pos) pos)
+
+;FIXME
+(define (read-pos conn)
+ #t)
+
+;;;; XML definitions
+
+;;; Set the default handler
+(define tag-list '())
+
+(define (lingua:default-start tag attr)
+ (xmltrans:set-attr #f "__START__" 1)
+ #f)
+
+(xmltrans:set-default-start-handler lingua:default-start)
+
+(define (lingua:default-end tag attr text)
+ (if (xmltrans:attr attr "__START__")
+ (xmltrans:parse-error #f "Unhandled element " tag))
+ (set! tag-list
+ (cons
+ (xmltrans:make-tag tag attr text)
+ tag-list))
+ #f)
+
+(xmltrans:set-default-end-handler lingua:default-end)
+
+;;; Themes class list
+(define class-list '())
+
+;;; Current node
+(define current-node #f)
+
+(define-syntax node-get
+ (syntax-rules ()
+ ((node-get #:key node)
+ (vector-ref node 0))
+ ((node-get #:forms node)
+ (vector-ref node 1))
+ ((node-get #:xref node)
+ (vector-ref node 2))
+ ((node-get #:p-list node)
+ (vector-ref node 3))
+ ((node-get #:locus node)
+ (vector-ref node 4))))
+
+(define-macro (current-node-get key)
+ `(node-get ,key current-node))
+
+(define (mark-invalid)
+ (xmltrans:set-attr "NODE" "__INVALID__" 1))
+
+(define-syntax node-set
+ (syntax-rules ()
+ ((node-set #:key node k)
+ (cond
+ ((node-get #:key node) =>
+ (lambda (klist)
+ (vector-set! node 0 (append klist (list k)))))
+ (else
+ (vector-set! node 0 (list k)))))
+ ((node-set #:forms node f)
+ (begin
+ (cond
+ ((node-get #:forms node)
+ (xmltrans:parse-error #f "Forms already set")
+ (mark-invalid)))
+ (vector-set! node 1 f)))
+ ((node-set #:xref node x)
+ (vector-set! node 2 (append (node-get #:xref node) (list x))))
+ ((node-set #:p-article node p)
+ (vector-set! node 3 (append (node-get #:p-list node) (list p))))
+ ((node-set #:topic node t)
+ ;; FIXME: Scope of <T> is position-dependent relative to <P>
+ (let ((pl (node-get #:p-list node)))
+ (if (null? pl)
+ (current-article-set #:topic t)
+ (for-each
+ (lambda (p)
+ (p-article-set #:topic p t))
+ pl))))
+ ((node-set #:locus node loc)
+ (vector-set! node 4 loc))))
+
+
+(define-macro (current-node-set key val)
+ `(node-set ,key current-node ,val))
+
+(define p-article #f)
+
+(define-syntax p-article-get
+ (syntax-rules ()
+ ((p-article-get #:pos article)
+ (vector-ref article 0))
+ ((p-article-get #:article article)
+ (vector-ref article 1))
+ ((p-article-get #:aref article)
+ (vector-ref article 2))
+ ((p-article-get #:xref article)
+ (vector-ref article 3))
+ ((p-article-get #:topic article)
+ (vector-ref article 4))))
+
+(define-syntax p-set
+ (syntax-rules ()
+ ((p-set key article n val)
+ (vector-set! article n
+ (cond
+ ((p-article-get key article) =>
+ (lambda (alst)
+ (append alst (list val))))
+ (else
+ (list val)))))))
+
+(define-macro (current-article-get key)
+ `(p-article-get ,key p-article))
+
+(define-syntax p-article-set
+ (syntax-rules ()
+ ((p-article-set #:pos article val)
+ (vector-set! article 0 val))
+ ((p-article-set #:article article val)
+ (p-set #:article article 1 val))
+ ((p-article-set #:aref article val)
+ (p-set #:aref article 2 val))
+ ((p-article-set #:xref article val)
+ (p-set #:xref article 3 val))
+ ((p-article-set #:topic article val)
+ (p-set #:topic article 4 val))))
+
+;(define-macro (current-article-set key val)
+; `(p-article-set ,key p-article ,val))
+
+(define-syntax current-article-set
+ (syntax-rules ()
+ ((current-article-set key val)
+ (if p-article
+ (p-article-set key p-article val)
+ (for-each
+ (lambda (x)
+ (p-article-set key x val))
+ (current-node-get #:p-list))))))
+
+;;; Node list
+(define node-list '())
+
+(define (push-node node)
+ (if (>= debug-level 100)
+ (begin
+ (write node)
+ (newline)))
+ (set! node-list (cons node node-list)))
+
+;;; Topic stack
+(define topic-list '())
+
+(define (push-topic id)
+ (debug 10 "PUSH " id)
+ (set! topic-list (cons id topic-list)))
+
+(define (pop-topic)
+ (debug 10 "POP " (car topic-list))
+ (set! topic-list (cdr topic-list)))
+
+(define (append-topics)
+ (debug 10 "APPEND")
+ (for-each
+ (lambda (x)
+ (current-article-set #:topic x))
+ topic-list))
+
+;;;
+(define (expect-context tag . rest)
+ (let ((parent (xmltrans:get-parent #t)))
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (ctx-name)
+ (if (string=? ctx-name parent)
+ (return)))
+ rest)
+ (xmltrans:parse-error #f tag " used in wrong context")))))
+
+;;; Handle main element
+(xmltrans:start-tag
+ "DICT"
+ (tag attr)
+ (let ((lang (xmltrans:attr attr "LANG")))
+ (if (not lang)
+ (xmltrans:parse-error #f "Required attribute LANG not specified"))
+ (push-node lang))
+ #f)
+
+(xmltrans:end-tag
+ "DICT"
+ (tag attr text)
+ ;; FIXME
+ #f)
+
+
+(define expected-k #f)
+
+(define (k-expect)
+ (set! expected-k #f))
+
+;;;; NODE
+(xmltrans:start-tag
+ "NODE"
+ (tag attr)
+ (set! current-node (vector #f #f '() '() (xmltrans:get-input-location)))
+ (set! p-article #f)
+ (set! expected-k #t)
+ (xmltrans:expect-next-tag "K" k-expect)
+ #f)
+
+;;;; INCLUDE
+(xmltrans:end-tag
+ "INCLUDE"
+ (tag attr text)
+ (let ((fname (xmltrans:attr attr "FILE")))
+ (cond
+ ((not fname)
+ (xmltrans:parse-error #f "File name not specified"))
+ ((string? fname)
+ (xmltrans:parse-file fname))
+ (else
+ (xmltrans:parse-error #f "FILE must be a valid string"))))
+ #f)
+
+(xmltrans:end-tag
+ "NODE"
+ (tag attr text)
+ (cond
+ (p-article
+ (if (not (null? (current-node-get #:p-list)))
+ (xmltrans:parse-warning #f "Mixed definition style"))
+ (append-topics)
+ (current-node-set #:p-article p-article)
+ (set! p-article #f)))
+
+ (cond
+ ((xmltrans:attr attr "__INVALID__")) ; Ignore
+ ((not (current-node-get #:key))
+ (xmltrans:parse-error #f "K element is missing"))
+ ((and (null? (current-node-get #:p-list)) (null? (current-node-get #:xref)))
+ (xmltrans:parse-error #f "No articles and no cross references for the node"))
+ (else
+ (push-node current-node)))
+ #f)
+
+;;; Topic
+(xmltrans:start-tag
+ "T"
+ (tag attr)
+ (if (not (xmltrans:attr attr "ID"))
+ (letrec ((loc (xmltrans:get-input-location))
+ (proc (lambda ()
+ (xmltrans:parse-error
+ loc
+ "T element does not have ID attribute."))))
+ (xmltrans:expect-next-tag "ID" proc))
+ (if (not (or (xmltrans:parent? "NODE") (xmltrans:parent? "P")))
+ (push-topic (xmltrans:attr attr "ID"))))
+ #f)
+
+(xmltrans:end-tag
+ "T"
+ (tag attr text)
+ (cond
+ ((not (xmltrans:attr attr "ID"))) ;; Ignore. Warning has already been issued
+ ((xmltrans:parent? "P")
+ (current-article-set #:topic (xmltrans:attr attr "ID")))
+ ((xmltrans:parent? "NODE")
+ (current-node-set #:topic (xmltrans:attr attr "ID")))
+ (else
+ (pop-topic)))
+ #f)
+
+(xmltrans:end-tag
+ "ID"
+ (tag attr text)
+ (cond
+ ((xmltrans:parent? "P")) ;; OK
+ ((not (xmltrans:parent? "T"))
+ (xmltrans:parse-error #f "ID used in wrong context"))
+ ((xmltrans:attr #t "ID")
+ (xmltrans:parse-error #f "T element already has ID attribute"))
+ (else
+ (xmltrans:set-attr #t "ID" text)
+ (if (not (xmltrans:parent? "NODE" "T"))
+ (push-topic text))))
+ #f)
+
+;;; K - KEY
+(xmltrans:end-tag
+ "K"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (if (not expected-k)
+ (begin
+ (xmltrans:parse-error #f "K tag cannot be used here")
+ (mark-invalid)))
+ (xmltrans:expect-next-tag "K" k-expect)
+ (current-node-set #:key text)
+ #f)
+
+;;; F - FORMS
+(xmltrans:end-tag
+ "F"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (current-node-set #:forms text)
+ #f)
+
+;;; P - PartOfSpeach
+(define p-location #f)
+
+(xmltrans:start-tag
+ "P"
+ (tag attr)
+ (set! p-location (xmltrans:get-input-location))
+ (set! p-article (vector #f '() '() '() '()))
+ #f)
+
+(xmltrans:end-tag
+ "P"
+ (tag attr text)
+ (expect-context tag "NODE")
+ (cond
+ ((xmltrans:attr attr "ID") =>
+ (lambda (id)
+ (current-article-set #:pos (convert-pos id))
+ (append-topics)
+ (current-node-set #:p-article p-article)
+ (set! p-article #f)))
+ ((not (null? (current-article-get #:article))) ;; FIXME: other elements too
+ (xmltrans:parse-error p-location
+ "P element is missing ID, but has nested elements"))
+ (else
+ (set! p-article (vector (convert-pos text) '() '() '() '()))))
+ #f)
+
+;;; A - Antonym
+
+(xmltrans:end-tag
+ "A"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (current-article-set #:aref text)
+ #f)
+
+;;; X - Xref
+(xmltrans:end-tag
+ "X"
+ (tag attr text)
+ (expect-context tag "NODE" "P")
+ (let ((ref (cond
+ ((string-null? text)
+ (call-with-current-continuation
+ (lambda (return)
+ (do ((node node-list (cdr node)))
+ ((null? node) #f)
+ (if (not (null? (p-article-get
+ #:article
+ (car (node-get #:p-list (car node))))))
+ (return (car (node-get #:key (car node)))))))))
+ (else
+ text))))
+
+ (cond
+ ((not ref)
+ (xmltrans:parse-error #f "Empty reference")
+ (mark-invalid))
+ ((xmltrans:parent? "P")
+ (current-article-set #:xref ref))
+ ((xmltrans:parent? "NODE")
+ (current-node-set #:xref ref))))
+ #f)
+
+;;; M - MEANING
+(xmltrans:end-tag
+ "M"
+ (tag attr text)
+ (expect-context tag "P" "NODE")
+ (cond
+ ((and (not p-article) (null? (current-node-get #:p-list)))
+ (xmltrans:parse-error #f "P element not defined")
+ (mark-invalid))
+ (else
+ (current-article-set #:article text)))
+ #f)
+
+
+;;; CLASS
+(xmltrans:end-tag
+ "CLASS"
+ (tag attr text)
+ (expect-context tag "DICT")
+ (let ((lang (xmltrans:attr #t "LANG"))
+ (id (xmltrans:attr attr "ID"))
+ (descr (xmltrans:attr attr "DESCR"))
+ (tds (xmltrans:attr attr "__TDS__")))
+ (set! class-list (cons
+ (list id descr lang tds)
+ class-list)))
+ #f)
+
+;;; D
+(xmltrans:end-tag
+ "D"
+ (tag attr text)
+ (expect-context tag "CLASS")
+ ;; FIXME: Check for duplicates
+ (xmltrans:set-attr #t "DESCR" text)
+ #f)
+
+;;; TD
+(xmltrans:end-tag
+ "TD"
+ (tag attr text)
+ (expect-context tag "CLASS")
+ (xmltrans:set-attr #t "__TDS__"
+ (cons
+ text
+ (or (xmltrans:attr #t "__TDS__") '())))
+ #f)
+
+
+;;; Formatting elements
+(xmltrans:end-tag
+ "C"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\\\"comment\\\">"
+ text
+ "</span>"))
+
+(xmltrans:end-tag
+ "E"
+ (tag attr text)
+ (expect-context tag "M" "F")
+ (list
+ "<span class=\\\"expl\\\">"
+ text
+ "</span>"))
+
+
+(define (assert condition . rest)
+ (cond
+ (condition)
+ ((null? rest)
+ (throw 'misc-error 'assert
+ "Assertion failed"
+ (list) #f)
+ (exit 1))
+ (else
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline)))
+ (exit 1))))
+
+;FIXME: Removed SQL statements
+
+(define (format-dict-node node)
+; (format #t "NODE ~a~%" node)
+ (format #t "~a~%" (string-join (node-get #:key node) ", "))
+ (cond
+ ((node-get #:forms node) =>
+ (lambda (forms)
+ (format #t "~a~%" forms))))
+ (do ((i 1 (1+ i))
+ (p-list (node-get #:p-list node) (cdr p-list)))
+ ((null? p-list))
+ (let ((art (car p-list))
+ (j 1))
+ (if (p-article-get #:pos art)
+ (format #t "[~a] " (p-article-get #:pos art)))
+ (let ((p-art (p-article-get #:article art)))
+ (if (not (null? p-art))
+ (if (null? (cdr p-art))
+ (format #t "~a~%" (car p-art))
+ (for-each
+ (lambda (text)
+ (format #t "~a. ~a~%" j
+ (regexp-substitute/global #f
+ "<[a-zA-Z]+[^>]*>([^<]*)</[a-zA-Z]+>"
+ text
+ 'pre
+ (lambda (m)
+ (match:substring m 1))
+ 'post))
+ (set! j (1+ j)))
+ (p-article-get #:article art))))
+ (newline)
+ (let ((ant (p-article-get #:aref art)))
+ (cond ((not (null? ant))
+ (format #t "(ант: ~a)~%" (string-join ant ", ")))))
+ (let ((xref (p-article-get #:xref art)))
+ (cond ((not (null? xref))
+ (format #t "см. также: ~a~%" (string-join xref ", "))))))))
+
+ (let ((xref (node-get #:xref node)))
+ (if (not (null? xref))
+ (format #t "см. также: ~a~%" (string-join xref ", "))))
+ (newline) )
+
+(define b64_alpha
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+(define (b64_encode val)
+ (if (= 0 val)
+ "A"
+ (string-trim
+ (string
+ (string-ref b64_alpha (ash (logand val #xc0000000) -30))
+ (string-ref b64_alpha (ash (logand val #x3f000000) -24))
+ (string-ref b64_alpha (ash (logand val #x00fc0000) -18))
+ (string-ref b64_alpha (ash (logand val #x0003f000) -12))
+ (string-ref b64_alpha (ash (logand val #x00000fc0) -6))
+ (string-ref b64_alpha (logand val #x0000003f)))
+ #\A)))
+
+
+;;;; Main
+(define dbname "greek")
+
+(define grammar
+ `((check (single-char #\c))
+ (name (single-char #\n) (value #t))
+ (cleanup)
+ (database (single-char #\d) (value #t))
+ (host (single-char #\h) (value #t))
+ (port (single-char #\P) (value #t))
+ (password (single-char #\p) (value #t))
+ (user (single-char #\u) (value #t))
+ (interface (value #t))
+ (verbose (single-char #\v))
+ (debug (value #t))
+ (preserve (value #t))
+ (help)))
+
+(define (usage)
+ (display "usage: dictrans OPTIONS FILES
+dictrans parses XML dictionary files in Ellinika dictionary format
+and stores them into SQL database.
+
+General options:
+
+ --check Only check input syntax and consistency. Do not
+ update the database. This means that dictrans will
+ not access the database at all, so some errors
+ (mistyped parts of speech and the like) may slip in
+ unnoticed.
+ --verbose Verbosely display SQL queries and their results.
+ --debug NUMBER Set debugging level (0 < NUMBER <= 100)
+
+SQL related options:
+
+ --interface STRING Select SQL interface to use. STRING may be
+ either \"mysql\" (the default) or \"postgres\".
+ --host HOST-OR-PATH Set name or IP address of the host running SQL
+ database, or path to the database I/O socket.
+ --database NAME Set name of the database to use.
+ --port NUMBER Set the SQL port number
+ --user USER-NAME Set SQL user name.
+ --password STRING Set the SQL password
+
+ --cleanup Clean up the database (delete all entries from all the
+ tables) before proceeding. Use this option with care.
+ --preserve Do not delete resolved entries from pending_links
+ table. This is intended mainly for debugging.
+
+Informational options:
+
+ --help Output this help info
+\n"))
+
+(define (cons? p)
+ (and (pair? p) (not (list? p))))
+
+(for-each
+ (lambda (x)
+ (cond
+ ((cons? x)
+ (case (car x)
+ ((check)
+ (set! compile-only #t))
+ ((verbose)
+ (set! verbose-option #t))
+ ((name)
+ (set! dbname (cdr x)))
+ ((debug)
+ (set! debug-level (string->number (cdr x))))
+ ((help)
+ (usage)
+ (exit 0))))
+ (else
+ (set! input-files (cdr x)))))
+ (getopt-long (command-line) grammar))
+
+
+(if (and (null? input-files) (not cleanup-option))
+ (begin
+ (display "Input files not specified\n" (current-error-port))
+ (exit 1)))
+
+(define (push-special-node key val)
+ (push-node
+ (let ((node (vector #f #f '() '() (xmltrans:get-input-location))))
+ (node-set #:key node key)
+ (node-set #:p-article node
+ (let ((art (vector #f '() '() '() '())))
+ (p-article-set #:article art val)
+ art))
+ node)))
+
+(push-special-node "00-database-info"
+ (string-join
+ (list
+ "Small greek-russian dictionary."
+ ""
+ "This file was converted from the original database on"
+ (strftime " %c" (localtime (current-time)))
+ ""
+ "The original data is available from:"
+ " http://git.gnu.org.ua/cgit/ellinika.git"
+ ""
+ "This dictionary is part of the Ellinika project (http://ellinika.gnu.org.ua)"
+ "Copyright (C) 2010 Sergey Poznyakoff."
+ "Distributed under the GPLv3 or later. See http://www.gnu.org/licenses/gpl.html") "\n"))
+
+(push-special-node "00-database-short"
+ "Греческо-русский словарь")
+(push-special-node "00-database-url"
+ "http://git.gnu.org.ua/cgit/ellinika.git")
+(push-special-node "00-database-utf8" "\n")
+(push-special-node "00-database-allchars" "\n")
+
+(for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)
+
+(if compile-only
+ (exit 0))
+
+(define (index<? a b)
+ (string<? (car a) (car b)))
+
+(let ((index (list)))
+ (with-output-to-file (string-append dbname ".dict")
+ (lambda ()
+ (let ((lang #f))
+ (for-each
+ (lambda (node)
+ (cond
+ ((string? node)
+ ; skip language marker
+ )
+ ((vector? node)
+ (let ((start (ftell (current-output-port))))
+ (format-dict-node node)
+ (let ((kwl (node-get #:key node)))
+ (if (and (= (length kwl) 1)
+ (string-prefix? "00-database" (car kwl)))
+ (set! start (+ start (string-length (car kwl)) 1))))
+ (let ((bstart (b64_encode start))
+ (blen (b64_encode (- (ftell (current-output-port)) start))))
+ (for-each
+ (lambda (key)
+ (set! index (cons
+ (list key bstart blen)
+ index))
+ (if (not (string-prefix? "00-database" key))
+ (let ((skey (elstr->string
+ (elstr-deaccent (string->elstr key)))))
+ (if (not (string=? key skey))
+ (set! index
+ (cons
+ (list skey bstart blen key)
+ index))))))
+ (node-get #:key node)))))
+ (else
+ (display "Unexpected node type!\n")
+ (exit 1))))
+ (reverse node-list)))))
+
+ (with-output-to-file (string-append dbname ".index")
+ (lambda ()
+ (for-each
+ (lambda (entry)
+ (format #t "~a\t~a\t~a"
+ (list-ref entry 0)
+ (list-ref entry 1)
+ (list-ref entry 2))
+ (if (= (length entry) 4)
+ (format #t "\t~a" (list-ref entry 3)))
+ (newline))
+ (sort-list index index<?)))))
+
+
+(exit 0)
+
+;;;; Local variables:
+;;;; mode: Scheme
+;;;; buffer-file-coding-system: utf-8
+;;;; End:
+
+

Return to:

Send suggestions and report system problems to the System administrator.