diff options
-rw-r--r-- | scm/neatrans.scm | 478 |
1 files changed, 478 insertions, 0 deletions
diff --git a/scm/neatrans.scm b/scm/neatrans.scm new file mode 100644 index 0000000..1797ef5 --- /dev/null +++ b/scm/neatrans.scm @@ -0,0 +1,478 @@ +#! =GUILE_BINDIR=/guile -s +=AUTOGENERATED= +!# +;;;; This file is part of Ellinika +;;;; Copyright (C) 2006 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 2 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with Ellinika; if not, write to the Free Software +;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +;;;; + +;;;; Dictionary structure +;;;; Internal representation: + +;;;; External representation (XML): +;;;; <NODE> +;;;; [<L>string</L>] ; Language +;;;; [<D>string</D>] ; Date +;;;; <H>string</H> ; Header +;;;; <T>string</H> ; Text +;;;; </NODE> + +;;; Tailor this statement to your needs if necessary. +;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path)) +(set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path)) +(use-modules (xmltools xmltrans) + (gamma sql) + (ellinika xlat) + (ice-9 getopt-long)) + +(use-syntax (ice-9 syncase)) + +(define compile-only #f) +(define cleanup-option #f) +(define force-option #f) +(define sql-iface "mysql") +(define sql-host "localhost") +(define sql-database "ellinika") +(define sql-port 3306) +(define sql-password #f) +(define sql-username #f) +(define verbose-option #f) +(define debug-level 0) +(define input-files '()) + +(define sysconf-dir "=SYSCONFDIR=") +(define config-file-name "ellinika.conf") + +(define dict-cgi-path #f) + +;;; Load the site defaults +(let ((rc-file (string-append sysconf-dir "/" config-file-name))) + (if (file-exists? rc-file) + (load rc-file))) + +(define (debug level . rest) + (if (>= debug-level level) + (begin + (for-each + (lambda (x) + (display x)) + rest) + (newline)))) + + +;;;; XML definitions + +;;; Set the default handler +(define tag-list '()) + +(define (lingua:default-start tag attr) + (xmltrans:set-attr #f "__START__" 1) + #f) + +(xmltrans:set-default-start-handler lingua:default-start) + +(define (lingua:default-end tag attr text) + (if (xmltrans:attr attr "__START__") + (xmltrans:parse-error #f "Unhandled element " tag)) + (set! tag-list + (cons + (xmltrans:make-tag tag attr text) + tag-list)) + #f) + +(xmltrans:set-default-end-handler lingua:default-end) + +;;; Themes class list +(define class-list '()) + +;;; Current node +(define current-node #f) + +(define-syntax node-get + (syntax-rules () + ((node-get #:lang node) + (vector-ref node 0)) + ((node-get #:date node) + (vector-ref node 1)) + ((node-get #:header node) + (vector-ref node 2)) + ((node-get #:text node) + (vector-ref node 3)) + ((node-get #:id node) + (vector-ref node 4)))) + +(define-macro (current-node-get key) + `(node-get ,key current-node)) + +(define (mark-invalid) + (xmltrans:set-attr "NODE" "__INVALID__" 1)) + +(define-syntax node-set + (syntax-rules () + ((node-set #:lang node val) + (vector-set! node 0 val)) + ((node-set #:date node val) + (vector-set! node 1 val)) + ((node-set #:header node val) + (vector-set! node 2 val)) + ((node-set #:text node val) + (vector-set! node 3 val)) + ((node-set #:id node val) + (vector-set! node 4 val)))) + +(define-macro (current-node-set key val) + `(node-set ,key current-node ,val)) + +;;; Node list +(define node-list '()) + +(define (push-node node) + (if (>= debug-level 100) + (begin + (display "PUSH ") + (write node) + (newline))) + (set! node-list (cons node node-list))) + +;;; +(define (expect-context tag . rest) + (let ((parent (xmltrans:get-parent #t))) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (ctx-name) + (if (string=? ctx-name parent) + (return))) + rest) + (xmltrans:parse-error #f tag " used in wrong context"))))) + +;;; Default language +(define default-language #f) + +;;; Handle main element +(xmltrans:start-tag + "NEA" + (tag attr) + (let ((lang (xmltrans:attr attr "LANG"))) + (if (not lang) + (xmltrans:parse-error #f "Required attribute LANG not specified") + (set! default-language lang))) + #f) + +;;;; NODE +(xmltrans:start-tag + "NODE" + (tag attr) + (set! current-node (vector #f #f #f #f #f)) + #f) + +(xmltrans:end-tag + "NODE" + (tag attr text) + (let ((valid (not (xmltrans:attr attr "__INVALID__")))) + (cond + (valid + + (cond + ((not (current-node-get #:lang)) + (cond ((not default-language) + (xmltrans:parse-error #f "<L> element is missing and NEA.LANG was not set") + (set! valid #f)) + (else + (current-node-set #:lang default-language))))) + (cond + ((not (current-node-get #:header)) + (xmltrans:parse-error #f "missing header (<H> element)") + (set! valid #f))) + (cond + ((not (current-node-get #:text)) + (xmltrans:parse-error #f "missing text (<T> element)") + (set! valid #f))))) + + (if valid + (push-node current-node)) + #f)) + +(xmltrans:end-tag + "L" + (tag attr text) + (cond ((current-node-get #:lang) + (xmltrans:parse-error #f "Language was already set") + (mark-invalid))) + (current-node-set #:lang text) + #f) + +(xmltrans:end-tag + "D" + (tag attr text) + (cond ((current-node-get #:date) + (xmltrans:parse-error #f "Date was already set") + (mark-invalid))) + (current-node-set #:date text) + #f) + +(xmltrans:end-tag + "H" + (tag attr text) + (cond ((current-node-get #:header) + (xmltrans:parse-error #f "Header was already set") + (mark-invalid))) + (current-node-set #:header text) + #f) + +(xmltrans:end-tag + "T" + (tag attr text) + (cond ((current-node-get #:text) + (xmltrans:parse-error #f "Article text was already set") + (mark-invalid))) + (current-node-set #:text text) + #f) + +(xmltrans:end-tag + "I" + (tag attr text) + (cond ((current-node-get #:id) + (xmltrans:parse-error #f "Article id was already set") + (mark-invalid))) + (current-node-set #:id text) + #f) + +;;; DB functions +(define (escape-string str) + (let loop ((lst '()) + (str str)) + (cond + ((string-index str #\") => + (lambda (pos) + (loop (append lst (list (substring str 0 pos) + "\\\"")) + (substring str (1+ pos))))) + (else + (apply string-append (append lst (list str))))))) + +(define (run-query conn . rest) + (let ((q (apply string-append rest))) + (if verbose-option + (format #t "QUERY: ~S\n" q)) + (let ((res (sql-query conn q))) + (if verbose-option + (format #t "RESULT: ~S\n" res)) + res))) + +(define (query-number conn q) + (let ((res (run-query conn q))) + (if (null? res) + #f + (string->number (caar res))))) + + +;;; DB functions (nea-specific) + +(define error-count 0) + +(define (db-cleanup conn) + (run-query conn "DELETE FROM news") + (run-query conn "DELETE FROM newsart")) + +(define (db-insert-node conn node) + (let ((res (run-query + conn + "SELECT n.ident,n.date,a.header FROM news n, newsart a WHERE " + "a.header=\"" + (escape-string (node-get #:header node)) + "\"" + " AND a.lang='" (node-get #:lang node) "'" + " AND a.ident=n.ident"))) + (cond + ((and (not force-option) (not (null? res))) + (format (current-error-port) "Found entry with that header: ~A~%" + (car res)) + (format (current-error-port) "Use --force to override~%") + (set! error-count (1+ error-count))) + (else + (debug 2 "INSERTING NODE " node) + (let ((date (if (node-get #:date node) + (string-append "'" (node-get #:date node) "'") + "now()"))) + (run-query conn "INSERT INTO news (date) VALUES(" date ")") + (let ((id (query-number conn "SELECT LAST_INSERT_ID()"))) + (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES (" + (number->string id) ",'" (node-get #:lang node) "',\"" + (escape-string (node-get #:header node)) "\",\"" + (escape-string (node-get #:text node)) "\")"))))))) + +(define (db-update-translation conn node) + (let ((res (run-query conn "SELECT a.lang FROM news n, newsart a WHERE " + "n.ident=" (node-get #:id node) + " AND n.ident=a.ident"))) + (cond + ((null? res) + (format (current-error-port) "Found no record with ID ~A~%" + (node-get #:id node)) + (set! error-count (1+ error-count))) + ((assoc (node-get #:lang node) res) + (format (current-error-port) "Article ~A in language ~A already exists~%" + (node-get #:id node) + (node-get #:lang node)) + (set! error-count (1+ error-count))) + (else + (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES (" + (node-get #:id node) ",'" (node-get #:lang node) "',\"" + (escape-string (node-get #:header node)) "\",\"" + (escape-string (node-get #:text node)) "\")"))))) + + +;;;; Main +(define grammar + `((check (single-char #\c)) + (cleanup) + (database (single-char #\d) (value #t)) + (host (single-char #\h) (value #t)) + (port (single-char #\P) (value #t)) + (password (single-char #\p) (value #t)) + (user (single-char #\u) (value #t)) + (force) + (interface (value #t)) + (verbose (single-char #\v)) + (debug (value #t)) + (help))) + +(define (usage) + (display "usage: neatrans OPTIONS FILES +dictrans parses XML news files in Ellinika news format +and stores them into SQL database. + +General options: + + --check Only check input syntax and consistency. Do not + update the database. This means that dictrans will + not access the database at all, so some errors + (mistyped parts of speech and the like) may slip in + unnoticed. + --verbose Verbosely display SQL queries and their results. + --debug NUMBER Set debugging level (0 < NUMBER <= 100) + +SQL related options: + + --interface STRING Select SQL interface to use. STRING may be + either \"mysql\" (the default) or \"postgres\". + --host HOST-OR-PATH Set name or IP address of the host running SQL + database, or path to the database I/O socket. + --database NAME Set name of the database to use. + --port NUMBER Set the SQL port number + --user USER-NAME Set SQL user name. + --password STRING Set the SQL password + + --cleanup Clean up the database (delete all entries from all the + tables) before proceeding. Use this option with care. + +Informational options: + + --help Output this help info +\n")) + +(define (cons? p) + (and (pair? p) (not (list? p)))) + +(for-each + (lambda (x) + (cond + ((cons? x) + (case (car x) + ((check) + (set! compile-only #t)) + ((cleanup) + (set! cleanup-option #t)) + ((database) + (set! sql-database (cdr x))) + ((host) + (set! sql-host (cdr x))) + ((port) + (set! sql-port (string->number (cdr x)))) + ((password) + (set! sql-password (cdr x))) + ((user) + (set! sql-username (cdr x))) + ((interface) + (set! sql-iface (cdr x))) + ((verbose) + (set! verbose-option #t)) + ((preserve) + (set! preserve-option #t)) + ((debug) + (set! debug-level (string->number (cdr x)))) + ((force) + (set! force-option #t)) + ((help) + (usage) + (exit 0)))) + (else + (set! input-files (cdr x))))) + (getopt-long (command-line) grammar)) + +(if (null? input-files) + (begin + (display "Input files not specified\n" (current-error-port)) + (exit 1))) + +(for-each + (lambda (x) + (if (not (xmltrans:parse-file x)) + (exit 1))) + input-files) + +(if compile-only + (exit 0)) + +(let ((conn (sql-connect sql-iface sql-host sql-port sql-database + sql-username sql-password))) + (if (not conn) + (begin + (display "Cannot connect to the database\n" (current-error-port)) + (exit 1))) + + (run-query conn "SET NAMES utf8") + + (if cleanup-option + (db-cleanup conn)) + + (for-each + (lambda (node) + (if (node-get #:id node) + (db-update-translation conn node) + (db-insert-node conn node))) + (reverse node-list)) + + (sql-connect-close conn)) + +(exit (if (= error-count 0) 0 1)) + +;;;; Local variables: +;;;; mode: Scheme +;;;; buffer-file-coding-system: utf-8 +;;;; End: + + + + + + + + + + + |