aboutsummaryrefslogtreecommitdiff
path: root/scm/neatrans.scm
diff options
context:
space:
mode:
Diffstat (limited to 'scm/neatrans.scm')
-rw-r--r--scm/neatrans.scm478
1 files changed, 478 insertions, 0 deletions
diff --git a/scm/neatrans.scm b/scm/neatrans.scm
new file mode 100644
index 0000000..1797ef5
--- /dev/null
+++ b/scm/neatrans.scm
@@ -0,0 +1,478 @@
+#! =GUILE_BINDIR=/guile -s
+=AUTOGENERATED=
+!#
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2006 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 2 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with Ellinika; if not, write to the Free Software
+;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;;;
+
+;;;; Dictionary structure
+;;;; Internal representation:
+
+;;;; External representation (XML):
+;;;; <NODE>
+;;;; [<L>string</L>] ; Language
+;;;; [<D>string</D>] ; Date
+;;;; <H>string</H> ; Header
+;;;; <T>string</H> ; Text
+;;;; </NODE>
+
+;;; Tailor this statement to your needs if necessary.
+;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path))
+(set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path))
+(use-modules (xmltools xmltrans)
+ (gamma sql)
+ (ellinika xlat)
+ (ice-9 getopt-long))
+
+(use-syntax (ice-9 syncase))
+
+(define compile-only #f)
+(define cleanup-option #f)
+(define force-option #f)
+(define sql-iface "mysql")
+(define sql-host "localhost")
+(define sql-database "ellinika")
+(define sql-port 3306)
+(define sql-password #f)
+(define sql-username #f)
+(define verbose-option #f)
+(define debug-level 0)
+(define input-files '())
+
+(define sysconf-dir "=SYSCONFDIR=")
+(define config-file-name "ellinika.conf")
+
+(define dict-cgi-path #f)
+
+;;; Load the site defaults
+(let ((rc-file (string-append sysconf-dir "/" config-file-name)))
+ (if (file-exists? rc-file)
+ (load rc-file)))
+
+(define (debug level . rest)
+ (if (>= debug-level level)
+ (begin
+ (for-each
+ (lambda (x)
+ (display x))
+ rest)
+ (newline))))
+
+
+;;;; XML definitions
+
+;;; Set the default handler
+(define tag-list '())
+
+(define (lingua:default-start tag attr)
+ (xmltrans:set-attr #f "__START__" 1)
+ #f)
+
+(xmltrans:set-default-start-handler lingua:default-start)
+
+(define (lingua:default-end tag attr text)
+ (if (xmltrans:attr attr "__START__")
+ (xmltrans:parse-error #f "Unhandled element " tag))
+ (set! tag-list
+ (cons
+ (xmltrans:make-tag tag attr text)
+ tag-list))
+ #f)
+
+(xmltrans:set-default-end-handler lingua:default-end)
+
+;;; Themes class list
+(define class-list '())
+
+;;; Current node
+(define current-node #f)
+
+(define-syntax node-get
+ (syntax-rules ()
+ ((node-get #:lang node)
+ (vector-ref node 0))
+ ((node-get #:date node)
+ (vector-ref node 1))
+ ((node-get #:header node)
+ (vector-ref node 2))
+ ((node-get #:text node)
+ (vector-ref node 3))
+ ((node-get #:id node)
+ (vector-ref node 4))))
+
+(define-macro (current-node-get key)
+ `(node-get ,key current-node))
+
+(define (mark-invalid)
+ (xmltrans:set-attr "NODE" "__INVALID__" 1))
+
+(define-syntax node-set
+ (syntax-rules ()
+ ((node-set #:lang node val)
+ (vector-set! node 0 val))
+ ((node-set #:date node val)
+ (vector-set! node 1 val))
+ ((node-set #:header node val)
+ (vector-set! node 2 val))
+ ((node-set #:text node val)
+ (vector-set! node 3 val))
+ ((node-set #:id node val)
+ (vector-set! node 4 val))))
+
+(define-macro (current-node-set key val)
+ `(node-set ,key current-node ,val))
+
+;;; Node list
+(define node-list '())
+
+(define (push-node node)
+ (if (>= debug-level 100)
+ (begin
+ (display "PUSH ")
+ (write node)
+ (newline)))
+ (set! node-list (cons node node-list)))
+
+;;;
+(define (expect-context tag . rest)
+ (let ((parent (xmltrans:get-parent #t)))
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (ctx-name)
+ (if (string=? ctx-name parent)
+ (return)))
+ rest)
+ (xmltrans:parse-error #f tag " used in wrong context")))))
+
+;;; Default language
+(define default-language #f)
+
+;;; Handle main element
+(xmltrans:start-tag
+ "NEA"
+ (tag attr)
+ (let ((lang (xmltrans:attr attr "LANG")))
+ (if (not lang)
+ (xmltrans:parse-error #f "Required attribute LANG not specified")
+ (set! default-language lang)))
+ #f)
+
+;;;; NODE
+(xmltrans:start-tag
+ "NODE"
+ (tag attr)
+ (set! current-node (vector #f #f #f #f #f))
+ #f)
+
+(xmltrans:end-tag
+ "NODE"
+ (tag attr text)
+ (let ((valid (not (xmltrans:attr attr "__INVALID__"))))
+ (cond
+ (valid
+
+ (cond
+ ((not (current-node-get #:lang))
+ (cond ((not default-language)
+ (xmltrans:parse-error #f "<L> element is missing and NEA.LANG was not set")
+ (set! valid #f))
+ (else
+ (current-node-set #:lang default-language)))))
+ (cond
+ ((not (current-node-get #:header))
+ (xmltrans:parse-error #f "missing header (<H> element)")
+ (set! valid #f)))
+ (cond
+ ((not (current-node-get #:text))
+ (xmltrans:parse-error #f "missing text (<T> element)")
+ (set! valid #f)))))
+
+ (if valid
+ (push-node current-node))
+ #f))
+
+(xmltrans:end-tag
+ "L"
+ (tag attr text)
+ (cond ((current-node-get #:lang)
+ (xmltrans:parse-error #f "Language was already set")
+ (mark-invalid)))
+ (current-node-set #:lang text)
+ #f)
+
+(xmltrans:end-tag
+ "D"
+ (tag attr text)
+ (cond ((current-node-get #:date)
+ (xmltrans:parse-error #f "Date was already set")
+ (mark-invalid)))
+ (current-node-set #:date text)
+ #f)
+
+(xmltrans:end-tag
+ "H"
+ (tag attr text)
+ (cond ((current-node-get #:header)
+ (xmltrans:parse-error #f "Header was already set")
+ (mark-invalid)))
+ (current-node-set #:header text)
+ #f)
+
+(xmltrans:end-tag
+ "T"
+ (tag attr text)
+ (cond ((current-node-get #:text)
+ (xmltrans:parse-error #f "Article text was already set")
+ (mark-invalid)))
+ (current-node-set #:text text)
+ #f)
+
+(xmltrans:end-tag
+ "I"
+ (tag attr text)
+ (cond ((current-node-get #:id)
+ (xmltrans:parse-error #f "Article id was already set")
+ (mark-invalid)))
+ (current-node-set #:id text)
+ #f)
+
+;;; DB functions
+(define (escape-string str)
+ (let loop ((lst '())
+ (str str))
+ (cond
+ ((string-index str #\") =>
+ (lambda (pos)
+ (loop (append lst (list (substring str 0 pos)
+ "\\\""))
+ (substring str (1+ pos)))))
+ (else
+ (apply string-append (append lst (list str)))))))
+
+(define (run-query conn . rest)
+ (let ((q (apply string-append rest)))
+ (if verbose-option
+ (format #t "QUERY: ~S\n" q))
+ (let ((res (sql-query conn q)))
+ (if verbose-option
+ (format #t "RESULT: ~S\n" res))
+ res)))
+
+(define (query-number conn q)
+ (let ((res (run-query conn q)))
+ (if (null? res)
+ #f
+ (string->number (caar res)))))
+
+
+;;; DB functions (nea-specific)
+
+(define error-count 0)
+
+(define (db-cleanup conn)
+ (run-query conn "DELETE FROM news")
+ (run-query conn "DELETE FROM newsart"))
+
+(define (db-insert-node conn node)
+ (let ((res (run-query
+ conn
+ "SELECT n.ident,n.date,a.header FROM news n, newsart a WHERE "
+ "a.header=\""
+ (escape-string (node-get #:header node))
+ "\""
+ " AND a.lang='" (node-get #:lang node) "'"
+ " AND a.ident=n.ident")))
+ (cond
+ ((and (not force-option) (not (null? res)))
+ (format (current-error-port) "Found entry with that header: ~A~%"
+ (car res))
+ (format (current-error-port) "Use --force to override~%")
+ (set! error-count (1+ error-count)))
+ (else
+ (debug 2 "INSERTING NODE " node)
+ (let ((date (if (node-get #:date node)
+ (string-append "'" (node-get #:date node) "'")
+ "now()")))
+ (run-query conn "INSERT INTO news (date) VALUES(" date ")")
+ (let ((id (query-number conn "SELECT LAST_INSERT_ID()")))
+ (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES ("
+ (number->string id) ",'" (node-get #:lang node) "',\""
+ (escape-string (node-get #:header node)) "\",\""
+ (escape-string (node-get #:text node)) "\")")))))))
+
+(define (db-update-translation conn node)
+ (let ((res (run-query conn "SELECT a.lang FROM news n, newsart a WHERE "
+ "n.ident=" (node-get #:id node)
+ " AND n.ident=a.ident")))
+ (cond
+ ((null? res)
+ (format (current-error-port) "Found no record with ID ~A~%"
+ (node-get #:id node))
+ (set! error-count (1+ error-count)))
+ ((assoc (node-get #:lang node) res)
+ (format (current-error-port) "Article ~A in language ~A already exists~%"
+ (node-get #:id node)
+ (node-get #:lang node))
+ (set! error-count (1+ error-count)))
+ (else
+ (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES ("
+ (node-get #:id node) ",'" (node-get #:lang node) "',\""
+ (escape-string (node-get #:header node)) "\",\""
+ (escape-string (node-get #:text node)) "\")")))))
+
+
+;;;; Main
+(define grammar
+ `((check (single-char #\c))
+ (cleanup)
+ (database (single-char #\d) (value #t))
+ (host (single-char #\h) (value #t))
+ (port (single-char #\P) (value #t))
+ (password (single-char #\p) (value #t))
+ (user (single-char #\u) (value #t))
+ (force)
+ (interface (value #t))
+ (verbose (single-char #\v))
+ (debug (value #t))
+ (help)))
+
+(define (usage)
+ (display "usage: neatrans OPTIONS FILES
+dictrans parses XML news files in Ellinika news format
+and stores them into SQL database.
+
+General options:
+
+ --check Only check input syntax and consistency. Do not
+ update the database. This means that dictrans will
+ not access the database at all, so some errors
+ (mistyped parts of speech and the like) may slip in
+ unnoticed.
+ --verbose Verbosely display SQL queries and their results.
+ --debug NUMBER Set debugging level (0 < NUMBER <= 100)
+
+SQL related options:
+
+ --interface STRING Select SQL interface to use. STRING may be
+ either \"mysql\" (the default) or \"postgres\".
+ --host HOST-OR-PATH Set name or IP address of the host running SQL
+ database, or path to the database I/O socket.
+ --database NAME Set name of the database to use.
+ --port NUMBER Set the SQL port number
+ --user USER-NAME Set SQL user name.
+ --password STRING Set the SQL password
+
+ --cleanup Clean up the database (delete all entries from all the
+ tables) before proceeding. Use this option with care.
+
+Informational options:
+
+ --help Output this help info
+\n"))
+
+(define (cons? p)
+ (and (pair? p) (not (list? p))))
+
+(for-each
+ (lambda (x)
+ (cond
+ ((cons? x)
+ (case (car x)
+ ((check)
+ (set! compile-only #t))
+ ((cleanup)
+ (set! cleanup-option #t))
+ ((database)
+ (set! sql-database (cdr x)))
+ ((host)
+ (set! sql-host (cdr x)))
+ ((port)
+ (set! sql-port (string->number (cdr x))))
+ ((password)
+ (set! sql-password (cdr x)))
+ ((user)
+ (set! sql-username (cdr x)))
+ ((interface)
+ (set! sql-iface (cdr x)))
+ ((verbose)
+ (set! verbose-option #t))
+ ((preserve)
+ (set! preserve-option #t))
+ ((debug)
+ (set! debug-level (string->number (cdr x))))
+ ((force)
+ (set! force-option #t))
+ ((help)
+ (usage)
+ (exit 0))))
+ (else
+ (set! input-files (cdr x)))))
+ (getopt-long (command-line) grammar))
+
+(if (null? input-files)
+ (begin
+ (display "Input files not specified\n" (current-error-port))
+ (exit 1)))
+
+(for-each
+ (lambda (x)
+ (if (not (xmltrans:parse-file x))
+ (exit 1)))
+ input-files)
+
+(if compile-only
+ (exit 0))
+
+(let ((conn (sql-connect sql-iface sql-host sql-port sql-database
+ sql-username sql-password)))
+ (if (not conn)
+ (begin
+ (display "Cannot connect to the database\n" (current-error-port))
+ (exit 1)))
+
+ (run-query conn "SET NAMES utf8")
+
+ (if cleanup-option
+ (db-cleanup conn))
+
+ (for-each
+ (lambda (node)
+ (if (node-get #:id node)
+ (db-update-translation conn node)
+ (db-insert-node conn node)))
+ (reverse node-list))
+
+ (sql-connect-close conn))
+
+(exit (if (= error-count 0) 0 1))
+
+;;;; Local variables:
+;;;; mode: Scheme
+;;;; buffer-file-coding-system: utf-8
+;;;; End:
+
+
+
+
+
+
+
+
+
+
+

Return to:

Send suggestions and report system problems to the System administrator.