#! =GUILE_BINDIR=/guile -s =AUTOGENERATED= !# ;;;; This file is part of Ellinika ;;;; Copyright (C) 2006, 2007, 2010 Sergey Poznyakoff ;;;; ;;;; Ellinika is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; Ellinika is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program. If not, see . ;;;; ;;;; Dictionary structure ;;;; Internal representation: ;;;; External representation (XML): ;;;; ;;;; [string] ; Language ;;;; [string] ; Date ;;;; string ; Header ;;;; string ; Text ;;;; ;;; Tailor this statement to your needs if necessary. ;=GUILE_COMMENT=;(set! %load-path (cons "=GUILE_SITE=" %load-path)) (set! %load-path (cons "/home/gray/linguae/ellinika/" %load-path)) (use-modules (xmltools xmltrans) (gamma sql) (ellinika xlat) (ice-9 getopt-long)) (use-syntax (ice-9 syncase)) (define compile-only #f) (define cleanup-option #f) (define force-option #f) (define verbose-option #f) (define debug-level 0) (define input-files '()) (define sysconf-dir "=SYSCONFDIR=") (define config-file-name "ellinika.conf") (define dict-cgi-path #f) (define ellinika-sql-connection '()) (define (add-conn-param key val) (set! ellinika-sql-connection (cons (cons key val) ellinika-sql-connection))) ;;; Load the site defaults (let ((rc-file (string-append sysconf-dir "/" config-file-name))) (if (file-exists? rc-file) (load rc-file))) (define (debug level . rest) (if (>= debug-level level) (begin (for-each (lambda (x) (display x)) rest) (newline)))) ;;;; XML definitions ;;; Set the default handler (define tag-list '()) (define (lingua:default-start tag attr) (xmltrans:set-attr #f "__START__" 1) #f) (xmltrans:set-default-start-handler lingua:default-start) (define (lingua:default-end tag attr text) (if (xmltrans:attr attr "__START__") (xmltrans:parse-error #f "Unhandled element " tag)) (set! tag-list (cons (xmltrans:make-tag tag attr text) tag-list)) #f) (xmltrans:set-default-end-handler lingua:default-end) ;;; Themes class list (define class-list '()) ;;; Current node (define current-node #f) (define-syntax node-get (syntax-rules () ((node-get #:lang node) (vector-ref node 0)) ((node-get #:date node) (vector-ref node 1)) ((node-get #:header node) (vector-ref node 2)) ((node-get #:text node) (vector-ref node 3)) ((node-get #:id node) (vector-ref node 4)))) (define-macro (current-node-get key) `(node-get ,key current-node)) (define (mark-invalid) (xmltrans:set-attr "NODE" "__INVALID__" 1)) (define-syntax node-set (syntax-rules () ((node-set #:lang node val) (vector-set! node 0 val)) ((node-set #:date node val) (vector-set! node 1 val)) ((node-set #:header node val) (vector-set! node 2 val)) ((node-set #:text node val) (vector-set! node 3 val)) ((node-set #:id node val) (vector-set! node 4 val)))) (define-macro (current-node-set key val) `(node-set ,key current-node ,val)) ;;; Node list (define node-list '()) (define (push-node node) (if (>= debug-level 100) (begin (display "PUSH ") (write node) (newline))) (set! node-list (cons node node-list))) ;;; (define (expect-context tag . rest) (let ((parent (xmltrans:get-parent #t))) (call-with-current-continuation (lambda (return) (for-each (lambda (ctx-name) (if (string=? ctx-name parent) (return))) rest) (xmltrans:parse-error #f tag " used in wrong context"))))) ;;; Default language (define default-language #f) ;;; Handle main element (xmltrans:start-tag "NEA" (tag attr) (let ((lang (xmltrans:attr attr "LANG"))) (if (not lang) (xmltrans:parse-error #f "Required attribute LANG not specified") (set! default-language lang))) #f) ;;;; NODE (xmltrans:start-tag "NODE" (tag attr) (set! current-node (vector #f #f #f #f #f)) #f) (xmltrans:end-tag "NODE" (tag attr text) (let ((valid (not (xmltrans:attr attr "__INVALID__")))) (cond (valid (cond ((not (current-node-get #:lang)) (cond ((not default-language) (xmltrans:parse-error #f " element is missing and NEA.LANG was not set") (set! valid #f)) (else (current-node-set #:lang default-language))))) (cond ((not (current-node-get #:header)) (xmltrans:parse-error #f "missing header ( element)") (set! valid #f))) (cond ((not (current-node-get #:text)) (xmltrans:parse-error #f "missing text ( element)") (set! valid #f))))) (if valid (push-node current-node)) #f)) (xmltrans:end-tag "L" (tag attr text) (cond ((current-node-get #:lang) (xmltrans:parse-error #f "Language was already set") (mark-invalid))) (current-node-set #:lang text) #f) (xmltrans:end-tag "D" (tag attr text) (cond ((current-node-get #:date) (xmltrans:parse-error #f "Date was already set") (mark-invalid))) (current-node-set #:date text) #f) (xmltrans:end-tag "H" (tag attr text) (cond ((current-node-get #:header) (xmltrans:parse-error #f "Header was already set") (mark-invalid))) (current-node-set #:header text) #f) (xmltrans:end-tag "T" (tag attr text) (cond ((current-node-get #:text) (xmltrans:parse-error #f "Article text was already set") (mark-invalid))) (current-node-set #:text text) #f) (xmltrans:end-tag "I" (tag attr text) (cond ((current-node-get #:id) (xmltrans:parse-error #f "Article id was already set") (mark-invalid))) (current-node-set #:id text) #f) ;;; DB functions (define (escape-string str) (let loop ((lst '()) (str str)) (cond ((string-index str #\") => (lambda (pos) (loop (append lst (list (substring str 0 pos) "\\\"")) (substring str (1+ pos))))) (else (apply string-append (append lst (list str))))))) (define (run-query conn . rest) (let ((q (apply string-append rest))) (if verbose-option (format #t "QUERY: ~S\n" q)) (let ((res (sql-query conn q))) (if verbose-option (format #t "RESULT: ~S\n" res)) res))) (define (query-number conn q) (let ((res (run-query conn q))) (if (null? res) #f (string->number (caar res))))) ;;; DB functions (nea-specific) (define error-count 0) (define (db-cleanup conn) (run-query conn "DELETE FROM news") (run-query conn "DELETE FROM newsart")) (define (db-insert-node conn node) (let ((res (run-query conn "SELECT n.ident,n.date,a.header FROM news n, newsart a WHERE " "a.header=\"" (escape-string (node-get #:header node)) "\"" " AND a.lang='" (node-get #:lang node) "'" " AND a.ident=n.ident"))) (cond ((and (not force-option) (not (null? res))) (format (current-error-port) "Found entry with that header: ~A~%" (car res)) (format (current-error-port) "Use --force to override~%") (set! error-count (1+ error-count))) (else (debug 2 "INSERTING NODE " node) (let ((date (if (node-get #:date node) (string-append "'" (node-get #:date node) "'") "now()"))) (run-query conn "INSERT INTO news (date) VALUES(" date ")") (let ((id (query-number conn "SELECT LAST_INSERT_ID()"))) (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES (" (number->string id) ",'" (node-get #:lang node) "',\"" (escape-string (node-get #:header node)) "\",\"" (escape-string (node-get #:text node)) "\")"))))))) (define (db-update-translation conn node) (let ((res (run-query conn "SELECT a.lang FROM news n, newsart a WHERE " "n.ident=" (node-get #:id node) " AND n.ident=a.ident"))) (cond ((null? res) (format (current-error-port) "Found no record with ID ~A~%" (node-get #:id node)) (set! error-count (1+ error-count))) ((assoc (node-get #:lang node) res) (format (current-error-port) "Article ~A in language ~A already exists~%" (node-get #:id node) (node-get #:lang node)) (set! error-count (1+ error-count))) (else (run-query conn "INSERT INTO newsart (ident,lang,header,text) VALUES (" (node-get #:id node) ",'" (node-get #:lang node) "',\"" (escape-string (node-get #:header node)) "\",\"" (escape-string (node-get #:text node)) "\")"))))) ;;;; Main (define grammar `((check (single-char #\c)) (cleanup) (database (single-char #\d) (value #t)) (host (single-char #\h) (value #t)) (port (single-char #\P) (value #t)) (password (single-char #\p) (value #t)) (user (single-char #\u) (value #t)) (force) (interface (value #t)) (verbose (single-char #\v)) (debug (value #t)) (help))) (define (usage) (display "usage: neatrans OPTIONS FILES dictrans parses XML news files in Ellinika news format and stores them into SQL database. General options: --check Only check input syntax and consistency. Do not update the database. This means that dictrans will not access the database at all, so some errors (mistyped parts of speech and the like) may slip in unnoticed. --verbose Verbosely display SQL queries and their results. --debug NUMBER Set debugging level (0 < NUMBER <= 100) SQL related options: --interface STRING Select SQL interface to use. STRING may be either \"mysql\" (the default) or \"postgres\". --host HOST-OR-PATH Set name or IP address of the host running SQL database, or path to the database I/O socket. --database NAME Set name of the database to use. --port NUMBER Set the SQL port number --user USER-NAME Set SQL user name. --password STRING Set the SQL password --cleanup Clean up the database (delete all entries from all the tables) before proceeding. Use this option with care. Informational options: --help Output this help info \n")) (define (cons? p) (and (pair? p) (not (list? p)))) (for-each (lambda (x) (cond ((cons? x) (case (car x) ((check) (set! compile-only #t)) ((cleanup) (set! cleanup-option #t)) ((database) (add-conn-param #:db (cdr x))) ((host) (add-conn-param #:host (cdr x))) ((port) (add-conn-param #:port (string->number (cdr x)))) ((password) (add-conn-param #:pass (cdr x))) ((user) (add-conn-param #:user (cdr x))) ((interface) (add-conn-param #:iface (cdr x))) ((verbose) (set! verbose-option #t)) ((preserve) (set! preserve-option #t)) ((debug) (set! debug-level (string->number (cdr x)))) ((force) (set! force-option #t)) ((help) (usage) (exit 0)))) (else (set! input-files (cdr x))))) (getopt-long (command-line) grammar)) (if (null? input-files) (begin (display "Input files not specified\n" (current-error-port)) (exit 1))) (for-each (lambda (x) (if (not (xmltrans:parse-file x)) (exit 1))) input-files) (if compile-only (exit 0)) (let ((conn (sql-open-connection ellinika-sql-connection))) (if (not conn) (begin (display "Cannot connect to the database\n" (current-error-port)) (exit 1))) (run-query conn "SET NAMES utf8") (if cleanup-option (db-cleanup conn)) (for-each (lambda (node) (if (node-get #:id node) (db-update-translation conn node) (db-insert-node conn node))) (reverse node-list)) (sql-close-connection conn)) (exit (if (= error-count 0) 0 1)) ;;;; Local variables: ;;;; mode: Scheme ;;;; buffer-file-coding-system: utf-8 ;;;; End: