summaryrefslogtreecommitdiffabout
path: root/src/ellinika
authorSergey Poznyakoff <gray@gnu.org.ua>2008-06-22 07:33:31 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2008-06-22 07:33:31 (GMT)
commit4eb48d2f187bc9bb3266cee025da2ea61270e4c4 (patch) (side-by-side diff)
treea99c057485f72d9074d22ea518804a4c8d2eb5bb /src/ellinika
parent64af4748a2cae68c7ff4aa48d3dc7c93a2d469f6 (diff)
downloadellinika-4eb48d2f187bc9bb3266cee025da2ea61270e4c4.tar.gz
ellinika-4eb48d2f187bc9bb3266cee025da2ea61270e4c4.tar.bz2
Move cgi-bin and ellinika to src.
* src: New dir * src/Makefile.am: New file. * cgi-bin, ellinika: Move to src. * configure.ac: Reflect the above changes. git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@525 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'src/ellinika') (more/less context) (ignore whitespace changes)
-rw-r--r--src/ellinika/Makefile.am43
-rw-r--r--src/ellinika/cgi.scm4169
-rw-r--r--src/ellinika/config.scm442
-rw-r--r--src/ellinika/dico.scm306
-rw-r--r--src/ellinika/i18n.scm308
-rw-r--r--src/ellinika/xlat.scm309
6 files changed, 1177 insertions, 0 deletions
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am
new file mode 100644
index 0000000..136b44f
--- a/dev/null
+++ b/src/ellinika/Makefile.am
@@ -0,0 +1,43 @@
+# This file is part of Ellinika project.
+# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff
+#
+# Ellinika is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ellinika is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+guiledir=$(GUILE_SITE)/$(PACKAGE)
+guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm
+
+cgi.m4: Makefile
+ echo 'divert(-1)' > $@
+ echo 'changequote([,])' >> $@
+ echo 'changecom([;],[' >> $@
+ echo '])' >> $@
+ echo 'undefine([format])' >> $@
+ echo 'define([IFACE],$(APACHE_IFACE))' >> $@
+ echo 'define([GUILE_BINDIR],$(GUILE_BINDIR))' >> $@
+ echo 'define([GUILE_SITE],@GUILE_SITE@)' >> $@
+ echo 'define([PACKAGE],$(PACKAGE))'>> $@
+ echo 'define([PREFIX],$(prefix))' >> $@
+ echo 'define([SYSCONFDIR],$(sysconfdir))' >> $@
+ echo 'define([LOCALEDIR],$(datadir)/locale)' >> $@
+ echo 'define([HTMLDIR],$(HTMLDIR))' >> $@
+ echo 'divert(0)dnl' >> $@
+ echo '@AUTOGENERATED@' >> $@
+
+SUFFIXES = .scm4 .scm
+
+.scm4.scm:
+ m4 cgi.m4 $< > $@
+
+cgi.scm: cgi.scm4 cgi.m4
+config.scm: config.scm4 cgi.m4
diff --git a/src/ellinika/cgi.scm4 b/src/ellinika/cgi.scm4
new file mode 100644
index 0000000..38fd3de
--- a/dev/null
+++ b/src/ellinika/cgi.scm4
@@ -0,0 +1,169 @@
+;;;; -*- scheme -*-
+;;;; Greek Dictionary Web Engine
+;;;; Copyright (C) 2005, 2007 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(define-module (ellinika cgi)
+ #:use-module (ellinika config)
+ #:use-module (ellinika i18n)
+ #:use-module ifelse(IFACE,[CGI],(www cgi),(guile-user))
+ #:re-export (base-dir html-dir sysconf-dir locale-path
+ sql-iface sql-host sql-port sql-database
+ sql-username sql-password
+ config-file-name ))
+
+
+ifelse(IFACE,[CGI],,dnl
+(define form-data
+ (append
+ (parse-form-data
+ (table:get (request-rec:subprocess-env Request) "QUERY_STRING"))
+ (if (= (request-rec:method-number Request) 2)
+ (parse-form-data (read-post-data Request))
+ '())))
+
+(define-public (cgi:value name)
+ (assoc-ref form-data name))
+
+(define-public (cgi:names)
+ (map car form-data))
+
+(define-public cgi-script-name
+ (table:get (request-rec:subprocess-env Request)
+ "SCRIPT_NAME"))
+
+(define-public cgi-server-hostname
+ (table:get (request-rec:subprocess-env Request)
+ "SERVER_NAME"))
+
+(define-public cgi-server-protocol-name #f)
+(define-public cgi-server-protocol-version #f)
+
+(let* ((server-protocol (table:get (request-rec:subprocess-env Request)
+ "SERVER_PROTOCOL")))
+ (if server-protocol
+ (let ((slash (string-index server-protocol #\/)))
+ (set! cgi-server-protocol-name (substring server-protocol
+ 0 slash))
+ (set! cgi-server-protocol-version (substring server-protocol
+ (1+ slash))))))
+
+)
+
+
+;;; User-definable variables
+(define-public dict-template-file-name "dict.html")
+(define-public nea-template-file-name "nea.html")
+(define-public monima-nea-template-file-name "monima.html")
+(define-public target-language "el_GR")
+
+(define-public word-forms-reference '())
+
+(define-public ref-loc #f)
+
+;; Number of colums in fuzzy search output
+(define-public match-list-columns 4)
+;;; End of user-definable variables
+
+(define-public (language-code lang)
+ (cond
+ ((string-index lang #\_) =>
+ (lambda (len)
+ (substring lang 0 len)))
+ (else
+ lang)))
+
+(define-public (template-file lang template-file-name)
+ (string-append html-dir "/" (language-code lang) "/" template-file-name))
+
+(define-public (make-cgi-name cgi-path . rest)
+ (apply
+ string-append
+ (cons
+ cgi-path
+ (let ((arglist (let ((lang (cgi:value "LANG")))
+ (do ((ilist (if lang
+ (cons "LANG" (cons lang rest))
+ rest) (cdr ilist))
+ (i 1 (1+ i))
+ (olist '()))
+ ((null? ilist) (if (null? olist)
+ olist
+ (reverse (cdr olist))))
+ (set! olist (cons (car ilist) olist))
+ (set! olist (cons
+ (if (odd? i) "=" "&amp;")
+ olist))))))
+ (if (null? arglist)
+ arglist
+ (cons "?" arglist))))))
+
+(define-public (expand-template explist template)
+ "(expand-template EXPLIST TEMPLATE)
+
+Expands string TEMPLATE in accordance with EXPLIST. EXPLIST is a list
+of elements:
+
+ (cons WORD THUNK)
+
+Each occurrence of WORD in TEMPLATE is replaced with the return value of
+THUNK.
+"
+ (let loop ((template template))
+ (cond
+ ((string-index template #\@) =>
+ (lambda (w)
+ (display (substring template 0 w))
+ (if (and (< (+ w 2) (string-length template))
+ (char=? (string-ref template (1+ w)) #\@))
+ (let ((end-pos (string-index template #\@ (+ w 2))))
+ (if (and end-pos
+ (< (1+ end-pos) (string-length template))
+ (char=? (string-ref template (1+ end-pos)) #\@))
+ (let* ((name (substring template w (+ end-pos 2)))
+ (entry (assoc name explist)))
+ (cond
+ (entry
+ ((cdr entry))
+ (loop (substring template (+ end-pos 2))))
+ (else
+ (display "@@")
+ (loop (substring template (+ w 2))))))
+ (begin
+ (display "@")
+ (loop (substring template (+ w 1))))))
+ (begin
+ (display "@")
+ (loop (substring template (1+ w)))))))
+ (else
+ (display template)))))
+
+
+(define-public (ellinika-cgi-init template-file-name)
+ ;;; Load the site defaults
+ (ellinika-config-setup)
+
+ ;;; Load the language-specific defaults
+ (cond
+ ((cgi:value "LANG") =>
+ (lambda (x)
+ (if (file-exists? (template-file x template-file-name))
+ (set! target-language x)))))
+ ;;; Initialize i18n
+ (let ((x (locale-setup target-language "PACKAGE" locale-path)))
+ (if x
+ (set! target-language x))))
+
+;;; End of cgi.scmi
diff --git a/src/ellinika/config.scm4 b/src/ellinika/config.scm4
new file mode 100644
index 0000000..8032409
--- a/dev/null
+++ b/src/ellinika/config.scm4
@@ -0,0 +1,42 @@
+;;;; -*- scheme -*-
+;;;; Greek Dictionary Web Engine
+;;;; Copyright (C) 2005, 2007, 2008 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+
+(define-module (ellinika config))
+
+(define-public base-dir "PREFIX")
+(define-public html-dir "HTMLDIR")
+(define-public sysconf-dir "SYSCONFDIR")
+(define-public locale-path "LOCALEDIR:/usr/share/locale:/usr/local/share/locale")
+(define-public sql-iface "mysql") ;; SQL interface ("mysql" or "postgres")
+;; SQL server hostname or a path to the UNIX socket
+(define-public sql-host "localhost")
+(define-public sql-port 3306) ;; SQL port number (0 for sockaddr_un
+ ;; connection)
+(define-public sql-database "ellinika") ;; Name of the database
+(define-public sql-username "gray") ;; Database user name
+(define-public sql-password "") ;; Password for that user name
+
+(define-public config-file-name "ellinika.conf")
+
+(define-public (ellinika-config-setup)
+ ;;; Load the site defaults
+ (let ((rc-file (string-append sysconf-dir "/" config-file-name)))
+ (if (file-exists? rc-file)
+ (load rc-file))))
+
+
diff --git a/src/ellinika/dico.scm b/src/ellinika/dico.scm
new file mode 100644
index 0000000..9383d1f
--- a/dev/null
+++ b/src/ellinika/dico.scm
@@ -0,0 +1,306 @@
+;;;; A Dico module for Greek Dictionary Web Engine -*- scheme -*-
+;;;; Copyright (C) 2008 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+
+(define-module (ellinika dico))
+
+(use-modules (guile-user)
+ (ice-9 rdelim)
+ (gamma sql)
+ (xmltools dict)
+ (ellinika xlat)
+ (ellinika config))
+
+(define (sql-error-handler err descr)
+ (format #t "cannot connect to the database")
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (display err)
+ (display ": ")
+ (display descr))))
+
+(define (my-sql-query conn query)
+ (catch #t
+ (lambda ()
+ (sql-query conn query))
+ (lambda args
+ '())))
+
+;; END of FIXME
+
+(define (dico-error err . rest)
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (display err)
+ (for-each
+ display
+ rest)
+ (newline))))
+
+;; Dico interface
+
+(define (open-module name . rest)
+ (let ((target-language "el")
+ (type 'dict))
+ (for-each (lambda (arg)
+ (let ((av (string-split arg #\=)))
+ (case (length av)
+ ((1) (cond
+ ((string=? (car av) "synonym")
+ (set! type 'synonim))
+ ((string=? (car av) "antonym")
+ (set! type 'antonym))
+ (else
+ (dico-error "Unknown option " (car av)))))
+ ((2) (cond
+ ((string=? (car av) "lang")
+ (set! target-language (cadr av)))
+ (else
+ (dico-error "Unknown option " (car av)))))
+ (else
+ (dico-error "Unknown option " (car av))))))
+ (cdr rest))
+ (let ((db-connection (sql-connect
+ sql-iface sql-host sql-port sql-database
+ sql-username sql-password)))
+ (sql-query db-connection "SET NAMES utf8")
+ (list db-connection target-language type))))
+
+(defmacro dbh:conn (dbh) `(list-ref ,dbh 0))
+(defmacro dbh:lang (dbh) `(list-ref ,dbh 1))
+(defmacro dbh:type (dbh) `(list-ref ,dbh 2))
+
+(define (close-module dbh)
+ (sql-connect-close (dbh:conn dbh)))
+
+(define descr-list
+ '(("pl" . "Słownik grecko-polski")
+ ("uk" . "Грецько-украЇнський словник")
+ ("ru" . "Греческо-русский словарь")))
+
+(define (descr dbh)
+ (case (dbh:type dbh)
+ ((dict)
+ (let ((res (assoc (dbh:lang dbh) descr-list)))
+ (if res
+ (cdr res)
+ "Ellinika (no description available)")))
+ ((antonym)
+ "Λέξικο αντωνύμων της Ελληνικής γλώσσας")
+ ((synonym)
+ "a")))
+
+(define (info dbh)
+ (string-append "Ellinika - A greek dictionary.\n\
+See http://ellinika.gnu.org.ua/cgi-bin/dict.cgi?LANG="
+ (dbh:lang dbh) "\n\
+Copyright © 2004, 2005, 2006, 2007, 2008 Sergey Poznyakoff\n\
+\n\
+Permission is granted to copy, distribute and/or modify this document\n\
+under the terms of the GNU Free Documentation License, Version 1.2 or\n\
+any later version published by the Free Software Foundation; with no\n\
+Invariant Sections, no Front-Cover and Back-Cover Texts"))
+
+(define (define-word-dict dbh key)
+ (let ((result '())
+ (last-id -1)
+ (word '())
+ (articles '()))
+ (for-each
+ (lambda (tuple)
+ (cond
+ ((not (= last-id (string->number (car tuple))))
+ (if (not (null? articles))
+ (set! result (cons
+ (cons word (reverse articles))
+ result)))
+ (set! last-id (string->number (car tuple)))
+ (set! word (cons (list-ref tuple 1)
+ (list-ref tuple 2))); FIXME: forms?
+ (set! articles '())))
+ (set! articles (cons
+ (cons (list-ref tuple 4)
+ (list-ref tuple 5))
+ articles)))
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT dict.ident,dict.word,pos.abbr,dict.forms,articles.subindex,articles.meaning "
+ "FROM dict,articles,pos WHERE dict.word=\""
+ key
+ "\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' "
+ "AND dict.pos=pos.id AND pos.canonical='Y' ORDER BY dict.ident, articles.subindex")))
+ (if (not (null? articles))
+ (set! result (cons
+ (cons word (reverse articles))
+ result)))
+ (cons 'define-word-dict (reverse result))))
+
+(define (define-word-x dbh word link-type)
+ (let ((res (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict,links,articles "
+ "WHERE links.type='" link-type "' AND links.ident IN "
+ "(SELECT ident FROM dict WHERE word=\"" word "\") "
+ "AND dict.ident=links.xref "
+ "AND dict.ident=articles.ident and articles.lang=\""
+ (dbh:lang dbh)
+ "\" ORDER BY word"))))
+ (if (and res (not (null? res)))
+ (cons 'define-word-x (list (cons word (map car res))))
+ #f)))
+
+(define (define-word-antonym dbh word)
+ (define-word-x dbh word "XREF"))
+
+(define (define-word-synonym dbh word)
+ (define-word-x dbh word "ANT"))
+
+(define define-list
+ (list (cons 'dict define-word-dict)
+ (cons 'synonym define-word-synonym)
+ (cons 'antonym define-word-antonym)))
+
+(define (define-word dbh word)
+ (let ((key (ellinika:translate-input word))
+ (x (assoc (dbh:type dbh) define-list)))
+ (if x
+ ((cdr x) dbh key)
+ #f)))
+
+(define (match-exact dbh strat word)
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word=\""
+ (ellinika:translate-input word)
+ "\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1")))
+
+(define (match-prefix dbh strat word)
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word LIKE \""
+ (ellinika:translate-input word)
+ "%\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1")))
+
+(define (match-suffix dbh strat word)
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word LIKE \"%"
+ (ellinika:translate-input word)
+ "\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1")))
+
+(define (match-extnd-regex dbh strat word)
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word regexp \""
+ (ellinika:translate-input word)
+ "\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1")))
+
+(define (match-basic-regex dbh strat word)
+ #f) ;FIXME
+
+(define (match-default dbh strat word)
+ (my-sql-query
+ (dbh:conn dbh)
+ (string-append
+ "SELECT DISTINCT dict.word FROM dict,articles WHERE dict.sound LIKE \""
+ (ellinika:sounds-like word)
+ "%\" AND dict.ident=articles.ident "
+ "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1")))
+
+
+(define strategy-list
+ (list (cons "exact" match-exact)
+ (cons "prefix" match-prefix)
+ (cons "suffix" match-suffix)
+ (cons "re" match-extnd-regex)
+ (cons "regexp" match-basic-regex)))
+
+(define (match-word dbh strat word)
+ (let ((sp (assoc (dico-strat-name strat) strategy-list)))
+ (let ((res (if sp
+ ((cdr sp) dbh strat word)
+ (match-default dbh strat word))))
+ (if res
+ (cons #f (map car res))
+ #f))))
+
+(define (output res n)
+ (let ((type (car res))
+ (contents (list-ref (cdr res) n)))
+ (case type
+ ((define-word-dict)
+ (let ((word-pair (car contents))
+ (defn (cdr contents)))
+ (display (car word-pair))
+ (display ", <")
+ (display (cdr word-pair))
+ (display ">")
+ (for-each
+ (lambda (article)
+ (newline)
+ (display (1+ (string->number (car article))))
+ (display ". ")
+ (display (cdr article))
+ (display ";"))
+ defn)))
+ ((define-word-x)
+ (let ((word (car contents))
+ (defn (cdr contents)))
+ (display word)
+ (display " -- ")
+ (display (car defn))
+ (if (cdr defn)
+ (for-each
+ (lambda (elt)
+ (display ", ")
+ (display elt))
+ (cdr defn)))))
+ (else
+ (display contents)))))
+
+(define (result-count res)
+ (length (cdr res)))
+
+(define-public (dico-ellinika-init arg)
+ (list (cons "open" open-module)
+ (cons "close" close-module)
+ (cons "descr" descr)
+ (cons "info" info)
+ (cons "define" define-word)
+ (cons "match" match-word)
+ (cons "output" output)
+ (cons "result-count" result-count)))
+
+;;
+;; Setup
+(ellinika-config-setup)
+(dico-register-strat "suffix" "Match word suffixes")
+
+
+
diff --git a/src/ellinika/i18n.scm b/src/ellinika/i18n.scm
new file mode 100644
index 0000000..474c8c9
--- a/dev/null
+++ b/src/ellinika/i18n.scm
@@ -0,0 +1,308 @@
+;;;; This file is part of Greek Dictionary Web Engine
+;;;; Copyright (C) 2006, 2007 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3, or (at your option)
+;;;; any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (ellinika i18n)
+ #:use-syntax (ice-9 syncase)
+ #:export-syntax (_)
+ #:export (locale-setup))
+
+(define-syntax _
+ (syntax-rules ()
+ ((_ msg) (gettext msg))))
+
+
+;;; ISO 639 language code => ISO 3166 country code
+;;; The corresponding country codes where selected using the following
+;;; principles:
+;;; 1. If the language is spoken in only one country, this country code is
+;;; used.
+;;; 2. If the language is spoken in more than one country, select the code of
+;;; that country where it has official status.
+;;; 3. If the language does not have official status, select the country with
+;;; greater number of speakers
+;;;
+;;; The table does not list artificial languages (Esperanto, Ido, Interlingua,
+;;; etc), as the notion of a territory does not apply to them.
+;;;
+;;; If you find any inconsistency in this table, please let me know.
+;;;
+
+(define defterr
+ '((aa . "ET") ; Afar
+ (ab . "GE") ; Abkhazian
+ (ae . "IR") ; Avestan
+ (af . "ZA") ; Afrikaans
+ (ak . "GH") ; Akan # or ak_CI
+ (am . "ET") ; Amharic
+ (an . "ES") ; Aragonese
+ (ar . "SA") ; Arabic
+ (as . "IN") ; Assamese
+ (av . "RU") ; Avaric # Spoken mainly in Dagestan
+ (ay . "BO") ; Aymara
+ (az . "AZ") ; Azerbaijani
+
+ (ba . "RU") ; Bashkir
+ (be . "BY") ; Byelorussian; Belarusian
+ (bg . "BG") ; Bulgarian
+ (bh . "IN") ; Bihari
+ (bi . "VU") ; Bislama
+ (bm . "ML") ; Bambara
+ (bn . "BD") ; Bengali; Bangla
+ (bo . "CN") ; Tibetan
+ (br . "FR") ; Breton
+ (bs . "BA") ; Bosnian
+
+ (ca . "ES") ; Catalan
+ (ce . "RU") ; Chechen
+ (ch . "GU") ; Chamorro
+ (co . "FR") ; Corsican
+ (cr . "CA") ; Cree
+ (cs . "CZ") ; Czech
+ (cu . "BG") ; Church Slavic
+ (cv . "RU") ; Chuvash
+ (cy . "GB") ; Welsh
+
+ (da . "DK") ; Danish
+ (de . "DE") ; German
+ (dv . "MV") ; Divehi
+ (dz . "BT") ; Dzongkha; Bhutani
+
+ (ee . "GH") ; @'Ew@'e
+ (el . "GR") ; Greek
+ (en . "US") ; English
+ (es . "ES") ; Spanish
+ (et . "EE") ; Estonian
+ (eu . "ES") ; Basque
+
+ (fa . "IR") ; Persian
+ (ff . "CM") ; Fulah # Also NG, MR, and many others
+ (fi . "FI") ; Finnish
+ (fj . "FJ") ; Fijian; Fiji
+ (fo . "FO") ; Faroese
+ (fr . "FR") ; French
+ (fy . "NL") ; Frisian
+
+ (ga . "IE") ; Irish
+ (gd . "GB") ; Scots; Gaelic
+ (gl . "ES") ; Gallegan; Galician
+ (gn . "PE") ; Guarani
+ (gu . "IN") ; Gujarati
+ (gv . "GB") ; Manx
+
+ (ha . "NG") ; Hausa (?)
+ (he . "IL") ; Hebrew (formerly iw)
+ (hi . "IN") ; Hindi
+ (ho . "PG") ; Hiri Motu
+ (hr . "HR") ; Croatian
+ (ht . "HT") ; Haitian; Haitian Creole
+ (hu . "HU") ; Hungarian
+ (hy . "AM") ; Armenian
+ (hz . "NA") ; Herero
+
+ (id . "ID") ; Indonesian (formerly in)
+ (ig . "NG") ; Igbo
+ (ii . "CN") ; Sichuan Yi
+ (ik . "CA") ; Inupiak
+ (is . "IS") ; Icelandic
+ (it . "IT") ; Italian
+ (iu . "CA") ; Inuktitut
+
+ (ja . "JP") ; Japanese
+ (jv . "ID") ; Javanese
+
+ (ka . "GE") ; Georgian
+ (kg . "CG") ; Kongo # also CD and AO
+ (ki . "KE") ; Kikuyu
+ (kj . "AO") ; Kuanyama
+ (kk . "KZ") ; Kazakh
+ (kl . "DK") ; Kalaallisut; Greenlandic
+ (km . "KH") ; Khmer; Cambodian
+ (kn . "IN") ; Kannada
+ (ko . "KR") ; Korean
+ (kr . "NG") ; Kanuri
+ (ks . "IN") ; Kashmiri
+ (ku . "IQ") ; Kurdish
+ (kv . "RU") ; Komi
+ (kw . "GB") ; Cornish
+ (ky . "KG") ; Kirghiz
+
+ (la . "VA") ; Latin
+ (lb . "LU") ; Letzeburgesch
+ (lg . "UG") ; Ganda
+ (li . "NL") ; Limburgish; Limburger; Limburgan
+ (ln . "CD") ; Lingala
+ (lo . "LA") ; Lao; Laotian
+ (lt . "LT") ; Lithuanian
+ (lu . "CD") ; Luba-Katanga
+ (lv . "LV") ; Latvian; Lettish
+
+ (mg . "MG") ; Malagasy
+ (mh . "MH") ; Marshall
+ (mi . "NZ") ; Maori
+ (mk . "MK") ; Macedonian
+ (ml . "IN") ; Malayalam
+ (mn . "MN") ; Mongolian
+ (mo . "MD") ; Moldavian
+ (mr . "IN") ; Marathi
+ (ms . "MY") ; Malay
+ (mt . "MT") ; Maltese
+ (my . "MM") ; Burmese
+
+ (na . "NR") ; Nauru
+ (nb . "NO") ; Norwegian Bokm@aa{}l
+ (nd . "ZA") ; Ndebele, North
+ (ne . "NP") ; Nepali
+ (ng . "NA") ; Ndonga
+ (nl . "NL") ; Dutch
+ (nn . "NO") ; Norwegian Nynorsk
+ (no . "NO") ; Norwegian
+ (nr . "ZA") ; Ndebele, South
+ (nv . "US") ; Navajo
+ (ny . "MW") ; Chichewa; Nyanja
+
+ (oc . "FR") ; Occitan; Proven@,{c}al
+ (oj . "CA") ; Ojibwa
+ (om . "ET") ; (Afan) Oromo
+ (or . "IN") ; Oriya
+ (os . "RU") ; Ossetian; Ossetic
+
+ (pa . "IN") ; Panjabi; Punjabi
+ (pi . "IN") ; Pali
+ (pl . "PL") ; Polish
+ (ps . "AF") ; Pashto, Pushto
+ (pt . "PT") ; Portuguese
+
+ (qu . "PE") ; Quechua
+
+ (rm . "FR") ; Rhaeto-Romance
+ (rn . "BI") ; Rundi; Kirundi
+ (ro . "RO") ; Romanian
+ (ru . "RU") ; Russian
+ (rw . "RW") ; Kinyarwanda
+
+ (sa . "IN") ; Sanskrit
+ (sc . "IT") ; Sardinian
+ (sd . "PK") ; Sindhi
+ (se . "NO") ; Northern Sami
+ (sg . "CF") ; Sango; Sangro
+ (si . "LK") ; Sinhalese
+ (sk . "SK") ; Slovak
+ (sl . "SI") ; Slovenian
+ (sm . "WS") ; Samoan
+ (sn . "ZW") ; Shona
+ (so . "SO") ; Somali
+ (sq . "AL") ; Albanian
+ (sr . "CS") ; Serbian
+ (ss . "SZ") ; Swati; Siswati
+ (st . "LS") ; Sesotho; Sotho, Southern
+ (su . "ID") ; Sundanese
+ (sv . "SE") ; Swedish
+ (sw . "TZ") ; Swahili # Also KE
+
+ (ta . "IN") ; Tamil
+ (te . "IN") ; Telugu
+ (tg . "TJ") ; Tajik
+ (th . "TH") ; Thai
+ (ti . "ER") ; Tigrinya
+ (tk . "TM") ; Turkmen
+ (tl . "PH") ; Tagalog
+ (tn . "BW") ; Tswana; Setswana
+ (to . "ZM") ; Tonga (?) # Also ZW ; MW
+ (tr . "TR") ; Turkish
+ (ts . "MZ") ; Tsonga # ZA SZ XW
+ (tt . "RU") ; Tatar
+ (tw . "GH") ; Twi
+ (ty . "PF") ; Tahitian
+
+ (ug . "RU") ; Uighur
+ (uk . "UA") ; Ukrainian
+ (ur . "IN") ; Urdu
+ (uz . "UZ") ; Uzbek
+
+ (ve . "ZA") ; Venda
+ (vi . "VN") ; Vietnamese
+
+ (wa . "FR") ; Walloon
+ (wo . "SN") ; Wolof
+
+ (xh . "ZA") ; Xhosa
+
+ (yi . "IL") ; Yiddish (formerly ji)
+ (yo . "NG") ; Yoruba
+
+ (za . "CN") ; Zhuang
+ (zh . "CN") ; Chinese
+ (zu . "ZA"))); Zulu
+
+(define (supported-locale-dir lang textdomain locale-path)
+ (call-with-current-continuation
+ (lambda (return)
+ (for-each
+ (lambda (dir)
+ (let ((name (string-append dir "/" lang "/LC_MESSAGES/" textdomain)))
+ (if (or (access? (string-append name ".mo") R_OK)
+ (access? (string-append name ".gmo") R_OK))
+ (return dir))))
+ (string-split locale-path #\:))
+ #f)))
+
+(define (locale-setup lang domain-name locale-path)
+ (catch 'system-error
+ (lambda ()
+ (cond
+ ((not lang)
+ (setenv "LC_ALL" "C")
+ (setlocale LC_ALL "C")
+ #f)
+ (else
+ (let ((curlocale (if (> (string-length lang) 2)
+ (string-downcase (substring lang 0 2))
+ lang))
+ (terr #f)
+ (sublocale #f)
+ (domaindir #f))
+
+ (cond
+ ((and (> (string-length lang) 2) (char=? (string-ref lang 2) #\-))
+ (set! terr (string-upcase (substring lang 3 2)))
+ (set! sublocale (string-append curlocale "_" terr)))
+ ((assoc-ref defterr (string->symbol curlocale)) =>
+ (lambda (elt)
+ (set! sublocale curlocale)
+ (set! terr elt)))
+ (else
+ (set! sublocale curlocale)
+ (set! terr "XX"))) ; Hack for languages without defined territory.
+
+ (let ((domaindir
+ (supported-locale-dir sublocale domain-name locale-path)))
+ (cond
+ (domaindir
+ (let ((locale (string-append curlocale "_" terr ".UTF-8")))
+ (setenv "LC_ALL" locale)
+ (setlocale LC_ALL locale)
+ (textdomain domain-name)
+ (bindtextdomain domain-name domaindir)
+ locale))
+ ((setenv "LC_ALL" "C")
+ (setlocale LC_ALL "C")
+ #f)))))))
+ (lambda args
+ #f)))
+
+
+
+
diff --git a/src/ellinika/xlat.scm b/src/ellinika/xlat.scm
new file mode 100644
index 0000000..c51edaa
--- a/dev/null
+++ b/src/ellinika/xlat.scm
@@ -0,0 +1,309 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2004, 2007 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 3 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(define-module (ellinika xlat))
+
+(define greek-postfix-map
+ (list
+ (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ")
+ (cons "ί" "ΐ") (cons "ύ" "ΰ")
+ (cons "θ" "ϊ") (cons "θ" "ϋ")))
+ (cons #\' (list (cons "α" "ά") (cons "Α" "Ά")
+ (cons "ε" "έ") (cons "Ε" "Έ")
+ (cons "η" "ή") (cons "Η" "Ή")
+ (cons "ι" "ί") (cons "Ι" "Ί")
+ (cons "ϊ" "ΐ") (cons "Ϊ" "Ϊ")
+ (cons "ο" "ό") (cons "Ο" "Ό")
+ (cons "υ" "ύ") (cons "Υ" "Ύ")
+ (cons "θ" "ύ") (cons "Θ" "Ύ")
+ (cons "ϋ" "ΰ") (cons "Ϋ" "Ϋ")
+ (cons "ω" "ώ") (cons "Ω" "Ώ")))
+ (cons #\s (list (cons "κ" "ξ") (cons "π" "ψ")))))
+
+(define greek-kbd-map
+ (list (cons #\a "α")
+ (cons #\A "Α")
+ (cons #\b "β")
+ (cons #\B "Β")
+ (cons #\g "γ")
+ (cons #\G "Γ")
+ (cons #\d "δ")
+ (cons #\D "Δ")
+ (cons #\e "ε")
+ (cons #\E "Ε")
+ (cons #\z "ζ")
+ (cons #\Z "Ζ")
+ (cons #\h "η")
+ (cons #\H "Η")
+ (cons #\u "θ")
+ (cons #\U "Θ")
+ (cons #\i "ι")
+ (cons #\I "Ι")
+ (cons #\k "κ")
+ (cons #\K "Κ")
+ (cons #\l "λ")
+ (cons #\L "Λ")
+ (cons #\m "μ")
+ (cons #\M "Μ")
+ (cons #\n "ν")
+ (cons #\M "Ν")
+ (cons #\j "ξ")
+ (cons #\J "Ξ")
+ (cons #\o "ο")
+ (cons #\O "Ο")
+ (cons #\p "π")
+ (cons #\P "Π")
+ (cons #\r "ρ")
+ (cons #\R "Ρ")
+ (cons #\s "σ")
+ (cons #\S "Σ")
+ (cons #\w "ς")
+ (cons #\W "Σ")
+ (cons #\t "τ")
+ (cons #\T "Τ")
+ (cons #\y "υ")
+ (cons #\Y "Υ")
+ (cons #\f "φ")
+ (cons #\F "Φ")
+ (cons #\x "χ")
+ (cons #\X "Χ")
+ (cons #\c "ψ")
+ (cons #\C "Ψ")
+ (cons #\v "ω")
+ (cons #\V "Ω")))
+
+
+(define (after-thita? c)
+ (member c (list #\a #\e #\i #\o #\y #\v)))
+
+;;; Given input string in Greek transliteration, convert it to
+;;; an equivalent Greek word in UTF-8 encoding. The input string is
+;;; supposed to follow the traditional Greek keyboard layout:
+;;;
+;;; +----------------------------------------------------------------+
+;;; | 1! | 2@ | 3# | 4$ | 5% | 6^ | 7& | 8* | 9( | 0) | -_ | =+ | `~ |
+;;; +----------------------------------------------------------------+
+;;; | ·― | ςΣ | εΕ | ρΡ | τΤ | υΥ | θΘ | ιΙ | οΟ | πΠ | [{ | ]} |
+;;; +------------------------------------------------------------+
+;;; | αΑ | σΣ | δΔ | φΦ | γΓ | ηΗ | ξΞ | κΚ | λΛ | ΄¨ | '" | \| |
+;;; +-----------------------------------------------------------+
+;;; | ζΖ | χΧ | ψΨ | ωΩ | βΒ | νΝ | μΜ | ,; | .: | /? |
+;;; +-------------------------------------------------+
+;;; +-----------------------------+
+;;; | space bar |
+;;; +-----------------------------+
+;;;
+;;;
+;;; The followin escape sequences are recognized:
+;;;
+;;; '\ks' -> 'ξ'
+;;; '\ps' -> 'ψ'
+;;; '\th' -> 'θ'
+;;;
+;;; Additionally some spell fixing heuristics is applied:
+;;;
+;;; 's' at the end of the word -> 'ς'
+;;; 'w' anywhere but at the end of the word -> 'ω'
+;;; 'ks' -> 'ξ'
+;;; 'ps' -> 'ψ'
+;;; "th" -> 'θ' unless followed by a consonant
+;;; "u'" -> 'ύ'
+;;;
+;;; FIXME: The case of less obvious spelling errors, like e.g. 'ou' -> 'ου'
+;;; will be handled by later spelling corrections if fuzzy search is
+;;; enabled
+(define-public (ellinika:translate-kbd str)
+ (apply
+ string-append
+ (do ((sl (string->list str) (cdr sl))
+ (l '()))
+ ((null? sl) (reverse l))
+ (letrec ((decode-kbd-map
+ (lambda ()
+ (let ((g (assoc
+ (let ((c (car sl)))
+ (cond
+ ((and (char=? c #\w) (not (null? (cdr sl))))
+ #\v)
+ ((and (char=? c #\s) (null? (cdr sl)))
+ #\w)
+ (else
+ c)))
+ greek-kbd-map)))
+ (if g
+ (set! l (cons (cdr g) l))
+ (if (char=? (car sl) #\\)
+ (cond
+ ((> (length sl) 2)
+ (cond
+ ((char=? (car (cdr (cdr sl))) #\s)
+ (let ((c (car (cdr sl))))
+ (cond
+ ((char=? c #\k)
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "ξ" l)))
+ ((char=? c #\p)
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "ψ" l)))
+ (else
+ (set! sl (cdr sl))))))
+ ((and (char=? (car (cdr sl)))
+ (char=? (car (cdr (cdr sl))) #\h))
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "θ" l)))))
+
+ (else
+ (set! l (cons (string (car sl)) l))))))))))
+ (if (null? l)
+ (decode-kbd-map)
+ (cond
+ ((char=? (car sl) #\h)
+ (if (and (not (null? (cdr sl)))
+ (after-thita? (car (cdr sl))))
+ (set-car! l "θ")
+ (decode-kbd-map)))
+ ((assoc (car sl) greek-postfix-map) =>
+ (lambda (cmap)
+ (let ((x (assoc (car l) (cdr cmap))))
+ (if x
+ (set-car! l (cdr x))
+ (decode-kbd-map)))))
+ (else
+ (decode-kbd-map))))))))
+
+
+;; Translate the input string to UTF-8 if necessary.
+(define-public (ellinika:translate-input input)
+ (if (and input
+ (not (string-null? input))
+ (< (char->integer (string-ref input 0)) 127))
+ (ellinika:translate-kbd input)
+ input))
+
+
+
+(define transcription-list
+ (list
+ (cons "μπ" "b" )
+ (cons "γγ" "g" )
+ (cons "γκ" "g" )
+ (cons "γχ" "g" )
+ (cons "ντ" "d" )
+ (cons "αι" "e" )
+ (cons "αί" "e" )
+ (cons "αυ" "au")
+ (cons "αύ" "au")
+ (cons "ου" "ou")
+ (cons "ού" "ou")
+ (cons "ευ" "eu")
+ (cons "εύ" "eu")
+ (cons "οι" "i" )
+ (cons "ει" "i" )
+ (cons "εί" "i" )
+ (cons "υι" "i" )
+
+ (cons "α" "a" )
+ (cons "Α" "a" )
+ (cons "Ά" "a" )
+ (cons "ά" "a" )
+ (cons "β" "b" )
+ (cons "Β" "b" )
+ (cons "γ" "g" )
+ (cons "Γ" "g" )
+ (cons "δ" "d" )
+ (cons "Δ" "d" )
+ (cons "ε" "e" )
+ (cons "Ε" "e" )
+ (cons "Έ" "e" )
+ (cons "έ" "e" )
+ (cons "ζ" "z" )
+ (cons "Ζ" "z" )
+ (cons "η" "i" )
+ (cons "Η" "i" )
+ (cons "Ή" "i" )
+ (cons "ή" "i" )
+ (cons "θ" "t" )
+ (cons "Θ" "t" )
+ (cons "ι" "i" )
+ (cons "Ι" "i" )
+ (cons "Ί" "i" )
+ (cons "ί" "i" )
+ (cons "κ" "k" )
+ (cons "Κ" "k" )
+ (cons "λ" "l" )
+ (cons "Λ" "l" )
+ (cons "μ" "m" )
+ (cons "Μ" "m" )
+ (cons "ν" "n" )
+ (cons "Ν" "n" )
+ (cons "ξ" "x" )
+ (cons "Ξ" "x" )
+ (cons "ο" "o" )
+ (cons "Ο" "o" )
+ (cons "Ό" "o" )
+ (cons "ό" "o" )
+ (cons "π" "p" )
+ (cons "Π" "p" )
+ (cons "ρ" "r" )
+ (cons "Ρ" "r" )
+ (cons "σ" "s" )
+ (cons "Σ" "s" )
+ (cons "ς" "s" )
+ (cons "τ" "t" )
+ (cons "Τ" "t" )
+ (cons "υ" "i" )
+ (cons "Υ" "i" )
+ (cons "Ύ" "i" )
+ (cons "ύ" "i" )
+ (cons "φ" "f" )
+ (cons "Φ" "f" )
+ (cons "χ" "h" )
+ (cons "Χ" "h" )
+ (cons "ψ" "P" )
+ (cons "Ψ" "P" )
+ (cons "ω" "o" )
+ (cons "Ω" "o" )
+ (cons "Ώ" "o" )
+ (cons "ώ" "o" )
+ (cons "Ϊ" "i" )
+ (cons "ΐ" "i" )
+ (cons "Ϋ" "i" )
+ (cons "ΰ" "i" )))
+
+(define-public (ellinika:sounds-like str)
+ (let ((len (string-length str)))
+ (do ((i 0)
+ (sl '()))
+ ((= i len) (apply string-append (reverse sl)))
+ (set! sl (cons
+ (cond
+ ((and (<= (+ i 4) len)
+ (assoc (substring str i (+ i 4)) transcription-list)) =>
+ (lambda (x)
+ (set! i (+ i 4))
+ (cdr x)))
+ ((and (<= (+ i 2) len)
+ (assoc (substring str i (+ i 2)) transcription-list)) =>
+ (lambda (x)
+ (set! i (+ i 2))
+ (cdr x)))
+ (else
+ (set! i (1+ i))
+ (substring str (- i 1) i)))
+ sl)))))
+
+;;;; End of ellinika.scm
+

Return to:

Send suggestions and report system problems to the System administrator.