diff options
Diffstat (limited to 'src/ellinika')
-rw-r--r-- | src/ellinika/Makefile.am | 43 | ||||
-rw-r--r-- | src/ellinika/cgi.scm4 | 169 | ||||
-rw-r--r-- | src/ellinika/config.scm4 | 42 | ||||
-rw-r--r-- | src/ellinika/dico.scm | 306 | ||||
-rw-r--r-- | src/ellinika/i18n.scm | 308 | ||||
-rw-r--r-- | src/ellinika/xlat.scm | 309 |
6 files changed, 1177 insertions, 0 deletions
diff --git a/src/ellinika/Makefile.am b/src/ellinika/Makefile.am new file mode 100644 index 0000000..136b44f --- /dev/null +++ b/src/ellinika/Makefile.am @@ -0,0 +1,43 @@ +# This file is part of Ellinika project. +# Copyright (C) 2004,2006,2007,2008 Sergey Poznyakoff +# +# Ellinika is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Ellinika is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +guiledir=$(GUILE_SITE)/$(PACKAGE) +guile_DATA=xlat.scm cgi.scm i18n.scm config.scm dico.scm + +cgi.m4: Makefile + echo 'divert(-1)' > $@ + echo 'changequote([,])' >> $@ + echo 'changecom([;],[' >> $@ + echo '])' >> $@ + echo 'undefine([format])' >> $@ + echo 'define([IFACE],$(APACHE_IFACE))' >> $@ + echo 'define([GUILE_BINDIR],$(GUILE_BINDIR))' >> $@ + echo 'define([GUILE_SITE],@GUILE_SITE@)' >> $@ + echo 'define([PACKAGE],$(PACKAGE))'>> $@ + echo 'define([PREFIX],$(prefix))' >> $@ + echo 'define([SYSCONFDIR],$(sysconfdir))' >> $@ + echo 'define([LOCALEDIR],$(datadir)/locale)' >> $@ + echo 'define([HTMLDIR],$(HTMLDIR))' >> $@ + echo 'divert(0)dnl' >> $@ + echo '@AUTOGENERATED@' >> $@ + +SUFFIXES = .scm4 .scm + +.scm4.scm: + m4 cgi.m4 $< > $@ + +cgi.scm: cgi.scm4 cgi.m4 +config.scm: config.scm4 cgi.m4 diff --git a/src/ellinika/cgi.scm4 b/src/ellinika/cgi.scm4 new file mode 100644 index 0000000..38fd3de --- /dev/null +++ b/src/ellinika/cgi.scm4 @@ -0,0 +1,169 @@ +;;;; -*- scheme -*- +;;;; Greek Dictionary Web Engine +;;;; Copyright (C) 2005, 2007 Sergey Poznyakoff +;;;; +;;;; This program is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. +;;;; +(define-module (ellinika cgi) + #:use-module (ellinika config) + #:use-module (ellinika i18n) + #:use-module ifelse(IFACE,[CGI],(www cgi),(guile-user)) + #:re-export (base-dir html-dir sysconf-dir locale-path + sql-iface sql-host sql-port sql-database + sql-username sql-password + config-file-name )) + + +ifelse(IFACE,[CGI],,dnl +(define form-data + (append + (parse-form-data + (table:get (request-rec:subprocess-env Request) "QUERY_STRING")) + (if (= (request-rec:method-number Request) 2) + (parse-form-data (read-post-data Request)) + '()))) + +(define-public (cgi:value name) + (assoc-ref form-data name)) + +(define-public (cgi:names) + (map car form-data)) + +(define-public cgi-script-name + (table:get (request-rec:subprocess-env Request) + "SCRIPT_NAME")) + +(define-public cgi-server-hostname + (table:get (request-rec:subprocess-env Request) + "SERVER_NAME")) + +(define-public cgi-server-protocol-name #f) +(define-public cgi-server-protocol-version #f) + +(let* ((server-protocol (table:get (request-rec:subprocess-env Request) + "SERVER_PROTOCOL"))) + (if server-protocol + (let ((slash (string-index server-protocol #\/))) + (set! cgi-server-protocol-name (substring server-protocol + 0 slash)) + (set! cgi-server-protocol-version (substring server-protocol + (1+ slash)))))) + +) + + +;;; User-definable variables +(define-public dict-template-file-name "dict.html") +(define-public nea-template-file-name "nea.html") +(define-public monima-nea-template-file-name "monima.html") +(define-public target-language "el_GR") + +(define-public word-forms-reference '()) + +(define-public ref-loc #f) + +;; Number of colums in fuzzy search output +(define-public match-list-columns 4) +;;; End of user-definable variables + +(define-public (language-code lang) + (cond + ((string-index lang #\_) => + (lambda (len) + (substring lang 0 len))) + (else + lang))) + +(define-public (template-file lang template-file-name) + (string-append html-dir "/" (language-code lang) "/" template-file-name)) + +(define-public (make-cgi-name cgi-path . rest) + (apply + string-append + (cons + cgi-path + (let ((arglist (let ((lang (cgi:value "LANG"))) + (do ((ilist (if lang + (cons "LANG" (cons lang rest)) + rest) (cdr ilist)) + (i 1 (1+ i)) + (olist '())) + ((null? ilist) (if (null? olist) + olist + (reverse (cdr olist)))) + (set! olist (cons (car ilist) olist)) + (set! olist (cons + (if (odd? i) "=" "&") + olist)))))) + (if (null? arglist) + arglist + (cons "?" arglist)))))) + +(define-public (expand-template explist template) + "(expand-template EXPLIST TEMPLATE) + +Expands string TEMPLATE in accordance with EXPLIST. EXPLIST is a list +of elements: + + (cons WORD THUNK) + +Each occurrence of WORD in TEMPLATE is replaced with the return value of +THUNK. +" + (let loop ((template template)) + (cond + ((string-index template #\@) => + (lambda (w) + (display (substring template 0 w)) + (if (and (< (+ w 2) (string-length template)) + (char=? (string-ref template (1+ w)) #\@)) + (let ((end-pos (string-index template #\@ (+ w 2)))) + (if (and end-pos + (< (1+ end-pos) (string-length template)) + (char=? (string-ref template (1+ end-pos)) #\@)) + (let* ((name (substring template w (+ end-pos 2))) + (entry (assoc name explist))) + (cond + (entry + ((cdr entry)) + (loop (substring template (+ end-pos 2)))) + (else + (display "@@") + (loop (substring template (+ w 2)))))) + (begin + (display "@") + (loop (substring template (+ w 1)))))) + (begin + (display "@") + (loop (substring template (1+ w))))))) + (else + (display template))))) + + +(define-public (ellinika-cgi-init template-file-name) + ;;; Load the site defaults + (ellinika-config-setup) + + ;;; Load the language-specific defaults + (cond + ((cgi:value "LANG") => + (lambda (x) + (if (file-exists? (template-file x template-file-name)) + (set! target-language x))))) + ;;; Initialize i18n + (let ((x (locale-setup target-language "PACKAGE" locale-path))) + (if x + (set! target-language x)))) + +;;; End of cgi.scmi diff --git a/src/ellinika/config.scm4 b/src/ellinika/config.scm4 new file mode 100644 index 0000000..8032409 --- /dev/null +++ b/src/ellinika/config.scm4 @@ -0,0 +1,42 @@ +;;;; -*- scheme -*- +;;;; Greek Dictionary Web Engine +;;;; Copyright (C) 2005, 2007, 2008 Sergey Poznyakoff +;;;; +;;;; This program is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. +;;;; + +(define-module (ellinika config)) + +(define-public base-dir "PREFIX") +(define-public html-dir "HTMLDIR") +(define-public sysconf-dir "SYSCONFDIR") +(define-public locale-path "LOCALEDIR:/usr/share/locale:/usr/local/share/locale") +(define-public sql-iface "mysql") ;; SQL interface ("mysql" or "postgres") +;; SQL server hostname or a path to the UNIX socket +(define-public sql-host "localhost") +(define-public sql-port 3306) ;; SQL port number (0 for sockaddr_un + ;; connection) +(define-public sql-database "ellinika") ;; Name of the database +(define-public sql-username "gray") ;; Database user name +(define-public sql-password "") ;; Password for that user name + +(define-public config-file-name "ellinika.conf") + +(define-public (ellinika-config-setup) + ;;; Load the site defaults + (let ((rc-file (string-append sysconf-dir "/" config-file-name))) + (if (file-exists? rc-file) + (load rc-file)))) + + diff --git a/src/ellinika/dico.scm b/src/ellinika/dico.scm new file mode 100644 index 0000000..9383d1f --- /dev/null +++ b/src/ellinika/dico.scm @@ -0,0 +1,306 @@ +;;;; A Dico module for Greek Dictionary Web Engine -*- scheme -*- +;;;; Copyright (C) 2008 Sergey Poznyakoff +;;;; +;;;; This program is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. +;;;; + +(define-module (ellinika dico)) + +(use-modules (guile-user) + (ice-9 rdelim) + (gamma sql) + (xmltools dict) + (ellinika xlat) + (ellinika config)) + +(define (sql-error-handler err descr) + (format #t "cannot connect to the database") + (with-output-to-port + (current-error-port) + (lambda () + (display err) + (display ": ") + (display descr)))) + +(define (my-sql-query conn query) + (catch #t + (lambda () + (sql-query conn query)) + (lambda args + '()))) + +;; END of FIXME + +(define (dico-error err . rest) + (with-output-to-port + (current-error-port) + (lambda () + (display err) + (for-each + display + rest) + (newline)))) + +;; Dico interface + +(define (open-module name . rest) + (let ((target-language "el") + (type 'dict)) + (for-each (lambda (arg) + (let ((av (string-split arg #\=))) + (case (length av) + ((1) (cond + ((string=? (car av) "synonym") + (set! type 'synonim)) + ((string=? (car av) "antonym") + (set! type 'antonym)) + (else + (dico-error "Unknown option " (car av))))) + ((2) (cond + ((string=? (car av) "lang") + (set! target-language (cadr av))) + (else + (dico-error "Unknown option " (car av))))) + (else + (dico-error "Unknown option " (car av)))))) + (cdr rest)) + (let ((db-connection (sql-connect + sql-iface sql-host sql-port sql-database + sql-username sql-password))) + (sql-query db-connection "SET NAMES utf8") + (list db-connection target-language type)))) + +(defmacro dbh:conn (dbh) `(list-ref ,dbh 0)) +(defmacro dbh:lang (dbh) `(list-ref ,dbh 1)) +(defmacro dbh:type (dbh) `(list-ref ,dbh 2)) + +(define (close-module dbh) + (sql-connect-close (dbh:conn dbh))) + +(define descr-list + '(("pl" . "Słownik grecko-polski") + ("uk" . "Грецько-украЇнський словник") + ("ru" . "Греческо-русский словарь"))) + +(define (descr dbh) + (case (dbh:type dbh) + ((dict) + (let ((res (assoc (dbh:lang dbh) descr-list))) + (if res + (cdr res) + "Ellinika (no description available)"))) + ((antonym) + "Λέξικο αντωνύμων της Ελληνικής γλώσσας") + ((synonym) + "a"))) + +(define (info dbh) + (string-append "Ellinika - A greek dictionary.\n\ +See http://ellinika.gnu.org.ua/cgi-bin/dict.cgi?LANG=" + (dbh:lang dbh) "\n\ +Copyright © 2004, 2005, 2006, 2007, 2008 Sergey Poznyakoff\n\ +\n\ +Permission is granted to copy, distribute and/or modify this document\n\ +under the terms of the GNU Free Documentation License, Version 1.2 or\n\ +any later version published by the Free Software Foundation; with no\n\ +Invariant Sections, no Front-Cover and Back-Cover Texts")) + +(define (define-word-dict dbh key) + (let ((result '()) + (last-id -1) + (word '()) + (articles '())) + (for-each + (lambda (tuple) + (cond + ((not (= last-id (string->number (car tuple)))) + (if (not (null? articles)) + (set! result (cons + (cons word (reverse articles)) + result))) + (set! last-id (string->number (car tuple))) + (set! word (cons (list-ref tuple 1) + (list-ref tuple 2))); FIXME: forms? + (set! articles '()))) + (set! articles (cons + (cons (list-ref tuple 4) + (list-ref tuple 5)) + articles))) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT dict.ident,dict.word,pos.abbr,dict.forms,articles.subindex,articles.meaning " + "FROM dict,articles,pos WHERE dict.word=\"" + key + "\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' " + "AND dict.pos=pos.id AND pos.canonical='Y' ORDER BY dict.ident, articles.subindex"))) + (if (not (null? articles)) + (set! result (cons + (cons word (reverse articles)) + result))) + (cons 'define-word-dict (reverse result)))) + +(define (define-word-x dbh word link-type) + (let ((res (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict,links,articles " + "WHERE links.type='" link-type "' AND links.ident IN " + "(SELECT ident FROM dict WHERE word=\"" word "\") " + "AND dict.ident=links.xref " + "AND dict.ident=articles.ident and articles.lang=\"" + (dbh:lang dbh) + "\" ORDER BY word")))) + (if (and res (not (null? res))) + (cons 'define-word-x (list (cons word (map car res)))) + #f))) + +(define (define-word-antonym dbh word) + (define-word-x dbh word "XREF")) + +(define (define-word-synonym dbh word) + (define-word-x dbh word "ANT")) + +(define define-list + (list (cons 'dict define-word-dict) + (cons 'synonym define-word-synonym) + (cons 'antonym define-word-antonym))) + +(define (define-word dbh word) + (let ((key (ellinika:translate-input word)) + (x (assoc (dbh:type dbh) define-list))) + (if x + ((cdr x) dbh key) + #f))) + +(define (match-exact dbh strat word) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word=\"" + (ellinika:translate-input word) + "\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1"))) + +(define (match-prefix dbh strat word) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word LIKE \"" + (ellinika:translate-input word) + "%\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1"))) + +(define (match-suffix dbh strat word) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word LIKE \"%" + (ellinika:translate-input word) + "\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1"))) + +(define (match-extnd-regex dbh strat word) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict, articles WHERE dict.word regexp \"" + (ellinika:translate-input word) + "\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1"))) + +(define (match-basic-regex dbh strat word) + #f) ;FIXME + +(define (match-default dbh strat word) + (my-sql-query + (dbh:conn dbh) + (string-append + "SELECT DISTINCT dict.word FROM dict,articles WHERE dict.sound LIKE \"" + (ellinika:sounds-like word) + "%\" AND dict.ident=articles.ident " + "AND articles.lang='" (dbh:lang dbh) "' ORDER BY 1"))) + + +(define strategy-list + (list (cons "exact" match-exact) + (cons "prefix" match-prefix) + (cons "suffix" match-suffix) + (cons "re" match-extnd-regex) + (cons "regexp" match-basic-regex))) + +(define (match-word dbh strat word) + (let ((sp (assoc (dico-strat-name strat) strategy-list))) + (let ((res (if sp + ((cdr sp) dbh strat word) + (match-default dbh strat word)))) + (if res + (cons #f (map car res)) + #f)))) + +(define (output res n) + (let ((type (car res)) + (contents (list-ref (cdr res) n))) + (case type + ((define-word-dict) + (let ((word-pair (car contents)) + (defn (cdr contents))) + (display (car word-pair)) + (display ", <") + (display (cdr word-pair)) + (display ">") + (for-each + (lambda (article) + (newline) + (display (1+ (string->number (car article)))) + (display ". ") + (display (cdr article)) + (display ";")) + defn))) + ((define-word-x) + (let ((word (car contents)) + (defn (cdr contents))) + (display word) + (display " -- ") + (display (car defn)) + (if (cdr defn) + (for-each + (lambda (elt) + (display ", ") + (display elt)) + (cdr defn))))) + (else + (display contents))))) + +(define (result-count res) + (length (cdr res))) + +(define-public (dico-ellinika-init arg) + (list (cons "open" open-module) + (cons "close" close-module) + (cons "descr" descr) + (cons "info" info) + (cons "define" define-word) + (cons "match" match-word) + (cons "output" output) + (cons "result-count" result-count))) + +;; +;; Setup +(ellinika-config-setup) +(dico-register-strat "suffix" "Match word suffixes") + + + diff --git a/src/ellinika/i18n.scm b/src/ellinika/i18n.scm new file mode 100644 index 0000000..474c8c9 --- /dev/null +++ b/src/ellinika/i18n.scm @@ -0,0 +1,308 @@ +;;;; This file is part of Greek Dictionary Web Engine +;;;; Copyright (C) 2006, 2007 Sergey Poznyakoff +;;;; +;;;; This program is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3, or (at your option) +;;;; any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +(define-module (ellinika i18n) + #:use-syntax (ice-9 syncase) + #:export-syntax (_) + #:export (locale-setup)) + +(define-syntax _ + (syntax-rules () + ((_ msg) (gettext msg)))) + + +;;; ISO 639 language code => ISO 3166 country code +;;; The corresponding country codes where selected using the following +;;; principles: +;;; 1. If the language is spoken in only one country, this country code is +;;; used. +;;; 2. If the language is spoken in more than one country, select the code of +;;; that country where it has official status. +;;; 3. If the language does not have official status, select the country with +;;; greater number of speakers +;;; +;;; The table does not list artificial languages (Esperanto, Ido, Interlingua, +;;; etc), as the notion of a territory does not apply to them. +;;; +;;; If you find any inconsistency in this table, please let me know. +;;; + +(define defterr + '((aa . "ET") ; Afar + (ab . "GE") ; Abkhazian + (ae . "IR") ; Avestan + (af . "ZA") ; Afrikaans + (ak . "GH") ; Akan # or ak_CI + (am . "ET") ; Amharic + (an . "ES") ; Aragonese + (ar . "SA") ; Arabic + (as . "IN") ; Assamese + (av . "RU") ; Avaric # Spoken mainly in Dagestan + (ay . "BO") ; Aymara + (az . "AZ") ; Azerbaijani + + (ba . "RU") ; Bashkir + (be . "BY") ; Byelorussian; Belarusian + (bg . "BG") ; Bulgarian + (bh . "IN") ; Bihari + (bi . "VU") ; Bislama + (bm . "ML") ; Bambara + (bn . "BD") ; Bengali; Bangla + (bo . "CN") ; Tibetan + (br . "FR") ; Breton + (bs . "BA") ; Bosnian + + (ca . "ES") ; Catalan + (ce . "RU") ; Chechen + (ch . "GU") ; Chamorro + (co . "FR") ; Corsican + (cr . "CA") ; Cree + (cs . "CZ") ; Czech + (cu . "BG") ; Church Slavic + (cv . "RU") ; Chuvash + (cy . "GB") ; Welsh + + (da . "DK") ; Danish + (de . "DE") ; German + (dv . "MV") ; Divehi + (dz . "BT") ; Dzongkha; Bhutani + + (ee . "GH") ; @'Ew@'e + (el . "GR") ; Greek + (en . "US") ; English + (es . "ES") ; Spanish + (et . "EE") ; Estonian + (eu . "ES") ; Basque + + (fa . "IR") ; Persian + (ff . "CM") ; Fulah # Also NG, MR, and many others + (fi . "FI") ; Finnish + (fj . "FJ") ; Fijian; Fiji + (fo . "FO") ; Faroese + (fr . "FR") ; French + (fy . "NL") ; Frisian + + (ga . "IE") ; Irish + (gd . "GB") ; Scots; Gaelic + (gl . "ES") ; Gallegan; Galician + (gn . "PE") ; Guarani + (gu . "IN") ; Gujarati + (gv . "GB") ; Manx + + (ha . "NG") ; Hausa (?) + (he . "IL") ; Hebrew (formerly iw) + (hi . "IN") ; Hindi + (ho . "PG") ; Hiri Motu + (hr . "HR") ; Croatian + (ht . "HT") ; Haitian; Haitian Creole + (hu . "HU") ; Hungarian + (hy . "AM") ; Armenian + (hz . "NA") ; Herero + + (id . "ID") ; Indonesian (formerly in) + (ig . "NG") ; Igbo + (ii . "CN") ; Sichuan Yi + (ik . "CA") ; Inupiak + (is . "IS") ; Icelandic + (it . "IT") ; Italian + (iu . "CA") ; Inuktitut + + (ja . "JP") ; Japanese + (jv . "ID") ; Javanese + + (ka . "GE") ; Georgian + (kg . "CG") ; Kongo # also CD and AO + (ki . "KE") ; Kikuyu + (kj . "AO") ; Kuanyama + (kk . "KZ") ; Kazakh + (kl . "DK") ; Kalaallisut; Greenlandic + (km . "KH") ; Khmer; Cambodian + (kn . "IN") ; Kannada + (ko . "KR") ; Korean + (kr . "NG") ; Kanuri + (ks . "IN") ; Kashmiri + (ku . "IQ") ; Kurdish + (kv . "RU") ; Komi + (kw . "GB") ; Cornish + (ky . "KG") ; Kirghiz + + (la . "VA") ; Latin + (lb . "LU") ; Letzeburgesch + (lg . "UG") ; Ganda + (li . "NL") ; Limburgish; Limburger; Limburgan + (ln . "CD") ; Lingala + (lo . "LA") ; Lao; Laotian + (lt . "LT") ; Lithuanian + (lu . "CD") ; Luba-Katanga + (lv . "LV") ; Latvian; Lettish + + (mg . "MG") ; Malagasy + (mh . "MH") ; Marshall + (mi . "NZ") ; Maori + (mk . "MK") ; Macedonian + (ml . "IN") ; Malayalam + (mn . "MN") ; Mongolian + (mo . "MD") ; Moldavian + (mr . "IN") ; Marathi + (ms . "MY") ; Malay + (mt . "MT") ; Maltese + (my . "MM") ; Burmese + + (na . "NR") ; Nauru + (nb . "NO") ; Norwegian Bokm@aa{}l + (nd . "ZA") ; Ndebele, North + (ne . "NP") ; Nepali + (ng . "NA") ; Ndonga + (nl . "NL") ; Dutch + (nn . "NO") ; Norwegian Nynorsk + (no . "NO") ; Norwegian + (nr . "ZA") ; Ndebele, South + (nv . "US") ; Navajo + (ny . "MW") ; Chichewa; Nyanja + + (oc . "FR") ; Occitan; Proven@,{c}al + (oj . "CA") ; Ojibwa + (om . "ET") ; (Afan) Oromo + (or . "IN") ; Oriya + (os . "RU") ; Ossetian; Ossetic + + (pa . "IN") ; Panjabi; Punjabi + (pi . "IN") ; Pali + (pl . "PL") ; Polish + (ps . "AF") ; Pashto, Pushto + (pt . "PT") ; Portuguese + + (qu . "PE") ; Quechua + + (rm . "FR") ; Rhaeto-Romance + (rn . "BI") ; Rundi; Kirundi + (ro . "RO") ; Romanian + (ru . "RU") ; Russian + (rw . "RW") ; Kinyarwanda + + (sa . "IN") ; Sanskrit + (sc . "IT") ; Sardinian + (sd . "PK") ; Sindhi + (se . "NO") ; Northern Sami + (sg . "CF") ; Sango; Sangro + (si . "LK") ; Sinhalese + (sk . "SK") ; Slovak + (sl . "SI") ; Slovenian + (sm . "WS") ; Samoan + (sn . "ZW") ; Shona + (so . "SO") ; Somali + (sq . "AL") ; Albanian + (sr . "CS") ; Serbian + (ss . "SZ") ; Swati; Siswati + (st . "LS") ; Sesotho; Sotho, Southern + (su . "ID") ; Sundanese + (sv . "SE") ; Swedish + (sw . "TZ") ; Swahili # Also KE + + (ta . "IN") ; Tamil + (te . "IN") ; Telugu + (tg . "TJ") ; Tajik + (th . "TH") ; Thai + (ti . "ER") ; Tigrinya + (tk . "TM") ; Turkmen + (tl . "PH") ; Tagalog + (tn . "BW") ; Tswana; Setswana + (to . "ZM") ; Tonga (?) # Also ZW ; MW + (tr . "TR") ; Turkish + (ts . "MZ") ; Tsonga # ZA SZ XW + (tt . "RU") ; Tatar + (tw . "GH") ; Twi + (ty . "PF") ; Tahitian + + (ug . "RU") ; Uighur + (uk . "UA") ; Ukrainian + (ur . "IN") ; Urdu + (uz . "UZ") ; Uzbek + + (ve . "ZA") ; Venda + (vi . "VN") ; Vietnamese + + (wa . "FR") ; Walloon + (wo . "SN") ; Wolof + + (xh . "ZA") ; Xhosa + + (yi . "IL") ; Yiddish (formerly ji) + (yo . "NG") ; Yoruba + + (za . "CN") ; Zhuang + (zh . "CN") ; Chinese + (zu . "ZA"))); Zulu + +(define (supported-locale-dir lang textdomain locale-path) + (call-with-current-continuation + (lambda (return) + (for-each + (lambda (dir) + (let ((name (string-append dir "/" lang "/LC_MESSAGES/" textdomain))) + (if (or (access? (string-append name ".mo") R_OK) + (access? (string-append name ".gmo") R_OK)) + (return dir)))) + (string-split locale-path #\:)) + #f))) + +(define (locale-setup lang domain-name locale-path) + (catch 'system-error + (lambda () + (cond + ((not lang) + (setenv "LC_ALL" "C") + (setlocale LC_ALL "C") + #f) + (else + (let ((curlocale (if (> (string-length lang) 2) + (string-downcase (substring lang 0 2)) + lang)) + (terr #f) + (sublocale #f) + (domaindir #f)) + + (cond + ((and (> (string-length lang) 2) (char=? (string-ref lang 2) #\-)) + (set! terr (string-upcase (substring lang 3 2))) + (set! sublocale (string-append curlocale "_" terr))) + ((assoc-ref defterr (string->symbol curlocale)) => + (lambda (elt) + (set! sublocale curlocale) + (set! terr elt))) + (else + (set! sublocale curlocale) + (set! terr "XX"))) ; Hack for languages without defined territory. + + (let ((domaindir + (supported-locale-dir sublocale domain-name locale-path))) + (cond + (domaindir + (let ((locale (string-append curlocale "_" terr ".UTF-8"))) + (setenv "LC_ALL" locale) + (setlocale LC_ALL locale) + (textdomain domain-name) + (bindtextdomain domain-name domaindir) + locale)) + ((setenv "LC_ALL" "C") + (setlocale LC_ALL "C") + #f))))))) + (lambda args + #f))) + + + + diff --git a/src/ellinika/xlat.scm b/src/ellinika/xlat.scm new file mode 100644 index 0000000..c51edaa --- /dev/null +++ b/src/ellinika/xlat.scm @@ -0,0 +1,309 @@ +;;;; This file is part of Ellinika +;;;; Copyright (C) 2004, 2007 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 3 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program. If not, see <http://www.gnu.org/licenses/>. +;;;; +(define-module (ellinika xlat)) + +(define greek-postfix-map + (list + (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ") + (cons "ί" "ΐ") (cons "ύ" "ΰ") + (cons "θ" "ϊ") (cons "θ" "ϋ"))) + (cons #\' (list (cons "α" "ά") (cons "Α" "Ά") + (cons "ε" "έ") (cons "Ε" "Έ") + (cons "η" "ή") (cons "Η" "Ή") + (cons "ι" "ί") (cons "Ι" "Ί") + (cons "ϊ" "ΐ") (cons "Ϊ" "Ϊ") + (cons "ο" "ό") (cons "Ο" "Ό") + (cons "υ" "ύ") (cons "Υ" "Ύ") + (cons "θ" "ύ") (cons "Θ" "Ύ") + (cons "ϋ" "ΰ") (cons "Ϋ" "Ϋ") + (cons "ω" "ώ") (cons "Ω" "Ώ"))) + (cons #\s (list (cons "κ" "ξ") (cons "π" "ψ"))))) + +(define greek-kbd-map + (list (cons #\a "α") + (cons #\A "Α") + (cons #\b "β") + (cons #\B "Β") + (cons #\g "γ") + (cons #\G "Γ") + (cons #\d "δ") + (cons #\D "Δ") + (cons #\e "ε") + (cons #\E "Ε") + (cons #\z "ζ") + (cons #\Z "Ζ") + (cons #\h "η") + (cons #\H "Η") + (cons #\u "θ") + (cons #\U "Θ") + (cons #\i "ι") + (cons #\I "Ι") + (cons #\k "κ") + (cons #\K "Κ") + (cons #\l "λ") + (cons #\L "Λ") + (cons #\m "μ") + (cons #\M "Μ") + (cons #\n "ν") + (cons #\M "Ν") + (cons #\j "ξ") + (cons #\J "Ξ") + (cons #\o "ο") + (cons #\O "Ο") + (cons #\p "π") + (cons #\P "Π") + (cons #\r "ρ") + (cons #\R "Ρ") + (cons #\s "σ") + (cons #\S "Σ") + (cons #\w "ς") + (cons #\W "Σ") + (cons #\t "τ") + (cons #\T "Τ") + (cons #\y "υ") + (cons #\Y "Υ") + (cons #\f "φ") + (cons #\F "Φ") + (cons #\x "χ") + (cons #\X "Χ") + (cons #\c "ψ") + (cons #\C "Ψ") + (cons #\v "ω") + (cons #\V "Ω"))) + + +(define (after-thita? c) + (member c (list #\a #\e #\i #\o #\y #\v))) + +;;; Given input string in Greek transliteration, convert it to +;;; an equivalent Greek word in UTF-8 encoding. The input string is +;;; supposed to follow the traditional Greek keyboard layout: +;;; +;;; +----------------------------------------------------------------+ +;;; | 1! | 2@ | 3# | 4$ | 5% | 6^ | 7& | 8* | 9( | 0) | -_ | =+ | `~ | +;;; +----------------------------------------------------------------+ +;;; | ·― | ςΣ | εΕ | ρΡ | τΤ | υΥ | θΘ | ιΙ | οΟ | πΠ | [{ | ]} | +;;; +------------------------------------------------------------+ +;;; | αΑ | σΣ | δΔ | φΦ | γΓ | ηΗ | ξΞ | κΚ | λΛ | ΄¨ | '" | \| | +;;; +-----------------------------------------------------------+ +;;; | ζΖ | χΧ | ψΨ | ωΩ | βΒ | νΝ | μΜ | ,; | .: | /? | +;;; +-------------------------------------------------+ +;;; +-----------------------------+ +;;; | space bar | +;;; +-----------------------------+ +;;; +;;; +;;; The followin escape sequences are recognized: +;;; +;;; '\ks' -> 'ξ' +;;; '\ps' -> 'ψ' +;;; '\th' -> 'θ' +;;; +;;; Additionally some spell fixing heuristics is applied: +;;; +;;; 's' at the end of the word -> 'ς' +;;; 'w' anywhere but at the end of the word -> 'ω' +;;; 'ks' -> 'ξ' +;;; 'ps' -> 'ψ' +;;; "th" -> 'θ' unless followed by a consonant +;;; "u'" -> 'ύ' +;;; +;;; FIXME: The case of less obvious spelling errors, like e.g. 'ou' -> 'ου' +;;; will be handled by later spelling corrections if fuzzy search is +;;; enabled +(define-public (ellinika:translate-kbd str) + (apply + string-append + (do ((sl (string->list str) (cdr sl)) + (l '())) + ((null? sl) (reverse l)) + (letrec ((decode-kbd-map + (lambda () + (let ((g (assoc + (let ((c (car sl))) + (cond + ((and (char=? c #\w) (not (null? (cdr sl)))) + #\v) + ((and (char=? c #\s) (null? (cdr sl))) + #\w) + (else + c))) + greek-kbd-map))) + (if g + (set! l (cons (cdr g) l)) + (if (char=? (car sl) #\\) + (cond + ((> (length sl) 2) + (cond + ((char=? (car (cdr (cdr sl))) #\s) + (let ((c (car (cdr sl)))) + (cond + ((char=? c #\k) + (set! sl (cdr (cdr sl))) + (set! l (cons "ξ" l))) + ((char=? c #\p) + (set! sl (cdr (cdr sl))) + (set! l (cons "ψ" l))) + (else + (set! sl (cdr sl)))))) + ((and (char=? (car (cdr sl))) + (char=? (car (cdr (cdr sl))) #\h)) + (set! sl (cdr (cdr sl))) + (set! l (cons "θ" l))))) + + (else + (set! l (cons (string (car sl)) l)))))))))) + (if (null? l) + (decode-kbd-map) + (cond + ((char=? (car sl) #\h) + (if (and (not (null? (cdr sl))) + (after-thita? (car (cdr sl)))) + (set-car! l "θ") + (decode-kbd-map))) + ((assoc (car sl) greek-postfix-map) => + (lambda (cmap) + (let ((x (assoc (car l) (cdr cmap)))) + (if x + (set-car! l (cdr x)) + (decode-kbd-map))))) + (else + (decode-kbd-map)))))))) + + +;; Translate the input string to UTF-8 if necessary. +(define-public (ellinika:translate-input input) + (if (and input + (not (string-null? input)) + (< (char->integer (string-ref input 0)) 127)) + (ellinika:translate-kbd input) + input)) + + + +(define transcription-lis |