From 5c3839c300b45980ac9c106cfa22ce2a68237aa4 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Fri, 8 Oct 2004 19:56:02 +0000 Subject: New file git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@204 941c8c0f-9102-463b-b60b-cd22ce0e6858 --- ellinika/Makefile.am | 20 ++++ ellinika/xlat.scm | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++ scm/Makefile.am | 29 +++++ 3 files changed, 357 insertions(+) create mode 100644 ellinika/Makefile.am create mode 100644 ellinika/xlat.scm create mode 100644 scm/Makefile.am diff --git a/ellinika/Makefile.am b/ellinika/Makefile.am new file mode 100644 index 0000000..394c660 --- /dev/null +++ b/ellinika/Makefile.am @@ -0,0 +1,20 @@ +# This file is part of Ellinika project. +# Copyright (C) 2004 Sergey Poznyakoff +# +# Ellinika is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ellinika is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ellinika; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +pkgmoddir=@GUILE_SITE@ +pkgmod_DATA=xlat.scm + diff --git a/ellinika/xlat.scm b/ellinika/xlat.scm new file mode 100644 index 0000000..401a9a6 --- /dev/null +++ b/ellinika/xlat.scm @@ -0,0 +1,308 @@ +;;;; This file is part of Ellinika +;;;; Copyright (C) 2004 Sergey Poznyakoff +;;;; +;;;; Ellinika is free software; you can redistribute it and/or modify +;;;; it under the terms of the GNU General Public License as published by +;;;; the Free Software Foundation; either version 2 of the License, or +;;;; (at your option) any later version. +;;;; +;;;; Ellinika is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with Ellinika; if not, write to the Free Software +;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +;;;; +(define-module (ellinika xlat)) + +(define greek-postfix-map + (list + (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ") + (cons "ί" "ΐ") (cons "ύ" "ΰ") + (cons "θ" "ϊ") (cons "θ" "ϋ"))) + (cons #\' (list (cons "α" "ά") (cons "Α" "Ά") + (cons "ε" "έ") (cons "Ε" "Έ") + (cons "η" "ή") (cons "Η" "Ή") + (cons "ι" "ί") (cons "Ι" "Ί") + (cons "ϊ" "ΐ") (cons "Ϊ" "Ϊ") + (cons "ο" "ό") (cons "Ο" "Ό") + (cons "υ" "ύ") (cons "Υ" "Ύ") + (cons "θ" "ύ") (cons "Θ" "Ύ") + (cons "ϋ" "ΰ") (cons "Ϋ" "Ϋ") + (cons "ω" "ώ") (cons "Ω" "Ώ"))) + (cons #\s (list (cons "κ" "ξ") (cons "π" "ψ"))))) + +(define greek-kbd-map + (list (cons #\a "α") + (cons #\A "Α") + (cons #\b "β") + (cons #\B "Β") + (cons #\g "γ") + (cons #\G "Γ") + (cons #\d "δ") + (cons #\D "Δ") + (cons #\e "ε") + (cons #\E "Ε") + (cons #\z "ζ") + (cons #\Z "Ζ") + (cons #\h "η") + (cons #\H "Η") + (cons #\u "θ") + (cons #\U "Θ") + (cons #\i "ι") + (cons #\I "Ι") + (cons #\k "κ") + (cons #\K "Κ") + (cons #\l "λ") + (cons #\L "Λ") + (cons #\m "μ") + (cons #\M "Μ") + (cons #\n "ν") + (cons #\M "Ν") + (cons #\j "ξ") + (cons #\J "Ξ") + (cons #\o "ο") + (cons #\O "Ο") + (cons #\p "π") + (cons #\P "Π") + (cons #\r "ρ") + (cons #\R "Ρ") + (cons #\s "σ") + (cons #\S "Σ") + (cons #\w "ς") + (cons #\W "Σ") + (cons #\t "τ") + (cons #\T "Τ") + (cons #\y "υ") + (cons #\Y "Υ") + (cons #\f "φ") + (cons #\F "Φ") + (cons #\x "χ") + (cons #\X "Χ") + (cons #\c "ψ") + (cons #\C "Ψ") + (cons #\v "ω") + (cons #\V "Ω"))) + + +(define (after-thita? c) + (member c (list #\a #\e #\i #\o #\y #\v))) + +;;; Given input string in Greek transliteration, convert it to +;;; an equivalent Greek word in UTF-8 encoding. The input string is +;;; supposed to follow the traditional Greek keyboard layout: +;;; +;;; +----------------------------------------------------------------+ +;;; | 1! | 2@ | 3# | 4$ | 5% | 6^ | 7& | 8* | 9( | 0) | -_ | =+ | `~ | +;;; +----------------------------------------------------------------+ +;;; | ·― | ςΣ | εΕ | ρΡ | τΤ | υΥ | θΘ | ιΙ | οΟ | πΠ | [{ | ]} | +;;; +------------------------------------------------------------+ +;;; | αΑ | σΣ | δΔ | φΦ | γΓ | ηΗ | ξΞ | κΚ | λΛ | ΄¨ | '" | \| | +;;; +-----------------------------------------------------------+ +;;; | ζΖ | χΧ | ψΨ | ωΩ | βΒ | νΝ | μΜ | ,; | .: | /? | +;;; +-------------------------------------------------+ +;;; +-----------------------------+ +;;; | space bar | +;;; +-----------------------------+ +;;; +;;; +;;; The followin escape sequences are recognized: +;;; +;;; '\ks' -> 'ξ' +;;; '\ps' -> 'ψ' +;;; '\th' -> 'θ' +;;; +;;; Additionally some spell fixing heuristics is applied: +;;; +;;; 's' at the end of the word -> 'ς' +;;; 'w' anywhere but at the end of the word -> 'ω' +;;; 'ks' -> 'ξ' +;;; 'ps' -> 'ψ' +;;; "th" -> 'θ' unless followed by a consonant +;;; "u'" -> 'ύ' +;;; +;;; FIXME: The case of less obvious spelling errors, like e.g. 'ou' -> 'ου' +;;; will be handled by later spelling corrections if fuzzy search is +;;; enabled +(define-public (ellinika:translate-kbd str) + (apply + string-append + (do ((sl (string->list str) (cdr sl)) + (l '())) + ((null? sl) (reverse l)) + (letrec ((decode-kbd-map + (lambda () + (let ((g (assoc + (let ((c (car sl))) + (cond + ((and (char=? c #\w) (not (null? (cdr sl)))) + #\v) + ((and (char=? c #\s) (null? (cdr sl))) + #\w) + (else + c))) + greek-kbd-map))) + (if g + (set! l (cons (cdr g) l)) + (if (char=? (car sl) #\\) + (cond + ((> (length sl) 2) + (cond + ((char=? (car (cdr (cdr sl))) #\s) + (let ((c (car (cdr sl)))) + (cond + ((char=? c #\k) + (set! sl (cdr (cdr sl))) + (set! l (cons "ξ" l))) + ((char=? c #\p) + (set! sl (cdr (cdr sl))) + (set! l (cons "ψ" l))) + (else + (set! sl (cdr sl)))))) + ((and (char=? (car (cdr sl))) + (char=? (car (cdr (cdr sl))) #\h)) + (set! sl (cdr (cdr sl))) + (set! l (cons "θ" l))))) + + (else + (set! l (cons (string (car sl)) l)))))))))) + (if (null? l) + (decode-kbd-map) + (cond + ((char=? (car sl) #\h) + (if (and (not (null? (cdr sl))) + (after-thita? (car (cdr sl)))) + (set-car! l "θ") + (decode-kbd-map))) + ((assoc (car sl) greek-postfix-map) => + (lambda (cmap) + (let ((x (assoc (car l) (cdr cmap)))) + (if x + (set-car! l (cdr x)) + (decode-kbd-map))))) + (else + (decode-kbd-map)))))))) + + +;; Translate the input string to UTF-8 if necessary. +(define-public (ellinika:translate-input input) + (if (< (char->integer (string-ref input 0)) 127) + (ellinika:translate-kbd input) + input)) + + + +(define transcription-list + (list + (cons "μπ" "b" ) + (cons "γγ" "g" ) + (cons "γκ" "g" ) + (cons "γχ" "g" ) + (cons "ντ" "d" ) + (cons "αι" "e" ) + (cons "αί" "e" ) + (cons "αυ" "au") + (cons "αύ" "au") + (cons "ου" "ou") + (cons "ού" "ou") + (cons "ευ" "eu") + (cons "εύ" "eu") + (cons "οι" "i" ) + (cons "ει" "i" ) + (cons "εί" "i" ) + (cons "υι" "i" ) + + (cons "α" "a" ) + (cons "Α" "a" ) + (cons "Ά" "a" ) + (cons "ά" "a" ) + (cons "β" "b" ) + (cons "Β" "b" ) + (cons "γ" "g" ) + (cons "Γ" "g" ) + (cons "δ" "d" ) + (cons "Δ" "d" ) + (cons "ε" "e" ) + (cons "Ε" "e" ) + (cons "Έ" "e" ) + (cons "έ" "e" ) + (cons "ζ" "z" ) + (cons "Ζ" "z" ) + (cons "η" "i" ) + (cons "Η" "i" ) + (cons "Ή" "i" ) + (cons "ή" "i" ) + (cons "θ" "t" ) + (cons "Θ" "t" ) + (cons "ι" "i" ) + (cons "Ι" "i" ) + (cons "Ί" "i" ) + (cons "ί" "i" ) + (cons "κ" "k" ) + (cons "Κ" "k" ) + (cons "λ" "l" ) + (cons "Λ" "l" ) + (cons "μ" "m" ) + (cons "Μ" "m" ) + (cons "ν" "n" ) + (cons "Ν" "n" ) + (cons "ξ" "x" ) + (cons "Ξ" "x" ) + (cons "ο" "o" ) + (cons "Ο" "o" ) + (cons "Ό" "o" ) + (cons "ό" "o" ) + (cons "π" "p" ) + (cons "Π" "p" ) + (cons "ρ" "r" ) + (cons "Ρ" "r" ) + (cons "σ" "s" ) + (cons "Σ" "s" ) + (cons "ς" "s" ) + (cons "τ" "t" ) + (cons "Τ" "t" ) + (cons "υ" "i" ) + (cons "Υ" "i" ) + (cons "Ύ" "i" ) + (cons "ύ" "i" ) + (cons "φ" "f" ) + (cons "Φ" "f" ) + (cons "χ" "h" ) + (cons "Χ" "h" ) + (cons "ψ" "P" ) + (cons "Ψ" "P" ) + (cons "ω" "o" ) + (cons "Ω" "o" ) + (cons "Ώ" "o" ) + (cons "ώ" "o" ) + (cons "Ϊ" "i" ) + (cons "ΐ" "i" ) + (cons "Ϋ" "i" ) + (cons "ΰ" "i" ))) + +(define-public (ellinika:sounds-like str) + (let ((len (string-length str))) + (do ((i 0) + (sl '())) + ((= i len) (apply string-append (reverse sl))) + (set! sl (cons + (cond + ((and (<= (+ i 4) len) + (assoc (substring str i (+ i 4)) transcription-list)) => + (lambda (x) + (set! i (+ i 4)) + (cdr x))) + ((and (<= (+ i 2) len) + (assoc (substring str i (+ i 2)) transcription-list)) => + (lambda (x) + (set! i (+ i 2)) + (cdr x))) + (else + (set! i (1+ i)) + (substring str (- i 1) i))) + sl))))) + +;;;; End of ellinika.scm + diff --git a/scm/Makefile.am b/scm/Makefile.am new file mode 100644 index 0000000..1850e08 --- /dev/null +++ b/scm/Makefile.am @@ -0,0 +1,29 @@ +# This file is part of Ellinika project. +# Copyright (C) 2004 Sergey Poznyakoff +# +# Ellinika is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ellinika is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ellinika; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +bin_SCRIPTS=dictrans +CLEANFILES=dictrans.sed dictrans +EXTRA_DIST=dictrans.scm + +dictrans.sed: Makefile + echo 's,=GUILE_BINDIR=,$(GUILE_BINDIR),' > $@ + s=`echo $(pkgmoddir) | sed 's,\(.*\)/.*,\1,'` ; \ + echo "s,\;=UPDPATH=,@UPDPATH@," >> $@ + +dictrans: $(srcdir)/dictrans.scm dictrans.sed + sed -f dictrans.sed $(srcdir)/dictrans.scm > $@ + chmod +x $@ -- cgit v1.2.1