aboutsummaryrefslogtreecommitdiff
path: root/ellinika/xlat.scm
diff options
context:
space:
mode:
Diffstat (limited to 'ellinika/xlat.scm')
-rw-r--r--ellinika/xlat.scm308
1 files changed, 308 insertions, 0 deletions
diff --git a/ellinika/xlat.scm b/ellinika/xlat.scm
new file mode 100644
index 0000000..401a9a6
--- /dev/null
+++ b/ellinika/xlat.scm
@@ -0,0 +1,308 @@
+;;;; This file is part of Ellinika
+;;;; Copyright (C) 2004 Sergey Poznyakoff
+;;;;
+;;;; Ellinika is free software; you can redistribute it and/or modify
+;;;; it under the terms of the GNU General Public License as published by
+;;;; the Free Software Foundation; either version 2 of the License, or
+;;;; (at your option) any later version.
+;;;;
+;;;; Ellinika is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License
+;;;; along with Ellinika; if not, write to the Free Software
+;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+;;;;
+(define-module (ellinika xlat))
+
+(define greek-postfix-map
+ (list
+ (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ")
+ (cons "ί" "ΐ") (cons "ύ" "ΰ")
+ (cons "θ" "ϊ") (cons "θ" "ϋ")))
+ (cons #\' (list (cons "α" "ά") (cons "Α" "Ά")
+ (cons "ε" "έ") (cons "Ε" "Έ")
+ (cons "η" "ή") (cons "Η" "Ή")
+ (cons "ι" "ί") (cons "Ι" "Ί")
+ (cons "ϊ" "ΐ") (cons "Ϊ" "Ϊ")
+ (cons "ο" "ό") (cons "Ο" "Ό")
+ (cons "υ" "ύ") (cons "Υ" "Ύ")
+ (cons "θ" "ύ") (cons "Θ" "Ύ")
+ (cons "ϋ" "ΰ") (cons "Ϋ" "Ϋ")
+ (cons "ω" "ώ") (cons "Ω" "Ώ")))
+ (cons #\s (list (cons "κ" "ξ") (cons "π" "ψ")))))
+
+(define greek-kbd-map
+ (list (cons #\a "α")
+ (cons #\A "Α")
+ (cons #\b "β")
+ (cons #\B "Β")
+ (cons #\g "γ")
+ (cons #\G "Γ")
+ (cons #\d "δ")
+ (cons #\D "Δ")
+ (cons #\e "ε")
+ (cons #\E "Ε")
+ (cons #\z "ζ")
+ (cons #\Z "Ζ")
+ (cons #\h "η")
+ (cons #\H "Η")
+ (cons #\u "θ")
+ (cons #\U "Θ")
+ (cons #\i "ι")
+ (cons #\I "Ι")
+ (cons #\k "κ")
+ (cons #\K "Κ")
+ (cons #\l "λ")
+ (cons #\L "Λ")
+ (cons #\m "μ")
+ (cons #\M "Μ")
+ (cons #\n "ν")
+ (cons #\M "Ν")
+ (cons #\j "ξ")
+ (cons #\J "Ξ")
+ (cons #\o "ο")
+ (cons #\O "Ο")
+ (cons #\p "π")
+ (cons #\P "Π")
+ (cons #\r "ρ")
+ (cons #\R "Ρ")
+ (cons #\s "σ")
+ (cons #\S "Σ")
+ (cons #\w "ς")
+ (cons #\W "Σ")
+ (cons #\t "τ")
+ (cons #\T "Τ")
+ (cons #\y "υ")
+ (cons #\Y "Υ")
+ (cons #\f "φ")
+ (cons #\F "Φ")
+ (cons #\x "χ")
+ (cons #\X "Χ")
+ (cons #\c "ψ")
+ (cons #\C "Ψ")
+ (cons #\v "ω")
+ (cons #\V "Ω")))
+
+
+(define (after-thita? c)
+ (member c (list #\a #\e #\i #\o #\y #\v)))
+
+;;; Given input string in Greek transliteration, convert it to
+;;; an equivalent Greek word in UTF-8 encoding. The input string is
+;;; supposed to follow the traditional Greek keyboard layout:
+;;;
+;;; +----------------------------------------------------------------+
+;;; | 1! | 2@ | 3# | 4$ | 5% | 6^ | 7& | 8* | 9( | 0) | -_ | =+ | `~ |
+;;; +----------------------------------------------------------------+
+;;; | ·― | ςΣ | εΕ | ρΡ | τΤ | υΥ | θΘ | ιΙ | οΟ | πΠ | [{ | ]} |
+;;; +------------------------------------------------------------+
+;;; | αΑ | σΣ | δΔ | φΦ | γΓ | ηΗ | ξΞ | κΚ | λΛ | ΄¨ | '" | \| |
+;;; +-----------------------------------------------------------+
+;;; | ζΖ | χΧ | ψΨ | ωΩ | βΒ | νΝ | μΜ | ,; | .: | /? |
+;;; +-------------------------------------------------+
+;;; +-----------------------------+
+;;; | space bar |
+;;; +-----------------------------+
+;;;
+;;;
+;;; The followin escape sequences are recognized:
+;;;
+;;; '\ks' -> 'ξ'
+;;; '\ps' -> 'ψ'
+;;; '\th' -> 'θ'
+;;;
+;;; Additionally some spell fixing heuristics is applied:
+;;;
+;;; 's' at the end of the word -> 'ς'
+;;; 'w' anywhere but at the end of the word -> 'ω'
+;;; 'ks' -> 'ξ'
+;;; 'ps' -> 'ψ'
+;;; "th" -> 'θ' unless followed by a consonant
+;;; "u'" -> 'ύ'
+;;;
+;;; FIXME: The case of less obvious spelling errors, like e.g. 'ou' -> 'ου'
+;;; will be handled by later spelling corrections if fuzzy search is
+;;; enabled
+(define-public (ellinika:translate-kbd str)
+ (apply
+ string-append
+ (do ((sl (string->list str) (cdr sl))
+ (l '()))
+ ((null? sl) (reverse l))
+ (letrec ((decode-kbd-map
+ (lambda ()
+ (let ((g (assoc
+ (let ((c (car sl)))
+ (cond
+ ((and (char=? c #\w) (not (null? (cdr sl))))
+ #\v)
+ ((and (char=? c #\s) (null? (cdr sl)))
+ #\w)
+ (else
+ c)))
+ greek-kbd-map)))
+ (if g
+ (set! l (cons (cdr g) l))
+ (if (char=? (car sl) #\\)
+ (cond
+ ((> (length sl) 2)
+ (cond
+ ((char=? (car (cdr (cdr sl))) #\s)
+ (let ((c (car (cdr sl))))
+ (cond
+ ((char=? c #\k)
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "ξ" l)))
+ ((char=? c #\p)
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "ψ" l)))
+ (else
+ (set! sl (cdr sl))))))
+ ((and (char=? (car (cdr sl)))
+ (char=? (car (cdr (cdr sl))) #\h))
+ (set! sl (cdr (cdr sl)))
+ (set! l (cons "θ" l)))))
+
+ (else
+ (set! l (cons (string (car sl)) l))))))))))
+ (if (null? l)
+ (decode-kbd-map)
+ (cond
+ ((char=? (car sl) #\h)
+ (if (and (not (null? (cdr sl)))
+ (after-thita? (car (cdr sl))))
+ (set-car! l "θ")
+ (decode-kbd-map)))
+ ((assoc (car sl) greek-postfix-map) =>
+ (lambda (cmap)
+ (let ((x (assoc (car l) (cdr cmap))))
+ (if x
+ (set-car! l (cdr x))
+ (decode-kbd-map)))))
+ (else
+ (decode-kbd-map))))))))
+
+
+;; Translate the input string to UTF-8 if necessary.
+(define-public (ellinika:translate-input input)
+ (if (< (char->integer (string-ref input 0)) 127)
+ (ellinika:translate-kbd input)
+ input))
+
+
+
+(define transcription-list
+ (list
+ (cons "μπ" "b" )
+ (cons "γγ" "g" )
+ (cons "γκ" "g" )
+ (cons "γχ" "g" )
+ (cons "ντ" "d" )
+ (cons "αι" "e" )
+ (cons "αί" "e" )
+ (cons "αυ" "au")
+ (cons "αύ" "au")
+ (cons "ου" "ou")
+ (cons "ού" "ou")
+ (cons "ευ" "eu")
+ (cons "εύ" "eu")
+ (cons "οι" "i" )
+ (cons "ει" "i" )
+ (cons "εί" "i" )
+ (cons "υι" "i" )
+
+ (cons "α" "a" )
+ (cons "Α" "a" )
+ (cons "Ά" "a" )
+ (cons "ά" "a" )
+ (cons "β" "b" )
+ (cons "Β" "b" )
+ (cons "γ" "g" )
+ (cons "Γ" "g" )
+ (cons "δ" "d" )
+ (cons "Δ" "d" )
+ (cons "ε" "e" )
+ (cons "Ε" "e" )
+ (cons "Έ" "e" )
+ (cons "έ" "e" )
+ (cons "ζ" "z" )
+ (cons "Ζ" "z" )
+ (cons "η" "i" )
+ (cons "Η" "i" )
+ (cons "Ή" "i" )
+ (cons "ή" "i" )
+ (cons "θ" "t" )
+ (cons "Θ" "t" )
+ (cons "ι" "i" )
+ (cons "Ι" "i" )
+ (cons "Ί" "i" )
+ (cons "ί" "i" )
+ (cons "κ" "k" )
+ (cons "Κ" "k" )
+ (cons "λ" "l" )
+ (cons "Λ" "l" )
+ (cons "μ" "m" )
+ (cons "Μ" "m" )
+ (cons "ν" "n" )
+ (cons "Ν" "n" )
+ (cons "ξ" "x" )
+ (cons "Ξ" "x" )
+ (cons "ο" "o" )
+ (cons "Ο" "o" )
+ (cons "Ό" "o" )
+ (cons "ό" "o" )
+ (cons "π" "p" )
+ (cons "Π" "p" )
+ (cons "ρ" "r" )
+ (cons "Ρ" "r" )
+ (cons "σ" "s" )
+ (cons "Σ" "s" )
+ (cons "ς" "s" )
+ (cons "τ" "t" )
+ (cons "Τ" "t" )
+ (cons "υ" "i" )
+ (cons "Υ" "i" )
+ (cons "Ύ" "i" )
+ (cons "ύ" "i" )
+ (cons "φ" "f" )
+ (cons "Φ" "f" )
+ (cons "χ" "h" )
+ (cons "Χ" "h" )
+ (cons "ψ" "P" )
+ (cons "Ψ" "P" )
+ (cons "ω" "o" )
+ (cons "Ω" "o" )
+ (cons "Ώ" "o" )
+ (cons "ώ" "o" )
+ (cons "Ϊ" "i" )
+ (cons "ΐ" "i" )
+ (cons "Ϋ" "i" )
+ (cons "ΰ" "i" )))
+
+(define-public (ellinika:sounds-like str)
+ (let ((len (string-length str)))
+ (do ((i 0)
+ (sl '()))
+ ((= i len) (apply string-append (reverse sl)))
+ (set! sl (cons
+ (cond
+ ((and (<= (+ i 4) len)
+ (assoc (substring str i (+ i 4)) transcription-list)) =>
+ (lambda (x)
+ (set! i (+ i 4))
+ (cdr x)))
+ ((and (<= (+ i 2) len)
+ (assoc (substring str i (+ i 2)) transcription-list)) =>
+ (lambda (x)
+ (set! i (+ i 2))
+ (cdr x)))
+ (else
+ (set! i (1+ i))
+ (substring str (- i 1) i)))
+ sl)))))
+
+;;;; End of ellinika.scm
+

Return to:

Send suggestions and report system problems to the System administrator.