;;;; This file is part of Ellinika ;;;; Copyright (C) 2004, 2007 Sergey Poznyakoff ;;;; ;;;; Ellinika is free software; you can redistribute it and/or modify ;;;; it under the terms of the GNU General Public License as published by ;;;; the Free Software Foundation; either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; Ellinika is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program. If not, see . ;;;; (define-module (ellinika xlat)) (define greek-postfix-map (list (cons #\: (list (cons "ι" "ϊ") (cons "υ" "ϋ") (cons "ί" "ΐ") (cons "ύ" "ΰ") (cons "θ" "ϊ") (cons "θ" "ϋ"))) (cons #\' (list (cons "α" "ά") (cons "Α" "Ά") (cons "ε" "έ") (cons "Ε" "Έ") (cons "η" "ή") (cons "Η" "Ή") (cons "ι" "ί") (cons "Ι" "Ί") (cons "ϊ" "ΐ") (cons "Ϊ" "Ϊ") (cons "ο" "ό") (cons "Ο" "Ό") (cons "υ" "ύ") (cons "Υ" "Ύ") (cons "θ" "ύ") (cons "Θ" "Ύ") (cons "ϋ" "ΰ") (cons "Ϋ" "Ϋ") (cons "ω" "ώ") (cons "Ω" "Ώ"))) (cons #\s (list (cons "κ" "ξ") (cons "π" "ψ"))))) (define greek-kbd-map (list (cons #\a "α") (cons #\A "Α") (cons #\b "β") (cons #\B "Β") (cons #\g "γ") (cons #\G "Γ") (cons #\d "δ") (cons #\D "Δ") (cons #\e "ε") (cons #\E "Ε") (cons #\z "ζ") (cons #\Z "Ζ") (cons #\h "η") (cons #\H "Η") (cons #\u "θ") (cons #\U "Θ") (cons #\i "ι") (cons #\I "Ι") (cons #\k "κ") (cons #\K "Κ") (cons #\l "λ") (cons #\L "Λ") (cons #\m "μ") (cons #\M "Μ") (cons #\n "ν") (cons #\M "Ν") (cons #\j "ξ") (cons #\J "Ξ") (cons #\o "ο") (cons #\O "Ο") (cons #\p "π") (cons #\P "Π") (cons #\r "ρ") (cons #\R "Ρ") (cons #\s "σ") (cons #\S "Σ") (cons #\w "ς") (cons #\W "Σ") (cons #\t "τ") (cons #\T "Τ") (cons #\y "υ") (cons #\Y "Υ") (cons #\f "φ") (cons #\F "Φ") (cons #\x "χ") (cons #\X "Χ") (cons #\c "ψ") (cons #\C "Ψ") (cons #\v "ω") (cons #\V "Ω"))) (define (after-thita? c) (member c (list #\a #\e #\i #\o #\y #\v))) ;;; Given input string in Greek transliteration, convert it to ;;; an equivalent Greek word in UTF-8 encoding. The input string is ;;; supposed to follow the traditional Greek keyboard layout: ;;; ;;; +----------------------------------------------------------------+ ;;; | 1! | 2@ | 3# | 4$ | 5% | 6^ | 7& | 8* | 9( | 0) | -_ | =+ | `~ | ;;; +----------------------------------------------------------------+ ;;; | ·― | ςΣ | εΕ | ρΡ | τΤ | υΥ | θΘ | ιΙ | οΟ | πΠ | [{ | ]} | ;;; +------------------------------------------------------------+ ;;; | αΑ | σΣ | δΔ | φΦ | γΓ | ηΗ | ξΞ | κΚ | λΛ | ΄¨ | '" | \| | ;;; +-----------------------------------------------------------+ ;;; | ζΖ | χΧ | ψΨ | ωΩ | βΒ | νΝ | μΜ | ,; | .: | /? | ;;; +-------------------------------------------------+ ;;; +-----------------------------+ ;;; | space bar | ;;; +-----------------------------+ ;;; ;;; ;;; The followin escape sequences are recognized: ;;; ;;; '\ks' -> 'ξ' ;;; '\ps' -> 'ψ' ;;; '\th' -> 'θ' ;;; ;;; Additionally some spell fixing heuristics is applied: ;;; ;;; 's' at the end of the word -> 'ς' ;;; 'w' anywhere but at the end of the word -> 'ω' ;;; 'ks' -> 'ξ' ;;; 'ps' -> 'ψ' ;;; "th" -> 'θ' unless followed by a consonant ;;; "u'" -> 'ύ' ;;; ;;; FIXME: The case of less obvious spelling errors, like e.g. 'ou' -> 'ου' ;;; will be handled by later spelling corrections if fuzzy search is ;;; enabled (define-public (ellinika:translate-kbd str) (apply string-append (do ((sl (string->list str) (cdr sl)) (l '())) ((null? sl) (reverse l)) (letrec ((decode-kbd-map (lambda () (let ((g (assoc (let ((c (car sl))) (cond ((and (char=? c #\w) (not (null? (cdr sl)))) #\v) ((and (char=? c #\s) (null? (cdr sl))) #\w) (else c))) greek-kbd-map))) (if g (set! l (cons (cdr g) l)) (if (char=? (car sl) #\\) (cond ((> (length sl) 2) (cond ((char=? (car (cdr (cdr sl))) #\s) (let ((c (car (cdr sl)))) (cond ((char=? c #\k) (set! sl (cdr (cdr sl))) (set! l (cons "ξ" l))) ((char=? c #\p) (set! sl (cdr (cdr sl))) (set! l (cons "ψ" l))) (else (set! sl (cdr sl)))))) ((and (char=? (car (cdr sl))) (char=? (car (cdr (cdr sl))) #\h)) (set! sl (cdr (cdr sl))) (set! l (cons "θ" l))))) (else (set! l (cons (string (car sl)) l)))))))))) (if (null? l) (decode-kbd-map) (cond ((char=? (car sl) #\h) (if (and (not (null? (cdr sl))) (after-thita? (car (cdr sl)))) (set-car! l "θ") (decode-kbd-map))) ((assoc (car sl) greek-postfix-map) => (lambda (cmap) (let ((x (assoc (car l) (cdr cmap)))) (if x (set-car! l (cdr x)) (decode-kbd-map))))) (else (decode-kbd-map)))))))) ;; Translate the input string to UTF-8 if necessary. (define-public (ellinika:translate-input input) (if (and input (not (string-null? input)) (< (char->integer (string-ref input 0)) 127)) (ellinika:translate-kbd input) input)) (define transcription-list (list (cons "μπ" "b" ) (cons "γγ" "g" ) (cons "γκ" "g" ) (cons "γχ" "g" ) (cons "ντ" "d" ) (cons "αι" "e" ) (cons "αί" "e" ) (cons "αυ" "au") (cons "αύ" "au") (cons "ου" "ou") (cons "ού" "ou") (cons "ευ" "eu") (cons "εύ" "eu") (cons "οι" "i" ) (cons "ει" "i" ) (cons "εί" "i" ) (cons "υι" "i" ) (cons "α" "a" ) (cons "Α" "a" ) (cons "Ά" "a" ) (cons "ά" "a" ) (cons "β" "b" ) (cons "Β" "b" ) (cons "γ" "g" ) (cons "Γ" "g" ) (cons "δ" "d" ) (cons "Δ" "d" ) (cons "ε" "e" ) (cons "Ε" "e" ) (cons "Έ" "e" ) (cons "έ" "e" ) (cons "ζ" "z" ) (cons "Ζ" "z" ) (cons "η" "i" ) (cons "Η" "i" ) (cons "Ή" "i" ) (cons "ή" "i" ) (cons "θ" "t" ) (cons "Θ" "t" ) (cons "ι" "i" ) (cons "Ι" "i" ) (cons "Ί" "i" ) (cons "ί" "i" ) (cons "κ" "k" ) (cons "Κ" "k" ) (cons "λ" "l" ) (cons "Λ" "l" ) (cons "μ" "m" ) (cons "Μ" "m" ) (cons "ν" "n" ) (cons "Ν" "n" ) (cons "ξ" "x" ) (cons "Ξ" "x" ) (cons "ο" "o" ) (cons "Ο" "o" ) (cons "Ό" "o" ) (cons "ό" "o" ) (cons "π" "p" ) (cons "Π" "p" ) (cons "ρ" "r" ) (cons "Ρ" "r" ) (cons "σ" "s" ) (cons "Σ" "s" ) (cons "ς" "s" ) (cons "τ" "t" ) (cons "Τ" "t" ) (cons "υ" "i" ) (cons "Υ" "i" ) (cons "Ύ" "i" ) (cons "ύ" "i" ) (cons "φ" "f" ) (cons "Φ" "f" ) (cons "χ" "h" ) (cons "Χ" "h" ) (cons "ψ" "P" ) (cons "Ψ" "P" ) (cons "ω" "o" ) (cons "Ω" "o" ) (cons "Ώ" "o" ) (cons "ώ" "o" ) (cons "Ϊ" "i" ) (cons "ΐ" "i" ) (cons "Ϋ" "i" ) (cons "ΰ" "i" ))) (define-public (ellinika:sounds-like str) (let ((len (string-length str))) (do ((i 0) (sl '())) ((= i len) (apply string-append (reverse sl))) (set! sl (cons (cond ((and (<= (+ i 4) len) (assoc (substring str i (+ i 4)) transcription-list)) => (lambda (x) (set! i (+ i 4)) (cdr x))) ((and (<= (+ i 2) len) (assoc (substring str i (+ i 2)) transcription-list)) => (lambda (x) (set! i (+ i 2)) (cdr x))) (else (set! i (1+ i)) (substring str (- i 1) i))) sl))))) ;;;; End of ellinika.scm