From 9efa28af3c0ba603a8b0e296088211b16d03fecb Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Fri, 11 Jun 2004 13:48:56 +0000 Subject: Translations from greek to internal representation and vice-versa git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@109 941c8c0f-9102-463b-b60b-cd22ce0e6858 --- scm/xlat.scm | 281 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 scm/xlat.scm (limited to 'scm/xlat.scm') diff --git a/scm/xlat.scm b/scm/xlat.scm new file mode 100644 index 0000000..7548e54 --- /dev/null +++ b/scm/xlat.scm @@ -0,0 +1,281 @@ +(define-module (xlat)) + +(define xlist-latin + (list + (list #\a "α" "Α" "ά" "Ά") + (list #\b "β" "Β") + (list #\g "γ" "Γ") + (list #\d "δ" "Δ") + (list #\e "ε" "Ε" "έ" "Έ") + (list #\z "ζ" "Ζ") + (list #\% "η" "Η" "ή" "Ή") + (list #\@ "θ" "Θ") + (list #\i "ι" "Ι" "ί" "Ί") + (list #\k "κ" "Κ") + (list #\l "λ" "Λ") + (list #\m "μ" "Μ") + (list #\n "ν" "Ν") + (list #\x "ξ" "Ξ") + (list #\o "ο" "Ο" "ό" "Ό") + (list #\p "π" "Π") + (list #\r "ρ" "Ρ") + (list #\s "σ" "Σ" "ς") ; FIXME: Special case. + (list #\c "ς" "Σ") + (list #\t "τ" "Τ") + (list #\y "υ" "Υ" "ύ" "Ύ") + (list #\f "φ" "Φ") + (list #\h "χ" "Χ") + (list #\* "ψ" "Ψ") + (list #\w "ω" "Ω" "ώ" "Ώ") + (list #\I "ϊ" "Ϊ" "ΐ" "ΐ") + (list #\Y "ϋ" "Ϋ" "ΰ" "ΰ"))) + + +(define xlist-greek + (list + (cons "α" #\a ) + (cons "Α" #\a ) + (cons "Ά" (cons #\a #t)) + (cons "ά" (cons #\a #t)) + (cons "β" #\b ) + (cons "Β" #\b ) + (cons "γ" #\g ) + (cons "Γ" #\g ) + (cons "δ" #\d ) + (cons "Δ" #\d ) + (cons "ε" #\e ) + (cons "Ε" #\e ) + (cons "Έ" (cons #\e #t)) + (cons "έ" (cons #\e #t)) + (cons "ζ" #\z ) + (cons "Ζ" #\z ) + (cons "η" #\% ) + (cons "Η" #\% ) + (cons "Ή" (cons #\% #t)) + (cons "ή" (cons #\% #t)) + (cons "θ" #\@ ) + (cons "Θ" #\@ ) + (cons "ι" #\i ) + (cons "Ι" #\i ) + (cons "Ί" (cons #\i #t)) + (cons "ί" (cons #\i #t)) + (cons "κ" #\k ) + (cons "Κ" #\k ) + (cons "λ" #\l ) + (cons "Λ" #\l ) + (cons "μ" #\m ) + (cons "Μ" #\m ) + (cons "ν" #\n ) + (cons "Ν" #\n ) + (cons "ξ" #\x ) + (cons "Ξ" #\x ) + (cons "ο" #\o ) + (cons "Ο" #\o ) + (cons "Ό" (cons #\o #t)) + (cons "ό" (cons #\o #t)) + (cons "π" #\p ) + (cons "Π" #\p ) + (cons "ρ" #\r ) + (cons "Ρ" #\r ) + (cons "σ" #\s ) + (cons "Σ" #\s ) + (cons "ς" #\s ) + (cons "τ" #\t ) + (cons "Τ" #\t ) + (cons "υ" #\y ) + (cons "Υ" #\y ) + (cons "Ύ" (cons #\y #t)) + (cons "ύ" (cons #\y #t)) + (cons "φ" #\f ) + (cons "Φ" #\f ) + (cons "χ" #\h ) + (cons "Χ" #\h ) + (cons "ψ" #\* ) + (cons "Ψ" #\* ) + (cons "ω" #\w ) + (cons "Ω" #\w ) + (cons "Ώ" (cons #\w #t)) + (cons "ώ" (cons #\w #t)) + (cons "Ϊ" #\I ) + (cons "ΐ" (cons #\I #t)) + (cons "Ϋ" #\Y ) + (cons "ΰ" (cons #\Y #t)))) + +(define (greek->xlat0 str) + "Convert the greek STRing into its latin transliteration. Returns + + (list AP XLAT) + +where AP is the number of accented letter (not syllable!), counted from 0; + XLAT is the list of transliterated letters +Secondary accents are ignored" + (let ((accent-pos #f) + (syllable 0) + (len (string-length str))) + (do ((i 0) + (lcnt 0 (1+ lcnt)) + (sl '())) + ((= i len) (cons + (if accent-pos + (- (length sl) accent-pos 1) + 0) + (reverse sl))) + (letrec ((get-trans (lambda (x) + (let ((y (cdr x))) + (cond + ((pair? y) + (if (not accent-pos) + (set! accent-pos lcnt)) + (car y)) + (else + y)))))) + (set! sl (cons (cond + ((and (<= (+ i 4) len) + (assoc (substring str i (+ i 4)) xlist-greek)) => + (lambda (x) + (set! i (+ i 4)) + (get-trans x))) + ((and (<= (+ i 2) len) + (assoc (substring str i (+ i 2)) xlist-greek)) => + (lambda (x) + (set! i (+ i 2)) + (get-trans x))) + (else + (set! i (1+ i)) + (substring str (- i 1) i))) + sl)))))) + +(define-public (xlat->greek w) + (apply + string-append + (apply append + (let ((acc (car w)) + (n 0)) + + (reverse + (map + (lambda (syllable) + (set! n (1+ n)) + (reverse + (map + (lambda (x) + (let ((entry (assoc x xlist-latin))) + (cond + ((and acc (= n acc) (= (length entry) 5)) + (set! acc #f) + (list-ref entry 3)) + (else + (list-ref entry 1))))) + syllable))) + (let ((slist (cdr w))) + (if (char=? (caar slist) #\s) + ;; Special handling for terminal sigma + (cons (cons #\c (cdar slist)) (cdr slist)) + slist)))))))) + +;; α a +;; +;; ε e +;; αι ai +;; +;; ο o +;; ω w +;; +;; ι i +;; η % +;; υ y +;; οι oi +;; ει ei +;; υι ui +;; +;; ου oy +;; +;; αυ ay +;; +;; ευ ey +;; +;; a -> a, ai, au +;; e -> e, ei, ey +;; o -> o, oi, oy +;; y -> y, yi +;; i -> i, ia, ie, io, ioy +;; % +;; I +;; Y + +(define-public (vowel? x) + (member x (list #\a #\e #\o #\y #\i #\% #\I #\Y #\w))) + +(define (prosodia ap wl accented sl) + (let ((syl '())) + ;; Collect consonants + (do () + ((or (null? wl) + (vowel? (car wl)))) + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl))) + (cond + ((null? wl) + (cons accented + (if (null? sl) + (list syl) + (cons (append syl (car sl)) (cdr sl))))) + (else + (let ((a (car wl))) + (set! wl (cdr wl)) + (set! syl (cons a syl)) + (cond + ((= ap (length wl)) + (set! accented (length sl))) + (else + (case a + ((#\a #\e #\o) + (cond + ((and (not (null? wl)) + (or (char=? (car wl) #\i) (char=? (car wl) #\y))) + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl)) + (if (= ap (length wl)) + (set! accented (length sl)))))) + ((#\i) + (if (not (null? wl)) + (cond + ((or (char=? (car wl) #\e) + (char=? (car wl) #\a)) + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl)) + (if (= ap (length wl)) + (set! accented (length sl)))) + ((char=? (car wl) #\o) + ;; "ιο" ή "ιου" + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl)) + (cond + ((= ap (length wl)) + (set! accented (length sl))) + ((and (not (null? wl)) (char=? (car wl) #\y)) + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl)) + (if (= ap (length wl)) + (set! accented (length sl))))))))) + ((#\y) + (cond + ((and (not (null? wl)) + (char=? (car wl) #\i)) + (set! syl (cons (car wl) syl)) + (set! wl (cdr wl)) + (if (= ap (length wl)) + (set! accented (length sl)))))) + ((#\Y #\I) + (set! accented (length sl)))))) + (prosodia ap wl accented (cons syl sl))))))) + + +(define-public (greek->xlat str) + (let* ((wl (greek->xlat0 str)) + (sl (prosodia (car wl) (cdr wl) 0 '()))) + (cons + (- (length (cdr sl)) (car sl)) + (cdr sl)))) + +;;;; End of file \ No newline at end of file -- cgit v1.2.1