aboutsummaryrefslogtreecommitdiff
path: root/scm/dictrans.scm
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2004-10-07 16:00:25 +0000
committerSergey Poznyakoff <gray@gnu.org.ua>2004-10-07 16:00:25 +0000
commitb27976bda04ef7a2219c44dfc7150441b62b3a0a (patch)
treee003a51f0b829d1664358b6cc4dfac79ef67f7be /scm/dictrans.scm
parent21ee0a3c2fc48d51b9a3e29a61a661afdaee4fc9 (diff)
downloadellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.gz
ellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.bz2
Dictionary parser
git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@195 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'scm/dictrans.scm')
-rw-r--r--scm/dictrans.scm654
1 files changed, 654 insertions, 0 deletions
diff --git a/scm/dictrans.scm b/scm/dictrans.scm
new file mode 100644
index 0000000..5cfc50e
--- /dev/null
+++ b/scm/dictrans.scm
@@ -0,0 +1,654 @@
1;;;; This file is part of Ellinika
2;;;; Copyright (C) 2004 Sergey Poznyakoff
3;;;;
4;;;; Ellinika is free software; you can redistribute it and/or modify
5;;;; it under the terms of the GNU General Public License as published by
6;;;; the Free Software Foundation; either version 2 of the License, or
7;;;; (at your option) any later version.
8;;;;
9;;;; Ellinika is distributed in the hope that it will be useful,
10;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12;;;; GNU General Public License for more details.
13;;;;
14;;;; You should have received a copy of the GNU General Public License
15;;;; along with Ellinika; if not, write to the Free Software
16;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17;;;;
18
19;;;; Dictionary structure
20;;;; Internal representation:
21;;;;
22;;;; * Each dictionary entry is represented as a vector:
23;;;;
24;;;; #(KEY FORMS XREF P-LIST)
25;;;;
26;;;; KEY is list of strings
27;;;;
28;;;; * FORMS is either #f or a string describing forms of the key if they
29;;;; are formed in an irregular manner.
30;;;;
31;;;; * XREF is a list of cross-references
32;;;;
33;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector:
34;;;;
35;;;; #(POS ARTICLE AREF XREF TOPIC)
36;;;;
37;;;; Member Type Meaning
38;;;; POS string part of speech
39;;;; ARTICLE list(string) Dictionary article associated with this key/pos
40;;;; AREF list(string) List of antonyms
41;;;; XREF list(string) List of cross-references
42;;;; TOPIC list(string) List of topics this item pertains to
43;;;;
44;;;;
45;;;; External representation (XML):
46;;;;
47;;;; <NODE>
48;;;; <K>string</K>+
49;;;; [<F>string</F>]
50;;;; <P ID="string">
51;;;; <M>string</M>+
52;;;; <A>string</A>*
53;;;; <X>string</X>*
54;;;; <T ID="string" />*
55;;;; </P>+
56;;;; <X>string</X>*
57;;;; </NODE>
58;;;;
59;;;; If only one P entry is present, the following alternative forms
60;;;; are understood:
61;;;;
62;;;; <NODE>
63;;;; <K>string</K>+
64;;;; [<F>string</F>]
65;;;; <P>string</P>
66;;;; <M>string</M>*
67;;;; <A>string</A>*
68;;;; <X>string</X>*
69;;;; <T ID="string" />*
70;;;; </NODE>
71;;;;
72;;;; or
73;;;;
74;;;; <NODE>
75;;;; <K>string</K>+
76;;;; [<F>string</F>]
77;;;; <P>string</P>
78;;;; <X>string</X>*
79;;;; <T ID="string" />*
80;;;; </NODE>
81
82(set! %load-path (cons "/usr/local/share/guile-sql" %load-path))
83(use-modules (xmltools xmltrans)
84 (sql)
85 (ice-9 getopt-long))
86
87(use-syntax (ice-9 syncase))
88
89(define compile-only #f)
90(define cleanup-option #f)
91(define preserve-option #f)
92
93(define sql-iface "mysql")
94(define sql-host "localhost")
95(define sql-database "ellinika")
96(define sql-port 3306)
97(define sql-password #f)
98(define sql-user #f)
99(define verbose-option #f)
100(define debug-level 0)
101(define input-files '())
102
103(define (debug level . rest)
104 (if (>= debug-level level)
105 (begin
106 (for-each
107 (lambda (x)
108 (display x))
109 rest)
110 (newline))))
111
112
113;;;; XML definitions
114
115;;; Set the default handler
116(define tag-list '())
117
118(define (lingua:default-start tag attr)
119 (xmltrans:set-attr #f "__START__" 1)
120 #f)
121
122(xmltrans:set-default-start-handler lingua:default-start)
123
124(define (lingua:default-end tag attr text)
125 (if (xmltrans:attr attr "__START__")
126 (xmltrans:parse-error #f "Unhandled element " tag))
127 (set! tag-list
128 (cons
129 (xmltrans:make-tag tag attr text)
130 tag-list))
131 #f)
132
133(xmltrans:set-default-end-handler lingua:default-end)
134
135;;; Current node
136(define current-node #f)
137
138(define-syntax node-get
139 (syntax-rules ()
140 ((node-get #:key)
141 (vector-ref current-node 0))
142 ((node-get #:forms)
143 (vector-ref current-node 1))
144 ((node-get #:xref)
145 (vector-ref current-node 2))
146 ((node-get #:p-list)
147 (vector-ref current-node 3))))
148
149(define (mark-invalid)
150 (xmltrans:set-attr "NODE" "__INVALID__" 1))
151
152(define-syntax node-set
153 (syntax-rules ()
154 ((node-set #:key k)
155 (cond
156 ((node-get #:key) =>
157 (lambda (klist)
158 (vector-set! current-node 0 (append klist (list k)))))
159 (else
160 (vector-set! current-node 0 (list k)))))
161 ((node-set #:forms f)
162 (begin
163 (cond
164 ((node-get #:forms)
165 (xmltrans:parse-error #f "Forms already set")
166 (mark-invalid)))
167 (vector-set! current-node 1 f)))
168 ((node-set #:xref x)
169 (cond
170 ((node-get #:xref) =>
171 (lambda (xlist)
172 (vector-set! current-node 2 (append xlist (list x)))))
173 (else
174 (vector-set! current-node 2 (list x)))))
175 ((node-set! #:p-article p)
176 (cond
177 ((node-get #:p-list) =>
178 (lambda (plist)
179 (vector-set! current-node 3 (append plist (list p)))))
180 (else
181 (vector-set! current-node 3 (list p)))))
182 ((node-set #:topic t)
183 (for-each
184 (lambda (p)
185 ;; FIXME: Use p-article-set
186 (vector-set! p 4 (append (vector-ref p 4) (list t))))
187 (node-get #:p-list)))))
188
189(define p-article #f)
190
191(define-syntax p-article-get
192 (syntax-rules ()
193 ((p-article-get #:pos)
194 (vector-ref p-article 0))
195 ((p-article-get #:article)
196 (vector-ref p-article 1))
197 ((p-article-get #:aref)
198 (vector-ref p-article 2))
199 ((p-article-get #:xref)
200 (vector-ref p-article 3))
201 ((p-article-get #:topic)
202 (vector-ref p-article 4))))
203
204(define-syntax p-set
205 (syntax-rules ()
206 ((p-set key n val)
207 (vector-set! p-article n
208 (cond
209 ((p-article-get key) =>
210 (lambda (alst)
211 (append alst (list val))))
212 (else
213 (list val)))))))
214
215(define-syntax p-article-set
216 (syntax-rules ()
217 ((p-article-set #:pos val)
218 (vector-set! p-article 0 val))
219 ((p-article-set #:article val)
220 (p-set #:article 1 val))
221 ((p-article-set #:aref val)
222 (p-set #:aref 2 val))
223 ((p-article-set #:xref val)
224 (p-set #:xref 3 val))
225 ((p-article-set #:topic val)
226 (p-set #:topic 4 val))))
227
228
229;;; Node list
230(define node-list '())
231
232(define (push-node node)
233 (if (>= debug-level 100)
234 (begin
235 (write node)
236 (newline)))
237 (set! node-list (cons current-node node-list)))
238
239;;; Topic stack<