diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2004-10-07 16:00:25 +0000 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2004-10-07 16:00:25 +0000 |
commit | b27976bda04ef7a2219c44dfc7150441b62b3a0a (patch) | |
tree | e003a51f0b829d1664358b6cc4dfac79ef67f7be /scm/dictrans.scm | |
parent | 21ee0a3c2fc48d51b9a3e29a61a661afdaee4fc9 (diff) | |
download | ellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.gz ellinika-b27976bda04ef7a2219c44dfc7150441b62b3a0a.tar.bz2 |
Dictionary parser
git-svn-id: file:///home/puszcza/svnroot/ellinika/trunk@195 941c8c0f-9102-463b-b60b-cd22ce0e6858
Diffstat (limited to 'scm/dictrans.scm')
-rw-r--r-- | scm/dictrans.scm | 654 |
1 files changed, 654 insertions, 0 deletions
diff --git a/scm/dictrans.scm b/scm/dictrans.scm new file mode 100644 index 0000000..5cfc50e --- /dev/null +++ b/scm/dictrans.scm | |||
@@ -0,0 +1,654 @@ | |||
1 | ;;;; This file is part of Ellinika | ||
2 | ;;;; Copyright (C) 2004 Sergey Poznyakoff | ||
3 | ;;;; | ||
4 | ;;;; Ellinika is free software; you can redistribute it and/or modify | ||
5 | ;;;; it under the terms of the GNU General Public License as published by | ||
6 | ;;;; the Free Software Foundation; either version 2 of the License, or | ||
7 | ;;;; (at your option) any later version. | ||
8 | ;;;; | ||
9 | ;;;; Ellinika is distributed in the hope that it will be useful, | ||
10 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | ;;;; GNU General Public License for more details. | ||
13 | ;;;; | ||
14 | ;;;; You should have received a copy of the GNU General Public License | ||
15 | ;;;; along with Ellinika; if not, write to the Free Software | ||
16 | ;;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | ;;;; | ||
18 | |||
19 | ;;;; Dictionary structure | ||
20 | ;;;; Internal representation: | ||
21 | ;;;; | ||
22 | ;;;; * Each dictionary entry is represented as a vector: | ||
23 | ;;;; | ||
24 | ;;;; #(KEY FORMS XREF P-LIST) | ||
25 | ;;;; | ||
26 | ;;;; KEY is list of strings | ||
27 | ;;;; | ||
28 | ;;;; * FORMS is either #f or a string describing forms of the key if they | ||
29 | ;;;; are formed in an irregular manner. | ||
30 | ;;;; | ||
31 | ;;;; * XREF is a list of cross-references | ||
32 | ;;;; | ||
33 | ;;;; * P-LIST is a list of P-ARTICLES. Each P-ARTICLE is a vector: | ||
34 | ;;;; | ||
35 | ;;;; #(POS ARTICLE AREF XREF TOPIC) | ||
36 | ;;;; | ||
37 | ;;;; Member Type Meaning | ||
38 | ;;;; POS string part of speech | ||
39 | ;;;; ARTICLE list(string) Dictionary article associated with this key/pos | ||
40 | ;;;; AREF list(string) List of antonyms | ||
41 | ;;;; XREF list(string) List of cross-references | ||
42 | ;;;; TOPIC list(string) List of topics this item pertains to | ||
43 | ;;;; | ||
44 | ;;;; | ||
45 | ;;;; External representation (XML): | ||
46 | ;;;; | ||
47 | ;;;; <NODE> | ||
48 | ;;;; <K>string</K>+ | ||
49 | ;;;; [<F>string</F>] | ||
50 | ;;;; <P ID="string"> | ||
51 | ;;;; <M>string</M>+ | ||
52 | ;;;; <A>string</A>* | ||
53 | ;;;; <X>string</X>* | ||
54 | ;;;; <T ID="string" />* | ||
55 | ;;;; </P>+ | ||
56 | ;;;; <X>string</X>* | ||
57 | ;;;; </NODE> | ||
58 | ;;;; | ||
59 | ;;;; If only one P entry is present, the following alternative forms | ||
60 | ;;;; are understood: | ||
61 | ;;;; | ||
62 | ;;;; <NODE> | ||
63 | ;;;; <K>string</K>+ | ||
64 | ;;;; [<F>string</F>] | ||
65 | ;;;; <P>string</P> | ||
66 | ;;;; <M>string</M>* | ||
67 | ;;;; <A>string</A>* | ||
68 | ;;;; <X>string</X>* | ||
69 | ;;;; <T ID="string" />* | ||
70 | ;;;; </NODE> | ||
71 | ;;;; | ||
72 | ;;;; or | ||
73 | ;;;; | ||
74 | ;;;; <NODE> | ||
75 | ;;;; <K>string</K>+ | ||
76 | ;;;; [<F>string</F>] | ||
77 | ;;;; <P>string</P> | ||
78 | ;;;; <X>string</X>* | ||
79 | ;;;; <T ID="string" />* | ||
80 | ;;;; </NODE> | ||
81 | |||
82 | (set! %load-path (cons "/usr/local/share/guile-sql" %load-path)) | ||
83 | (use-modules (xmltools xmltrans) | ||
84 | (sql) | ||
85 | (ice-9 getopt-long)) | ||
86 | |||
87 | (use-syntax (ice-9 syncase)) | ||
88 | |||
89 | (define compile-only #f) | ||
90 | (define cleanup-option #f) | ||
91 | (define preserve-option #f) | ||
92 | |||
93 | (define sql-iface "mysql") | ||
94 | (define sql-host "localhost") | ||
95 | (define sql-database "ellinika") | ||
96 | (define sql-port 3306) | ||
97 | (define sql-password #f) | ||
98 | (define sql-user #f) | ||
99 | (define verbose-option #f) | ||
100 | (define debug-level 0) | ||
101 | (define input-files '()) | ||
102 | |||
103 | (define (debug level . rest) | ||
104 | (if (>= debug-level level) | ||
105 | (begin | ||
106 | (for-each | ||
107 | (lambda (x) | ||
108 | (display x)) | ||
109 | rest) | ||
110 | (newline)))) | ||
111 | |||
112 | |||
113 | ;;;; XML definitions | ||
114 | |||
115 | ;;; Set the default handler | ||
116 | (define tag-list '()) | ||
117 | |||
118 | (define (lingua:default-start tag attr) | ||
119 | (xmltrans:set-attr #f "__START__" 1) | ||
120 | #f) | ||
121 | |||
122 | (xmltrans:set-default-start-handler lingua:default-start) | ||
123 | |||
124 | (define (lingua:default-end tag attr text) | ||
125 | (if (xmltrans:attr attr "__START__") | ||
126 | (xmltrans:parse-error #f "Unhandled element " tag)) | ||
127 | (set! tag-list | ||
128 | (cons | ||
129 | (xmltrans:make-tag tag attr text) | ||
130 | tag-list)) | ||
131 | #f) | ||
132 | |||
133 | (xmltrans:set-default-end-handler lingua:default-end) | ||
134 | |||
135 | ;;; Current node | ||
136 | (define current-node #f) | ||
137 | |||
138 | (define-syntax node-get | ||
139 | (syntax-rules () | ||
140 | ((node-get #:key) | ||
141 | (vector-ref current-node 0)) | ||
142 | ((node-get #:forms) | ||
143 | (vector-ref current-node 1)) | ||
144 | ((node-get #:xref) | ||
145 | (vector-ref current-node 2)) | ||
146 | ((node-get #:p-list) | ||
147 | (vector-ref current-node 3)))) | ||
148 | |||
149 | (define (mark-invalid) | ||
150 | (xmltrans:set-attr "NODE" "__INVALID__" 1)) | ||
151 | |||
152 | (define-syntax node-set | ||
153 | (syntax-rules () | ||
154 | ((node-set #:key k) | ||
155 | (cond | ||
156 | ((node-get #:key) => | ||
157 | (lambda (klist) | ||
158 | (vector-set! current-node 0 (append klist (list k))))) | ||
159 | (else | ||
160 | (vector-set! current-node 0 (list k))))) | ||
161 | ((node-set #:forms f) | ||
162 | (begin | ||
163 | (cond | ||
164 | ((node-get #:forms) | ||
165 | (xmltrans:parse-error #f "Forms already set") | ||
166 | (mark-invalid))) | ||
167 | (vector-set! current-node 1 f))) | ||
168 | ((node-set #:xref x) | ||
169 | (cond | ||
170 | ((node-get #:xref) => | ||
171 | (lambda (xlist) | ||
172 | (vector-set! current-node 2 (append xlist (list x))))) | ||
173 | (else | ||
174 | (vector-set! current-node 2 (list x))))) | ||
175 | ((node-set! #:p-article p) | ||
176 | (cond | ||
177 | ((node-get #:p-list) => | ||
178 | (lambda (plist) | ||
179 | (vector-set! current-node 3 (append plist (list p))))) | ||
180 | (else | ||
181 | (vector-set! current-node 3 (list p))))) | ||
182 | ((node-set #:topic t) | ||
183 | (for-each | ||
184 | (lambda (p) | ||
185 | ;; FIXME: Use p-article-set | ||
186 | (vector-set! p 4 (append (vector-ref p 4) (list t)))) | ||
187 | (node-get #:p-list))))) | ||
188 | |||
189 | (define p-article #f) | ||
190 | |||
191 | (define-syntax p-article-get | ||
192 | (syntax-rules () | ||
193 | ((p-article-get #:pos) | ||
194 | (vector-ref p-article 0)) | ||
195 | ((p-article-get #:article) | ||
196 | (vector-ref p-article 1)) | ||
197 | ((p-article-get #:aref) | ||
198 | (vector-ref p-article 2)) | ||
199 | ((p-article-get #:xref) | ||
200 | (vector-ref p-article 3)) | ||
201 | ((p-article-get #:topic) | ||
202 | (vector-ref p-article 4)))) | ||
203 | |||
204 | (define-syntax p-set | ||
205 | (syntax-rules () | ||
206 | ((p-set key n val) | ||
207 | (vector-set! p-article n | ||
208 | (cond | ||
209 | ((p-article-get key) => | ||
210 | (lambda (alst) | ||
211 | (append alst (list val)))) | ||
212 | (else | ||
213 | (list val))))))) | ||
214 | |||
215 | (define-syntax p-article-set | ||
216 | (syntax-rules () | ||
217 | ((p-article-set #:pos val) | ||
218 | (vector-set! p-article 0 val)) | ||
219 | ((p-article-set #:article val) | ||
220 | (p-set #:article 1 val)) | ||
221 | ((p-article-set #:aref val) | ||
222 | (p-set #:aref 2 val)) | ||
223 | ((p-article-set #:xref val) | ||
224 | (p-set #:xref 3 val)) | ||
225 | ((p-article-set #:topic val) | ||
226 | (p-set #:topic 4 val)))) | ||
227 | |||
228 | |||
229 | ;;; Node list | ||
230 | (define node-list '()) | ||
231 | |||
232 | (define (push-node node) | ||
233 | (if (>= debug-level 100) | ||
234 | (begin | ||
235 | (write node) | ||
236 | (newline))) | ||
237 | (set! node-list (cons current-node node-list))) | ||
238 | |||
239 | ;;; Topic stack< |