summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-08-18 14:43:36 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-08-18 14:43:36 (GMT)
commit8c0de7a515aa9d6db7b3ce4110a4e29261851abb (patch) (unidiff)
tree228c2bb1be5a907b373158c6c94cd8d51f23c419
parent0c03a5a7b40b598b88f22f46b9e9086af6c59877 (diff)
downloadwikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.gz
wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.bz2
Minor stylistic fixes.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--README.rst6
-rwxr-xr-xbin/wikitrans3
-rw-r--r--wikitrans/wiki2html.py53
-rw-r--r--wikitrans/wiki2texi.py82
-rw-r--r--wikitrans/wiki2text.py66
-rw-r--r--wikitrans/wikidump.py20
-rw-r--r--wikitrans/wikimarkup.py46
-rw-r--r--wikitrans/wikitoken.py72
8 files changed, 205 insertions, 143 deletions
diff --git a/README.rst b/README.rst
index d189c98..7c53e3c 100644
--- a/README.rst
+++ b/README.rst
@@ -124,7 +124,7 @@ is ``http://%(lang)s.wikipedia.org/wiki/``.
124 124
125The ``wikitrans`` utility 125The ``wikitrans`` utility
126========================= 126=========================
127This command line utility converts the supplied text to a selected 127This command line utility converts the supplied text to selected
128output format. The usage syntax is:: 128output format. The usage syntax is::
129 129
130 wikitrans [OPTIONS] ARG 130 wikitrans [OPTIONS] ARG
@@ -160,11 +160,11 @@ Options are:
160 Set output document type (``html`` (the default), ``texi``, 160 Set output document type (``html`` (the default), ``texi``,
161 ``text``, or ``dump``). 161 ``text``, or ``dump``).
162``-l LANG``, ``--lang=LANG`` 162``-l LANG``, ``--lang=LANG``
163 Set input document language 163 Set input document language.
164``-o KW=VAL``, ``--option=KW=VAL`` 164``-o KW=VAL``, ``--option=KW=VAL``
165 Pass the keyword argument ``KW=VAL`` to the parser class construct. 165 Pass the keyword argument ``KW=VAL`` to the parser class construct.
166``-d DEBUG``, ``--debug=DEBUG`` 166``-d DEBUG``, ``--debug=DEBUG``
167 Set debug level (0..100) 167 Set debug level (0..100).
168``-D``, ``--dump`` 168``-D``, ``--dump``
169 Dump parse tree and exit; same as ``--type=dump``. 169 Dump parse tree and exit; same as ``--type=dump``.
170``-b URL``, ``--base-url=URL`` 170``-b URL``, ``--base-url=URL``
diff --git a/bin/wikitrans b/bin/wikitrans
index caaa885..87de020 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -59,7 +59,8 @@ handlers = {
59 'wiktionary': TextWiktionaryMarkup 59 'wiktionary': TextWiktionaryMarkup
60 }, 60 },
61 'texi': { 61 'texi': {
62 'default': TexiWikiMarkup 62 'default': TexiWikiMarkup,
63 'wiktionary': TextWikiMarkup
63 } 64 }
64} 65}
65 66
diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py
index fc6b142..0696dce 100644
--- a/wikitrans/wiki2html.py
+++ b/wikitrans/wiki2html.py
@@ -1,17 +1,17 @@
1#!/usr/bin/python 1#!/usr/bin/python
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3# Copyright (C) 2008-2018 Sergey Poznyakoff 3# Copyright (C) 2008-2018 Sergey Poznyakoff
4# 4#
5# This program is free software; you can redistribute it and/or modify 5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by 6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option) 7# the Free Software Foundation; either version 3, or (at your option)
8# any later version. 8# any later version.
9# 9#
10# This program is distributed in the hope that it will be useful, 10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details. 13# GNU General Public License for more details.
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
@@ -40,16 +40,18 @@ try:
40 from html import escape as html_escape 40 from html import escape as html_escape
41except ImportError: 41except ImportError:
42 from cgi import escape as html_escape 42 from cgi import escape as html_escape
43 43
44__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] 44__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
45 45
46
46class HtmlSeqNode(WikiSeqNode): 47class HtmlSeqNode(WikiSeqNode):
47 def format(self): 48 def format(self):
48 s = '' 49 s = ''
49 for x in self.content: 50 for x in self.content:
50 s += x.format() 51 s += x.format()
51 return s 52 return s
52 53
54
53class HtmlLinkNode(HtmlSeqNode): 55class HtmlLinkNode(HtmlSeqNode):
54 def format(self): 56 def format(self):
55 arg = self.content[0].format() 57 arg = self.content[0].format()
@@ -70,8 +72,8 @@ class HtmlLinkNode(HtmlSeqNode):
70 elif s[0] == "proto": 72 elif s[0] == "proto":
71 text = self.parser.tmpl_proto(s) 73 text = self.parser.tmpl_proto(s)
72 return text 74 return text
73 75
74 (qual,sep,tgt) = arg.partition(':') 76 (qual, sep, tgt) = arg.partition(':')
75 if tgt != '': 77 if tgt != '':
76 ns = self.parser.wiki_ns_name(qual) 78 ns = self.parser.wiki_ns_name(qual)
77 if ns: 79 if ns:
@@ -92,6 +94,7 @@ class HtmlLinkNode(HtmlSeqNode):
92 return "<a href=\"%s\">%s</a>" % (tgt, 94 return "<a href=\"%s\">%s</a>" % (tgt,
93 text if (text and text != '') else arg) 95 text if (text and text != '') else arg)
94 96
97
95class HtmlRefNode(WikiRefNode): 98class HtmlRefNode(WikiRefNode):
96 def format(self): 99 def format(self):
97 target = self.ref 100 target = self.ref
@@ -101,6 +104,7 @@ class HtmlRefNode(WikiRefNode):
101 text if (text and text != '') else target 104 text if (text and text != '') else target
102 ) 105 )
103 106
107
104class HtmlFontNode(HtmlSeqNode): 108class HtmlFontNode(HtmlSeqNode):
105 def format(self): 109 def format(self):
106 comm = { 'IT': 'i', 110 comm = { 'IT': 'i',
@@ -111,14 +115,16 @@ class HtmlFontNode(HtmlSeqNode):
111 s += '</%s>' % comm[self.type] 115 s += '</%s>' % comm[self.type]
112 return s 116 return s
113 117
118
114class HtmlTextNode(HtmlSeqNode): 119class HtmlTextNode(HtmlSeqNode):
115 def format(self): 120 def format(self):
116 if isinstance(self.content,list): 121 if isinstance(self.content, list):
117 s = ''.join(self.content) 122 s = ''.join(self.content)
118 else: 123 else:
119 s = html_escape(self.content, quote=False) 124 s = html_escape(self.content, quote=False)
120 return s 125 return s
121 126
127
122class HtmlHdrNode(WikiHdrNode): 128class HtmlHdrNode(WikiHdrNode):
123 def format(self): 129 def format(self):
124 level = self.level 130 level = self.level
@@ -126,10 +132,12 @@ class HtmlHdrNode(WikiHdrNode):
126 level = 6 132 level = 6
127 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) 133 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
128 134
135
129class HtmlBarNode(WikiNode): 136class HtmlBarNode(WikiNode):
130 def format(self): 137 def format(self):
131 return "<hr/>\n" 138 return "<hr/>\n"
132 139
140
133class HtmlEnvNode(WikiEnvNode): 141class HtmlEnvNode(WikiEnvNode):
134 def format(self): 142 def format(self):
135 type = self.envtype 143 type = self.envtype
@@ -146,7 +154,8 @@ class HtmlEnvNode(WikiEnvNode):
146 string, 154 string,
147 self.parser.envt[type]["hdr"]) 155 self.parser.envt[type]["hdr"])
148 return string 156 return string
149 157
158
150class HtmlTagNode(WikiTagNode): 159class HtmlTagNode(WikiTagNode):
151 def format(self): 160 def format(self):
152 if self.tag == 'code': 161 if self.tag == 'code':
@@ -156,13 +165,13 @@ class HtmlTagNode(WikiTagNode):
156 return '<pre><code>' + s + '</code></pre>' #FIXME 165 return '<pre><code>' + s + '</code></pre>' #FIXME
157 elif self.tag == 'ref': 166 elif self.tag == 'ref':
158 n = self.idx+1 167 n = self.idx+1
159 return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n) 168 return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n, n, n, n)
160 elif self.tag == 'references': 169 elif self.tag == 'references':
161 s = '<div class="references">\n' 170 s = '<div class="references">\n'
162 s += '<ol class="references">\n' 171 s += '<ol class="references">\n'
163 n = 0 172 n = 0
164 for ref in self.parser.references: 173 for ref in self.parser.references:
165 n += 1 174 n += 1
166 s += ('<li id="cite_note-%d">' 175 s += ('<li id="cite_note-%d">'
167 + '<span class="mw-cite-backlink">' 176 + '<span class="mw-cite-backlink">'
168 + '<b><a href="#cite_ref-%d">^</a></b>' 177 + '<b><a href="#cite_ref-%d">^</a></b>'
@@ -170,7 +179,7 @@ class HtmlTagNode(WikiTagNode):
170 + '<span class="reference-text">' 179 + '<span class="reference-text">'
171 + ref.content.format() 180 + ref.content.format()
172 + '</span>' 181 + '</span>'
173 + '</li>\n') % (n,n) 182 + '</li>\n') % (n, n)
174 s += '</ol>\n</div>\n' 183 s += '</ol>\n</div>\n'
175 return s 184 return s
176 else: 185 else:
@@ -180,11 +189,13 @@ class HtmlTagNode(WikiTagNode):
180 s += '>' 189 s += '>'
181 s += self.content.format() 190 s += self.content.format()
182 return s + '</' + self.tag + '>' 191 return s + '</' + self.tag + '>'
183 192
193
184class HtmlParaNode(HtmlSeqNode): 194class HtmlParaNode(HtmlSeqNode):
185 def format(self): 195 def format(self):
186 return "<p>" + super(HtmlParaNode, self).format() + "</p>\n" 196 return "<p>" + super(HtmlParaNode, self).format() + "</p>\n"
187 197
198
188class HtmlPreNode(HtmlSeqNode): 199class HtmlPreNode(HtmlSeqNode):
189 def format(self): 200 def format(self):
190 s = super(HtmlPreNode, self).format() 201 s = super(HtmlPreNode, self).format()
@@ -193,10 +204,11 @@ class HtmlPreNode(HtmlSeqNode):
193 else: 204 else:
194 return '<pre>' + s + '</pre>' 205 return '<pre>' + s + '</pre>'
195 206
207
196class HtmlIndNode(WikiIndNode): 208class HtmlIndNode(WikiIndNode):
197 def format(self): 209 def format(self):
198 return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level 210 return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
199 211
200 212
201class HtmlWikiMarkup(WikiMarkup): 213class HtmlWikiMarkup(WikiMarkup):
202 """A Wiki markup to HTML translator class. 214 """A Wiki markup to HTML translator class.
@@ -223,9 +235,9 @@ class HtmlWikiMarkup(WikiMarkup):
223 [html_base=URL],[image_base=URL],[media_base=URL]) 235 [html_base=URL],[image_base=URL],[media_base=URL])
224 236
225 The arguments have the same meaning as in the WikiMarkup constructor. 237 The arguments have the same meaning as in the WikiMarkup constructor.
226 238
227 """ 239 """
228 240
229 super(HtmlWikiMarkup, self).__init__(*args, **kwargs) 241 super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
230 self.token_class['LINK'] = HtmlLinkNode 242 self.token_class['LINK'] = HtmlLinkNode
231 self.token_class['TMPL'] = HtmlLinkNode 243 self.token_class['TMPL'] = HtmlLinkNode
@@ -249,14 +261,14 @@ class HtmlWikiMarkup(WikiMarkup):
249 for elt in wiki_ns_re[self.lang][str]: 261 for elt in wiki_ns_re[self.lang][str]:
250 if str.beginswith(elt[0]) and str.endswith(elt[1]): 262 if str.beginswith(elt[0]) and str.endswith(elt[1]):
251 return elt[2] 263 return elt[2]
252 return None 264 return None
253 265
254 envt = { "unnumbered": { "hdr": "ul", 266 envt = { "unnumbered": { "hdr": "ul",
255 "elt": ["li"] }, 267 "elt": ["li"] },
256 "numbered": { "hdr": "ol", 268 "numbered": { "hdr": "ol",
257 "elt": ["li"] }, 269 "elt": ["li"] },
258 "defn": { "hdr": "dl", 270 "defn": { "hdr": "dl",
259 "elt": ["dt","dd"] } } 271 "elt": ["dt","dd"] } }
260 272
261 def mktgt(self, tgt, lang = None): 273 def mktgt(self, tgt, lang = None):
262 if not lang: 274 if not lang:
@@ -291,13 +303,14 @@ class HtmlWikiMarkup(WikiMarkup):
291 text += ' <span class="proto">' + x + '</span>' 303 text += ' <span class="proto">' + x + '</span>'
292 text += ' <span class="meaning">(' + s[-2] + ')</span>' 304 text += ' <span class="meaning">(' + s[-2] + ')</span>'
293 return text 305 return text
294 306
295 def __str__(self): 307 def __str__(self):
296 str = "" 308 str = ""
297 for elt in self.tree: 309 for elt in self.tree:
298 str += elt.format() 310 str += elt.format()
299 return str 311 return str
300 312
313
301class HtmlWiktionaryMarkup(HtmlWikiMarkup): 314class HtmlWiktionaryMarkup(HtmlWikiMarkup):
302 """A class for translating Wiktionary articles into HTML. 315 """A class for translating Wiktionary articles into HTML.
303 316
diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py
index 55dffe2..936a133 100644
--- a/wikitrans/wiki2texi.py
+++ b/wikitrans/wiki2texi.py
@@ -1,17 +1,17 @@
1#!/usr/bin/python 1#!/usr/bin/python
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3# Copyright (C) 2015-2018 Sergey Poznyakoff 3# Copyright (C) 2015-2018 Sergey Poznyakoff
4# 4#
5# This program is free software; you can redistribute it and/or modify 5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by 6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option) 7# the Free Software Foundation; either version 3, or (at your option)
8# any later version. 8# any later version.
9# 9#
10# This program is distributed in the hope that it will be useful, 10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details. 13# GNU General Public License for more details.
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
@@ -30,16 +30,17 @@ from wikitrans.wikins import wiki_ns_re, wiki_ns
30import re 30import re
31import urllib 31import urllib
32 32
33
33class Acc(list): 34class Acc(list):
34 def prepend(self,x): 35 def prepend(self, x):
35 self.insert(0,x) 36 self.insert(0, x)
36 37
37 def is_empty(self): 38 def is_empty(self):
38 return len(self) == 0 39 return len(self) == 0
39 40
40 def clear(self): 41 def clear(self):
41 self = [] 42 self = []
42 43
43 def tail(self, n = 1): 44 def tail(self, n = 1):
44 s = Acc() 45 s = Acc()
45 i = len(self) 46 i = len(self)
@@ -65,33 +66,35 @@ class Acc(list):
65 self += elt[0:-n] 66 self += elt[0:-n]
66 break 67 break
67 n -= l 68 n -= l
68 69
69 def trimnl(self): 70 def trimnl(self):
70 if self.endswith('\n'): 71 if self.endswith('\n'):
71 self.trim(1) 72 self.trim(1)
72 73
73 def trimpara(self): 74 def trimpara(self):
74 if self.endswith('\n\n'): 75 if self.endswith('\n\n'):
75 self.trim(2) 76 self.trim(2)
76 77
77 def endswith(self, x): 78 def endswith(self, x):
78 return self.tail(len(x)) == x 79 return self.tail(len(x)) == x
79 80
80 def in_new_para(self): 81 def in_new_para(self):
81 return self.is_empty() or self.endswith('\n\n') 82 return self.is_empty() or self.endswith('\n\n')
82 83
83 def __str__(self): 84 def __str__(self):
84 return ''.join(self) 85 return ''.join(self)
85 86
87
86class TexiTextNode(WikiTextNode): 88class TexiTextNode(WikiTextNode):
87 def format(self): 89 def format(self):
88 parser = self.parser 90 parser = self.parser
89 if isinstance(self.content,list): 91 if isinstance(self.content, list):
90 for s in self.content: 92 for s in self.content:
91 parser._print(s) 93 parser._print(s)
92 else: 94 else:
93 parser._print(self.content) 95 parser._print(self.content)
94 96
97
95class TexiTagNode(WikiTagNode): 98class TexiTagNode(WikiTagNode):
96 def format(self): 99 def format(self):
97 parser = self.parser 100 parser = self.parser
@@ -125,9 +128,10 @@ class TexiTagNode(WikiTagNode):
125 parser._print('>'); 128 parser._print('>');
126 self.content.format() 129 self.content.format()
127 parser._print('</' + self.tag + '>') 130 parser._print('</' + self.tag + '>')
128 131
132
129class TexiParaNode(WikiSeqNode): 133class TexiParaNode(WikiSeqNode):
130 def format(self): 134 def format(self):
131 parser = self.parser 135 parser = self.parser
132 if not parser.acc.in_new_para(): 136 if not parser.acc.in_new_para():
133 parser._print('\n', nl=True) 137 parser._print('\n', nl=True)
@@ -135,7 +139,8 @@ class TexiParaNode(WikiSeqNode):
135 x.format() 139 x.format()
136 if not parser.acc.in_new_para(): 140 if not parser.acc.in_new_para():
137 parser._print('\n', nl=True) 141 parser._print('\n', nl=True)
138 142
143
139class TexiPreNode(WikiSeqNode): 144class TexiPreNode(WikiSeqNode):
140 def format(self): 145 def format(self):
141 parser = self.parser 146 parser = self.parser
@@ -146,6 +151,7 @@ class TexiPreNode(WikiSeqNode):
146 if not parser.nested: 151 if not parser.nested:
147 parser._print('@end example\n', nl=True, escape=False) 152 parser._print('@end example\n', nl=True, escape=False)
148 153
154
149class TexiFontNode(WikiSeqNode): 155class TexiFontNode(WikiSeqNode):
150 def format(self): 156 def format(self):
151 parser = self.parser 157 parser = self.parser
@@ -156,6 +162,7 @@ class TexiFontNode(WikiSeqNode):
156 x.format() 162 x.format()
157 parser._print('}', escape=False) 163 parser._print('}', escape=False)
158 164
165
159class TexiHdrNode(WikiHdrNode): 166class TexiHdrNode(WikiHdrNode):
160 def format(self): 167 def format(self):
161 parser = self.parser 168 parser = self.parser
@@ -174,10 +181,12 @@ class TexiHdrNode(WikiHdrNode):
174 parser._print('\n') 181 parser._print('\n')
175 parser._print(None, nl=True) 182 parser._print(None, nl=True)
176 183
184
177class TexiBarNode(WikiNode): 185class TexiBarNode(WikiNode):
178 def format(self): 186 def format(self):
179 self.parser._print("\n-----\n") 187 self.parser._print("\n-----\n")
180 188
189
181class TexiIndNode(WikiIndNode): 190class TexiIndNode(WikiIndNode):
182 def format(self): 191 def format(self):
183 parser = self.parser 192 parser = self.parser
@@ -185,6 +194,7 @@ class TexiIndNode(WikiIndNode):
185 self.content.format() 194 self.content.format()
186 parser._print(None, nl=True) 195 parser._print(None, nl=True)
187 196
197
188class TexiEnvNode(WikiEnvNode): 198class TexiEnvNode(WikiEnvNode):
189 def format(self): 199 def format(self):
190 parser = self.parser 200 parser = self.parser
@@ -216,7 +226,8 @@ class TexiEnvNode(WikiEnvNode):
216 parser._print(None, nl=True) 226 parser._print(None, nl=True)
217 parser._print('\n') 227 parser._print('\n')
218 parser._print('@end table\n', nl=True, escape=False) 228 parser._print('@end table\n', nl=True, escape=False)
219 229
230
220class TexiLinkNode(WikiSeqNode): 231class TexiLinkNode(WikiSeqNode):
221 def format(self): 232 def format(self):
222 parser = self.parser 233 parser = self.parser
@@ -242,12 +253,13 @@ class TexiLinkNode(WikiSeqNode):
242 if len(s) > 1 and s[1] == 'thumb': 253 if len(s) > 1 and s[1] == 'thumb':
243 return 254 return
244 255
245 (qual,sep,tgt) = arg.partition(':') 256 (qual, sep, tgt) = arg.partition(':')
246 if text: 257 if text:
247 parser._print("@ref{%s,%s}" % (qual, text), escape=False) 258 parser._print("@ref{%s,%s}" % (qual, text), escape=False)
248 else: 259 else:
249 parser._print("@ref{%s}" % qual, escape=False) 260 parser._print("@ref{%s}" % qual, escape=False)
250 261
262
251class TexiRefNode(WikiRefNode): 263class TexiRefNode(WikiRefNode):
252 def format(self): 264 def format(self):
253 parser = self.parser 265 parser = self.parser
@@ -259,10 +271,11 @@ class TexiRefNode(WikiRefNode):
259 parser._print("@uref{%s,%s}" % (target, text), escape=False) 271 parser._print("@uref{%s,%s}" % (target, text), escape=False)
260 else: 272 else:
261 parser._print("@uref{%s}" % target, escape=False) 273 parser._print("@uref{%s}" % target, escape=False)
262 274
275
263class TexiWikiMarkup(WikiMarkup): 276class TexiWikiMarkup(WikiMarkup):
264 """Wiki markup to Texinfo translator class. 277 """Wiki markup to Texinfo translator class.
265 278
266 Usage: 279 Usage:
267 280
268 x = TexiWikiMarkup(file="input.wiki") 281 x = TexiWikiMarkup(file="input.wiki")
@@ -270,16 +283,16 @@ class TexiWikiMarkup(WikiMarkup):
270 x.parse() 283 x.parse()
271 # Print it as Texi: 284 # Print it as Texi:
272 print(str(x)) 285 print(str(x))
273 286
274 """ 287 """
275 288
276 nested = 0 289 nested = 0
277 sectcomm = { 290 sectcomm = {
278 'numbered': [ 291 'numbered': [
279 '@top', 292 '@top',
280 '@chapter', 293 '@chapter',
281 '@section', 294 '@section',
282 '@subsection', 295 '@subsection',
283 '@subsubsection' 296 '@subsubsection'
284 ], 297 ],
285 'unnumbered': [ 298 'unnumbered': [
@@ -317,7 +330,7 @@ class TexiWikiMarkup(WikiMarkup):
317 330
318 For a discussion of generic arguments, see the constructor of 331 For a discussion of generic arguments, see the constructor of
319 the WikiMarkup class. 332 the WikiMarkup class.
320 333
321 Additional arguments: 334 Additional arguments:
322 335
323 sectioning_model=MODEL 336 sectioning_model=MODEL
@@ -342,9 +355,9 @@ class TexiWikiMarkup(WikiMarkup):
342 "@section A" on output. Now, if given "sectioning_start=1", this 355 "@section A" on output. Now, if given "sectioning_start=1", this
343 directive will produce "@subsection A" instead. 356 directive will produce "@subsection A" instead.
344 """ 357 """
345 358
346 super(TexiWikiMarkup, self).__init__(*args, **keywords) 359 super(TexiWikiMarkup, self).__init__(*args, **keywords)
347 360
348 self.token_class['TEXT'] = TexiTextNode 361 self.token_class['TEXT'] = TexiTextNode
349 self.token_class['TAG'] = TexiTagNode 362 self.token_class['TAG'] = TexiTagNode
350 self.token_class['PARA'] = TexiParaNode 363 self.token_class['PARA'] = TexiParaNode
@@ -357,7 +370,7 @@ class TexiWikiMarkup(WikiMarkup):
357 self.token_class['ENV'] = TexiEnvNode 370 self.token_class['ENV'] = TexiEnvNode
358 self.token_class['LINK'] = TexiLinkNode 371 self.token_class['LINK'] = TexiLinkNode
359 self.token_class['REF'] = TexiRefNode 372 self.token_class['REF'] = TexiRefNode
360 373
361 if "sectioning_model" in keywords: 374 if "sectioning_model" in keywords:
362 val = keywords["sectioning_model"] 375 val = keywords["sectioning_model"]
363 if val in self.sectcomm: 376 if val in self.sectcomm:
@@ -394,17 +407,10 @@ class TexiWikiMarkup(WikiMarkup):
394 s = self.acc 407 s = self.acc
395 self.acc = val 408 self.acc = val
396 return str(s) 409 return str(s)
397 410
398 def __str__(self): 411 def __str__(self):
399 self._begin_print() 412 self._begin_print()
400 for elt in self.tree: 413 for elt in self.tree:
401 elt.format() 414 elt.format()
402 self.acc.trimpara() 415 self.acc.trimpara()
403 return self._end_print() 416 return self._end_print()
404
405
406
407
408
409
410
diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py
index 88e7610..7585bff 100644
--- a/wikitrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -1,17 +1,17 @@
1#!/usr/bin/python 1#!/usr/bin/python
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3# Copyright (C) 2008-2018 Sergey Poznyakoff 3# Copyright (C) 2008-2018 Sergey Poznyakoff
4# 4#
5# This program is free software; you can redistribute it and/or modify 5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by 6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option) 7# the Free Software Foundation; either version 3, or (at your option)
8# any later version. 8# any later version.
9# 9#
10# This program is distributed in the hope that it will be useful, 10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details. 13# GNU General Public License for more details.
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
@@ -35,6 +35,7 @@ try:
35except ImportError: 35except ImportError:
36 from urllib.parse import quote as url_quote 36 from urllib.parse import quote as url_quote
37 37
38
38class TextSeqNode(WikiSeqNode): 39class TextSeqNode(WikiSeqNode):
39 def format(self): 40 def format(self):
40 string = "" 41 string = ""
@@ -43,10 +44,11 @@ class TextSeqNode(WikiSeqNode):
43 string += ' ' 44 string += ' '
44 string += x.format() 45 string += x.format()
45 return string 46 return string
46 47
48
47class TextTextNode(WikiTextNode): 49class TextTextNode(WikiTextNode):
48 def format(self): 50 def format(self):
49 if isinstance(self.content,list): 51 if isinstance(self.content, list):
50 string = "" 52 string = ""
51 for s in self.content: 53 for s in self.content:
52 if string: 54 if string:
@@ -59,6 +61,7 @@ class TextTextNode(WikiTextNode):
59 string = self.content 61 string = self.content
60 return string 62 return string
61 63
64
62class TextPreNode(WikiSeqNode): 65class TextPreNode(WikiSeqNode):
63 def format(self): 66 def format(self):
64 string = "" 67 string = ""
@@ -67,6 +70,7 @@ class TextPreNode(WikiSeqNode):
67 string += '\n' 70 string += '\n'
68 return string 71 return string
69 72
73
70class TextParaNode(WikiSeqNode): 74class TextParaNode(WikiSeqNode):
71 def format(self): 75 def format(self):
72 string = "" 76 string = ""
@@ -75,6 +79,7 @@ class TextParaNode(WikiSeqNode):
75 string = self.parser.fmtpara(string) + '\n\n' 79 string = self.parser.fmtpara(string) + '\n\n'
76 return string 80 return string
77 81
82
78class TextItNode(WikiSeqNode): 83class TextItNode(WikiSeqNode):
79 def format(self): 84 def format(self):
80 string = "" 85 string = ""
@@ -83,7 +88,8 @@ class TextItNode(WikiSeqNode):
83 if s: 88 if s:
84 string += " " + s 89 string += " " + s
85 return "_" + string.lstrip(" ") + "_" 90 return "_" + string.lstrip(" ") + "_"
86 91
92
87class TextBoldNode(WikiSeqNode): 93class TextBoldNode(WikiSeqNode):
88 def format(self): 94 def format(self):
89 string = "" 95 string = ""
@@ -95,6 +101,7 @@ class TextBoldNode(WikiSeqNode):
95 string += x.format() 101 string += x.format()
96 return string.upper() 102 return string.upper()
97 103
104
98class TextLinkNode(WikiSeqNode): 105class TextLinkNode(WikiSeqNode):
99 def format(self): 106 def format(self):
100 arg = self.content[0].format() 107 arg = self.content[0].format()
@@ -110,7 +117,7 @@ class TextLinkNode(WikiSeqNode):
110 return "" 117 return ""
111 if len(s) > 1 and s[1] == 'thumb': 118 if len(s) > 1 and s[1] == 'thumb':
112 return "" 119 return ""
113 (qual,sep,tgt) = arg.partition(':') 120 (qual, sep, tgt) = arg.partition(':')
114 if tgt != '': 121 if tgt != '':
115 ns = self.parser.wiki_ns_name(qual) 122 ns = self.parser.wiki_ns_name(qual)
116 if ns: 123 if ns:
@@ -138,11 +145,13 @@ class TextLinkNode(WikiSeqNode):
138 return arg 145 return arg
139 else: 146 else:
140 return text 147 return text
141 148
149
142class TextTmplNode(TextLinkNode): 150class TextTmplNode(TextLinkNode):
143 def format(self): 151 def format(self):
144 return '[' + super(TextTmplNode, self).format() + ']' 152 return '[' + super(TextTmplNode, self).format() + ']'
145 153
154
146class TextBarNode(WikiNode): 155class TextBarNode(WikiNode):
147 def format(self): 156 def format(self):
148 w = self.parser.width 157 w = self.parser.width
@@ -150,6 +159,7 @@ class TextBarNode(WikiNode):
150 w = 5 159 w = 5
151 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" 160 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
152 161
162
153class TextHdrNode(WikiHdrNode): 163class TextHdrNode(WikiHdrNode):
154 def format(self): 164 def format(self):
155 return ("\n" 165 return ("\n"
@@ -157,7 +167,8 @@ class TextHdrNode(WikiHdrNode):
157 + " " 167 + " "
158 + self.content.format().lstrip(" ") 168 + self.content.format().lstrip(" ")
159 + "\n\n") 169 + "\n\n")
160 170
171
161class TextRefNode(WikiRefNode): 172class TextRefNode(WikiRefNode):
162 def format(self): 173 def format(self):
163 text = self.content.format() 174 text = self.content.format()
@@ -166,6 +177,7 @@ class TextRefNode(WikiRefNode):
166 else: 177 else:
167 return "see " + self.ref 178 return "see " + self.ref
168 179
180
169class TextEnvNode(WikiEnvNode): 181class TextEnvNode(WikiEnvNode):
170 def format(self): 182 def format(self):
171 type = self.envtype 183 type = self.envtype
@@ -188,16 +200,16 @@ class TextEnvNode(WikiEnvNode):
188 string += self.parser.indent(lev-1, x) 200 string += self.parser.indent(lev-1, x)
189 else: 201 else:
190 string += self.parser.indent(lev+3, x) 202 string += self.parser.indent(lev+3, x)
191
192 if not string.endswith("\n"): 203 if not string.endswith("\n"):
193 string += "\n" 204 string += "\n"
194
195 return string 205 return string
196 206
207
197class TextIndNode(WikiIndNode): 208class TextIndNode(WikiIndNode):
198 def format(self): 209 def format(self):
199 return (" " * self.level) + self.content.format() + '\n' 210 return (" " * self.level) + self.content.format() + '\n'
200 211
212
201class TextTagNode(WikiTagNode): 213class TextTagNode(WikiTagNode):
202 def format(self): 214 def format(self):
203 if self.tag == 'code': 215 if self.tag == 'code':
@@ -215,8 +227,8 @@ class TextTagNode(WikiTagNode):
215 if self.args: 227 if self.args:
216 s += ' ' + str(self.args) 228 s += ' ' + str(self.args)
217 s += '>' + self.content.format() + '</' + self.tag + '>' 229 s += '>' + self.content.format() + '</' + self.tag + '>'
218 return s 230 return s
219 231
220 232
221class TextWikiMarkup(WikiMarkup): 233class TextWikiMarkup(WikiMarkup):
222 """A Wiki markup to plain text translator. 234 """A Wiki markup to plain text translator.
@@ -228,7 +240,7 @@ class TextWikiMarkup(WikiMarkup):
228 x.parse() 240 x.parse()
229 # Print it as plain text: 241 # Print it as plain text:
230 print(str(x)) 242 print(str(x))
231 243
232 """ 244 """
233 245
234 # Output width 246 # Output width
@@ -243,7 +255,7 @@ class TextWikiMarkup(WikiMarkup):
243 255
244 # Array of footnote references 256 # Array of footnote references
245 references = [] 257 references = []
246 258
247 def __init__(self, *args, **keywords): 259 def __init__(self, *args, **keywords):
248 """Create a TextWikiMarkup object. 260 """Create a TextWikiMarkup object.
249 261
@@ -254,16 +266,16 @@ class TextWikiMarkup(WikiMarkup):
254 Most arguments have the same meaning as in the WikiMarkup constructor. 266 Most arguments have the same meaning as in the WikiMarkup constructor.
255 267
256 Class-specific arguments: 268 Class-specific arguments:
257 269
258 width=N 270 width=N
259 Limit output width to N columns. Default is 78. 271 Limit output width to N columns. Default is 78.
260 show_urls=False 272 show_urls=False
261 By default, the link URLs are displayed in parentheses next to the 273 By default, the link URLs are displayed in parentheses next to the
262 link text. If this argument is given, only the link text will be 274 link text. If this argument is given, only the link text will be
263 displayed. 275 displayed.
264 """ 276 """
265 277
266 super(TextWikiMarkup,self).__init__(*args, **keywords) 278 super(TextWikiMarkup, self).__init__(*args, **keywords)
267 if 'width' in keywords: 279 if 'width' in keywords:
268 self.width = keywords['width'] 280 self.width = keywords['width']
269 if 'show_urls' in keywords: 281 if 'show_urls' in keywords:
@@ -283,7 +295,7 @@ class TextWikiMarkup(WikiMarkup):
283 self.token_class['ENV'] = TextEnvNode 295 self.token_class['ENV'] = TextEnvNode
284 self.token_class['IND'] = TextIndNode 296 self.token_class['IND'] = TextIndNode
285 self.token_class['TAG'] = TextTagNode 297 self.token_class['TAG'] = TextTagNode
286 298
287 def wiki_ns_name(self, str): 299 def wiki_ns_name(self, str):
288 if str in wiki_ns[self.lang]: 300 if str in wiki_ns[self.lang]:
289 return wiki_ns[self.lang][str] 301 return wiki_ns[self.lang][str]
@@ -292,15 +304,15 @@ class TextWikiMarkup(WikiMarkup):
292 if str.beginswith(elt[0]) and str.endswith(elt[1]): 304 if str.beginswith(elt[0]) and str.endswith(elt[1]):
293 return elt[2] 305 return elt[2]
294 return None 306 return None
295 307
296 def mktgt(self, tgt, lang = None): 308 def mktgt(self, tgt, lang = None):
297 if not lang: 309 if not lang:
298 lang = self.lang 310 lang = self.lang
299 return self.html_base % { 'lang' : lang } + url_quote(tgt) 311 return self.html_base % { 'lang' : lang } + url_quote(tgt)
300 312
301 def indent(self, lev, text): 313 def indent(self, lev, text):
302 if text.find('\n') == -1: 314 if text.find('\n') == -1:
303 s = (" " * lev) + text 315 s = (" " * lev) + text
304 else: 316 else:
305 s = "" 317 s = ""
306 for elt in text.split('\n'): 318 for elt in text.split('\n'):
@@ -309,7 +321,7 @@ class TextWikiMarkup(WikiMarkup):
309 if not text.endswith('\n'): 321 if not text.endswith('\n'):
310 s = s.rstrip('\n') 322 s = s.rstrip('\n')
311 return s 323 return s
312 324
313 def fmtpara(self, input): 325 def fmtpara(self, input):
314 output = "" 326 output = ""
315 linebuf = "" 327 linebuf = ""
@@ -331,13 +343,14 @@ class TextWikiMarkup(WikiMarkup):
331 linebuf += " " * wsc + s 343 linebuf += " " * wsc + s
332 length += wsc + wlen 344 length += wsc + wlen
333 return output + linebuf 345 return output + linebuf
334 346
335 def __str__(self): 347 def __str__(self):
336 str = "" 348 str = ""
337 for elt in self.tree: 349 for elt in self.tree:
338 str += elt.format() 350 str += elt.format()
339 return str 351 return str
340 352
353
341class TextWiktionaryMarkup(TextWikiMarkup): 354class TextWiktionaryMarkup(TextWikiMarkup):
342 """A class for translating Wiktionary articles into plain text. 355 """A class for translating Wiktionary articles into plain text.
343 356
@@ -345,4 +358,3 @@ class TextWiktionaryMarkup(TextWikiMarkup):
345 """ 358 """
346 359
347 html_base='http://%(lang)s.wiktionary.org/wiki/' 360 html_base='http://%(lang)s.wiktionary.org/wiki/'
348
diff --git a/wikitrans/wikidump.py b/wikitrans/wikidump.py
index d5f651c..bc71876 100644
--- a/wikitrans/wikidump.py
+++ b/wikitrans/wikidump.py
@@ -1,16 +1,16 @@
1# Wiki "dump" format. -*- coding: utf-8 -*- 1# Wiki "dump" format. -*- coding: utf-8 -*-
2# Copyright (C) 2015-2018 Sergey Poznyakoff 2# Copyright (C) 2015-2018 Sergey Poznyakoff
3# 3#
4# This program is free software; you can redistribute it and/or modify 4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by 5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3, or (at your option) 6# the Free Software Foundation; either version 3, or (at your option)
7# any later version. 7# any later version.
8# 8#
9# This program is distributed in the hope that it will be useful, 9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details. 12# GNU General Public License for more details.
13# 13#
14# You should have received a copy of the GNU General Public License 14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>. 15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16
@@ -28,6 +28,7 @@ from wikitrans.wikitoken import *
28import json 28import json
29from wikitrans.wikimarkup import WikiMarkup 29from wikitrans.wikimarkup import WikiMarkup
30 30
31
31class DumpReferences(object): 32class DumpReferences(object):
32 idx = 0 33 idx = 0
33 def __len__(self): 34 def __len__(self):
@@ -43,13 +44,14 @@ class DumpWikiMarkup(WikiMarkup):
43 x = DumpWikiMarkup(file="input.wiki") 44 x = DumpWikiMarkup(file="input.wiki")
44 # Parse the input: 45 # Parse the input:
45 x.parse() 46 x.parse()
46 # Print a JSON dump of the parse tree 47 # Print a JSON dump of the parse tree
47 print(str(x)) 48 print(str(x))
48 49
49 """ 50 """
50 51
51 indent = None 52 indent = None
52 references = DumpReferences() 53 references = DumpReferences()
54
53 def __init__(self, **kwarg): 55 def __init__(self, **kwarg):
54 """Create a DumpWikiMarkup object. 56 """Create a DumpWikiMarkup object.
55 57
@@ -64,14 +66,14 @@ class DumpWikiMarkup(WikiMarkup):
64 indent=N 66 indent=N
65 Basic indent offset for JSON objects. 67 Basic indent offset for JSON objects.
66 """ 68 """
67
68 n = kwarg.pop('indent', None) 69 n = kwarg.pop('indent', None)
69 if n != None: 70 if n != None:
70 self.indent = int(n) 71 self.indent = int(n)
71 super(DumpWikiMarkup,self).__init__(self, **kwarg) 72 super(DumpWikiMarkup, self).__init__(self, **kwarg)
73
72 def __str__(self): 74 def __str__(self):
73 return json.dumps(self.tree, 75 return json.dumps(self.tree,
74 cls=WikiNodeEncoder, 76 cls=WikiNodeEncoder,
75 indent=self.indent, 77 indent=self.indent,
76 separators=(',',': '), 78 separators=(',', ': '),
77 sort_keys=True) 79 sort_keys=True)
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index a3e17d5..d199335 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -38,16 +38,20 @@ from wikitrans.wikitoken import *
38__all__ = [ "WikiMarkupParser", "WikiMarkup", 38__all__ = [ "WikiMarkupParser", "WikiMarkup",
39 "TagAttributes", "TagAttributeSyntaxError" ] 39 "TagAttributes", "TagAttributeSyntaxError" ]
40 40
41
41class UnexpectedTokenError(Exception): 42class UnexpectedTokenError(Exception):
42 def __init__(self, value): 43 def __init__(self, value):
43 self.value = value 44 self.value = value
44 45
46
45class TagAttributeSyntaxError(Exception): 47class TagAttributeSyntaxError(Exception):
46 def __init__(self, value): 48 def __init__(self, value):
47 self.value = value 49 self.value = value
50
48 def __str__(self): 51 def __str__(self):
49 return repr(self.value) 52 return repr(self.value)
50 53
54
51class TagAttributes(object): 55class TagAttributes(object):
52 """A dictionary-like collection of tag attributes. 56 """A dictionary-like collection of tag attributes.
53 57
@@ -59,7 +63,7 @@ class TagAttributes(object):
59 for a in attr: 63 for a in attr:
60 ... 64 ...
61 """ 65 """
62 66
63 attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?") 67 attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?")
64 valseg = re.compile("^[^\\\"]+") 68 valseg = re.compile("^[^\\\"]+")
65 tab = {} 69 tab = {}
@@ -94,26 +98,36 @@ class TagAttributes(object):
94 self.tab[name] = val 98 self.tab[name] = val
95 else: 99 else:
96 raise TagAttributeSyntaxError(s) 100 raise TagAttributeSyntaxError(s)
101
97 def __len__(self): 102 def __len__(self):
98 return len(self.tab) 103 return len(self.tab)
104
99 def __getitem__(self, key): 105 def __getitem__(self, key):
100 return self.tab[key] 106 return self.tab[key]
107
101 def __contains__(self, key): 108 def __contains__(self, key):
102 return key in self.tab 109 return key in self.tab
110
103 def __iter__(self): 111 def __iter__(self):
104 for key in self.tab: 112 for key in self.tab:
105 yield(key) 113 yield(key)
114
106 def has_key(self, key): 115 def has_key(self, key):
107 return self.__contains__(key) 116 return self.__contains__(key)
117
108 def __setitem__(self, key, value): 118 def __setitem__(self, key, value):
109 self.tab[key] = value 119 self.tab[key] = value
120
110 def __delitem__(self, key): 121 def __delitem__(self, key):
111 del self.tab[key] 122 del self.tab[key]
123
112 def __str__(self): 124 def __str__(self):
113 return self.printable 125 return self.printable
126
114 def __repr__(self): 127 def __repr__(self):
115 return self.printable 128 return self.printable
116 129
130
117class WikiMarkupParser(object): 131class WikiMarkupParser(object):
118 """Parser for Wiki markup language. 132 """Parser for Wiki markup language.
119 133
@@ -195,7 +209,7 @@ class WikiMarkupParser(object):
195 'HDR': WikiHdrNode 209 'HDR': WikiHdrNode
196 } 210 }
197 211
198 def _new_node(self,**kwarg): 212 def _new_node(self, **kwarg):
199 return self.token_class[kwarg['type']](self, **kwarg) 213 return self.token_class[kwarg['type']](self, **kwarg)
200 214
201 def tokread(self): 215 def tokread(self):
@@ -266,7 +280,7 @@ class WikiMarkupParser(object):
266 content=m.group(0))) 280 content=m.group(0)))
267 continue 281 continue
268 else: 282 else:
269 yield(self._new_node(type='TEXT',content=m.group(0))) 283 yield(self._new_node(type='TEXT', content=m.group(0)))
270 continue 284 continue
271 else: 285 else:
272 m = self.ctag.match(line, pos) 286 m = self.ctag.match(line, pos)
@@ -306,13 +320,12 @@ class WikiMarkupParser(object):
306 if line: 320 if line:
307 if line[-1] == '\n': 321 if line[-1] == '\n':
308 if line[pos:-1] != '': 322 if line[pos:-1] != '':
309 yield(self._new_node(type='TEXT',content=line[pos:-1])) 323 yield(self._new_node(type='TEXT', content=line[pos:-1]))
310 yield(self._new_node(type='NL')) 324 yield(self._new_node(type='NL'))
311 else: 325 else:
312 yield(self._new_node(type='TEXT',content=line[pos:])) 326 yield(self._new_node(type='TEXT', content=line[pos:]))
313 line = None 327 line = None
314 328
315
316 def input(self): 329 def input(self):
317 """Return next physical line from the input. 330 """Return next physical line from the input.
318 331
@@ -349,7 +362,7 @@ class WikiMarkupParser(object):
349 # 3a. '''a b ''c d''''' 362 # 3a. '''a b ''c d'''''
350 # 3b. ''a b '''c d''''' 363 # 3b. ''a b '''c d'''''
351 stack = [] 364 stack = []
352 for i in range(0,len(self.toklist)): 365 for i in range(0, len(self.toklist)):
353 if (self.toklist[i].type == 'DELIM' 366 if (self.toklist[i].type == 'DELIM'
354 and (self.toklist[i].content == "''" 367 and (self.toklist[i].content == "''"
355 or self.toklist[i].content == "'''")): 368 or self.toklist[i].content == "'''")):
@@ -400,7 +413,7 @@ class WikiMarkupParser(object):
400 self.dprint(20, "lookahead(%s): %s", off, tok) 413 self.dprint(20, "lookahead(%s): %s", off, tok)
401 return tok 414 return tok
402 415
403 def setkn(self,val): 416 def setkn(self, val):
404 """Store token val at the current token index.""" 417 """Store token val at the current token index."""
405 self.toklist[self.tokind] = val 418 self.toklist[self.tokind] = val
406 419
@@ -471,7 +484,7 @@ class WikiMarkupParser(object):
471 acc['textlist'] = [] 484 acc['textlist'] = []
472 485
473 if (isinstance(tok, WikiContentNode) 486 if (isinstance(tok, WikiContentNode)
474 and isinstance(tok.content,str) 487 and isinstance(tok.content, str)
475 and re.match("^[ \t]", tok.content)): 488 and re.match("^[ \t]", tok.content)):
476 type = 'PRE' 489 type = 'PRE'
477 rx = re.compile("^\S") 490 rx = re.compile("^\S")
@@ -574,7 +587,7 @@ class WikiMarkupParser(object):
574 self.dprint(80, "LEAVE parse_indent=%s", x) 587 self.dprint(80, "LEAVE parse_indent=%s", x)
575 return x 588 return x
576 589
577 def parse_fontmod(self,delim,what): 590 def parse_fontmod(self, delim, what):
578 """Parse font modification directive (bold or italics). 591 """Parse font modification directive (bold or italics).
579 592
580 Arguments: 593 Arguments:
@@ -627,7 +640,7 @@ class WikiMarkupParser(object):
627 return None 640 return None
628 641
629 seq = [] 642 seq = []
630 (ref,sep,text) = tok.content.partition(' ') 643 (ref, sep, text) = tok.content.partition(' ')
631 if text: 644 if text:
632 seq.insert(0, self._new_node(type='TEXT', content=text)) 645 seq.insert(0, self._new_node(type='TEXT', content=text))
633 646
@@ -735,7 +748,7 @@ class WikiMarkupParser(object):
735 if od in self.close_delim: 748 if od in self.close_delim:
736 cd = self.close_delim[od] 749 cd = self.close_delim[od]
737 lev = 0 750 lev = 0
738 for i,tok in enumerate(self.toklist[self.tokind+1:]): 751 for i, tok in enumerate(self.toklist[self.tokind+1:]):
739 if tok.type == 'NIL': 752 if tok.type == 'NIL':
740 break 753 break
741 elif tok.type == 'DELIM': 754 elif tok.type == 'DELIM':
@@ -766,7 +779,7 @@ class WikiMarkupParser(object):
766 if tag.args: 779 if tag.args:
767 s += ' ' + str(tag.args) 780 s += ' ' + str(tag.args)
768 s += '>' 781 s += '>'
769 node = self._new_node(type='TEXT',content=s) 782 node = self._new_node(type='TEXT', content=s)
770 if tag.content: 783 if tag.content:
771 self.tree[self.tokind:self.tokind] = tag.content 784 self.tree[self.tokind:self.tokind] = tag.content
772 self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node) 785 self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node)
@@ -786,13 +799,13 @@ class WikiMarkupParser(object):
786 elif tok.type == 'NL': 799 elif tok.type == 'NL':
787 tok = self._new_node(type = 'TEXT', content = '\n') 800 tok = self._new_node(type = 'TEXT', content = '\n')
788 list.append(tok) 801 list.append(tok)
789
790 self.clear_mark() 802 self.clear_mark()
791 ret = self._new_node(type = 'TAG', 803 ret = self._new_node(type = 'TAG',
792 tag = tag.tag, 804 tag = tag.tag,
793 args = tag.args, 805 args = tag.args,
794 isblock = tag.isblock, 806 isblock = tag.isblock,
795 content = self._new_node(type = 'SEQ', content = list)) 807 content = self._new_node(type = 'SEQ',
808 content = list))
796 self.dprint(80, "LEAVE parse_tag = %s", ret) 809 self.dprint(80, "LEAVE parse_tag = %s", ret)
797 return ret 810 return ret
798 811
@@ -800,7 +813,7 @@ class WikiMarkupParser(object):
800 """Parse a block environment (numbered, unnumbered, or definition list).""" 813 """Parse a block environment (numbered, unnumbered, or definition list)."""
801 type = self.envtypes[tok.content[0]][0] 814 type = self.envtypes[tok.content[0]][0]
802 lev = len(tok.content) 815 lev = len(tok.content)
803 self.dprint(80, "ENTER parse_env(%s,%s)",type,lev) 816 self.dprint(80, "ENTER parse_env(%s,%s)", type, lev)
804 list = [] 817 list = []
805 while True: 818 while True:
806 if (tok.type == 'DELIM' 819 if (tok.type == 'DELIM'
@@ -831,7 +844,6 @@ class WikiMarkupParser(object):
831 break 844 break
832 845
833 tok = self.getkn() 846 tok = self.getkn()
834
835 ret = self._new_node(type='ENV', 847 ret = self._new_node(type='ENV',
836 envtype=type, 848 envtype=type,
837 level=lev, 849 level=lev,
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
index 49c6c68..0678a75 100644
--- a/wikitrans/wikitoken.py
+++ b/wikitrans/wikitoken.py
@@ -41,21 +41,26 @@ from __future__ import print_function
41import re 41import re
42import json 42import json
43 43
44
44class WikiNodeEncoder(json.JSONEncoder): 45class WikiNodeEncoder(json.JSONEncoder):
45 """Custom JSONEncoder subclass for serializing WikiNode and its subclasses.""" 46 """Custom JSONEncoder subclass for serializing WikiNode and its subclasses."""
47
46 def default(self, obj): 48 def default(self, obj):
47 if isinstance(obj,WikiNode): 49 if isinstance(obj, WikiNode):
48 return obj.jsonEncode() 50 return obj.json_encode()
49 return json.JSONEncoder.default(self, obj) 51 return json.JSONEncoder.default(self, obj)
50 52
53
51def jsonencoder(func): 54def jsonencoder(func):
52 def _mkencoder(self): 55 def _mkencoder(self):
53 json = func(self) 56 json = func(self)
54 json['wikinode'] = self.__class__.__name__ 57 json['wikinode'] = self.__class__.__name__
55 json['type'] = self.type 58 json['type'] = self.type
56 return json 59 return json
60
57 return _mkencoder 61 return _mkencoder
58 62
63
59class WikiNode(object): 64class WikiNode(object):
60 """Generic parse tree node. 65 """Generic parse tree node.
61 66
@@ -71,7 +76,7 @@ class WikiNode(object):
71 def __init__(self, parser, **kwargs): 76 def __init__(self, parser, **kwargs):
72 self.parser = parser 77 self.parser = parser
73 for key in kwargs: 78 for key in kwargs:
74 if hasattr(self,key): 79 if hasattr(self, key):
75 self.__dict__[key] = kwargs[key] 80 self.__dict__[key] = kwargs[key]
76 else: 81 else:
77 raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key)) 82 raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key))
@@ -80,7 +85,7 @@ class WikiNode(object):
80 return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True) 85 return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True)
81 86
82 @jsonencoder 87 @jsonencoder
83 def jsonEncode(self): 88 def json_encode(self):
84 ret = {} 89 ret = {}
85 for x in dir(self): 90 for x in dir(self):
86 if x == 'parser' or x.startswith('_') or type(x) == 'function': 91 if x == 'parser' or x.startswith('_') or type(x) == 'function':
@@ -96,6 +101,7 @@ class WikiNode(object):
96 """ 101 """
97 pass 102 pass
98 103
104
99class WikiContentNode(WikiNode): 105class WikiContentNode(WikiNode):
100 """Generic content node. 106 """Generic content node.
101 107
@@ -110,21 +116,22 @@ class WikiContentNode(WikiNode):
110 pass 116 pass
111 117
112 @jsonencoder 118 @jsonencoder
113 def jsonEncode(self): 119 def json_encode(self):
114 ret = {} 120 ret = {}
115 if self.content: 121 if self.content:
116 if self.type == 'TEXT': 122 if self.type == 'TEXT':
117 ret['content'] = self.content 123 ret['content'] = self.content
118 elif isinstance(self.content,list): 124 elif isinstance(self.content, list):
119 ret['content'] = map(lambda x: x.jsonEncode(), self.content) 125 ret['content'] = map(lambda x: x.json_encode(), self.content)
120 elif isinstance(self.content,WikiNode): 126 elif isinstance(self.content, WikiNode):
121 ret['content'] = self.content.jsonEncode() 127 ret['content'] = self.content.json_encode()
122 else: 128 else:
123 ret['content'] = self.content 129 ret['content'] = self.content
124 else: 130 else:
125 ret['content'] = None 131 ret['content'] = None
126 return ret 132 return ret
127 133
134
128class WikiSeqNode(WikiContentNode): 135class WikiSeqNode(WikiContentNode):
129 """Generic sequence of nodes. 136 """Generic sequence of nodes.
130 137
@@ -138,14 +145,14 @@ class WikiSeqNode(WikiContentNode):
138 x.format() 145 x.format()
139 146
140 @jsonencoder 147 @jsonencoder
141 def jsonEncode(self): 148 def json_encode(self):
142 ret = {} 149 ret = {}
143 if not self.content: 150 if not self.content:
144 ret['content'] = None 151 ret['content'] = None
145 elif isinstance(self.content,list): 152 elif isinstance(self.content, list):
146 ret['content'] = map(lambda x: x.jsonEncode(), self.content) 153 ret['content'] = map(lambda x: x.json_encode(), self.content)
147 elif isinstance(self.content,WikiNode): 154 elif isinstance(self.content, WikiNode):
148 ret['content'] = self.content.jsonEncode() 155 ret['content'] = self.content.json_encode()
149 else: 156 else:
150 ret['content'] = self.content 157 ret['content'] = self.content
151 return ret 158 return ret
@@ -165,11 +172,12 @@ class WikiTextNode(WikiContentNode):
165 type = 'TEXT' 172 type = 'TEXT'
166 173
167 @jsonencoder 174 @jsonencoder
168 def jsonEncode(self): 175 def json_encode(self):
169 return { 176 return {
170 'content': self.content 177 'content': self.content
171 } 178 }
172 179
180
173class WikiDelimNode(WikiContentNode): 181class WikiDelimNode(WikiContentNode):
174 """Delimiter node. 182 """Delimiter node.
175 183
@@ -185,6 +193,7 @@ class WikiDelimNode(WikiContentNode):
185 isblock=False 193 isblock=False
186 continuation = False 194 continuation = False
187 195
196
188class WikiTagNode(WikiContentNode): 197class WikiTagNode(WikiContentNode):
189 """A Wiki tag. 198 """A Wiki tag.
190 199
@@ -205,20 +214,23 @@ class WikiTagNode(WikiContentNode):
205 214
206 def __init__(self, *args, **keywords): 215 def __init__(self, *args, **keywords):
207 super(WikiTagNode, self).__init__(*args, **keywords) 216 super(WikiTagNode, self).__init__(*args, **keywords)
208 if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'): 217 if (self.type == 'TAG'
218 and self.tag == 'ref'
219 and hasattr(self.parser, 'references')):
209 self.idx = len(self.parser.references) 220 self.idx = len(self.parser.references)
210 self.parser.references.append(self) 221 self.parser.references.append(self)
211 222
212 @jsonencoder 223 @jsonencoder
213 def jsonEncode(self): 224 def json_encode(self):
214 return { 225 return {
215 'tag': self.tag, 226 'tag': self.tag,
216 'isblock': self.isblock, 227 'isblock': self.isblock,
217 'args': self.args.tab if self.args else None, 228 'args': self.args.tab if self.args else None,
218 'content': self.content.jsonEncode() if self.content else None, 229 'content': self.content.json_encode() if self.content else None,
219 'idx': self.idx 230 'idx': self.idx
220 } 231 }
221 232
233
222class WikiRefNode(WikiContentNode): 234class WikiRefNode(WikiContentNode):
223 """Reference node. 235 """Reference node.
224 236
@@ -233,12 +245,13 @@ class WikiRefNode(WikiContentNode):
233 type = 'REF' 245 type = 'REF'
234 ref = None 246 ref = None
235 @jsonencoder 247 @jsonencoder
236 def jsonEncode(self): 248 def json_encode(self):
237 return { 249 return {
238 'ref': self.ref, 250 'ref': self.ref,
239 'content': self.content.jsonEncode() 251 'content': self.content.json_encode()
240 } 252 }
241 253
254
242class WikiHdrNode(WikiContentNode): 255class WikiHdrNode(WikiContentNode):
243 """A wiki markup header class. 256 """A wiki markup header class.
244 257
@@ -252,12 +265,13 @@ class WikiHdrNode(WikiContentNode):
252 level = None 265 level = None
253 266
254 @jsonencoder 267 @jsonencoder
255 def jsonEncode(self): 268 def json_encode(self):
256 return { 269 return {
257 'level': self.level, 270 'level': self.level,
258 'content': self.content.jsonEncode() 271 'content': self.content.json_encode()
259 } 272 }
260 273
274
261class WikiEltNode(WikiContentNode): 275class WikiEltNode(WikiContentNode):
262 """Environment element node. 276 """Environment element node.
263 277
@@ -271,12 +285,13 @@ class WikiEltNode(WikiContentNode):
271 subtype = None 285 subtype = None
272 286
273 @jsonencoder 287 @jsonencoder
274 def jsonEncode(self): 288 def json_encode(self):
275 return { 289 return {
276 'subtype': self.subtype, 290 'subtype': self.subtype,
277 'content': self.content.jsonEncode() 291 'content': self.content.json_encode()
278 } 292 }
279 293
294
280class WikiEnvNode(WikiContentNode): 295class WikiEnvNode(WikiContentNode):
281 """Wiki Environment Node 296 """Wiki Environment Node
282 297
@@ -291,13 +306,14 @@ class WikiEnvNode(WikiContentNode):
291 level = None 306 level = None
292 307
293 @jsonencoder 308 @jsonencoder
294 def jsonEncode(self): 309 def json_encode(self):
295 return { 310 return {
296 'envtype': self.envtype, 311 'envtype': self.envtype,
297 'level': self.level, 312 'level': self.level,
298 'content': map(lambda x: x.jsonEncode(), self.content) 313 'content': map(lambda x: x.json_encode(), self.content)
299 } 314 }
300 315
316
301class WikiIndNode(WikiContentNode): 317class WikiIndNode(WikiContentNode):
302 """Indented block node. 318 """Indented block node.
303 319
@@ -311,8 +327,8 @@ class WikiIndNode(WikiContentNode):
311 level = None 327 level = None
312 328
313 @jsonencoder 329 @jsonencoder
314 def jsonEncode(self): 330 def json_encode(self):
315 return { 331 return {
316 'level': self.level, 332 'level': self.level,
317 'content': self.content.jsonEncode() 333 'content': self.content.json_encode()
318 } 334 }

Return to:

Send suggestions and report system problems to the System administrator.