summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-08-16 12:45:00 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-08-17 10:17:11 (GMT)
commit7186dbab7f1c1227e9229866e086bc417e3e4e52 (patch) (unidiff)
treef29114e9ff7a7b023dd3d611a9bc8808f5cf5bbd
parentd9e26129527ce84f626eb44ff95e4ecfbc5bc92a (diff)
downloadwikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.gz
wikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.bz2
Fix PEP 8 issues.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--WikiTrans/wikitoken.py188
-rw-r--r--tests/test_html.py8
-rw-r--r--tests/test_texi.py6
-rw-r--r--tests/test_text.py8
-rw-r--r--tests/wikitest.py (renamed from tests/WikiTest.py)6
-rw-r--r--wikitrans/__init__.py (renamed from WikiTrans/__init__.py)0
-rw-r--r--wikitrans/wiki2html.py (renamed from WikiTrans/wiki2html.py)115
-rw-r--r--wikitrans/wiki2texi.py (renamed from WikiTrans/wiki2texi.py)63
-rw-r--r--wikitrans/wiki2text.py (renamed from WikiTrans/wiki2text.py)74
-rw-r--r--wikitrans/wikidump.py (renamed from WikiTrans/wikidump.py)41
-rw-r--r--wikitrans/wikimarkup.py (renamed from WikiTrans/wikimarkup.py)784
-rw-r--r--wikitrans/wikins.py (renamed from WikiTrans/wikins.py)0
-rw-r--r--wikitrans/wikitoken.py318
13 files changed, 978 insertions, 633 deletions
diff --git a/WikiTrans/wikitoken.py b/WikiTrans/wikitoken.py
deleted file mode 100644
index 2238a66..0000000
--- a/WikiTrans/wikitoken.py
+++ b/dev/null
@@ -1,188 +0,0 @@
1# Wiki tokens. -*- coding: utf-8 -*-
2# Copyright (C) 2015-2018 Sergey Poznyakoff
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3, or (at your option)
7# any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17from __future__ import print_function
18import re
19import json
20
21class WikiNodeEncoder(json.JSONEncoder):
22 def default(self, obj):
23 if isinstance(obj,WikiNode):
24 return obj.jsonEncode()
25 return json.JSONEncoder.default(self, obj)
26
27def jsonencoder(func):
28 def _mkencoder(self):
29 json = func(self)
30 json['wikinode'] = self.__class__.__name__
31 json['type'] = self.type
32 return json
33 return _mkencoder
34
35class WikiNode(object):
36 type = 'UNDEF'
37 parser = None
38 def __init__(self, parser, **kwargs):
39 self.parser = parser
40 for key in kwargs:
41 if hasattr(self,key):
42 self.__dict__[key] = kwargs[key]
43 else:
44 raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key))
45
46 def __str__(self):
47 return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True)
48
49 @jsonencoder
50 def jsonEncode(self):
51 ret = {}
52 for x in dir(self):
53 if x == 'parser' or x.startswith('_') or type(x) == 'function':
54 continue
55 if x in self.__dict__:
56 ret[x] = self.__dict__[x]
57 return ret
58
59 def format(self):
60 pass
61
62class WikiContentNode(WikiNode):
63 content = None
64 def format(self):
65 pass
66 @jsonencoder
67 def jsonEncode(self):
68 ret = {}
69 if self.content:
70 if self.type == 'TEXT':
71 ret['content'] = self.content
72 elif isinstance(self.content,list):
73 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
74 elif isinstance(self.content,WikiNode):
75 ret['content'] = self.content.jsonEncode()
76 else:
77 ret['content'] = self.content
78 else:
79 ret['content'] = None
80 return ret
81
82class WikiSeqNode(WikiContentNode):
83 def format(self):
84 for x in self.content:
85 x.format()
86 @jsonencoder
87 def jsonEncode(self):
88 ret = {}
89 if not self.content:
90 ret['content'] = None
91 elif isinstance(self.content,list):
92 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
93 elif isinstance(self.content,WikiNode):
94 ret['content'] = self.content.jsonEncode()
95 else:
96 ret['content'] = self.content
97 return ret
98
99
100# ##############
101
102class WikiTextNode(WikiContentNode):
103 type = 'TEXT'
104 @jsonencoder
105 def jsonEncode(self):
106 return {
107 'content': self.content
108 }
109
110class WikiDelimNode(WikiContentNode):
111 type = 'DELIM'
112 isblock=False
113 continuation = False
114
115class WikiTagNode(WikiContentNode):
116 tag = None
117 isblock = False
118 args = None
119 idx = None
120 def __init__(self, *args, **keywords):
121 super(WikiTagNode, self).__init__(*args, **keywords)
122 if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'):
123 self.idx = len(self.parser.references)
124 self.parser.references.append(self)
125 @jsonencoder
126 def jsonEncode(self):
127 return {
128 'tag': self.tag,
129 'isblock': self.isblock,
130 'args': self.args.tab if self.args else None,
131 'content': self.content.jsonEncode() if self.content else None,
132 'idx': self.idx
133 }
134
135class WikiRefNode(WikiContentNode):
136 type = 'REF'
137 ref = None
138 @jsonencoder
139 def jsonEncode(self):
140 return {
141 'ref': self.ref,
142 'content': self.content.jsonEncode()
143 }
144
145class WikiHdrNode(WikiContentNode):
146 type = 'HDR'
147 level = None
148 @jsonencoder
149 def jsonEncode(self):
150 return {
151 'level': self.level,
152 'content': self.content.jsonEncode()
153 }
154
155class WikiEltNode(WikiContentNode):
156 type = 'ELT'
157 subtype = None
158 @jsonencoder
159 def jsonEncode(self):
160 return {
161 'subtype': self.subtype,
162 'content': self.content.jsonEncode()
163 }
164
165class WikiEnvNode(WikiContentNode):
166 type = 'ENV'
167 envtype = None
168 level = None
169 @jsonencoder
170 def jsonEncode(self):
171 return {
172 'envtype': self.envtype,
173 'level': self.level,
174 'content': map(lambda x: x.jsonEncode(), self.content)
175 }
176
177class WikiIndNode(WikiContentNode):
178 type = 'IND'
179 level = None
180 @jsonencoder
181 def jsonEncode(self):
182 return {
183 'level': self.level,
184 'content': self.content.jsonEncode()
185 }
186
187
188
diff --git a/tests/test_html.py b/tests/test_html.py
index 3da57f6..5a15cb8 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -2,13 +2,13 @@
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3from __future__ import print_function 3from __future__ import print_function
4import unittest 4import unittest
5from WikiTrans.wiki2html import HtmlWiktionaryMarkup 5from wikitrans.wiki2html import HtmlWikiMarkup
6from WikiTest import populateMethods 6from wikitest import populate_methods
7 7
8class TestWiktionaryMarkup (unittest.TestCase): 8class TestWikiMarkup (unittest.TestCase):
9 pass 9 pass
10 10
11populateMethods(TestWiktionaryMarkup, HtmlWiktionaryMarkup, '.html') 11populate_methods(TestWikiMarkup, HtmlWikiMarkup, '.html')
12 12
13if __name__ == '__main__': 13if __name__ == '__main__':
14 unittest.main() 14 unittest.main()
diff --git a/tests/test_texi.py b/tests/test_texi.py
index 75314c9..ddd26c7 100644
--- a/tests/test_texi.py
+++ b/tests/test_texi.py
@@ -2,13 +2,13 @@
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3from __future__ import print_function 3from __future__ import print_function
4import unittest 4import unittest
5from WikiTrans.wiki2texi import TexiWikiMarkup 5from wikitrans.wiki2texi import TexiWikiMarkup
6from WikiTest import populateMethods 6from wikitest import populate_methods
7 7
8class TestTexiWikiMarkup (unittest.TestCase): 8class TestTexiWikiMarkup (unittest.TestCase):
9 pass 9 pass
10 10
11populateMethods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi') 11populate_methods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi')
12 12
13if __name__ == '__main__': 13if __name__ == '__main__':
14 unittest.main() 14 unittest.main()
diff --git a/tests/test_text.py b/tests/test_text.py
index a06f519..b3d0a12 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -2,13 +2,13 @@
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3from __future__ import print_function 3from __future__ import print_function
4import unittest 4import unittest
5from WikiTrans.wiki2text import TextWiktionaryMarkup 5from wikitrans.wiki2text import TextWikiMarkup
6from WikiTest import populateMethods 6from wikitest import populate_methods
7 7
8class TestTextWiktionaryMarkup (unittest.TestCase): 8class TestTextWikiMarkup (unittest.TestCase):
9 pass 9 pass
10 10
11populateMethods(TestTextWiktionaryMarkup, TextWiktionaryMarkup, '.text') 11populate_methods(TestTextWikiMarkup, TextWikiMarkup, '.text')
12 12
13if __name__ == '__main__': 13if __name__ == '__main__':
14 unittest.main() 14 unittest.main()
diff --git a/tests/WikiTest.py b/tests/wikitest.py
index 1429f5e..ff26227 100644
--- a/tests/WikiTest.py
+++ b/tests/wikitest.py
@@ -4,7 +4,7 @@ from __future__ import print_function
4from glob import glob 4from glob import glob
5import os.path 5import os.path
6 6
7def MarkupTest(classname, name_in, name_out): 7def wiki_markup_test(classname, name_in, name_out):
8 fh = open(name_out) 8 fh = open(name_out)
9 buf = ''.join(fh.readlines()).strip() 9 buf = ''.join(fh.readlines()).strip()
10 fh.close() 10 fh.close()
@@ -19,10 +19,10 @@ def MarkupTest(classname, name_in, name_out):
19 print(">>>%s<<<" % str(hwm).strip()) 19 print(">>>%s<<<" % str(hwm).strip())
20 return False 20 return False
21 21
22def populateMethods(cls, wcls, suffix): 22def populate_methods(cls, wcls, suffix):
23 def settest(self, base, wiki_name, pat_name): 23 def settest(self, base, wiki_name, pat_name):
24 def dyntest(self): 24 def dyntest(self):
25 self.assertTrue(MarkupTest(wcls, wiki_name, pat_name)) 25 self.assertTrue(wiki_markup_test(wcls, wiki_name, pat_name))
26 meth = 'test_' + wcls.__name__ + '_' + base 26 meth = 'test_' + wcls.__name__ + '_' + base
27 dyntest.__name__ = meth 27 dyntest.__name__ = meth
28 setattr(cls, meth, dyntest) 28 setattr(cls, meth, dyntest)
diff --git a/WikiTrans/__init__.py b/wikitrans/__init__.py
index 5832e38..5832e38 100644
--- a/WikiTrans/__init__.py
+++ b/wikitrans/__init__.py
diff --git a/WikiTrans/wiki2html.py b/wikitrans/wiki2html.py
index 6147642..ce65bae 100644
--- a/WikiTrans/wiki2html.py
+++ b/wikitrans/wiki2html.py
@@ -15,10 +15,21 @@
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18"""
19Wiki markup to HTML translator.
20
21Classes:
22
23HtmlWikiMarkup -- Converts Wiki material to HTML.
24HtmlWiktionaryMarkup -- Reserved for future use. Currently does the same as
25 HtmlWikiMarkup.
26
27"""
28
18from __future__ import print_function 29from __future__ import print_function
19from WikiTrans.wikimarkup import * 30from wikitrans.wikimarkup import *
20from WikiTrans.wikitoken import * 31from wikitrans.wikitoken import *
21from WikiTrans.wikins import wiki_ns_re, wiki_ns 32from wikitrans.wikins import wiki_ns_re, wiki_ns
22import re 33import re
23try: 34try:
24 from urllib import quote as url_quote 35 from urllib import quote as url_quote
@@ -79,16 +90,16 @@ class HtmlLinkNode(HtmlSeqNode):
79 else: 90 else:
80 tgt = self.parser.mktgt(arg) 91 tgt = self.parser.mktgt(arg)
81 return "<a href=\"%s\">%s</a>" % (tgt, 92 return "<a href=\"%s\">%s</a>" % (tgt,
82 text if (text and text != '') \ 93 text if (text and text != '') else arg)
83 else arg)
84 94
85class HtmlRefNode(WikiRefNode): 95class HtmlRefNode(WikiRefNode):
86 def format(self): 96 def format(self):
87 target = self.ref 97 target = self.ref
88 text = self.content.format() 98 text = self.content.format()
89 return "<a href=\"%s\">%s</a>" % (target, 99 return "<a href=\"%s\">%s</a>" % (
90 text if (text and text != '') \ 100 target,
91 else target) 101 text if (text and text != '') else target
102 )
92 103
93class HtmlFontNode(HtmlSeqNode): 104class HtmlFontNode(HtmlSeqNode):
94 def format(self): 105 def format(self):
@@ -152,14 +163,14 @@ class HtmlTagNode(WikiTagNode):
152 n = 0 163 n = 0
153 for ref in self.parser.references: 164 for ref in self.parser.references:
154 n += 1 165 n += 1
155 s += ('<li id="cite_note-%d">' + \ 166 s += ('<li id="cite_note-%d">'
156 '<span class="mw-cite-backlink">' + \ 167 + '<span class="mw-cite-backlink">'
157 '<b><a href="#cite_ref-%d">^</a></b>' + \ 168 + '<b><a href="#cite_ref-%d">^</a></b>'
158 '</span>' + \ 169 + '</span>'
159 '<span class="reference-text">' + \ 170 + '<span class="reference-text">'
160 ref.content.format() + \ 171 + ref.content.format()
161 '</span>' + \ 172 + '</span>'
162 '</li>\n') % (n,n) 173 + '</li>\n') % (n,n)
163 s += '</ol>\n</div>\n' 174 s += '</ol>\n</div>\n'
164 return s 175 return s
165 else: 176 else:
@@ -187,17 +198,49 @@ class HtmlIndNode(WikiIndNode):
187 return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level 198 return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
188 199
189 200
190class HtmlWikiMarkup (WikiMarkup): 201class HtmlWikiMarkup(WikiMarkup):
191 """ 202 """A Wiki markup to HTML translator class.
192 A (hopefully) general-purpose Wiki->HTML translator class. 203
193 FIXME: 1. See WikiMarkup for a list 204 Usage:
194 2. [[official position]]s : final 's' gets after closing </a> tag. 205
195 Should be before. 206 x = HtmlWikiMarkup(file="input.wiki")
207 # Parse the input:
208 x.parse()
209 # Print it as HTML:
210 print(str(x))
211
212 Known bugs:
213 * [[official position]]s
214 Final 's' gets after closing </a> tag. Should be before.
196 """ 215 """
197 216
198 nested = 0 217 nested = 0
199 references = [] 218 references = []
200 def __init__(self, *args, **kwargs): 219 def __init__(self, *args, **kwargs):
220 """Create a HtmlWikiMarkup object.
221
222 Arguments:
223
224 filename=FILE
225 Read Wiki material from the file named FILE.
226 file=FD
227 Read Wiki material from file object FD.
228 text=STRING
229 Read Wiki material from STRING.
230 lang=CODE
231 Specifies source language. Default is 'en'. This variable can be
232 referred to as '%(lang)s' in the keyword arguments below.
233 html_base=URL
234 Base URL for cross-references. Default is
235 'http://%(lang)s.wiktionary.org/wiki/'
236 image_base=URL
237 Base URL for images. Default is
238 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf'
239 media_base=URL
240 Base URL for media files. Default is
241 'http://www.mediawiki.org/xml/export-0.3'
242 """
243
201 super(HtmlWikiMarkup, self).__init__(*args, **kwargs) 244 super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
202 self.token_class['LINK'] = HtmlLinkNode 245 self.token_class['LINK'] = HtmlLinkNode
203 self.token_class['TMPL'] = HtmlLinkNode 246 self.token_class['TMPL'] = HtmlLinkNode
@@ -270,30 +313,8 @@ class HtmlWikiMarkup (WikiMarkup):
270 str += elt.format() 313 str += elt.format()
271 return str 314 return str
272 315
273class HtmlWiktionaryMarkup (HtmlWikiMarkup): 316class HtmlWiktionaryMarkup(HtmlWikiMarkup):
274 """ 317 """A class for translating Wiktionary articles into HTML.
275 A class for translating Wiktionary articles into HTML.
276 This version does not do much, except that it tries to correctly
277 format templates. But "tries" does not mean "does". The heuristics
278 used here is clearly not enough to cope with it.
279
280 1. FIXME:
281 The right solution would be to have a database of templates with their
282 semantics and to decide on their rendering depending on that. E.g.
283 {{term}} in en.wiktionary means "replace this with the search term".
284 This, however, does not work in other wiktionaries. There are
285 also more complex templates, e.g.: {{t+|bg|врата|n|p|tr=vrata|sc=Cyrl}}
286 I don't know what it means. Couldn't find any documentation either.
287 Again, this template does not work in other dictionaries.
288 318
289 2. Capitulation notice: 319 Reserved for future use. Currently does the same as HtmlWikiMarkup.
290 Given the:
291 1. vast amount of wiktionaries available,
292 2. abundance of various templates for each wictionary,
293 3. apparent lack of documentation thereof,
294 4. the lack of standardized language-independent templates,
295 I dont see any way to cope with the template-rendering task within a
296 reasonable amount of time.
297
298 Faeci quod potui, faciant meliora potentes.
299 """ 320 """
diff --git a/WikiTrans/wiki2texi.py b/wikitrans/wiki2texi.py
index 7297195..d9e5f52 100644
--- a/WikiTrans/wiki2texi.py
+++ b/wikitrans/wiki2texi.py
@@ -15,9 +15,18 @@
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18from WikiTrans.wikimarkup import * 18"""
19from WikiTrans.wikitoken import * 19Wiki markup to Texinfo translator.
20from WikiTrans.wikins import wiki_ns_re, wiki_ns 20
21Classes:
22
23TexiWikiMarkup -- Converts Wiki material to Texinfo.
24
25"""
26
27from wikitrans.wikimarkup import *
28from wikitrans.wikitoken import *
29from wikitrans.wikins import wiki_ns_re, wiki_ns
21import re 30import re
22import urllib 31import urllib
23 32
@@ -251,7 +260,19 @@ class TexiRefNode(WikiRefNode):
251 else: 260 else:
252 parser._print("@uref{%s}" % target, escape=False) 261 parser._print("@uref{%s}" % target, escape=False)
253 262
254class TexiWikiMarkup (WikiMarkup): 263class TexiWikiMarkup(WikiMarkup):
264 """Wiki markup to Texinfo translator class.
265
266 Usage:
267
268 x = TexiWikiMarkup(file="input.wiki")
269 # Parse the input:
270 x.parse()
271 # Print it as Texi:
272 print(str(x))
273
274 """
275
255 nested = 0 276 nested = 0
256 sectcomm = { 277 sectcomm = {
257 'numbered': [ 278 'numbered': [
@@ -288,6 +309,40 @@ class TexiWikiMarkup (WikiMarkup):
288 sectioning_start = 0 309 sectioning_start = 0
289 310
290 def __init__(self, *args, **keywords): 311 def __init__(self, *args, **keywords):
312 """Create a TexiWikiMarkup object.
313
314 Arguments:
315
316 filename=FILE
317 Read Wiki material from the file named FILE.
318 file=FD
319 Read Wiki material from file object FD.
320 text=STRING
321 Read Wiki material from STRING.
322
323 sectioning_model=MODEL
324 Select the Texinfo sectioning model for the output document. Possible
325 values are:
326
327 'numbered'
328 Top of document is marked with "@top". Headings ("=", "==",
329 "===", etc) produce "@chapter", "@section", "@subsection", etc.
330 'unnumbered'
331 Unnumbered sectioning: "@top", "@unnumbered", "@unnumberedsec",
332 "@unnumberedsubsec".
333 'appendix'
334 Sectioning suitable for appendix entries: "@top", "@appendix",
335 "@appendixsec", "@appendixsubsec", etc.
336 'heading'
337 Use heading directives to reflect sectioning: "@majorheading",
338 "@chapheading", "@heading", "@subheading", etc.
339 sectioning_start=N
340 Shift resulting heading level by N positions. For example, supposing
341 "sectioning_model='numbered'", "== A ==" normally produces
342 "@section A" on output. Now, if given "sectioning_start=1", this
343 directive will produce "@subsection A" instead.
344 """
345
291 super(TexiWikiMarkup, self).__init__(*args, **keywords) 346 super(TexiWikiMarkup, self).__init__(*args, **keywords)
292 347
293 self.token_class['TEXT'] = TexiTextNode 348 self.token_class['TEXT'] = TexiTextNode
diff --git a/WikiTrans/wiki2text.py b/wikitrans/wiki2text.py
index cb3a183..1fbc61b 100644
--- a/WikiTrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -15,9 +15,20 @@
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18from WikiTrans.wikitoken import * 18"""
19from WikiTrans.wikimarkup import * 19Wiki markup to plain text translator.
20from WikiTrans.wikins import wiki_ns_re, wiki_ns 20
21Classes:
22
23TextWikiMarkup -- Converts Wiki material to plain text.
24TextWiktionaryMarkup -- Reserved for future use. Currently does the same as
25 TextWikiMarkup.
26
27"""
28
29from wikitrans.wikitoken import *
30from wikitrans.wikimarkup import *
31from wikitrans.wikins import wiki_ns_re, wiki_ns
21import re 32import re
22try: 33try:
23 from urllib import quote as url_quote 34 from urllib import quote as url_quote
@@ -107,9 +118,9 @@ class TextLinkNode(WikiSeqNode):
107 if not self.parser.show_urls: 118 if not self.parser.show_urls:
108 return "" 119 return ""
109 text = "[%s: %s]" % (qual, text if text else arg) 120 text = "[%s: %s]" % (qual, text if text else arg)
110 tgt = self.image_base + '/' + \ 121 tgt = "%s/%s/250px-%s" % (self.image_base,
111 url_quote(tgt) + \ 122 url_quote(tgt),
112 '/250px-' + url_quote(tgt) 123 url_quote(tgt))
113 elif ns == 'NS_MEDIA': 124 elif ns == 'NS_MEDIA':
114 text = "[%s]" % (qual) 125 text = "[%s]" % (qual)
115 else: 126 else:
@@ -141,8 +152,11 @@ class TextBarNode(WikiNode):
141 152
142class TextHdrNode(WikiHdrNode): 153class TextHdrNode(WikiHdrNode):
143 def format(self): 154 def format(self):
144 return "\n" + ("*" * self.level) + " " + \ 155 return ("\n"
145 self.content.format().lstrip(" ") + "\n\n" 156 + ("*" * self.level)
157 + " "
158 + self.content.format().lstrip(" ")
159 + "\n\n")
146 160
147class TextRefNode(WikiRefNode): 161class TextRefNode(WikiRefNode):
148 def format(self): 162 def format(self):
@@ -204,9 +218,17 @@ class TextTagNode(WikiTagNode):
204 return s 218 return s
205 219
206 220
207class TextWikiMarkup (WikiMarkup): 221class TextWikiMarkup(WikiMarkup):
208 """ 222 """A Wiki markup to plain text translator.
209 A (general-purpose Wiki->Text translator class. 223
224 Usage:
225
226 x = TextWikiMarkup(file="input.wiki")
227 # Parse the input:
228 x.parse()
229 # Print it as plain text:
230 print(str(x))
231
210 """ 232 """
211 233
212 # Output width 234 # Output width
@@ -223,6 +245,25 @@ class TextWikiMarkup (WikiMarkup):
223 references = [] 245 references = []
224 246
225 def __init__(self, *args, **keywords): 247 def __init__(self, *args, **keywords):
248 """Create a TextWikiMarkup object.
249
250 Arguments:
251
252 filename=FILE
253 Read Wiki material from the file named FILE.
254 file=FD
255 Read Wiki material from file object FD.
256 text=STRING
257 Read Wiki material from STRING.
258
259 width=N
260 Limit output width to N columns. Default is 78.
261 show_urls=False
262 By default, the link URLs are displayed in parentheses next to the
263 link text. If this argument is given, only the link text will be
264 displayed.
265 """
266
226 super(TextWikiMarkup,self).__init__(*args, **keywords) 267 super(TextWikiMarkup,self).__init__(*args, **keywords)
227 if 'width' in keywords: 268 if 'width' in keywords:
228 self.width = keywords['width'] 269 self.width = keywords['width']
@@ -258,7 +299,7 @@ class TextWikiMarkup (WikiMarkup):
258 lang = self.lang 299 lang = self.lang
259 return self.html_base % { 'lang' : lang } + url_quote(tgt) 300 return self.html_base % { 'lang' : lang } + url_quote(tgt)
260 301
261 def indent (self, lev, text): 302 def indent(self, lev, text):
262 if text.find('\n') == -1: 303 if text.find('\n') == -1:
263 s = (" " * lev) + text 304 s = (" " * lev) + text
264 else: 305 else:
@@ -298,9 +339,10 @@ class TextWikiMarkup (WikiMarkup):
298 str += elt.format() 339 str += elt.format()
299 return str 340 return str
300 341
301class TextWiktionaryMarkup (TextWikiMarkup): 342class TextWiktionaryMarkup(TextWikiMarkup):
302 """ 343 """A class for translating Wiktionary articles into plain text.
303 See documentation for HtmlWiktionaryMarkup 344
345 Reserved for future use. Currently does the same as TextWikiMarkup.
304 """ 346 """
305 # FIXME: It is supposed to do something about templates 347
306 348
diff --git a/WikiTrans/wikidump.py b/wikitrans/wikidump.py
index 7457dfa..d5f651c 100644
--- a/WikiTrans/wikidump.py
+++ b/wikitrans/wikidump.py
@@ -14,10 +14,19 @@
14# You should have received a copy of the GNU General Public License 14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>. 15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16 16
17"""
18Print Wiki parse tree as JSON.
19
20Classes:
21
22DumpWikiMarkup
23
24"""
25
17from __future__ import print_function 26from __future__ import print_function
18from WikiTrans.wikitoken import * 27from wikitrans.wikitoken import *
19import json 28import json
20from WikiTrans.wikimarkup import WikiMarkup 29from wikitrans.wikimarkup import WikiMarkup
21 30
22class DumpReferences(object): 31class DumpReferences(object):
23 idx = 0 32 idx = 0
@@ -27,13 +36,39 @@ class DumpReferences(object):
27 self.idx += 1 36 self.idx += 1
28 37
29class DumpWikiMarkup(WikiMarkup): 38class DumpWikiMarkup(WikiMarkup):
39 """Produce a JSON dump of the Wiki markup parse tree.
40
41 Usage:
42
43 x = DumpWikiMarkup(file="input.wiki")
44 # Parse the input:
45 x.parse()
46 # Print a JSON dump of the parse tree
47 print(str(x))
48
49 """
50
30 indent = None 51 indent = None
31 references = DumpReferences() 52 references = DumpReferences()
32 def __init__(self, **kwarg): 53 def __init__(self, **kwarg):
54 """Create a DumpWikiMarkup object.
55
56 Arguments:
57
58 filename=FILE
59 Read Wiki material from the file named FILE.
60 file=FD
61 Read Wiki material from file object FD.
62 text=STRING
63 Read Wiki material from STRING.
64 indent=N
65 Basic indent offset for JSON objects.
66 """
67
33 n = kwarg.pop('indent', None) 68 n = kwarg.pop('indent', None)
34 if n != None: 69 if n != None:
35 self.indent = int(n) 70 self.indent = int(n)
36 WikiMarkup.__init__(self, **kwarg) 71 super(DumpWikiMarkup,self).__init__(self, **kwarg)
37 def __str__(self): 72 def __str__(self):
38 return json.dumps(self.tree, 73 return json.dumps(self.tree,
39 cls=WikiNodeEncoder, 74 cls=WikiNodeEncoder,
diff --git a/WikiTrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 6cbf5de..77c3b30 100644
--- a/WikiTrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -1,40 +1,65 @@
1#!/usr/bin/python 1#!/usr/bin/python
2# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*-
3# Copyright (C) 2008-2018 Sergey Poznyakoff 3# Copyright (C) 2008-2018 Sergey Poznyakoff
4# 4#
5# This program is free software; you can redistribute it and/or modify 5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by 6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option) 7# the Free Software Foundation; either version 3, or (at your option)
8# any later version. 8# any later version.
9# 9#
10# This program is distributed in the hope that it will be useful, 10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13# GNU General Public License for more details. 13# GNU General Public License for more details.
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18"""
19Wiki markup parser.
20
21This module provides two class:
22
23WikiMarkupParser:
24 An abstract parser class, which serves as a base class for all markup
25 classes in this package.
26
27WikiMarkup
28 A subclass of the above, providing basic input method.
29
30"""
31
18from __future__ import print_function 32from __future__ import print_function
19import sys 33import sys
20import re 34import re
21from types import * 35from types import *
22from WikiTrans.wikitoken import * 36from wikitrans.wikitoken import *
23 37
24__all__ = [ "BaseWikiMarkup", "WikiMarkup", 38__all__ = [ "WikiMarkupParser", "WikiMarkup",
25 "TagAttributes", "TagAttributeSyntax" ] 39 "TagAttributes", "TagAttributeSyntaxError" ]
26 40
27class UnexpectedToken(Exception): 41class UnexpectedTokenError(Exception):
28 def __init__(self, value): 42 def __init__(self, value):
29 self.value = value 43 self.value = value
30 44
31class TagAttributeSyntax(Exception): 45class TagAttributeSyntaxError(Exception):
32 def __init__(self, value): 46 def __init__(self, value):
33 self.value = value 47 self.value = value
34 def __str__(self): 48 def __str__(self):
35 return repr(self.value) 49 return repr(self.value)
36 50
37class TagAttributes(object): 51class TagAttributes(object):
52 """A dictionary-like collection of tag attributes.
53
54 Example:
55
56 attr = TagAttributes('href="foo" length=2')
57 if 'href' in attr:
58 print(x['href']) # returns "foo"
59 for a in attr:
60 ...
61 """
62
38 attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?") 63 attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?")
39 valseg = re.compile("^[^\\\"]+") 64 valseg = re.compile("^[^\\\"]+")
40 tab = {} 65 tab = {}
@@ -68,7 +93,7 @@ class TagAttributes(object):
68 val = 1 93 val = 1
69 self.tab[name] = val 94 self.tab[name] = val
70 else: 95 else:
71 raise TagAttributeSyntax(s) 96 raise TagAttributeSyntaxError(s)
72 def __len__(self): 97 def __len__(self):
73 return len(self.tab) 98 return len(self.tab)
74 def __getitem__(self, key): 99 def __getitem__(self, key):
@@ -89,13 +114,32 @@ class TagAttributes(object):
89 def __repr__(self): 114 def __repr__(self):
90 return self.printable 115 return self.printable
91 116
92class BaseWikiMarkup(object): 117class WikiMarkupParser(object):
118 """Parser for Wiki markup language.
119
120 Given input in Wiki markup language creates an abstract parse tree for it.
121 This is a base class for actual parsers. The subclasses must provide the
122 input method.
123
124 Public methods:
125
126 parse() -- parse the input.
127
128 Abstract methods (must be overridden by the subclass):
129
130 input() -- returns next physical line from the input material.
131
132 Public attributes:
133
134 tree -- constructed parse tree (a subclass of WikiNode)
135
136 """
93 137
94 delim = re.compile("^==+[ \t]*|[ \t]*==+[ \t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<") 138 delim = re.compile("^==+[ \t]*|[ \t]*==+[ \t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
95 otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^/][^>]+))?\s*(?P<closed>/)?>") 139 otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^/][^>]+))?\s*(?P<closed>/)?>")
96 ctag = re.compile("</(?P<tag>[a-zA-Z0-9_]+)\s*>") 140 ctag = re.compile("</(?P<tag>[a-zA-Z0-9_]+)\s*>")
97 refstart = re.compile("^https?://") 141 refstart = re.compile("^https?://")
98 142
99 close_delim = { 143 close_delim = {
100 '[': ']', 144 '[': ']',
101 '[[': ']]', 145 '[[': ']]',
@@ -115,10 +159,13 @@ class BaseWikiMarkup(object):
115 tree = None 159 tree = None
116 160
117 tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ] 161 tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ]
118 162
119 debug_level = 0 163 debug_level = 0
120 164
121 def dprint(self, lev, fmt, *argv): 165 def dprint(self, lev, fmt, *argv):
166 """If current debug level is greater than or equal to lev, print *argv
167 according to format.
168 """
122 if self.debug_level >= lev: 169 if self.debug_level >= lev:
123 for l in (fmt % argv).split('\n'): 170 for l in (fmt % argv).split('\n'):
124 print("[DEBUG] %s" % l) 171 print("[DEBUG] %s" % l)
@@ -135,7 +182,7 @@ class BaseWikiMarkup(object):
135 'TEXT': WikiTextNode, 182 'TEXT': WikiTextNode,
136 'PRE': WikiContentNode, 183 'PRE': WikiContentNode,
137 'PARA': WikiSeqNode, 184 'PARA': WikiSeqNode,
138 'BAR': WikiNode, 185 'BAR': WikiNode,
139 'SEQ': WikiSeqNode, 186 'SEQ': WikiSeqNode,
140 'IND': WikiIndNode, 187 'IND': WikiIndNode,
141 'REF': WikiRefNode, 188 'REF': WikiRefNode,
@@ -148,10 +195,11 @@ class BaseWikiMarkup(object):
148 'HDR': WikiHdrNode 195 'HDR': WikiHdrNode
149 } 196 }
150 197
151 def __createWikiNode(self,**kwarg): 198 def _new_node(self,**kwarg):
152 return self.token_class[kwarg['type']](self, **kwarg) 199 return self.token_class[kwarg['type']](self, **kwarg)
153 200
154 def tokread(self): 201 def tokread(self):
202 """Read next token from the input. Return it as a subclass of WikiNode."""
155 line = None 203 line = None
156 pos = 0 204 pos = 0
157 while 1: 205 while 1:
@@ -161,23 +209,23 @@ class BaseWikiMarkup(object):
161 pos = 0 209 pos = 0
162 except StopIteration: 210 except StopIteration:
163 line = u'' 211 line = u''
164 212
165 if not line or line == "": 213 if not line or line == "":
166 yield(self.__createWikiNode(type='NIL')) 214 yield(self._new_node(type='NIL'))
167 break 215 break
168 216
169 if line == '\n': 217 if line == '\n':
170 yield(self.__createWikiNode(type='NL')) 218 yield(self._new_node(type='NL'))
171 line = None 219 line = None
172 continue 220 continue
173 221
174 self.dprint(100, "LINE: %s", line[pos:]) 222 self.dprint(100, "LINE: %s", line[pos:])
175 m = self.delim.search(line, pos) 223 m = self.delim.search(line, pos)
176 224
177 if m: 225 if m:
178 if (pos < m.start(0)): 226 if (pos < m.start(0)):
179 yield(self.__createWikiNode(type='TEXT', 227 yield(self._new_node(type='TEXT',
180 content=line[pos:m.start(0)])) 228 content=line[pos:m.start(0)]))
181 pos = m.start(0) 229 pos = m.start(0)
182 t = None 230 t = None
183 231
@@ -191,13 +239,13 @@ class BaseWikiMarkup(object):
191 try: 239 try:
192 m = self.ctag.search(line, pos) 240 m = self.ctag.search(line, pos)
193 if m and m.group('tag') == 'nowiki': 241 if m and m.group('tag') == 'nowiki':
194 yield(self.__createWikiNode(type='TEXT', 242 yield(self._new_node(type='TEXT',
195 content=line[pos:m.start(0)] )) 243 content=line[pos:m.start(0)] ))
196 pos = m.end(0) 244 pos = m.end(0)
197 break 245 break
198 246
199 yield(self.__createWikiNode(type='TEXT', 247 yield(self._new_node(type='TEXT',
200 content=line[pos:])) 248 content=line[pos:]))
201 249
202 line = self.input() 250 line = self.input()
203 pos = 0 251 pos = 0
@@ -206,40 +254,41 @@ class BaseWikiMarkup(object):
206 continue 254 continue
207 elif m.group('tag') in self.tags: 255 elif m.group('tag') in self.tags:
208 try: 256 try:
209 yield(self.__createWikiNode(type='OTAG', 257 yield(self._new_node(type='OTAG',
210 tag=m.group('tag'), 258 tag=m.group('tag'),
211 isblock=(line[pos] == '\n'), 259 isblock=(line[pos] == '\n'),
212 args=TagAttributes(m.group('args')))) 260 args=TagAttributes(m.group('args'))))
213 if m.group('closed'): 261 if m.group('closed'):
214 yield(self.__createWikiNode(type='CTAG', 262 yield(self._new_node(type='CTAG',
215 tag=m.group('tag'))) 263 tag=m.group('tag')))
216 except TagAttributeSyntax: 264 except TagAttributeSyntaxError:
217 yield(self.__createWikiNode(type='TEXT',content=m.group(0))) 265 yield(self._new_node(type='TEXT',
266 content=m.group(0)))
218 continue 267 continue
219 else: 268 else:
220 yield(self.__createWikiNode(type='TEXT',content=m.group(0))) 269 yield(self._new_node(type='TEXT',content=m.group(0)))
221 continue 270 continue
222 else: 271 else:
223 m = self.ctag.match(line, pos) 272 m = self.ctag.match(line, pos)
224 if m: 273 if m:
225 if m.group('tag') in self.tags: 274 if m.group('tag') in self.tags:
226 yield(self.__createWikiNode(type='CTAG', 275 yield(self._new_node(type='CTAG',
227 tag=m.group('tag'))) 276 tag=m.group('tag')))
228 pos = m.end(0) 277 pos = m.end(0)
229 continue 278 continue
230 else: 279 else:
231 yield(self.__createWikiNode(type='TEXT', 280 yield(self._new_node(type='TEXT',
232 content=line[pos:pos+1])) 281 content=line[pos:pos+1]))
233 pos += 1 282 pos += 1
234 continue 283 continue
235 else: 284 else:
236 pos = m.end(0) 285 pos = m.end(0)
237 content = m.group(0) 286 content = m.group(0)
238 if content[0] in self.envtypes: 287 if content[0] in self.envtypes:
239 node = self.__createWikiNode(type='DELIM', 288 node = self._new_node(type='DELIM',
240 content=content, 289 content=content,
241 isblock=True, 290 isblock=True,
242 continuation=pos < len(line) and line[pos] == ":") 291 continuation=pos < len(line) and line[pos] == ":")
243 if node.continuation: 292 if node.continuation:
244 node.content += node.content[0] 293 node.content += node.content[0]
245 pos += 1 294 pos += 1
@@ -247,33 +296,43 @@ class BaseWikiMarkup(object):
247 yield(node) 296 yield(node)
248 297
249 while pos < len(line) and line[pos] in [' ', '\t']: 298 while pos < len(line) and line[pos] in [' ', '\t']:
250 pos += 1 299 pos += 1
251 else: 300 else:
252 yield(self.__createWikiNode(type='DELIM', 301 yield(self._new_node(type='DELIM',
253 isblock=(content.strip() not in self.inline_delims), 302 isblock=(content.strip() not in self.inline_delims),
254 content=content.strip())) 303 content=content.strip()))
255 continue 304 continue
256 305
257 if line: 306 if line:
258 if line[-1] == '\n': 307 if line[-1] == '\n':
259 if line[pos:-1] != '': 308 if line[pos:-1] != '':
260 yield(self.__createWikiNode(type='TEXT',content=line[pos:-1])) 309 yield(self._new_node(type='TEXT',content=line[pos:-1]))
261 yield(self.__createWikiNode(type='NL')) 310 yield(self._new_node(type='NL'))
262 else: 311 else:
263 yield(self.__createWikiNode(type='TEXT',content=line[pos:])) 312 yield(self._new_node(type='TEXT',content=line[pos:]))
264 line = None 313 line = None
265 314
266 315
267 def input(self): 316 def input(self):
317 """Return next physical line from the input.
318
319 This method must be overridden by the subclass.
320 """
268 return None 321 return None
269 322
270 def swaptkn(self, i, j): 323 def swaptkn(self, i, j):
324 """Swap tokens at indices i and j in toklist."""
271 self.dprint(80, "SWAPPING %s <-> %s", i, j) 325 self.dprint(80, "SWAPPING %s <-> %s", i, j)
272 x = self.toklist[i] 326 x = self.toklist[i]
273 self.toklist[i] = self.toklist[j] 327 self.toklist[i] = self.toklist[j]
274 self.toklist[j] = x 328 self.toklist[j] = x
275 329
276 def tokenize(self): 330 def tokenize(self):
331 """Tokenize the input.
332
333 Read tokens from the input (supplied by the input() method). Place the
334 obtained tokens in the toklist array.
335 """
277 self.toklist = [] 336 self.toklist = []
278 for tok in self.tokread(): 337 for tok in self.tokread():
279 self.dprint(100, "TOK: %s", tok) 338 self.dprint(100, "TOK: %s", tok)
@@ -286,14 +345,14 @@ class BaseWikiMarkup(object):
286 # 345 #
287 # 2a. '''''a b'' c d''' 346 # 2a. '''''a b'' c d'''
288 # 2b. '''''a b''' c d'' 347 # 2b. '''''a b''' c d''
289 # 348 #
290 # 3a. '''a b ''c d''''' 349 # 3a. '''a b ''c d'''''
291 # 3b. ''a b '''c d''''' 350 # 3b. ''a b '''c d'''''
292 stack = [] 351 stack = []
293 for i in range(0,len(self.toklist)): 352 for i in range(0,len(self.toklist)):
294 if self.toklist[i].type == 'DELIM' \ 353 if (self.toklist[i].type == 'DELIM'
295 and (self.toklist[i].content == "''" \ 354 and (self.toklist[i].content == "''"
296 or self.toklist[i].content == "'''"): 355 or self.toklist[i].content == "'''")):
297 if len(stack) > 0: 356 if len(stack) > 0:
298 if self.toklist[stack[-1]].content == self.toklist[i].content: 357 if self.toklist[stack[-1]].content == self.toklist[i].content:
299 # Case 1: just pop the matching delimiter off the stack 358 # Case 1: just pop the matching delimiter off the stack
@@ -303,12 +362,13 @@ class BaseWikiMarkup(object):
303 self.swaptkn(stack[-2], stack[-1]) 362 self.swaptkn(stack[-2], stack[-1])
304 # and pop off the matching one 363 # and pop off the matching one
305 stack.pop() 364 stack.pop()
306 elif i < len(self.toklist) \ 365 elif (i < len(self.toklist)
307 and self.toklist[i+1].type == 'DELIM' \ 366 and self.toklist[i+1].type == 'DELIM'
308 and self.toklist[stack[-1]].content == self.toklist[i+1].content: 367 and self.toklist[stack[-1]].content
368 == self.toklist[i+1].content):
309 # Case 3: swap current and next tokens 369 # Case 3: swap current and next tokens
310 self.swaptkn(i, i+1) 370 self.swaptkn(i, i+1)
311 # and pop off the matching one 371 # and pop off the matching one
312 stack.pop() 372 stack.pop()
313 else: 373 else:
314 # Push the token on stack 374 # Push the token on stack
@@ -321,34 +381,46 @@ class BaseWikiMarkup(object):
321 self.toklist[i].type = 'TEXT' # FIXME 381 self.toklist[i].type = 'TEXT' # FIXME
322 382
323 mark = [] 383 mark = []
324 384
325 def push_mark(self): 385 def push_mark(self):
386 """Save the current token index on stack."""
326 self.mark.append(self.tokind) 387 self.mark.append(self.tokind)
327 388
328 def pop_mark(self): 389 def pop_mark(self):
390 """Restore the token index from top of stack."""
329 self.tokind = self.mark.pop() 391 self.tokind = self.mark.pop()
330 392
331 def clear_mark(self): 393 def clear_mark(self):
394 """Forget the last mark."""
332 self.mark.pop() 395 self.mark.pop()
333 396
334 def lookahead(self, off=0): 397 def lookahead(self, off=0):
398 """Peek a token at index (tokind+off)."""
335 tok = self.toklist[self.tokind+off] 399 tok = self.toklist[self.tokind+off]
336 self.dprint(20, "lookahead(%s): %s", off, tok) 400 self.dprint(20, "lookahead(%s): %s", off, tok)
337 return tok 401 return tok
338 402
339 def setkn(self,val): 403 def setkn(self,val):
404 """Store token val at the current token index."""
340 self.toklist[self.tokind] = val 405 self.toklist[self.tokind] = val
341 406
342 def getkn(self): 407 def getkn(self):
408 """Get next token from the toklist. Advance tokind."""
343 self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' 409 self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL'
344 if self.tokind == len(self.toklist): 410 if self.tokind == len(self.toklist):
345 return self.__createWikiNode(type='NIL') 411 return self._new_node(type='NIL')
346 tok = self.toklist[self.tokind] 412 tok = self.toklist[self.tokind]
347 self.tokind = self.tokind + 1 413 self.tokind = self.tokind + 1
348 self.dprint(20, "getkn: %s", tok) 414 self.dprint(20, "getkn: %s", tok)
349 return tok 415 return tok
350 416
351 def ungetkn(self, tok=None): 417 def ungetkn(self, tok=None):
418 """Unget the last read token.
419
420 Decrease the tokind by one, so the last read token will be read again.
421 If optional argument is supplied and is not None, store it in the toklist
422 in place of the current token.
423 """
352 self.tokind = self.tokind - 1 424 self.tokind = self.tokind - 1
353 self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' 425 self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL'
354 if tok: 426 if tok:
@@ -357,17 +429,20 @@ class BaseWikiMarkup(object):
357 return self.toklist[self.tokind] 429 return self.toklist[self.tokind]
358 430
359 def fixuptkn(self, tok): 431 def fixuptkn(self, tok):
432 """Replace the recently read token by tok."""
360 if self.tokind == 0: 433 if self.tokind == 0:
361 raise IndexError('wikimarkup.fixuptkn called at start of input') 434 raise IndexError('WikiMarkupParser.fixuptkn called at start of input')
362 self.toklist[self.tokind-1] = tok 435 self.toklist[self.tokind-1] = tok
363 return tok 436 return tok
364 437
365 def dump(self, tree, file=sys.stdout): 438 def dump(self, tree, file=sys.stdout):
439 """Dump the tree to file, node by node."""
366 for node in tree: 440 for node in tree:
367 file.write(str(node)) 441 file.write(str(node))
368 file.write('\n') 442 file.write('\n')
369 443
370 def is_block_end(self, tok): 444 def is_block_end(self, tok):
445 """Return True if tok ends a block environment."""
371 if tok.type == 'NIL': 446 if tok.type == 'NIL':
372 return True 447 return True
373 elif tok.type == 'NL': 448 elif tok.type == 'NL':
@@ -383,20 +458,21 @@ class BaseWikiMarkup(object):
383 return False 458 return False
384 459
385 def parse_para(self, tok): 460 def parse_para(self, tok):
461 """Read paragraph starting at tok."""
386 self.dprint(80, "ENTER parse_para: %s", tok) 462 self.dprint(80, "ENTER parse_para: %s", tok)
387 463
388 acc = { 'seq': [], 464 acc = { 'seq': [],
389 'textlist': [] } 465 'textlist': [] }
390 466
391 def flush(): 467 def flush():
392 if acc['textlist']: 468 if acc['textlist']:
393 acc['seq'].append(self.__createWikiNode(type='TEXT', 469 acc['seq'].append(self._new_node(type='TEXT',
394 content=''.join(acc['textlist']))) 470 content=''.join(acc['textlist'])))
395 acc['textlist'] = [] 471 acc['textlist'] = []
396 472
397 if isinstance(tok, WikiContentNode) \ 473 if (isinstance(tok, WikiContentNode)
398 and isinstance(tok.content,str) \ 474 and isinstance(tok.content,str)
399 and re.match("^[ \t]", tok.content): 475 and re.match("^[ \t]", tok.content)):
400 type = 'PRE' 476 type = 'PRE'
401 rx = re.compile("^\S") 477 rx = re.compile("^\S")
402 else: 478 else:
@@ -418,26 +494,27 @@ class BaseWikiMarkup(object):
418 flush() 494 flush()
419 acc['seq'].append(self.parse_inline_delim(tok)) 495 acc['seq'].append(self.parse_inline_delim(tok))
420 else: 496 else:
421 raise UnexpectedToken(tok) 497 raise UnexpectedTokenError(tok)
422 tok = self.getkn() 498 tok = self.getkn()
423 flush() 499 flush()
424 if acc['seq']: 500 if acc['seq']:
425 tok = self.__createWikiNode(type=type, content=acc['seq']) 501 tok = self._new_node(type=type, content=acc['seq'])
426 else: 502 else:
427 tok = None 503 tok = None
428 self.dprint(80, "LEAVE parse_para=%s", tok) 504 self.dprint(80, "LEAVE parse_para=%s", tok)
429 return tok 505 return tok
430 506
431 def parse_block_delim(self, tok): 507 def parse_block_delim(self, tok):
508 """Parse block environment starting at tok."""
432 self.dprint(80, "ENTER parse_block_delim") 509 self.dprint(80, "ENTER parse_block_delim")
433 assert(tok.type == 'DELIM') 510 assert(tok.type == 'DELIM')
434 if tok.content == "----": 511 if tok.content == "----":
435 node = self.__createWikiNode(type = 'BAR') 512 node = self._new_node(type = 'BAR')
436 elif tok.content[0:2] == "==": 513 elif tok.content[0:2] == "==":
437 node = self.parse_header(tok) 514 node = self.parse_header(tok)
438 if not node: 515 if not node:
439 tok = self.ungetkn(self.__createWikiNode(type='TEXT', 516 tok = self.ungetkn(self._new_node(type='TEXT',
440 content=tok.content)) 517 content=tok.content))
441 elif tok.content[0] in self.envtypes: 518 elif tok.content[0] in self.envtypes:
442 node = None 519 node = None
443 if tok.content[0] == ':': 520 if tok.content[0] == ':':
@@ -451,8 +528,9 @@ class BaseWikiMarkup(object):
451 node = None 528 node = None
452 self.dprint(80, "LEAVE parse_block_delim=%s", node) 529 self.dprint(80, "LEAVE parse_block_delim=%s", node)
453 return node 530 return node
454 531
455 def parse_line(self): 532 def parse_line(self):
533 """Parse the input line."""
456 self.dprint(80, "ENTER parse_line") 534 self.dprint(80, "ENTER parse_line")
457 list = [] 535 list = []
458 while True: 536 while True:
@@ -463,8 +541,7 @@ class BaseWikiMarkup(object):
463 list.append(tok) 541 list.append(tok)
464 elif tok.type == 'DELIM': 542 elif tok.type == 'DELIM':
465 if tok.isblock: 543 if tok.isblock:
466 tok = self.__createWikiNode(type = 'TEXT', 544 tok = self._new_node(type = 'TEXT', content = tok.content)
467 content = tok.content)
468 self.fixuptkn(tok) 545 self.fixuptkn(tok)
469 list.append(tok) 546 list.append(tok)
470 elif tok.content[0] == ":": 547 elif tok.content[0] == ":":
@@ -476,7 +553,8 @@ class BaseWikiMarkup(object):
476 if x: 553 if x:
477 list.append(x) 554 list.append(x)
478 else: 555 else:
479 list.append(self.fixuptkn(self.__createWikiNode(type = 'TEXT', content = tok.content))) 556 list.append(self.fixuptkn(self._new_node(type = 'TEXT',
557 content = tok.content)))
480 elif tok.type == 'OTAG': 558 elif tok.type == 'OTAG':
481 if tok.isblock: 559 if tok.isblock:
482 self.ungetkn() 560 self.ungetkn()
@@ -484,18 +562,26 @@ class BaseWikiMarkup(object):
484 list.append(self.parse_tag(tok)) 562 list.append(self.parse_tag(tok))
485 else: 563 else:
486 list.append(tok) 564 list.append(tok)
487 ret = self.__createWikiNode(type='SEQ', content=list) 565 ret = self._new_node(type='SEQ', content=list)
488 self.dprint(80, "LEAVE parse_line=%s", ret) 566 self.dprint(80, "LEAVE parse_line=%s", ret)
489 return ret 567 return ret
490 568
491 def parse_indent(self, tok): 569 def parse_indent(self, tok):
570 """Parse indented block starting at tok."""
492 lev = len(tok.content) 571 lev = len(tok.content)
493 self.dprint(80, "ENTER parse_indent(%s)", lev) 572 self.dprint(80, "ENTER parse_indent(%s)", lev)
494 x = self.__createWikiNode(type='IND', level=lev, content=self.parse_line()) 573 x = self._new_node(type='IND', level=lev, content=self.parse_line())
495 self.dprint(80, "LEAVE parse_indent=%s", x) 574 self.dprint(80, "LEAVE parse_indent=%s", x)
496 return x 575 return x
497 576
498 def parse_fontmod(self,delim,what): 577 def parse_fontmod(self,delim,what):
578 """Parse font modification directive (bold or italics).
579
580 Arguments:
581
582 delim -- starting delimiter ("''" or "'''")
583 what -- 'IT' or 'BOLD'
584 """
499 self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s", 585 self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s",
500 delim, what, self.lookahead()) 586 delim, what, self.lookahead())
501 seq = [] 587 seq = []
@@ -513,7 +599,7 @@ class BaseWikiMarkup(object):
513 break 599 break
514 else: 600 else:
515 if text: 601 if text:
516 seq.append(self.__createWikiNode(type='TEXT', content=text)) 602 seq.append(self._new_node(type='TEXT', content=text))
517 text = '' 603 text = ''
518 x = self.parse_inline_delim(tok) 604 x = self.parse_inline_delim(tok)
519 if x: 605 if x:
@@ -522,17 +608,18 @@ class BaseWikiMarkup(object):
522 self.dprint(80, "LEAVE parse_fontmod=%s", "None") 608 self.dprint(80, "LEAVE parse_fontmod=%s", "None")
523 return None 609 return None
524 elif tok.type == 'NL': 610 elif tok.type == 'NL':
525 seq.append(self.__createWikiNode(type='TEXT', content='\n')) 611 seq.append(self._new_node(type='TEXT', content='\n'))
526 else: 612 else:
527 self.dprint(80, "LEAVE parse_fontmod=None") 613 self.dprint(80, "LEAVE parse_fontmod=None")
528 return None 614 return None
529 if text: 615 if text:
530 seq.append(self.__createWikiNode(type='TEXT', content=text)) 616 seq.append(self._new_node(type='TEXT', content=text))
531 res = self.__createWikiNode(type=what, content=seq) 617 res = self._new_node(type=what, content=seq)
532 self.dprint(80, "LEAVE parse_fontmod=%s", res) 618 self.dprint(80, "LEAVE parse_fontmod=%s", res)
533 return res 619 return res
534 620
535 def parse_ref(self): 621 def parse_ref(self):
622 """Parse a reference block ([...])"""
536 self.dprint(80, "ENTER parse_ref") 623 self.dprint(80, "ENTER parse_ref")
537 tok = self.getkn() 624 tok = self.getkn()
538 if not (tok.type == 'TEXT' and self.refstart.match(tok.content)): 625 if not (tok.type == 'TEXT' and self.refstart.match(tok.content)):
@@ -542,7 +629,7 @@ class BaseWikiMarkup(object):
542 seq = [] 629 seq = []
543 (ref,sep,text) = tok.content.partition(' ') 630 (ref,sep,text) = tok.content.partition(' ')
544 if text: 631 if text:
545 seq.insert(0, self.__createWikiNode(type='TEXT', content=text)) 632 seq.insert(0, self._new_node(type='TEXT', content=text))
546 633
547 while True: 634 while True:
548 tok = self.getkn() 635 tok = self.getkn()
@@ -567,13 +654,22 @@ class BaseWikiMarkup(object):
567 else: 654 else:
568 seq.append(tok) 655 seq.append(tok)
569 656
570 ret = self.__createWikiNode(type='REF', 657 ret = self._new_node(type='REF', ref=ref,
571 ref=ref, 658 content=self._new_node(type='SEQ', content=seq))
572 content=self.__createWikiNode(type='SEQ', content=seq))
573 self.dprint(80, "LEAVE parse_ref= %s", ret) 659 self.dprint(80, "LEAVE parse_ref= %s", ret)
574 return ret 660 return ret
575 661
576 def parse_link(self, type, delim): 662 def parse_link(self, type, delim):
663 """Parse an external link ([[...]]).
664
665 In this implementation, it is also used to parse template
666 references ({{...}}).
667
668 Arguments:
669
670 type -- 'LINK' or 'TMPL'
671 delim -- expected closing delimiter.
672 """
577 self.dprint(80, "ENTER parse_link(%s,%s)", type, delim) 673 self.dprint(80, "ENTER parse_link(%s,%s)", type, delim)
578 subtree = [] 674 subtree = []
579 list = [] 675 list = []
@@ -585,13 +681,13 @@ class BaseWikiMarkup(object):
585 if tok.type == 'DELIM': 681 if tok.type == 'DELIM':
586 if tok.content == delim: 682 if tok.content == delim:
587 if list: 683 if list:
588 subtree.append(self.__createWikiNode(type='SEQ', 684 subtree.append(self._new_node(type='SEQ',
589 content=list)) 685 content=list))
590 break 686 break
591 elif tok.content == "|": 687 elif tok.content == "|":
592 if len(list) > 1: 688 if len(list) > 1:
593 subtree.append(self.__createWikiNode(type='SEQ', 689 subtree.append(self._new_node(type='SEQ',
594 content=list)) 690 content=list))
595 elif list: 691 elif list:
596 subtree.append(list[0]) 692 subtree.append(list[0])
597 list = [] 693 list = []
@@ -607,11 +703,12 @@ class BaseWikiMarkup(object):
607 else: 703 else:
608 self.dprint(80, "LEAVE parse_link=None [unexpected token]") 704 self.dprint(80, "LEAVE parse_link=None [unexpected token]")
609 return None 705 return None
610 ret = self.__createWikiNode(type=type, content=subtree) 706 ret = self._new_node(type=type, content=subtree)
611 self.dprint(80, "LEAVE parse_link=%s", ret) 707 self.dprint(80, "LEAVE parse_link=%s", ret)
612 return ret 708 return ret
613 709
614 def parse_inline_delim(self, tok): 710 def parse_inline_delim(self, tok):
711 """Parse an inline block."""
615 self.dprint(80, "ENTER parse_inline_delim") 712 self.dprint(80, "ENTER parse_inline_delim")
616 assert(tok.type == 'DELIM') 713 assert(tok.type == 'DELIM')
617 self.push_mark() 714 self.push_mark()
@@ -633,8 +730,7 @@ class BaseWikiMarkup(object):
633 else: 730 else:
634 self.dprint(80, "BEGIN DELIMITER RECOVERY: %s", tok) 731 self.dprint(80, "BEGIN DELIMITER RECOVERY: %s", tok)
635 self.pop_mark() 732 self.pop_mark()
636 x = self.fixuptkn(self.__createWikiNode(type='TEXT', 733 x = self.fixuptkn(self._new_node(type='TEXT', content=tok.content))
637 content=tok.content))
638 od = tok.content 734 od = tok.content
639 if od in self.close_delim: 735 if od in self.close_delim:
640 cd = self.close_delim[od] 736 cd = self.close_delim[od]
@@ -647,8 +743,8 @@ class BaseWikiMarkup(object):
647 lev += 1 743 lev += 1
648 elif tok.content == cd: 744 elif tok.content == cd:
649 if lev == 0: 745 if lev == 0:
650 tok = self.__createWikiNode(type='TEXT', 746 tok = self._new_node(type='TEXT',
651 content=tok.content) 747 content=tok.content)
652 self.toklist[self.tokind+1+i] = tok 748 self.toklist[self.tokind+1+i] = tok
653 lev -= 1 749 lev -= 1
654 break 750 break
@@ -656,8 +752,9 @@ class BaseWikiMarkup(object):
656 752
657 self.dprint(80, "LEAVE parse_inline_delim=%s", x) 753 self.dprint(80, "LEAVE parse_inline_delim=%s", x)
658 return x 754 return x
659 755
660 def parse_tag(self, tag): 756 def parse_tag(self, tag):
757 """Parse an xml-like tag (such as, e.g. "<tt>...</tt>")."""
661 self.dprint(80, "ENTER parse_tag") 758 self.dprint(80, "ENTER parse_tag")
662 list = [] 759 list = []
663 self.push_mark() 760 self.push_mark()
@@ -669,7 +766,7 @@ class BaseWikiMarkup(object):
669 if tag.args: 766 if tag.args:
670 s += ' ' + str(tag.args) 767 s += ' ' + str(tag.args)
671 s += '>' 768 s += '>'
672 node = self.__createWikiNode(type='TEXT',content=s) 769 node = self._new_node(type='TEXT',content=s)
673 if tag.content: 770 if tag.content:
674 self.tree[self.tokind:self.tokind] = tag.content 771 self.tree[self.tokind:self.tokind] = tag.content
675 self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node) 772 self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node)
@@ -685,30 +782,30 @@ class BaseWikiMarkup(object):
685 if tag.tag == tok.tag: 782 if tag.tag == tok.tag:
686 break 783 break
687 s = '</' + tag.tag + '>' 784 s = '</' + tag.tag + '>'
688 tok = self.fixuptkn(self.__createWikiNode(type='TEXT', 785 tok = self.fixuptkn(self._new_node(type='TEXT', content=s))
689 content=s))
690 elif tok.type == 'NL': 786 elif tok.type == 'NL':
691 tok = self.__createWikiNode(type = 'TEXT', content = '\n') 787 tok = self._new_node(type = 'TEXT', content = '\n')
692 list.append(tok) 788 list.append(tok)
693 789
694 self.clear_mark() 790 self.clear_mark()
695 ret = self.__createWikiNode(type = 'TAG', 791 ret = self._new_node(type = 'TAG',
696 tag = tag.tag, 792 tag = tag.tag,
697 args = tag.args, 793 args = tag.args,
698 isblock = tag.isblock, 794 isblock = tag.isblock,
699 content = self.__createWikiNode(type = 'SEQ', content = list)) 795 content = self._new_node(type = 'SEQ', content = list))
700 self.dprint(80, "LEAVE parse_tag = %s", ret) 796 self.dprint(80, "LEAVE parse_tag = %s", ret)
701 return ret 797 return ret
702 798
703 def parse_env(self, tok): 799 def parse_env(self, tok):
800 """Parse a block environment (numbered, unnumbered, or definition list)."""
704 type = self.envtypes[tok.content[0]][0] 801 type = self.envtypes[tok.content[0]][0]
705 lev = len(tok.content) 802 lev = len(tok.content)
706 self.dprint(80, "ENTER parse_env(%s,%s)",type,lev) 803 self.dprint(80, "ENTER parse_env(%s,%s)",type,lev)
707 list = [] 804 list = []
708 while True: 805 while True:
709 if tok.type == 'DELIM' \ 806 if (tok.type == 'DELIM'
710 and tok.content[0] in self.envtypes \ 807 and tok.content[0] in self.envtypes
711 and type == self.envtypes[tok.content[0]][0]: 808 and type == self.envtypes[tok.content[0]][0]):
712 if len(tok.content) < lev: 809 if len(tok.content) < lev:
713 self.ungetkn() 810 self.ungetkn()
714 break 811 break
@@ -717,9 +814,9 @@ class BaseWikiMarkup(object):
717 else: 814 else:
718 elt = self.parse_line() 815 elt = self.parse_line()
719 if not tok.continuation: 816 if not tok.continuation:
720 list.append(self.__createWikiNode(type='ELT', 817 list.append(self._new_node(type='ELT',
721 subtype=self.envtypes[tok.content[0]][1], 818 subtype=self.envtypes[tok.content[0]][1],
722 content=elt)) 819 content=elt))
723 tok = self.getkn() 820 tok = self.getkn()
724 continue 821 continue
725 822
@@ -727,7 +824,7 @@ class BaseWikiMarkup(object):
727 if list[-1].content.type != 'SEQ': 824 if list[-1].content.type != 'SEQ':
728 x = list[-1].content.content 825 x = list[-1].content.content
729 # FIXME: 826 # FIXME:
730 list[-1].content = self.__createWikiNode(type='SEQ', content=[x]) 827 list[-1].content = self._new_node(type='SEQ', content=[x])
731 list[-1].content.content.append(elt) 828 list[-1].content.content.append(elt)
732 else: 829 else:
733 self.ungetkn() 830 self.ungetkn()
@@ -735,21 +832,21 @@ class BaseWikiMarkup(object):
735 832
736 tok = self.getkn() 833 tok = self.getkn()
737 834
738 ret = self.__createWikiNode(type='ENV', 835 ret = self._new_node(type='ENV',
739 envtype=type, 836 envtype=type,
740 level=lev, 837 level=lev,
741 content=list) 838 content=list)
742 self.dprint(80, "LEAVE parse_env=%s", ret) 839 self.dprint(80, "LEAVE parse_env=%s", ret)
743 return ret 840 return ret
744 841
745 def parse_header(self, tok): 842 def parse_header(self, tok):
843 """Parse a Wiki header."""
746 self.dprint(80, "ENTER parse_header") 844 self.dprint(80, "ENTER parse_header")
747 self.push_mark() 845 self.push_mark()
748 list = [] 846 list = []
749 delim = tok.content 847 delim = tok.content
750 while True: 848 while True:
751 tok = self.getkn() 849 tok = self.getkn()
752
753 if tok.type == 'NL': 850 if tok.type == 'NL':
754 self.pop_mark() 851 self.pop_mark()
755 self.dprint(80, "LEAVE parse_header=None") 852 self.dprint(80, "LEAVE parse_header=None")
@@ -779,17 +876,15 @@ class BaseWikiMarkup(object):
779 self.dprint(80, "LEAVE parse_header=None") 876 self.dprint(80, "LEAVE parse_header=None")
780 return None 877 return None
781 list.append(self.parse_tag(tok)) 878 list.append(self.parse_tag(tok))
782
783
784 self.clear_mark() 879 self.clear_mark()
785 ret = self.__createWikiNode(type='HDR', 880 ret = self._new_node(type='HDR',
786 level = len(delim), 881 level=len(delim),
787 content = self.__createWikiNode(type='SEQ', 882 content=self._new_node(type='SEQ', content=list))
788 content=list))
789 self.dprint(80, "LEAVE parse_header=%s", ret) 883 self.dprint(80, "LEAVE parse_header=%s", ret)
790 return ret 884 return ret
791 885
792 def parse_block(self): 886 def parse_block(self):
887 """Parse next block: newline, delimiter, tag, or paragraph."""
793 tok = self.getkn() 888 tok = self.getkn()
794 while tok.type == 'NL': 889 while tok.type == 'NL':
795 tok = self.getkn() 890 tok = self.getkn()
@@ -805,8 +900,12 @@ class BaseWikiMarkup(object):
805 return self.parse_tag(tok) 900 return self.parse_tag(tok)
806 901
807 return self.parse_para(tok) 902 return self.parse_para(tok)
808 903
809 def parse(self): 904 def parse(self):
905 """Parse Wiki material supplied by the input() method.
906
907 Store the resulting abstract parsing tree in the tree attribute.
908 """
810 if not self.toklist: 909 if not self.toklist:
811 self.tokenize() 910 self.tokenize()
812 if self.debug_level >= 90: 911 if self.debug_level >= 90:
@@ -829,10 +928,10 @@ class BaseWikiMarkup(object):
829 return str(self.tree) 928 return str(self.tree)
830 929
831 930
832class WikiMarkup (BaseWikiMarkup): 931class WikiMarkup(WikiMarkupParser):
833 """ 932 """
834 A derived class, that supplies a basic input method. 933 A derived parser class that supplies a basic input method.
835 934
836 Three types of inputs are available: 935 Three types of inputs are available:
837 936
838 1. filename=<file> 937 1. filename=<file>
@@ -849,13 +948,14 @@ class WikiMarkup (BaseWikiMarkup):
849 ... Do whatever you need with obj.tree ... 948 ... Do whatever you need with obj.tree ...
850 949
851 """ 950 """
951
852 file = None 952 file = None
853 text = None 953 text = None
854 lang = 'en' 954 lang = 'en'
855 html_base = 'http://%(lang)s.wiktionary.org/wiki/' 955 html_base = 'http://%(lang)s.wiktionary.org/wiki/'
856 image_base = 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf' 956 image_base = 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf'
857 media_base = 'http://www.mediawiki.org/xml/export-0.3' 957 media_base = 'http://www.mediawiki.org/xml/export-0.3'
858 958
859 def __init__(self, *args, **keywords): 959 def __init__(self, *args, **keywords):
860 for kw in keywords: 960 for kw in keywords:
861 if kw == 'file': 961 if kw == 'file':
@@ -885,339 +985,301 @@ class WikiMarkup (BaseWikiMarkup):
885 else: 985 else:
886 return None 986 return None
887 987
888 def is_lang_link(self, elt): 988 # ISO 639
889 if elt.type == 'LINK' \
890 and isinstance(elt.content, list) \
891 and len(elt.content) == 1:
892 if elt.content[0].type == TEXT:
893 m = re.match('([\w-]+):', elt.content[0].content)
894 if m: # and m.group(1) in self.langtab:
895 return True
896 elif elt.content[0].type == 'SEQ' \
897 and len(elt.content[0].content) == 1 and\
898 elt.content[0].content[0].type == TEXT:
899 m = re.match('([\w-]+):',elt.content[0].content[0].content)
900 if m: # and m.group(1) in self.langtab:
901 return True
902 return False
903
904 def is_empty_text(self, elt):
905 if elt.type == 'TEXT':
906 if re.search('\w', elt.content):
907 return False
908 return True
909 return False
910
911 def is_empty_para(self, seq):
912 for x in seq:
913 if not (self.is_lang_link(x) or self.is_empty_text(x)):
914 return False
915 return True
916
917 # ISO 639
918 langtab = { 989 langtab = {
919 "aa": "Afar", # Afar 990 "aa": "Afar", # Afar
920 "ab": "Аҧсуа", # Abkhazian 991 "ab": "Аҧсуа", # Abkhazian
921 "ae": None, # Avestan 992 "ae": None, # Avestan
922 "af": "Afrikaans", # Afrikaans 993 "af": "Afrikaans", # Afrikaans
923 "ak": "Akana", # Akan 994 "ak": "Akana", # Akan
924 "als": "Alemannisch", 995 "als": "Alemannisch",
925 "am": "አማርኛ", # Amharic 996 "am": "አማርኛ", # Amharic
926 "an": "Aragonés", # Aragonese 997 "an": "Aragonés", # Aragonese
927 "ang": "Englisc", 998 "ang": "Englisc",
928 "ar": "العربية" , # Arabic 999 "ar": "العربية" , # Arabic
929 "arc": "ܐܪܡܝܐ", 1000 "arc": "ܐܪܡܝܐ",
930 "as": "অসমীয়া", # Assamese 1001 "as": "অসমীয়া", # Assamese
931 "ast": "Asturian", 1002 "ast": "Asturian",
932 "av": "Авар", # Avaric 1003 "av": "Авар", # Avaric
933 "ay": "Aymara", # Aymara 1004 "ay": "Aymara", # Aymara
934 "az": "Azərbaycan" , # Azerbaijani 1005 "az": "Azərbaycan" , # Azerbaijani
935 1006
936 "ba": "Башҡорт", # Bashkir 1007 "ba": "Башҡорт", # Bashkir
937 "bar": "Boarisch", 1008 "bar": "Boarisch",
938 "bat-smg": "Žemaitėška", 1009 "bat-smg": "Žemaitėška",
939 "bcl": "Bikol", 1010 "bcl": "Bikol",
940 "be": "Беларуская", # Byelorussian; Belarusian 1011 "be": "Беларуская", # Byelorussian; Belarusian
941 "be-x-old": "Беларуская (тарашкевіца)", 1012 "be-x-old": "Беларуская (тарашкевіца)",
942 "bg": "Български", # Bulgarian 1013 "bg": "Български", # Bulgarian
943 "bh": "भोजपुरी", # Bihari 1014 "bh": "भोजपुरी", # Bihari
944 "bi": "Bislama", # Bislama 1015 "bi": "Bislama", # Bislama
945 "bm": "Bamanankan", # Bambara 1016 "bm": "Bamanankan", # Bambara
946 "bn": "বাংলা" , # Bengali; Bangla 1017 "bn": "বাংলা" , # Bengali; Bangla
947 "bo": "བོད་སྐད", # Tibetan 1018 "bo": "བོད་སྐད", # Tibetan
948 "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" , 1019 "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" ,
949 "br": "Brezhoneg" , # Breton 1020 "br": "Brezhoneg" , # Breton
950 "bs": "Bosanski" , # Bosnian 1021 "bs": "Bosanski" , # Bosnian
951 "bug": "Basa Ugi", 1022 "bug": "Basa Ugi",
952 "bxr": "Буряад", 1023 "bxr": "Буряад",
953 1024
954 "ca": "Català" , # Catalan 1025 "ca": "Català" , # Catalan
955 "cbk-zam": "Chavacano de Zamboanga", 1026 "cbk-zam": "Chavacano de Zamboanga",
956 "cdo": "Mìng-dĕ̤ng-ngṳ̄", 1027 "cdo": "Mìng-dĕ̤ng-ngṳ̄",
957 "cho": "Choctaw", 1028 "cho": "Choctaw",
958 "ce": "Нохчийн", # Chechen 1029 "ce": "Нохчийн", # Chechen
959 "ceb": "Sinugboanong Binisaya" , # Cebuano 1030 "ceb": "Sinugboanong Binisaya" , # Cebuano
960 "ch": "Chamor", # Chamorro 1031 "ch": "Chamor", # Chamorro
961 "chr": "ᏣᎳᎩ", 1032 "chr": "ᏣᎳᎩ",
962 "chy": "Tsetsêhestâhese", 1033 "chy": "Tsetsêhestâhese",
963 "co": "Cors", # Corsican 1034 "co": "Cors", # Corsican
964 "cr": "Nehiyaw", # Cree 1035 "cr": "Nehiyaw", # Cree
965 "crh": "Qırımtatarca", 1036 "crh": "Qırımtatarca",
966 "cs": "Česky" , # Czech 1037 "cs": "Česky" , # Czech
967 "csb": "Kaszëbsczi", 1038 "csb": "Kaszëbsczi",
968 "c": "Словѣньскъ", # Church Slavic 1039 "c": "Словѣньскъ", # Church Slavic
969 "cv": "Чăваш", # Chuvash 1040 "cv": "Чăваш", # Chuvash
970 "cy": "Cymraeg" , # Welsh 1041 "cy": "Cymraeg" , # Welsh
971 1042
972 "da": "Dansk" , # Danish 1043 "da": "Dansk" , # Danish
973 "de": "Deutsch" , # German 1044 "de": "Deutsch" , # German
974 "diq": "Zazaki", # Dimli (Southern Zazaki) 1045 "diq": "Zazaki", # Dimli (Southern Zazaki)
975 "dsb": "Dolnoserbski", 1046 "dsb": "Dolnoserbski",
976 "dv": "ދިވެހިބަސް", # Divehi 1047 "dv": "ދިވެހިބަސް", # Divehi
977 "dz": "ཇོང་ཁ", # Dzongkha; Bhutani 1048 "dz": "ཇོང་ཁ", # Dzongkha; Bhutani
978 1049
979 "ee": "Eʋegbe", # Ewe 1050 "ee": "Eʋegbe", # Ewe
980 "el": "Ελληνικά" , # Greek 1051 "el": "Ελληνικά" , # Greek
981 "eml": "Emiliàn e rumagnòl", 1052 "eml": "Emiliàn e rumagnòl",
982 "en": "English" , # English 1053 "en": "English" , # English
983 "eo": "Esperanto" , 1054 "eo": "Esperanto" ,
984 "es": "Español" , # Spanish 1055 "es": "Español" , # Spanish
985 "et": "Eesti" , # Estonian 1056 "et": "Eesti" , # Estonian
986 "eu": "Euskara" , # Basque 1057 "eu": "Euskara" , # Basque
987 "ext": "Estremeñ", 1058 "ext": "Estremeñ",
988 1059
989 "fa": "فارسی" , # Persian 1060 "fa": "فارسی" , # Persian
990 "ff": "Fulfulde", # Fulah 1061 "ff": "Fulfulde", # Fulah
991 "fi": "Suomi" , # Finnish 1062 "fi": "Suomi" , # Finnish
992 "fiu-vro": "Võro", 1063 "fiu-vro": "Võro",
993 "fj": "Na Vosa Vakaviti",# Fijian; Fiji 1064 "fj": "Na Vosa Vakaviti",# Fijian; Fiji
994 "fo": "Føroyskt" , # Faroese 1065 "fo": "Føroyskt" , # Faroese
995 "fr": "Français" , # French 1066 "fr": "Français" , # French
996 "frp": "Arpitan", 1067 "frp": "Arpitan",
997 "fur": "Furlan", 1068 "fur": "Furlan",
998 "fy": "Frysk", # Frisian 1069 "fy": "Frysk", # Frisian
999 1070
1000 "ga": "Gaeilge", # Irish 1071 "ga": "Gaeilge", # Irish
1001 "gan": "贛語 (Gànyŭ)", 1072 "gan": "贛語 (Gànyŭ)",
1002 "gd": "Gàidhlig", # Scots; Gaelic 1073 "gd": "Gàidhlig", # Scots; Gaelic
1003 "gl": "Gallego" , # Gallegan; Galician 1074 "gl": "Gallego" , # Gallegan; Galician
1004 "glk": "گیلکی", 1075 "glk": "گیلکی",
1005 "got": "𐌲𐌿𐍄𐌹𐍃𐌺𐍉𐍂𐌰𐌶𐌳𐌰", 1076 "got": "𐌲𐌿𐍄𐌹𐍃𐌺𐍉𐍂𐌰𐌶𐌳𐌰",
1006 "gn": "Avañe'ẽ", # Guarani 1077 "gn": "Avañe'ẽ", # Guarani
1007 "g": "ગુજરાતી", # Gujarati 1078 "g": "ગુજરાતી", # Gujarati
1008 "gv": "Gaelg", # Manx 1079 "gv": "Gaelg", # Manx
1009 1080
1010 "ha": "هَوُسَ", # Hausa 1081 "ha": "هَوُسَ", # Hausa
1011 "hak": "Hak-kâ-fa / 客家話", 1082 "hak": "Hak-kâ-fa / 客家話",
1012 "haw": "Hawai`i", 1083 "haw": "Hawai`i",
1013 "he": "עברית" , # Hebrew (formerly iw) 1084 "he": "עברית" , # Hebrew (formerly iw)
1014 "hi": "हिन्दी" , # Hindi 1085 "hi": "हिन्दी" , # Hindi
1015 "hif": "Fiji Hindi", 1086 "hif": "Fiji Hindi",
1016 "ho": "Hiri Mot", # Hiri Motu 1087 "ho": "Hiri Mot", # Hiri Motu
1017 "hr": "Hrvatski" , # Croatian 1088 "hr": "Hrvatski" , # Croatian
1018 "hsb": "Hornjoserbsce", 1089 "hsb": "Hornjoserbsce",
1019 "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole 1090 "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole
1020 "hu": "Magyar" , # Hungarian 1091 "hu": "Magyar" , # Hungarian
1021 "hy": "Հայերեն", # Armenian 1092 "hy": "Հայերեն", # Armenian
1022 "hz": "Otsiherero", # Herero 1093 "hz": "Otsiherero", # Herero
1023 1094
1024 "ia": "Interlingua", 1095 "ia": "Interlingua",
1025 "ie": "Interlingue", 1096 "ie": "Interlingue",
1026 "id": "Bahasa Indonesia",# Indonesian (formerly in) 1097 "id": "Bahasa Indonesia",# Indonesian (formerly in)
1027 "ig": "Igbo", # Igbo 1098 "ig": "Igbo", # Igbo
1028 "ii": "ꆇꉙ ", # Sichuan Yi 1099 "ii": "ꆇꉙ ", # Sichuan Yi
1029 "ik": "Iñupiak", # Inupiak 1100 "ik": "Iñupiak", # Inupiak
1030 "ilo": "Ilokano", 1101 "ilo": "Ilokano",
1031 "io": "Ido" , 1102 "io": "Ido" ,
1032 "is": "Íslenska" , # Icelandic 1103 "is": "Íslenska" , # Icelandic
1033 "it": "Italiano" , # Italian 1104 "it": "Italiano" , # Italian
1034 "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut 1105 "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut
1035 1106
1036 "ja": "日本語", # Japanese 1107 "ja": "日本語", # Japanese
1037 "jbo": "Lojban", 1108 "jbo": "Lojban",
1038 "jv": "Basa Jawa", # Javanese 1109 "jv": "Basa Jawa", # Javanese
1039 1110
1040 "ka": "ქართული" , # Georgian 1111 "ka": "ქართული" , # Georgian
1041 "kaa": "Qaraqalpaqsha", 1112 "kaa": "Qaraqalpaqsha",
1042 "kab": "Taqbaylit", 1113 "kab": "Taqbaylit",
1043 "kg": "KiKongo", # Kongo 1114 "kg": "KiKongo", # Kongo
1044 "ki": "Gĩkũyũ", # Kikuyu 1115 "ki": "Gĩkũyũ", # Kikuyu
1045 "kj": "Kuanyama", # Kuanyama 1116 "kj": "Kuanyama", # Kuanyama
1046 "kk": "Қазақша", # Kazakh 1117 "kk": "Қазақша", # Kazakh
1047 "kl": "Kalaallisut", # Kalaallisut; Greenlandic 1118 "kl": "Kalaallisut", # Kalaallisut; Greenlandic
1048 "km": "ភាសាខ្មែរ", # Khmer; Cambodian 1119 "km": "ភាសាខ្មែរ", # Khmer; Cambodian
1049 "kn": "ಕನ್ನಡ", # Kannada 1120 "kn": "ಕನ್ನಡ", # Kannada
1050 "ko": "한국어" , # Korean 1121 "ko": "한국어" , # Korean
1051 "kr": "Kanuri", # Kanuri 1122 "kr": "Kanuri", # Kanuri
1052 "ks": "कश्मीरी / كشميري", # Kashmiri 1123 "ks": "कश्मीरी / كشميري", # Kashmiri
1053 "ksh": "Ripoarisch", 1124 "ksh": "Ripoarisch",
1054 "ku": "Kurdî / كوردی", # Kurdish 1125 "ku": "Kurdî / كوردی", # Kurdish
1055 "kv": "Коми", # Komi 1126 "kv": "Коми", # Komi
1056 "kw": "Kernewek/Karnuack", # Cornish 1127 "kw": "Kernewek/Karnuack", # Cornish
1057 "ky": "Кыргызча", # Kirghiz 1128 "ky": "Кыргызча", # Kirghiz
1058 1129
1059 "la": "Latina" , # Latin 1130 "la": "Latina" , # Latin
1060 "lad": "Dzhudezmo", 1131 "lad": "Dzhudezmo",
1061 "lb": "Lëtzebuergesch" , # Letzeburgesch 1132 "lb": "Lëtzebuergesch" , # Letzeburgesch
1062 "lbe": "Лакку", 1133 "lbe": "Лакку",
1063 "lg": "Luganda", # Ganda 1134 "lg": "Luganda", # Ganda
1064 "li": "Limburgs", # Limburgish; Limburger; Limburgan 1135 "li": "Limburgs", # Limburgish; Limburger; Limburgan
1065 "lij": "Lígur", 1136 "lij": "Lígur",
1066 "ln": "Lingala", # Lingala 1137 "ln": "Lingala", # Lingala
1067 "lmo": "Lumbaart", 1138 "lmo": "Lumbaart",
1068 "lo": "ລາວ", # Lao; Laotian 1139 "lo": "ລາວ", # Lao; Laotian
1069 "lt": "Lietuvių" , # Lithuanian 1140 "lt": "Lietuvių" , # Lithuanian
1070 "lua": "Luba", # Luba 1141 "lua": "Luba", # Luba
1071 "lv": "Latvieš" , # Latvian; Lettish 1142 "lv": "Latvieš" , # Latvian; Lettish
1072 1143
1073 "map-bms": "Basa Banyumasan", 1144 "map-bms": "Basa Banyumasan",
1074 "mdf": "Мокшень (Mokshanj Kälj)", 1145 "mdf": "Мокшень (Mokshanj Kälj)",
1075 "mg": "Malagasy", # Malagasy 1146 "mg": "Malagasy", # Malagasy
1076 "mh": "Ebon", # Marshall 1147 "mh": "Ebon", # Marshall
1077 "mi": "Māori", # Maori 1148 "mi": "Māori", # Maori
1078 "mk": "Македонски" , # Macedonian 1149 "mk": "Македонски" , # Macedonian
1079 "ml": None, # Malayalam 1150 "ml": None, # Malayalam
1080 "mn": "Монгол", # Mongolian 1151 "mn": "Монгол", # Mongolian
1081 "mo": "Молдовеняскэ", # Moldavian 1152 "mo": "Молдовеняскэ", # Moldavian
1082 "mr": "मराठी" , # Marathi 1153 "mr": "मराठी" , # Marathi
1083 "ms": "Bahasa Melay" , # Malay 1154 "ms": "Bahasa Melay" , # Malay
1084 "mt": "Malti", # Maltese 1155 "mt": "Malti", # Maltese
1085 "mus": "Muskogee", 1156 "mus": "Muskogee",
1086 "my": "မ္ရန္‌မာစာ", # Burmese 1157 "my": "မ္ရန္‌မာစာ", # Burmese
1087 "myv": "Эрзянь (Erzjanj Kelj)", 1158 "myv": "Эрзянь (Erzjanj Kelj)",
1088 "mzn": "مَزِروني", 1159 "mzn": "مَزِروني",
1089 1160
1090 "na": "dorerin Naoero", # Nauru 1161 "na": "dorerin Naoero", # Nauru
1091 "nah": "Nāhuatl", 1162 "nah": "Nāhuatl",
1092 "nap": "Nnapulitano", 1163 "nap": "Nnapulitano",
1093 "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l 1164 "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l
1094 "nd": None, # Ndebele, North 1165 "nd": None, # Ndebele, North
1095 "nds": "Plattdüütsch", 1166 "nds": "Plattdüütsch",
1096 "nds-nl": "Nedersaksisch", 1167 "nds-nl": "Nedersaksisch",
1097 "ne": "नेपाली", # Nepali 1168 "ne": "नेपाली", # Nepali
1098 "new": "नेपाल भाषा" , # Nepal Bhasa 1169 "new": "नेपाल भाषा" , # Nepal Bhasa
1099 "ng": "Oshiwambo", # Ndonga 1170 "ng": "Oshiwambo", # Ndonga
1100 "nl": "Nederlands" , # Dutch 1171 "nl": "Nederlands" , # Dutch
1101 "nn": "Nynorsk", # Norwegian Nynorsk 1172 "nn": "Nynorsk", # Norwegian Nynorsk
1102 "no": "Norsk (Bokmål)" , # Norwegian 1173 "no": "Norsk (Bokmål)" , # Norwegian
1103 "nov": "Novial", 1174 "nov": "Novial",
1104 "nr": None, # Ndebele, South 1175 "nr": None, # Ndebele, South
1105 "nrm": "Nouormand/Normaund", 1176 "nrm": "Nouormand/Normaund",
1106 "nv": "Diné bizaad", # Navajo 1177 "nv": "Diné bizaad", # Navajo
1107 "ny": "Chi-Chewa", # Chichewa; Nyanja 1178 "ny": "Chi-Chewa", # Chichewa; Nyanja
1108 1179
1109 "oc": "Occitan", # Occitan; Proven@,{c}al 1180 "oc": "Occitan", # Occitan; Proven@,{c}al
1110 "oj": None, # Ojibwa 1181 "oj": None, # Ojibwa
1111 "om": "Oromoo", # (Afan) Oromo 1182 "om": "Oromoo", # (Afan) Oromo
1112 "or": "ଓଡ଼ିଆ", # Oriya 1183 "or": "ଓଡ଼ିଆ", # Oriya
1113 "os": "Иронау", # Ossetian; Ossetic 1184 "os": "Иронау", # Ossetian; Ossetic
1114 1185
1115 "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi 1186 "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi
1116 "pag": "Pangasinan", 1187 "pag": "Pangasinan",
1117 "pam": "Kapampangan", 1188 "pam": "Kapampangan",
1118 "pap": "Papiament", 1189 "pap": "Papiament",
1119 "pdc": "Deitsch", 1190 "pdc": "Deitsch",
1120 "pi": "पाऴि", # Pali 1191 "pi": "पाऴि", # Pali
1121 "pih": "Norfuk", 1192 "pih": "Norfuk",
1122 "pl": "Polski" , # Polish 1193 "pl": "Polski" , # Polish
1123 "pms": "Piemontèis" , 1194 "pms": "Piemontèis" ,
1124 "ps": "پښتو", # Pashto, Pushto 1195 "ps": "پښتو", # Pashto, Pushto
1125 "pt": "Português" , # Portuguese 1196 "pt": "Português" , # Portuguese
1126 1197
1127 "q": "Runa Simi" , # Quechua 1198 "q": "Runa Simi" , # Quechua
1128 1199
1129 "rm": "Rumantsch", # Rhaeto-Romance 1200 "rm": "Rumantsch", # Rhaeto-Romance
1130 "rmy": "romani - रोमानी", 1201 "rmy": "romani - रोमानी",
1131 "rn": "Kirundi", # Rundi; Kirundi 1202 "rn": "Kirundi", # Rundi; Kirundi
1132 "ro": "Română" , # Romanian 1203 "ro": "Română" , # Romanian
1133 "roa-rup": "Armãneashce", 1204 "roa-rup": "Armãneashce",
1134 "roa-tara": "Tarandíne", 1205 "roa-tara": "Tarandíne",
1135 "ru": "Русский" , # Russian 1206 "ru": "Русский" , # Russian
1136 "rw": "Ikinyarwanda", # Kinyarwanda 1207 "rw": "Ikinyarwanda", # Kinyarwanda
1137 1208
1138 "sa": "संस्कृतम्", # Sanskrit 1209 "sa": "संस्कृतम्", # Sanskrit
1139 "sah": "Саха тыла (Saxa Tyla)", 1210 "sah": "Саха тыла (Saxa Tyla)",
1140 "sc": "Sardu", # Sardinian 1211 "sc": "Sardu", # Sardinian
1141 "scn": "Sicilian", 1212 "scn": "Sicilian",
1142 "sco": "Scots", 1213 "sco": "Scots",
1143 "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi 1214 "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi
1144 "se": "Sámegiella", # Northern Sami 1215 "se": "Sámegiella", # Northern Sami
1145 "sg": "Sängö", # Sango; Sangro 1216 "sg": "Sängö", # Sango; Sangro
1146 "sh": "Srpskohrvatski / Српскохрватски" , 1217 "sh": "Srpskohrvatski / Српскохрватски" ,
1147 "si": "සිංහල", 1218 "si": "සිංහල",
1148 "simple": "Simple English" , 1219 "simple": "Simple English" ,
1149 "sk": "Slovenčina" , # Slovak 1220 "sk": "Slovenčina" , # Slovak
1150 "sl": "Slovenščina" , # Slovenian 1221 "sl": "Slovenščina" , # Slovenian
1151 "sm": "Gagana Samoa", # Samoan 1222 "sm": "Gagana Samoa", # Samoan
1152 "sn": "chiShona", # Shona 1223 "sn": "chiShona", # Shona
1153 "so": "Soomaaliga", # Somali 1224 "so": "Soomaaliga", # Somali
1154 "sr": "Српски / Srpski", # Serbian 1225 "sr": "Српски / Srpski", # Serbian
1155 "srn": "Sranantongo", 1226 "srn": "Sranantongo",
1156 "ss": "SiSwati", # Swati; Siswati 1227 "ss": "SiSwati", # Swati; Siswati
1157 "st": "Sesotho", # Sesotho; Sotho, Southern 1228 "st": "Sesotho", # Sesotho; Sotho, Southern
1158 "stk": "Seeltersk", 1229 "stk": "Seeltersk",
1159 "s": "Basa Sunda", # Sundanese 1230 "s": "Basa Sunda", # Sundanese
1160 "sq": "Shqip" , # Albanian 1231 "sq": "Shqip" , # Albanian
1161 "szl": "Ślůnski", 1232 "szl": "Ślůnski",
1162 "sv": "Svenska" , # Swedish 1233 "sv": "Svenska" , # Swedish
1163 "sw": "Kiswahili", # Swahili 1234 "sw": "Kiswahili", # Swahili
1164 1235
1165 "ta": "தமிழ்" , # Tamil 1236 "ta": "தமிழ்" , # Tamil
1166 "te": "తెలుగు" , # Telugu 1237 "te": "తెలుగు" , # Telugu
1167 "tet": "Tetun", 1238 "tet": "Tetun",
1168 "tg": "Тоҷикӣ", # Tajik 1239 "tg": "Тоҷикӣ", # Tajik
1169 "th": "ไทย" , # Thai 1240 "th": "ไทย" , # Thai
1170 "ti": "ትግርኛ", # Tigrinya 1241 "ti": "ትግርኛ", # Tigrinya
1171 "tk": "تركمن / Туркмен", # Turkmen 1242 "tk": "تركمن / Туркмен", # Turkmen
1172 "tl": "Tagalog" , # Tagalog 1243 "tl": "Tagalog" , # Tagalog
1173 "tn": "Setswana", # Tswana; Setswana 1244 "tn": "Setswana", # Tswana; Setswana
1174 "to": "faka Tonga", # Tonga (?) # Also ZW ; MW 1245 "to": "faka Tonga", # Tonga (?) # Also ZW ; MW
1175 "tokipona": "Tokipona", 1246 "tokipona": "Tokipona",
1176 "tpi": "Tok Pisin", 1247 "tpi": "Tok Pisin",
1177 "tr": "Türkçe" , # Turkish 1248 "tr": "Türkçe" , # Turkish
1178 "ts": "Xitsonga", # Tsonga 1249 "ts": "Xitsonga", # Tsonga
1179 "tt": "Tatarça / Татарча", # Tatar 1250 "tt": "Tatarça / Татарча", # Tatar
1180 "tum": "chiTumbuka", 1251 "tum": "chiTumbuka",
1181 "tw": "Twi", # Twi 1252 "tw": "Twi", # Twi
1182 "ty": "Reo Mā`ohi", # Tahitian 1253 "ty": "Reo Mā`ohi", # Tahitian
1183 1254
1184 "udm": "Удмурт кыл", 1255 "udm": "Удмурт кыл",
1185 "ug": "Oyghurque", # Uighur 1256 "ug": "Oyghurque", # Uighur
1186 "uk": "Українська" , # Ukrainian 1257 "uk": "Українська" , # Ukrainian
1187 "ur": "اردو", # Urdu 1258 "ur": "اردو", # Urdu
1188 "uz": "O‘zbek", # Uzbek 1259 "uz": "O‘zbek", # Uzbek
1189 1260
1190 "ve": "Tshivenda", # Venda 1261 "ve": "Tshivenda", # Venda
1191 "vec": "Vèneto", 1262 "vec": "Vèneto",
1192 "vi": "Tiếng Việt" , # Vietnamese 1263 "vi": "Tiếng Việt" , # Vietnamese
1193 "vls": "West-Vlams", 1264 "vls": "West-Vlams",
1194 "vo": "Volapük" , 1265 "vo": "Volapük" ,
1195 1266
1196 "wa": "Walon", # Walloon 1267 "wa": "Walon", # Walloon
1197 "war": "Winaray", 1268 "war": "Winaray",
1198 "wo": "Wolof", # Wolof 1269 "wo": "Wolof", # Wolof
1199 "w": "吴语", 1270 "w": "吴语",
1200 1271
1201 "xal": "Хальмг", 1272 "xal": "Хальмг",
1202 "xh": "isiXhosa", # Xhosa 1273 "xh": "isiXhosa", # Xhosa
1203 1274
1204 "yi": "ייִדיש", # Yiddish 1275 "yi": "ייִדיש", # Yiddish
1205 "yo": "Yorùbá", # Yoruba 1276 "yo": "Yorùbá", # Yoruba
1206 1277
1207 "za": "Cuengh", # Zhuang 1278 "za": "Cuengh", # Zhuang
1208 "zea": "Zeêuws", 1279 "zea": "Zeêuws",
1209 "zh": "中文" , # Chinese 1280 "zh": "中文" , # Chinese
1210 "zh-classical": "古文 / 文言文", 1281 "zh-classical": "古文 / 文言文",
1211 "zm-min-nan": "Bân-lâm-gú", 1282 "zm-min-nan": "Bân-lâm-gú",
1212 "zh-yue": "粵語", 1283 "zh-yue": "粵語",
1213 "zu": "isiZulu" # Zulu 1284 "zu": "isiZulu" # Zulu
1214 } 1285 }
1215
1216
1217
1218
1219
1220
1221
1222
1223
diff --git a/WikiTrans/wikins.py b/wikitrans/wikins.py
index 4fb5315..4fb5315 100644
--- a/WikiTrans/wikins.py
+++ b/wikitrans/wikins.py
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
new file mode 100644
index 0000000..49c6c68
--- a/dev/null
+++ b/wikitrans/wikitoken.py
@@ -0,0 +1,318 @@
1# Wiki tokens. -*- coding: utf-8 -*-
2# Copyright (C) 2015-2018 Sergey Poznyakoff
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3, or (at your option)
7# any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17"""
18Wiki markup tokens and associated classes.
19
20This module defines classes for the basic nodes of the Wiki markup parse tree:
21
22WikiNode -- Abstract parse tree node.
23WikiContentNode -- A node associated with some content.
24WikiSeqNode -- A sequence of nodes.
25WikiTextNode -- Textual content.
26WikiDelimNode -- Delimiter.
27WikiTagNode -- Tag (e.g. <tt>, </tt>, <tt />, etc.)
28WikiRefNode -- Wiki reference (e.g. [target|name])
29WikiHdrNode -- Heading (e.g. == Section ==)
30WikiEltNode -- Environment element.
31WikiEnvNode -- Environment (numbered or unnumbered list, definition, etc.)
32WikiIndNode -- Indent node.
33
34Auxiliary classes:
35
36WikiNodeEncoder -- Custom JSONEncoder subclass for serializing objects of the
37 above classes.
38"""
39
40from __future__ import print_function
41import re
42import json
43
44class WikiNodeEncoder(json.JSONEncoder):
45 """Custom JSONEncoder subclass for serializing WikiNode and its subclasses."""
46 def default(self, obj):
47 if isinstance(obj,WikiNode):
48 return obj.jsonEncode()
49 return json.JSONEncoder.default(self, obj)
50
51def jsonencoder(func):
52 def _mkencoder(self):
53 json = func(self)
54 json['wikinode'] = self.__class__.__name__
55 json['type'] = self.type
56 return json
57 return _mkencoder
58
59class WikiNode(object):
60 """Generic parse tree node.
61
62 Attributes:
63
64 type -- actual type of this object (string)
65 parser -- parser instance that owns this node
66 """
67
68 type = 'UNDEF'
69 parser = None
70
71 def __init__(self, parser, **kwargs):
72 self.parser = parser
73 for key in kwargs:
74 if hasattr(self,key):
75 self.__dict__[key] = kwargs[key]
76 else:
77 raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key))
78
79 def __str__(self):
80 return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True)
81
82 @jsonencoder
83 def jsonEncode(self):
84 ret = {}
85 for x in dir(self):
86 if x == 'parser' or x.startswith('_') or type(x) == 'function':
87 continue
88 if x in self.__dict__:
89 ret[x] = self.__dict__[x]
90 return ret
91
92 def format(self):
93 """Abstract formatting function.
94
95 Derived classes must override it.
96 """
97 pass
98
99class WikiContentNode(WikiNode):
100 """Generic content node.
101
102 Attributes:
103
104 content -- Actual content
105 """
106
107 content = None
108
109 def format(self):
110 pass
111
112 @jsonencoder
113 def jsonEncode(self):
114 ret = {}
115 if self.content:
116 if self.type == 'TEXT':
117 ret['content'] = self.content
118 elif isinstance(self.content,list):
119 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
120 elif isinstance(self.content,WikiNode):
121 ret['content'] = self.content.jsonEncode()
122 else:
123 ret['content'] = self.content
124 else:
125 ret['content'] = None
126 return ret
127
128class WikiSeqNode(WikiContentNode):
129 """Generic sequence of nodes.
130
131 Attributes:
132
133 content -- list of nodes.
134 """
135
136 def format(self):
137 for x in self.content:
138 x.format()
139
140 @jsonencoder
141 def jsonEncode(self):
142 ret = {}
143 if not self.content:
144 ret['content'] = None
145 elif isinstance(self.content,list):
146 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
147 elif isinstance(self.content,WikiNode):
148 ret['content'] = self.content.jsonEncode()
149 else:
150 ret['content'] = self.content
151 return ret
152
153
154# ##############
155
156class WikiTextNode(WikiContentNode):
157 """Text node.
158
159 Attributes:
160
161 type -- 'TEXT'
162 content -- string
163 """
164
165 type = 'TEXT'
166
167 @jsonencoder
168 def jsonEncode(self):
169 return {
170 'content': self.content
171 }
172
173class WikiDelimNode(WikiContentNode):
174 """Delimiter node.
175
176 Attributes:
177
178 type -- 'DELIM'
179 content -- actual delimiter string
180 isblock -- boolean indicating whether it is a block delimiter
181 continuation -- True if continuation is expected
182 """
183
184 type = 'DELIM'
185 isblock=False
186 continuation = False
187
188class WikiTagNode(WikiContentNode):
189 """A Wiki tag.
190
191 Attributes:
192
193 tag -- actual tag name (with '<', '>', and eventual '/' stripped)
194 isblock -- True if this is a block tag
195 args -- List of tag arguments
196 idx -- If this is a "see also" reference, index of this ref in the
197 list of references.
198 FIXME: Perhaps this merits a subclass?
199 """
200
201 tag = None
202 isblock = False
203 args = None
204 idx = None
205
206 def __init__(self, *args, **keywords):
207 super(WikiTagNode, self).__init__(*args, **keywords)
208 if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'):
209 self.idx = len(self.parser.references)
210 self.parser.references.append(self)
211
212 @jsonencoder
213 def jsonEncode(self):
214 return {
215 'tag': self.tag,
216 'isblock': self.isblock,
217 'args': self.args.tab if self.args else None,
218 'content': self.content.jsonEncode() if self.content else None,
219 'idx': self.idx
220 }
221
222class WikiRefNode(WikiContentNode):
223 """Reference node.
224
225 This class represents a wiki reference, such as "[ref|content]".
226
227 Attributes:
228
229 ref -- actual reference
230 content -- content string
231 """
232
233 type = 'REF'
234 ref = None
235 @jsonencoder
236 def jsonEncode(self):
237 return {
238 'ref': self.ref,
239 'content': self.content.jsonEncode()
240 }
241
242class WikiHdrNode(WikiContentNode):
243 """A wiki markup header class.
244
245 Attributes:
246
247 level -- header level
248 content -- header content (WikiNode subclass object)
249 """
250
251 type = 'HDR'
252 level = None
253
254 @jsonencoder
255 def jsonEncode(self):
256 return {
257 'level': self.level,
258 'content': self.content.jsonEncode()
259 }
260
261class WikiEltNode(WikiContentNode):
262 """Environment element node.
263
264 Attributes:
265
266 subtype -- type of the environment (numbered, unnumbered, defn)
267 content -- content of the element (WikiNode subclass object)
268 """
269
270 type = 'ELT'
271 subtype = None
272
273 @jsonencoder
274 def jsonEncode(self):
275 return {
276 'subtype': self.subtype,
277 'content': self.content.jsonEncode()
278 }
279
280class WikiEnvNode(WikiContentNode):
281 """Wiki Environment Node
282
283 Attributes:
284
285 envtype -- type of the environment (numbered, unnumbered, defn)
286 level -- nesting level of the environment
287 """
288
289 type = 'ENV'
290 envtype = None
291 level = None
292
293 @jsonencoder
294 def jsonEncode(self):
295 return {
296 'envtype': self.envtype,
297 'level': self.level,
298 'content': map(lambda x: x.jsonEncode(), self.content)
299 }
300
301class WikiIndNode(WikiContentNode):
302 """Indented block node.
303
304 Attributes:
305
306 level -- Indentation level.
307 content -- Indented content (WikiNode subclass object).
308 """
309
310 type = 'IND'
311 level = None
312
313 @jsonencoder
314 def jsonEncode(self):
315 return {
316 'level': self.level,
317 'content': self.content.jsonEncode()
318 }

Return to:

Send suggestions and report system problems to the System administrator.