summaryrefslogtreecommitdiff
path: root/wikitrans
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-09-01 22:10:01 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-09-01 22:24:02 +0300
commit5320bea15e388200e613e6a2bdac3c1449030986 (patch)
tree7d8571c63f72cc690cea8323e43be09c18527c45 /wikitrans
parent0aae19835045bac0be0f22ecd0e84527cdaee21c (diff)
downloadwikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.gz
wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.bz2
Bugfixes
* README.rst: Describe new options. * bin/wikitrans: Change handling of the --debug option. * wikitrans/wikimarkup.py (WikiMarkupParser): New attribute - strict. (parse_para): Don't throw UnexpectedTokenError if self.strict is False, instead ignore invalid token. (WikiMarkup): Fix Python 3 compatibility * wikitrans/wikitoken.py: Fix Python 3 compatibility
Diffstat (limited to 'wikitrans')
-rw-r--r--wikitrans/wikimarkup.py35
-rw-r--r--wikitrans/wikitoken.py2
2 files changed, 31 insertions, 6 deletions
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 19f69e6..1e2429f 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -145,6 +145,13 @@ class WikiMarkupParser(object):
145 145
146 Public attributes: 146 Public attributes:
147 147
148 Input:
149 debug_level -- debug verbosity level (0 - no debug info, 100 - excessively
150 copious debug messages). Default is 0.
151 strict -- if True, parser will throw exception upon encountering
152 invalid markup tag (mostly for future use)
153
154 Output:
148 tree -- constructed parse tree (a subclass of WikiNode) 155 tree -- constructed parse tree (a subclass of WikiNode)
149 156
150 """ 157 """
@@ -175,6 +182,7 @@ class WikiMarkupParser(object):
175 tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ] 182 tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ]
176 183
177 debug_level = 0 184 debug_level = 0
185 strict = False
178 186
179 def dprint(self, lev, fmt, *argv): 187 def dprint(self, lev, fmt, *argv):
180 """If current debug level is greater than or equal to lev, print *argv 188 """If current debug level is greater than or equal to lev, print *argv
@@ -187,7 +195,7 @@ class WikiMarkupParser(object):
187 inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] 195 inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ]
188 196
189 token_class = {} 197 token_class = {}
190 198
191 def _new_node(self, **kwarg): 199 def _new_node(self, **kwarg):
192 return self.token_class[kwarg['type']](self, **kwarg) 200 return self.token_class[kwarg['type']](self, **kwarg)
193 201
@@ -374,7 +382,7 @@ class WikiMarkupParser(object):
374 stack.append(i) 382 stack.append(i)
375 # Redefine all non-matched tokens as TEXT 383 # Redefine all non-matched tokens as TEXT
376 for i in stack: 384 for i in stack:
377 # FIXME 385 # FIXME: How to convert node to TEXT?
378 self.toklist[i] = self._new_node(type='TEXT', 386 self.toklist[i] = self._new_node(type='TEXT',
379 content=str(self.toklist[i])) 387 content=str(self.toklist[i]))
380 388
@@ -492,7 +500,10 @@ class WikiMarkupParser(object):
492 flush() 500 flush()
493 acc['seq'].append(self.parse_inline_delim(tok)) 501 acc['seq'].append(self.parse_inline_delim(tok))
494 else: 502 else:
495 raise UnexpectedTokenError(tok) 503 if self.strict:
504 raise UnexpectedTokenError(tok)
505 # FIXME: Another possible variant of handling this case is to
506 # convert tok to TEXT node and append it to acc['seq']
496 tok = self.getkn() 507 tok = self.getkn()
497 flush() 508 flush()
498 if acc['seq']: 509 if acc['seq']:
@@ -976,6 +987,13 @@ class WikiMarkup(WikiMarkupParser):
976 media_base=URL 987 media_base=URL
977 Base URL for media files. Default is 988 Base URL for media files. Default is
978 'http://www.mediawiki.org/xml/export-0.3' 989 'http://www.mediawiki.org/xml/export-0.3'
990
991 debug_level=INT
992 debug verbosity level (0 - no debug info, 100 - excessively
993 copious debug messages). Default is 0.
994 strict=BOOL
995 Strict parsing mode. Throw exceptions on syntax errors. Default
996 is False.
979 """ 997 """
980 self.token_class = { 998 self.token_class = {
981 'NIL': WikiNode, 999 'NIL': WikiNode,
@@ -999,14 +1017,17 @@ class WikiMarkup(WikiMarkupParser):
999 'LINK': WikiSeqNode, 1017 'LINK': WikiSeqNode,
1000 'HDR': WikiHdrNode 1018 'HDR': WikiHdrNode
1001 } 1019 }
1002 1020
1003 for kw in keywords: 1021 for kw in keywords:
1004 if kw == 'file': 1022 if kw == 'file':
1005 self.file = keywords[kw] 1023 self.file = keywords[kw]
1006 elif kw == 'filename': 1024 elif kw == 'filename':
1007 self.file = open(keywords[kw]) 1025 self.file = open(keywords[kw])
1008 elif kw == 'text': 1026 elif kw == 'text':
1009 self.text = keywords[kw].split("\n") 1027 if sys.version_info[0] > 2:
1028 self.text = keywords[kw].decode('utf-8').split("\n")
1029 else:
1030 self.text = keywords[kw].split("\n")
1010 elif kw == 'lang': 1031 elif kw == 'lang':
1011 self.lang = keywords[kw] 1032 self.lang = keywords[kw]
1012 elif kw == 'html_base': 1033 elif kw == 'html_base':
@@ -1015,6 +1036,10 @@ class WikiMarkup(WikiMarkupParser):
1015 self.image_base = keywords[kw] 1036 self.image_base = keywords[kw]
1016 elif kw == 'media_base': 1037 elif kw == 'media_base':
1017 self.media_base = keywords[kw] 1038 self.media_base = keywords[kw]
1039 elif kw == 'strict':
1040 self.strict = keywords[kw]
1041 elif kw == 'debug_level':
1042 self.debug_level = keywords[kw]
1018 1043
1019 def __del__(self): 1044 def __del__(self):
1020 if self.file: 1045 if self.file:
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
index deedea8..1f81092 100644
--- a/wikitrans/wikitoken.py
+++ b/wikitrans/wikitoken.py
@@ -311,7 +311,7 @@ class WikiEnvNode(WikiContentNode):
311 return { 311 return {
312 'envtype': self.envtype, 312 'envtype': self.envtype,
313 'level': self.level, 313 'level': self.level,
314 'content': map(lambda x: x.json_encode(), self.content) 314 'content': [x for x in map(lambda x: x.json_encode(), self.content)]
315 } 315 }
316 316
317 317

Return to:

Send suggestions and report system problems to the System administrator.