diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-09-01 22:10:01 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-09-01 22:24:02 +0300 |
commit | 5320bea15e388200e613e6a2bdac3c1449030986 (patch) | |
tree | 7d8571c63f72cc690cea8323e43be09c18527c45 /wikitrans | |
parent | 0aae19835045bac0be0f22ecd0e84527cdaee21c (diff) | |
download | wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.gz wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.bz2 |
Bugfixes
* README.rst: Describe new options.
* bin/wikitrans: Change handling of the --debug option.
* wikitrans/wikimarkup.py (WikiMarkupParser): New attribute - strict.
(parse_para): Don't throw UnexpectedTokenError if self.strict is False,
instead ignore invalid token.
(WikiMarkup): Fix Python 3 compatibility
* wikitrans/wikitoken.py: Fix Python 3 compatibility
Diffstat (limited to 'wikitrans')
-rw-r--r-- | wikitrans/wikimarkup.py | 35 | ||||
-rw-r--r-- | wikitrans/wikitoken.py | 2 |
2 files changed, 31 insertions, 6 deletions
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py index 19f69e6..1e2429f 100644 --- a/wikitrans/wikimarkup.py +++ b/wikitrans/wikimarkup.py | |||
@@ -145,6 +145,13 @@ class WikiMarkupParser(object): | |||
145 | 145 | ||
146 | Public attributes: | 146 | Public attributes: |
147 | 147 | ||
148 | Input: | ||
149 | debug_level -- debug verbosity level (0 - no debug info, 100 - excessively | ||
150 | copious debug messages). Default is 0. | ||
151 | strict -- if True, parser will throw exception upon encountering | ||
152 | invalid markup tag (mostly for future use) | ||
153 | |||
154 | Output: | ||
148 | tree -- constructed parse tree (a subclass of WikiNode) | 155 | tree -- constructed parse tree (a subclass of WikiNode) |
149 | 156 | ||
150 | """ | 157 | """ |
@@ -175,6 +182,7 @@ class WikiMarkupParser(object): | |||
175 | tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ] | 182 | tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ] |
176 | 183 | ||
177 | debug_level = 0 | 184 | debug_level = 0 |
185 | strict = False | ||
178 | 186 | ||
179 | def dprint(self, lev, fmt, *argv): | 187 | def dprint(self, lev, fmt, *argv): |
180 | """If current debug level is greater than or equal to lev, print *argv | 188 | """If current debug level is greater than or equal to lev, print *argv |
@@ -187,7 +195,7 @@ class WikiMarkupParser(object): | |||
187 | inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] | 195 | inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] |
188 | 196 | ||
189 | token_class = {} | 197 | token_class = {} |
190 | 198 | ||
191 | def _new_node(self, **kwarg): | 199 | def _new_node(self, **kwarg): |
192 | return self.token_class[kwarg['type']](self, **kwarg) | 200 | return self.token_class[kwarg['type']](self, **kwarg) |
193 | 201 | ||
@@ -374,7 +382,7 @@ class WikiMarkupParser(object): | |||
374 | stack.append(i) | 382 | stack.append(i) |
375 | # Redefine all non-matched tokens as TEXT | 383 | # Redefine all non-matched tokens as TEXT |
376 | for i in stack: | 384 | for i in stack: |
377 | # FIXME | 385 | # FIXME: How to convert node to TEXT? |
378 | self.toklist[i] = self._new_node(type='TEXT', | 386 | self.toklist[i] = self._new_node(type='TEXT', |
379 | content=str(self.toklist[i])) | 387 | content=str(self.toklist[i])) |
380 | 388 | ||
@@ -492,7 +500,10 @@ class WikiMarkupParser(object): | |||
492 | flush() | 500 | flush() |
493 | acc['seq'].append(self.parse_inline_delim(tok)) | 501 | acc['seq'].append(self.parse_inline_delim(tok)) |
494 | else: | 502 | else: |
495 | raise UnexpectedTokenError(tok) | 503 | if self.strict: |
504 | raise UnexpectedTokenError(tok) | ||
505 | # FIXME: Another possible variant of handling this case is to | ||
506 | # convert tok to TEXT node and append it to acc['seq'] | ||
496 | tok = self.getkn() | 507 | tok = self.getkn() |
497 | flush() | 508 | flush() |
498 | if acc['seq']: | 509 | if acc['seq']: |
@@ -976,6 +987,13 @@ class WikiMarkup(WikiMarkupParser): | |||
976 | media_base=URL | 987 | media_base=URL |
977 | Base URL for media files. Default is | 988 | Base URL for media files. Default is |
978 | 'http://www.mediawiki.org/xml/export-0.3' | 989 | 'http://www.mediawiki.org/xml/export-0.3' |
990 | |||
991 | debug_level=INT | ||
992 | debug verbosity level (0 - no debug info, 100 - excessively | ||
993 | copious debug messages). Default is 0. | ||
994 | strict=BOOL | ||
995 | Strict parsing mode. Throw exceptions on syntax errors. Default | ||
996 | is False. | ||
979 | """ | 997 | """ |
980 | self.token_class = { | 998 | self.token_class = { |
981 | 'NIL': WikiNode, | 999 | 'NIL': WikiNode, |
@@ -999,14 +1017,17 @@ class WikiMarkup(WikiMarkupParser): | |||
999 | 'LINK': WikiSeqNode, | 1017 | 'LINK': WikiSeqNode, |
1000 | 'HDR': WikiHdrNode | 1018 | 'HDR': WikiHdrNode |
1001 | } | 1019 | } |
1002 | 1020 | ||
1003 | for kw in keywords: | 1021 | for kw in keywords: |
1004 | if kw == 'file': | 1022 | if kw == 'file': |
1005 | self.file = keywords[kw] | 1023 | self.file = keywords[kw] |
1006 | elif kw == 'filename': | 1024 | elif kw == 'filename': |
1007 | self.file = open(keywords[kw]) | 1025 | self.file = open(keywords[kw]) |
1008 | elif kw == 'text': | 1026 | elif kw == 'text': |
1009 | self.text = keywords[kw].split("\n") | 1027 | if sys.version_info[0] > 2: |
1028 | self.text = keywords[kw].decode('utf-8').split("\n") | ||
1029 | else: | ||
1030 | self.text = keywords[kw].split("\n") | ||
1010 | elif kw == 'lang': | 1031 | elif kw == 'lang': |
1011 | self.lang = keywords[kw] | 1032 | self.lang = keywords[kw] |
1012 | elif kw == 'html_base': | 1033 | elif kw == 'html_base': |
@@ -1015,6 +1036,10 @@ class WikiMarkup(WikiMarkupParser): | |||
1015 | self.image_base = keywords[kw] | 1036 | self.image_base = keywords[kw] |
1016 | elif kw == 'media_base': | 1037 | elif kw == 'media_base': |
1017 | self.media_base = keywords[kw] | 1038 | self.media_base = keywords[kw] |
1039 | elif kw == 'strict': | ||
1040 | self.strict = keywords[kw] | ||
1041 | elif kw == 'debug_level': | ||
1042 | self.debug_level = keywords[kw] | ||
1018 | 1043 | ||
1019 | def __del__(self): | 1044 | def __del__(self): |
1020 | if self.file: | 1045 | if self.file: |
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py index deedea8..1f81092 100644 --- a/wikitrans/wikitoken.py +++ b/wikitrans/wikitoken.py | |||
@@ -311,7 +311,7 @@ class WikiEnvNode(WikiContentNode): | |||
311 | return { | 311 | return { |
312 | 'envtype': self.envtype, | 312 | 'envtype': self.envtype, |
313 | 'level': self.level, | 313 | 'level': self.level, |
314 | 'content': map(lambda x: x.json_encode(), self.content) | 314 | 'content': [x for x in map(lambda x: x.json_encode(), self.content)] |
315 | } | 315 | } |
316 | 316 | ||
317 | 317 | ||