summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-08-27 08:05:10 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-08-27 08:05:10 (GMT)
commit6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c (patch) (side-by-side diff)
treeeaaf5a26a0c2de2ca412b260288c27c87fd14d80
parent2cc7e51e64c923f04e64c89e1c3e4ff09c97c4c9 (diff)
downloadwikitrans-6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c.tar.gz
wikitrans-6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c.tar.bz2
Fix tokenizer
* wikitrans/wikimarkup.py (tokread): Convert unrecognized closing tags to TEXT (tokenize): Use str() when forcing nodes to TEXT
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wikitrans/wikimarkup.py14
1 files changed, 10 insertions, 4 deletions
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 0ce0e15..19f69e6 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -259,7 +259,8 @@ class WikiMarkupParser(object):
content=m.group(0)))
continue
else:
- yield(self._new_node(type='TEXT', content=m.group(0)))
+ yield(self._new_node(type='TEXT',
+ content=m.group(0)))
continue
else:
m = self.ctag.match(line, pos)
@@ -267,8 +268,11 @@ class WikiMarkupParser(object):
if m.group('tag') in self.tags:
yield(self._new_node(type='CTAG',
tag=m.group('tag')))
- pos = m.end(0)
- continue
+ else:
+ yield(self._new_node(type='TEXT',
+ content=m.group(0)))
+ pos = m.end(0)
+ continue
else:
yield(self._new_node(type='TEXT',
content=line[pos:pos+1]))
@@ -370,7 +374,9 @@ class WikiMarkupParser(object):
stack.append(i)
# Redefine all non-matched tokens as TEXT
for i in stack:
- self.toklist[i].type = 'TEXT' # FIXME
+ # FIXME
+ self.toklist[i] = self._new_node(type='TEXT',
+ content=str(self.toklist[i]))
mark = []

Return to:

Send suggestions and report system problems to the System administrator.