diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-27 11:05:10 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-27 11:05:10 +0300 |
commit | 6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c (patch) | |
tree | eaaf5a26a0c2de2ca412b260288c27c87fd14d80 | |
parent | 2cc7e51e64c923f04e64c89e1c3e4ff09c97c4c9 (diff) | |
download | wikitrans-6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c.tar.gz wikitrans-6d65c3591b3d8b2dc9b833ac7222c8cba8b0911c.tar.bz2 |
Fix tokenizer
* wikitrans/wikimarkup.py (tokread): Convert unrecognized closing tags
to TEXT
(tokenize): Use str() when forcing nodes to TEXT
-rw-r--r-- | wikitrans/wikimarkup.py | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py index 0ce0e15..19f69e6 100644 --- a/wikitrans/wikimarkup.py +++ b/wikitrans/wikimarkup.py @@ -256,22 +256,26 @@ class WikiMarkupParser(object): tag=m.group('tag'))) except TagAttributeSyntaxError: yield(self._new_node(type='TEXT', content=m.group(0))) continue else: - yield(self._new_node(type='TEXT', content=m.group(0))) + yield(self._new_node(type='TEXT', + content=m.group(0))) continue else: m = self.ctag.match(line, pos) if m: if m.group('tag') in self.tags: yield(self._new_node(type='CTAG', tag=m.group('tag'))) - pos = m.end(0) - continue + else: + yield(self._new_node(type='TEXT', + content=m.group(0))) + pos = m.end(0) + continue else: yield(self._new_node(type='TEXT', content=line[pos:pos+1])) pos += 1 continue else: @@ -367,13 +371,15 @@ class WikiMarkupParser(object): stack.append(i) else: # Push the token on stack stack.append(i) # Redefine all non-matched tokens as TEXT for i in stack: - self.toklist[i].type = 'TEXT' # FIXME + # FIXME + self.toklist[i] = self._new_node(type='TEXT', + content=str(self.toklist[i])) mark = [] def push_mark(self): """Save the current token index on stack.""" self.mark.append(self.tokind) |