diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
commit | 28072898f1bd9a925d73ac187d560198d6345524 (patch) | |
tree | a46d781fb85d9dda61fc8f68e0ba6ec43d60ce55 /wiki2text.py | |
parent | 75672b57a2d63f01d00795fe8d661d1efe7b6e8d (diff) | |
download | wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.gz wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.bz2 |
Improve tag handling and debugging
* wikimarkup.py: Rewrite tag recognition.
Implement dump method.
* wikicvt.py: New options -D (--dump), and -t dump
* wiki2html.py (input_tag): Remove method
(str_tag): Change handling of tags
* wiki2texi.py: Likewise.
* wiki2text.py: Likewise.
Diffstat (limited to 'wiki2text.py')
-rw-r--r-- | wiki2text.py | 27 |
1 files changed, 11 insertions, 16 deletions
diff --git a/wiki2text.py b/wiki2text.py index 27a7051..d4cab81 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -139,31 +139,26 @@ class TextWikiMarkup (WikiMarkup): length = 0 linebuf = "" linebuf += " " * wsc + s length += wsc + wlen return output + linebuf - supported_tags = [ 'nowiki', 'code' ] - def input_tag(self, tag): - return tag['tag'] in self.supported_tags - def str_tag(self, elt): if elt['tag'] == 'nowiki': - return elt['content'] + return self.format(elt['content']) elif elt['tag'] == 'code': - kwdict = { - 'nested': self.nested + 1, - 'lang': self.lang, - 'text': elt['content'], - 'html_base': self.html_base, - 'image_base': self.image_base, - 'media_base': self.media_base } - markup = TextWiktionaryMarkup(**kwdict) - markup.debug_level = self.debug_level - markup.parse() - return str(markup) + self.nested += 1 + s = self.format(elt['content']) + self.nested -= 1 + return s #FIXME + else: + s = '<' + elt['tag'] + if elt['args']: + s += ' ' + elt['args'] + s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' + return s def format(self, elt): if elt['type'] == 'TEXT': if isinstance(elt['content'],list): string = "" for s in elt['content']: |