diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2015-07-22 12:16:38 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2015-07-22 12:16:38 +0300 |
commit | 32be559549aab3d71bee6be566782eef6594442d (patch) | |
tree | 71bde2c14190a742832b9a2591c3fb0aa4c2f75d | |
parent | 64cf5fdb880815ff21652ddb74e48490dd2f56fe (diff) | |
download | wikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.gz wikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.bz2 |
Major rewrite
Use dedicated classes, instead of dictionaries, to represent markup tokens
* WikiTrans/wikitoken.py: New file. Defines Wiki markup tokens.
* WikiTrans/wikimarkup.py: Rewrite.
* WikiTrans/wiki2html.py: Update.
* WikiTrans/wiki2texi.py: Update.
* WikiTrans/wiki2text.py: Update.
* bin/wikitrans: Update
-rw-r--r-- | WikiTrans/wiki2html.py | 88 | ||||
-rw-r--r-- | WikiTrans/wiki2texi.py | 112 | ||||
-rw-r--r-- | WikiTrans/wiki2text.py | 81 | ||||
-rw-r--r-- | WikiTrans/wikimarkup.py | 923 | ||||
-rw-r--r-- | WikiTrans/wikitoken.py | 166 | ||||
-rwxr-xr-x | bin/wikitrans | 2 | ||||
-rw-r--r-- | testdata/colon.html | 5 | ||||
-rw-r--r-- | testdata/headings.html | 11 | ||||
-rw-r--r-- | testdata/hz.html | 5 | ||||
-rw-r--r-- | testdata/nowiki-ind.html | 7 | ||||
-rw-r--r-- | testdata/nowiki.html | 1 | ||||
-rw-r--r-- | testdata/numlist.html | 4 | ||||
-rw-r--r-- | testdata/para.html | 4 | ||||
-rw-r--r-- | testdata/unlist.html | 4 |
14 files changed, 770 insertions, 643 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 754fa9b..122c91c 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -82,16 +82,16 @@ class HtmlWikiMarkup (WikiMarkup): text += ' <span class="proto">' + x + '</span>' text += ' <span class="meaning">(' + s[-2] + ')</span>' return text def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) + arg = self.format(elt.content[0]) text = None - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" elif len(s) > 1 and s[1] == 'thumb': return "" text = '<span class="template">' + s[1] + '</span>' if istmpl: @@ -130,122 +130,122 @@ class HtmlWikiMarkup (WikiMarkup): return self.fmtlink(elt, False) def str_tmpl(self, elt): return self.fmtlink(elt, True) def str_ref(self, elt): - target = elt['ref'] - text = self.format(elt['content']) + target = elt.ref + text = self.format(elt.content) return "<a href=\"%s\">%s</a>" % (target, text if (text and text != '') \ else target) def concat(self, eltlist): string = "" for x in eltlist: string += self.format(x) return string def str_it(self, elt): - return "<i>" + self.concat(elt['content']) + "</i>" + return "<i>" + self.concat(elt.content) + "</i>" def str_bold(self, elt): - return "<b>" + self.concat(elt['content']) + "</b>" + return "<b>" + self.concat(elt.content) + "</b>" def str_hdr(self, elt): - level = elt['level'] + 1 + level = elt.level if level > 4: level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) + return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level) def str_bar(self): - return "<hr/>" + return "<hr/>\n" def str_env(self, elt): - type = elt['envtype'] - lev = elt['level'] + type = elt.envtype + lev = elt.level if lev > 4: lev = 2 string = "" - for s in elt['content']: - n = s['subtype']; + for s in elt.content: + n = s.subtype; string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s['content']), + self.format(s.content), self.envt[type]["elt"][n]) return "<%s>%s</%s>" % (self.envt[type]["hdr"], string, self.envt[type]["hdr"]) return string def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return '<pre><code>' + s + '</code></pre>' #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) s += '>' - s += self.format(elt['content']) - return s + '</' + elt['tag'] + '>' + s += self.format(elt.content) + return s + '</' + elt.tag + '>' def str_para(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) - return "<p>" + string + "</p>" + return "<p>" + string + "</p>\n" def str_pre(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) if self.nested: return string return '<pre>' + string + '</pre>' def str_ind(self, elt): - return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level'] + return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: string += s else: - string = elt['content'] + string = elt.content return string - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': return self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': return self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': return self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': return self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': return self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': return self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': return self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': return self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': return self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': return self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': return self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': return self.str_ind(elt) - elif elt['type'] == 'SEQ': + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) return string else: return str(elt) def __str__(self): diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index f36c0a1..dfde565 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -98,164 +98,164 @@ class TexiWikiMarkup (WikiMarkup): self._begin_print() for elt in self.tree: self.format(elt) return self._end_print() def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): - for s in elt['content']: + if elt.type == 'TEXT': + if isinstance(elt.content,list): + for s in elt.content: self._print(s) else: - self._print(elt['content']) - elif elt['type'] == 'TAG': + self._print(elt.content) + elif elt.type == 'TAG': self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': self.str_ind(elt) - elif elt['type'] == 'SEQ': - for x in elt['content']: + elif elt.type == 'SEQ': + for x in elt.content: self.format(x) else: self._print(str(elt)) def str_tag(self, elt): - if elt['tag'] in ['code', 'tt']: + if elt.tag in ['code', 'tt']: save = self._begin_print() self.nested += 1 - self.format(elt['content']) + self.format(elt.content) self.nested -= 1 s = self._end_print(save) - if elt['isblock']: + if elt.isblock: self._print('@example', nl=True, escape=False) self._print(s, escape=False) self._print('@end example\n', nl=True, escape=False) else: self._print('@code{%s}' % s, escape=False) - elif elt['tag'] == 'div': - if 'args' in elt and 'id' in elt['args']: - self._print("@anchor{%s}\n" % elt['args']['id'], + elif elt.tag == 'div': + if elt.args and 'id' in elt.args: + self._print("@anchor{%s}\n" % elt.args['id'], nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: - self._print('<' + elt['tag']) - if elt['args']: - self._print(' ' + elt['args']) + self._print('<' + elt.tag) + if elt.args: + self._print(' ' + elt.args) self._print('>'); - self.format(elt['content']); - self._print('</' + elt['tag'] + '>') + self.format(elt.content); + self._print('</' + elt.tag + '>') def str_para(self, elt): if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) - for x in elt['content']: + for x in elt.content: self.format(x) if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) def str_pre(self, elt): if not self.nested: self._print('@example\n', nl=True, escape=False) - for x in elt['content']: + for x in elt.content: self.format(x) if not self.nested: self._print('@end example\n', nl=True, escape=False) def concat(self, eltlist): for x in eltlist: self.format(x) def str_it(self, elt): self._print('@i{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_bold(self, elt): self._print('@b{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_hdr(self, elt): - level = elt['level'] + level = elt.level if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: self._print("@* ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print(None, nl=True) if self.sectcomm[self.sectioning_model][0] == '@top': self._print('@node ', nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print('\n') self._print(None, nl=True) def str_bar(self): self._print("\n-----\n") def str_ind(self, elt): - self._print("@w{ }" * elt['level'], nl=True, escape=False) - self.format(elt['content']) + self._print("@w{ }" * elt.level, nl=True, escape=False) + self.format(elt.content) self._print(None, nl=True) def str_env(self, elt): - if elt['envtype'] == 'unnumbered': + if elt.envtype == 'unnumbered': self._print('@itemize @bullet\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end itemize\n', nl=True, escape=False) - elif elt['envtype'] == 'numbered': + elif elt.envtype == 'numbered': self._print('@enumerate\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end enumerate\n', nl=True, escape=False) - elif elt['envtype'] == 'defn': + elif elt.envtype == 'defn': self._print('@table @asis\n', nl=True, escape=False) - for s in elt['content']: - if s['subtype'] == 0: + for s in elt.content: + if s.subtype == 0: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) else: - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end table\n', nl=True, escape=False) def str_link(self, elt): save = self._begin_print() - self.format(elt['content'][0]) + self.format(elt.content[0]) arg = self._end_print() - if len(elt['content']) > 1: + if len(elt.content) > 1: s = [] - for x in elt['content'][0:2]: + for x in elt.content[0:2]: self._begin_print() self.format(x) s.append(self._end_print()) text = s[1] else: s = None @@ -276,15 +276,15 @@ class TexiWikiMarkup (WikiMarkup): self._print("@ref{%s}" % qual, escape=False) def str_tmpl(self, elt): self._print("FIXME: str_tmpl not implemented\n") def str_ref(self, elt): - target = elt['ref'] + target = elt.ref save = self._begin_print() - self.format(elt['content']) + self.format(elt.content) text = self._end_print(save) if text and text != '': self._print("@uref{%s,%s}" % (target, text), escape=False) else: self._print("@uref{%s}" % target, escape=False) diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index 916391e..ee1748c 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -62,15 +62,15 @@ class TextWikiMarkup (WikiMarkup): def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + urllib.quote(tgt) def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + arg = self.format(elt.content[0]) + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] text = s[1] else: s = None text = None if s: @@ -139,115 +139,114 @@ class TextWikiMarkup (WikiMarkup): linebuf = "" linebuf += " " * wsc + s length += wsc + wlen return output + linebuf def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return s #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) - s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) + s += '>' + self.format(elt.content) + '</' + elt.tag + '>' return s def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: if string: if string.endswith("."): string += " " else: string += " " string += s else: - string = elt['content'] - elif elt['type'] == 'PRE': + string = elt.content + elif elt.type == 'PRE': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) string += '\n' - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': string = self.str_tag(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: string += " " + s string = "_" + string.lstrip(" ") + "_" - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: if string.endswith("."): string += " " else: string += " " string += s string = string.upper() - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': string = self.fmtlink(elt, False) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': s = self.fmtlink(elt, True) if s: string = '[' + s + ']' else: string = s - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': w = self.width if w < 5: w = 5 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt['type'] == 'HDR': - level = elt['level'] - string = "\n" + ("*" * level) + " " + \ - self.format(elt['content']).lstrip(" ") + "\n\n" - elif elt['type'] == 'REF': - string = self.xref(self.format(elt['content']), elt['ref']) - elif elt['type'] == 'ENV': - type = elt['envtype'] - lev = elt['level'] + elif elt.type == 'HDR': + string = "\n" + ("*" * elt.level) + " " + \ + self.format(elt.content).lstrip(" ") + "\n\n" + elif elt.type == 'REF': + string = self.xref(self.format(elt.content), elt.ref) + elif elt.type == 'ENV': + type = elt.envtype + lev = elt.level if lev > self.width - 4: lev = 1 string = "" n = 1 - for s in elt['content']: + for s in elt.content: if not string.endswith("\n"): string += "\n" - x = self.format(s['content']) + x = self.format(s.content) if type == "unnumbered": string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) elif type == "numbered": string += self.fmtpara(self.indent(lev, "%d. %s" % (n, x))) n += 1 elif type == "defn": - if s['subtype'] == 0: + if s.subtype == 0: string += self.indent(lev-1, x) else: string += self.indent(lev+3, x) if not string.endswith("\n"): string += "\n" - elif elt['type'] == 'IND': - string = (" " * elt['level']) + self.format(elt['content']) + '\n' - elif elt['type'] == 'SEQ': + elif elt.type == 'IND': + string = (" " * elt.level) + self.format(elt.content) + '\n' + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: if len(string) > 1 and not string[-1].isspace(): string += ' ' string += self.format(x) else: string = str(elt) return string diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index 2fad0af..f971347 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -16,16 +16,21 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import print_function import sys import re from types import * +from wikitoken import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "TagAttributes", "TagAttributeSyntax" ] +class UnexpectedToken(Exception): + def __init__(self, value): + self.value = value + class TagAttributeSyntax(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) @@ -113,143 +118,43 @@ class BaseWikiMarkup(object): nested = 0 debug_level = 0 def dprint(self, lev, fmt, *argv): if self.debug_level >= lev: - print("[DEBUG]", fmt % argv) - - def print_dump_prefix(self, level, file): - file.write("[DUMP]" + ' ' * (2*level + 1)) + for l in (fmt % argv).split('\n'): + print("[DEBUG] %s" % l) - def dump_nil(self, node, level, file): - pass - - def dump_text(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("CONTENT: \"%s\"\n" % node['content']) - - def dump_delim(self, node, level, file): - file.write("'%s'" % node['content']) - if 'continuation' in node and node['continuation']: - file.write(" (cont)") - file.write("\n") - - def dump_tag(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("TAG: %s\n" % node['tag']) - if 'isblock' in node: - self.print_dump_prefix(level, file) - file.write("PLACEMENT: %s\n" % ('BLOCK' if node['isblock'] else 'INLINE')) - if 'args' in node: - self.print_dump_prefix(level, file) - file.write("ARGS: %s\n" % node['args']) - if 'content' in node: - self.dump_node(node['content'], level + 1, file) - - def dump_seq(self, node, level, file): - self.dump(node['content'], level + 1, file) - - def dump_ref(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("REF: %s\n" % node['ref']) - self.dump_node(node['content'], level + 1, file) - - def dump_hdr(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_elt(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("SUBTYPE: %s\n" % node['subtype']) - self.dump_node(node['content'], level + 1, file) - - def dump_env(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("ENVTYPE: %s\n" % node['envtype']) - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump(node['content'], level + 1, file) - - def dump_ind(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_link(self, node, level, file): - self.dump(node['content'], level + 1, file) - - dump_type = { - 'NIL': dump_nil, - 'NL': dump_nil, - 'TEXT': dump_text, - 'DELIM': dump_delim, - 'OTAG': dump_tag, - 'CTAG': dump_tag, - 'TAG': dump_tag, - 'SEQ': dump_seq, - 'REF': dump_ref, - 'HDR': dump_hdr, - 'ELT': dump_elt, - 'ENV': dump_env, - 'IND': dump_ind, - 'BAR': dump_nil, - 'PARA': dump_seq, - 'PRE': dump_text, - 'BOLD': dump_seq, - 'IT': dump_seq, - 'LINK': dump_link, - } + inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] - def dump_node(self, node, level, file): - if type(node) != dict: - file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node)) - return - - self.print_dump_prefix(level, file) - file.write("NODE " + node['type'] + ":\n") - if node['type'] in self.dump_type: - self.dump_type[node['type']](self, node, level, file) - else: - self.print_dump_prefix(level, file) - file.write("(UNHANDLED) ") - file.write("%s\n" % node) - self.print_dump_prefix(level, file) - file.write("END NODE " + node['type'] + "\n") - - def dump(self, tree, level=0, file=sys.stdout): - for node in tree: - self.dump_node(node, level, file) - def tokread(self): line = None pos = 0 while 1: if (not line or pos == len(line)): try: line = self.input() pos = 0 except StopIteration: line = u'' if not line or line == "": - yield({ 'type': 'NIL' }) + yield(WikiNode(type='NIL')) break if line == '\n': - yield({ 'type': 'NL', 'content': line }) + yield(WikiNode(type='NL')) line = None continue self.dprint(100, "LINE: %s", line[pos:]) m = self.delim.search(line, pos) if m: if (pos < m.start(0)): - yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) + yield(WikiTextNode(content=line[pos:m.start(0)])) pos = m.start(0) t = None if line[m.start(0)] == '<': m = self.otag.match(line, pos) if m: @@ -257,86 +162,79 @@ class BaseWikiMarkup(object): if m.group('tag') == 'nowiki': if not m.group('closed'): while 1: try: m = self.ctag.search(line, pos) if m and m.group('tag') == 'nowiki': - yield({ 'type': 'TEXT', - 'content': line[pos:m.start(0)] }) + yield(WikiTextNode(content=line[pos:m.start(0)] )) pos = m.end(0) break - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = self.input() pos = 0 except StopIteration: break continue elif m.group('tag') in self.tags: try: - t = { 'type': 'OTAG', - 'tag': m.group('tag'), - 'args': TagAttributes(m.group('args')) } - yield(t) + yield(WikiTagNode(type='OTAG', + tag=m.group('tag'), + isblock=(line[pos] == '\n'), + args=TagAttributes(m.group('args')))) if m.group('closed'): - t['type'] = 'CTAG' - yield(t) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) except TagAttributeSyntax: - yield({'type': 'TEXT', - 'content': m.group(0)}) + yield(WikiTextNode(content=m.group(0))) continue else: - yield({ 'type': 'TEXT', - 'content': m.group(0) }) + yield(WikiTextNode(content=m.group(0))) continue else: m = self.ctag.match(line, pos) if m: if m.group('tag') in self.tags: - yield( { 'type': 'CTAG', - 'tag': m.group('tag') } ) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) pos = m.end(0) continue else: - yield( { 'type': 'TEXT', - 'content': line[pos:pos+1] }) + yield(WikiTextNode(content=line[pos:pos+1])) pos += 1 continue else: pos = m.end(0) content = m.group(0) if content[0] in self.envtypes: - t = { 'type': 'DELIM', - 'content': content, - 'continuation': pos < len(line) and line[pos] == ":" } - if t['continuation']: - t['content'] += t['content'][0] + node = WikiDelimNode(type='DELIM', + content=content, + isblock=True, + continuation=pos < len(line) and line[pos] == ":") + if node.continuation: + node.content += node.content[0] pos += 1 - yield(t) + yield(node) while pos < len(line) and line[pos] in [' ', '\t']: pos += 1 else: - yield({ 'type': 'DELIM', - 'content': content.strip(), - 'continuation': False}) + yield(WikiDelimNode(type='DELIM', + isblock=(content.strip() not in self.inline_delims), + content=content.strip())) continue |