diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2015-07-22 12:16:38 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2015-07-22 12:16:38 +0300 |
commit | 32be559549aab3d71bee6be566782eef6594442d (patch) | |
tree | 71bde2c14190a742832b9a2591c3fb0aa4c2f75d | |
parent | 64cf5fdb880815ff21652ddb74e48490dd2f56fe (diff) | |
download | wikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.gz wikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.bz2 |
Major rewrite
Use dedicated classes, instead of dictionaries, to represent markup tokens
* WikiTrans/wikitoken.py: New file. Defines Wiki markup tokens.
* WikiTrans/wikimarkup.py: Rewrite.
* WikiTrans/wiki2html.py: Update.
* WikiTrans/wiki2texi.py: Update.
* WikiTrans/wiki2text.py: Update.
* bin/wikitrans: Update
-rw-r--r-- | WikiTrans/wiki2html.py | 88 | ||||
-rw-r--r-- | WikiTrans/wiki2texi.py | 112 | ||||
-rw-r--r-- | WikiTrans/wiki2text.py | 81 | ||||
-rw-r--r-- | WikiTrans/wikimarkup.py | 923 | ||||
-rw-r--r-- | WikiTrans/wikitoken.py | 166 | ||||
-rwxr-xr-x | bin/wikitrans | 2 | ||||
-rw-r--r-- | testdata/colon.html | 5 | ||||
-rw-r--r-- | testdata/headings.html | 11 | ||||
-rw-r--r-- | testdata/hz.html | 5 | ||||
-rw-r--r-- | testdata/nowiki-ind.html | 7 | ||||
-rw-r--r-- | testdata/nowiki.html | 1 | ||||
-rw-r--r-- | testdata/numlist.html | 4 | ||||
-rw-r--r-- | testdata/para.html | 4 | ||||
-rw-r--r-- | testdata/unlist.html | 4 |
14 files changed, 770 insertions, 643 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 754fa9b..122c91c 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -87,6 +87,6 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) + arg = self.format(elt.content[0]) text = None - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] if s[0] == 'disambigR' or s[0] == 'wikiquote': @@ -135,4 +135,4 @@ class HtmlWikiMarkup (WikiMarkup): def str_ref(self, elt): - target = elt['ref'] - text = self.format(elt['content']) + target = elt.ref + text = self.format(elt.content) return "<a href=\"%s\">%s</a>" % (target, @@ -148,19 +148,19 @@ class HtmlWikiMarkup (WikiMarkup): def str_it(self, elt): - return "<i>" + self.concat(elt['content']) + "</i>" + return "<i>" + self.concat(elt.content) + "</i>" def str_bold(self, elt): - return "<b>" + self.concat(elt['content']) + "</b>" + return "<b>" + self.concat(elt.content) + "</b>" def str_hdr(self, elt): - level = elt['level'] + 1 + level = elt.level if level > 4: level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) + return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level) def str_bar(self): - return "<hr/>" + return "<hr/>\n" def str_env(self, elt): - type = elt['envtype'] - lev = elt['level'] + type = elt.envtype + lev = elt.level if lev > 4: @@ -168,6 +168,6 @@ class HtmlWikiMarkup (WikiMarkup): string = "" - for s in elt['content']: - n = s['subtype']; + for s in elt.content: + n = s.subtype; string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s['content']), + self.format(s.content), self.envt[type]["elt"][n]) @@ -179,5 +179,5 @@ class HtmlWikiMarkup (WikiMarkup): def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 @@ -185,8 +185,8 @@ class HtmlWikiMarkup (WikiMarkup): else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) s += '>' - s += self.format(elt['content']) - return s + '</' + elt['tag'] + '>' + s += self.format(elt.content) + return s + '</' + elt.tag + '>' @@ -194,5 +194,5 @@ class HtmlWikiMarkup (WikiMarkup): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) - return "<p>" + string + "</p>" + return "<p>" + string + "</p>\n" @@ -200,3 +200,3 @@ class HtmlWikiMarkup (WikiMarkup): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) @@ -207,40 +207,40 @@ class HtmlWikiMarkup (WikiMarkup): def str_ind(self, elt): - return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level'] + return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: string += s else: - string = elt['content'] + string = elt.content return string - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': return self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': return self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': return self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': return self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': return self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': return self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': return self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': return self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': return self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': return self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': return self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': return self.str_ind(elt) - elif elt['type'] == 'SEQ': + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index f36c0a1..dfde565 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -103,34 +103,34 @@ class TexiWikiMarkup (WikiMarkup): def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): - for s in elt['content']: + if elt.type == 'TEXT': + if isinstance(elt.content,list): + for s in elt.content: self._print(s) else: - self._print(elt['content']) - elif elt['type'] == 'TAG': + self._print(elt.content) + elif elt.type == 'TAG': self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': self.str_ind(elt) - elif elt['type'] == 'SEQ': - for x in elt['content']: + elif elt.type == 'SEQ': + for x in elt.content: self.format(x) @@ -140,9 +140,9 @@ class TexiWikiMarkup (WikiMarkup): def str_tag(self, elt): - if elt['tag'] in ['code', 'tt']: + if elt.tag in ['code', 'tt']: save = self._begin_print() self.nested += 1 - self.format(elt['content']) + self.format(elt.content) self.nested -= 1 s = self._end_print(save) - if elt['isblock']: + if elt.isblock: self._print('@example', nl=True, escape=False) @@ -152,14 +152,14 @@ class TexiWikiMarkup (WikiMarkup): self._print('@code{%s}' % s, escape=False) - elif elt['tag'] == 'div': - if 'args' in elt and 'id' in elt['args']: - self._print("@anchor{%s}\n" % elt['args']['id'], + elif elt.tag == 'div': + if elt.args and 'id' in elt.args: + self._print("@anchor{%s}\n" % elt.args['id'], nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: - self._print('<' + elt['tag']) - if elt['args']: - self._print(' ' + elt['args']) + self._print('<' + elt.tag) + if elt.args: + self._print(' ' + elt.args) self._print('>'); - self.format(elt['content']); - self._print('</' + elt['tag'] + '>') + self.format(elt.content); + self._print('</' + elt.tag + '>') @@ -168,3 +168,3 @@ class TexiWikiMarkup (WikiMarkup): self._print('\n', nl=True) - for x in elt['content']: + for x in elt.content: self.format(x) @@ -176,3 +176,3 @@ class TexiWikiMarkup (WikiMarkup): self._print('@example\n', nl=True, escape=False) - for x in elt['content']: + for x in elt.content: self.format(x) @@ -187,3 +187,3 @@ class TexiWikiMarkup (WikiMarkup): self._print('@i{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) @@ -192,3 +192,3 @@ class TexiWikiMarkup (WikiMarkup): self._print('@b{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) @@ -196,9 +196,9 @@ class TexiWikiMarkup (WikiMarkup): def str_hdr(self, elt): - level = elt['level'] + level = elt.level if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: self._print("@* ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print(None, nl=True) @@ -206,3 +206,3 @@ class TexiWikiMarkup (WikiMarkup): self._print('@node ', nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print('\n') @@ -214,4 +214,4 @@ class TexiWikiMarkup (WikiMarkup): def str_ind(self, elt): - self._print("@w{ }" * elt['level'], nl=True, escape=False) - self.format(elt['content']) + self._print("@w{ }" * elt.level, nl=True, escape=False) + self.format(elt.content) self._print(None, nl=True) @@ -219,7 +219,7 @@ class TexiWikiMarkup (WikiMarkup): def str_env(self, elt): - if elt['envtype'] == 'unnumbered': + if elt.envtype == 'unnumbered': self._print('@itemize @bullet\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) @@ -227,7 +227,7 @@ class TexiWikiMarkup (WikiMarkup): self._print('@end itemize\n', nl=True, escape=False) - elif elt['envtype'] == 'numbered': + elif elt.envtype == 'numbered': self._print('@enumerate\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) @@ -235,11 +235,11 @@ class TexiWikiMarkup (WikiMarkup): self._print('@end enumerate\n', nl=True, escape=False) - elif elt['envtype'] == 'defn': + elif elt.envtype == 'defn': self._print('@table @asis\n', nl=True, escape=False) - for s in elt['content']: - if s['subtype'] == 0: + for s in elt.content: + if s.subtype == 0: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) else: - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) @@ -250,7 +250,7 @@ class TexiWikiMarkup (WikiMarkup): save = self._begin_print() - self.format(elt['content'][0]) + self.format(elt.content[0]) arg = self._end_print() - if len(elt['content']) > 1: + if len(elt.content) > 1: s = [] - for x in elt['content'][0:2]: + for x in elt.content[0:2]: self._begin_print() @@ -281,5 +281,5 @@ class TexiWikiMarkup (WikiMarkup): def str_ref(self, elt): - target = elt['ref'] + target = elt.ref save = self._begin_print() - self.format(elt['content']) + self.format(elt.content) text = self._end_print(save) diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index 916391e..ee1748c 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -67,5 +67,5 @@ class TextWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + arg = self.format(elt.content[0]) + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] text = s[1] @@ -144,5 +144,5 @@ class TextWikiMarkup (WikiMarkup): def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 @@ -150,6 +150,6 @@ class TextWikiMarkup (WikiMarkup): else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) - s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) + s += '>' + self.format(elt.content) + '</' + elt.tag + '>' return s @@ -157,6 +157,6 @@ class TextWikiMarkup (WikiMarkup): def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: if string: @@ -168,18 +168,18 @@ class TextWikiMarkup (WikiMarkup): else: - string = elt['content'] - elif elt['type'] == 'PRE': + string = elt.content + elif elt.type == 'PRE': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) string += '\n' - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': string = self.str_tag(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) @@ -188,5 +188,5 @@ class TextWikiMarkup (WikiMarkup): string = "_" + string.lstrip(" ") + "_" - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) @@ -199,5 +199,5 @@ class TextWikiMarkup (WikiMarkup): string = string.upper() - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': string = self.fmtlink(elt, False) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': s = self.fmtlink(elt, True) @@ -207,3 +207,3 @@ class TextWikiMarkup (WikiMarkup): string = s - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': w = self.width @@ -212,11 +212,10 @@ class TextWikiMarkup (WikiMarkup): string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt['type'] == 'HDR': - level = elt['level'] - string = "\n" + ("*" * level) + " " + \ - self.format(elt['content']).lstrip(" ") + "\n\n" - elif elt['type'] == 'REF': - string = self.xref(self.format(elt['content']), elt['ref']) - elif elt['type'] == 'ENV': - type = elt['envtype'] - lev = elt['level'] + elif elt.type == 'HDR': + string = "\n" + ("*" * elt.level) + " " + \ + self.format(elt.content).lstrip(" ") + "\n\n" + elif elt.type == 'REF': + string = self.xref(self.format(elt.content), elt.ref) + elif elt.type == 'ENV': + type = elt.envtype + lev = elt.level if lev > self.width - 4: @@ -225,6 +224,6 @@ class TextWikiMarkup (WikiMarkup): n = 1 - for s in elt['content']: + for s in elt.content: if not string.endswith("\n"): string += "\n" - x = self.format(s['content']) + x = self.format(s.content) if type == "unnumbered": @@ -235,3 +234,3 @@ class TextWikiMarkup (WikiMarkup): elif type == "defn": - if s['subtype'] == 0: + if s.subtype == 0: string += self.indent(lev-1, x) @@ -242,7 +241,7 @@ class TextWikiMarkup (WikiMarkup): string += "\n" - elif elt['type'] == 'IND': - string = (" " * elt['level']) + self.format(elt['content']) + '\n' - elif elt['type'] == 'SEQ': + elif elt.type == 'IND': + string = (" " * elt.level) + self.format(elt.content) + '\n' + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: if len(string) > 1 and not string[-1].isspace(): diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index 2fad0af..f971347 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -21,2 +21,3 @@ import re from types import * +from wikitoken import * @@ -25,2 +26,6 @@ __all__ = [ "BaseWikiMarkup", "WikiMarkup", +class UnexpectedToken(Exception): + def __init__(self, value): + self.value = value + class TagAttributeSyntax(Exception): @@ -118,107 +123,7 @@ class BaseWikiMarkup(object): if self.debug_level >= lev: - print("[DEBUG]", fmt % argv) - - def print_dump_prefix(self, level, file): - file.write("[DUMP]" + ' ' * (2*level + 1)) + for l in (fmt % argv).split('\n'): + print("[DEBUG] %s" % l) - def dump_nil(self, node, level, file): - pass - - def dump_text(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("CONTENT: \"%s\"\n" % node['content']) - - def dump_delim(self, node, level, file): - file.write("'%s'" % node['content']) - if 'continuation' in node and node['continuation']: - file.write(" (cont)") - file.write("\n") - - def dump_tag(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("TAG: %s\n" % node['tag']) - if 'isblock' in node: - self.print_dump_prefix(level, file) - file.write("PLACEMENT: %s\n" % ('BLOCK' if node['isblock'] else 'INLINE')) - if 'args' in node: - self.print_dump_prefix(level, file) - file.write("ARGS: %s\n" % node['args']) - if 'content' in node: - self.dump_node(node['content'], level + 1, file) - - def dump_seq(self, node, level, file): - self.dump(node['content'], level + 1, file) - - def dump_ref(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("REF: %s\n" % node['ref']) - self.dump_node(node['content'], level + 1, file) - - def dump_hdr(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_elt(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("SUBTYPE: %s\n" % node['subtype']) - self.dump_node(node['content'], level + 1, file) - - def dump_env(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("ENVTYPE: %s\n" % node['envtype']) - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump(node['content'], level + 1, file) - - def dump_ind(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_link(self, node, level, file): - self.dump(node['content'], level + 1, file) - - dump_type = { - 'NIL': dump_nil, - 'NL': dump_nil, - 'TEXT': dump_text, - 'DELIM': dump_delim, - 'OTAG': dump_tag, - 'CTAG': dump_tag, - 'TAG': dump_tag, - 'SEQ': dump_seq, - 'REF': dump_ref, - 'HDR': dump_hdr, - 'ELT': dump_elt, - 'ENV': dump_env, - 'IND': dump_ind, - 'BAR': dump_nil, - 'PARA': dump_seq, - 'PRE': dump_text, - 'BOLD': dump_seq, - 'IT': dump_seq, - 'LINK': dump_link, - } + inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] - def dump_node(self, node, level, file): - if type(node) != dict: - file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node)) - return - - self.print_dump_prefix(level, file) - file.write("NODE " + node['type'] + ":\n") - if node['type'] in self.dump_type: - self.dump_type[node['type']](self, node, level, file) - else: - self.print_dump_prefix(level, file) - file.write("(UNHANDLED) ") - file.write("%s\n" % node) - self.print_dump_prefix(level, file) - file.write("END NODE " + node['type'] + "\n") - - def dump(self, tree, level=0, file=sys.stdout): - for node in tree: - self.dump_node(node, level, file) - def tokread(self): @@ -235,3 +140,3 @@ class BaseWikiMarkup(object): if not line or line == "": - yield({ 'type': 'NIL' }) + yield(WikiNode(type='NIL')) break @@ -239,3 +144,3 @@ class BaseWikiMarkup(object): if line == '\n': - yield({ 'type': 'NL', 'content': line }) + yield(WikiNode(type='NL')) line = None @@ -248,3 +153,3 @@ class BaseWikiMarkup(object): if (pos < m.start(0)): - yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) + yield(WikiTextNode(content=line[pos:m.start(0)])) pos = m.start(0) @@ -262,4 +167,3 @@ class BaseWikiMarkup(object): if m and m.group('tag') == 'nowiki': - yield({ 'type': 'TEXT', - 'content': line[pos:m.start(0)] }) + yield(WikiTextNode(content=line[pos:m.start(0)] )) pos = m.end(0) @@ -267,4 +171,3 @@ class BaseWikiMarkup(object): - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) @@ -277,16 +180,14 @@ class BaseWikiMarkup(object): try: - t = { 'type': 'OTAG', - 'tag': m.group('tag'), - 'args': TagAttributes(m.group('args')) } - yield(t) + yield(WikiTagNode(type='OTAG', + tag=m.group('tag'), + isblock=(line[pos] == '\n'), + args=TagAttributes(m.group('args')))) if m.group('closed'): - t['type'] = 'CTAG' - yield(t) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) except TagAttributeSyntax: - yield({'type': 'TEXT', - 'content': m.group(0)}) + yield(WikiTextNode(content=m.group(0))) continue else: - yield({ 'type': 'TEXT', - 'content': m.group(0) }) + yield(WikiTextNode(content=m.group(0))) continue @@ -296,4 +197,4 @@ class BaseWikiMarkup(object): if m.group('tag') in self.tags: - yield( { 'type': 'CTAG', - 'tag': m.group('tag') } ) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) pos = m.end(0) @@ -301,4 +202,3 @@ class BaseWikiMarkup(object): else: - yield( { 'type': 'TEXT', - 'content': line[pos:pos+1] }) + yield(WikiTextNode(content=line[pos:pos+1])) pos += 1 @@ -309,10 +209,11 @@ class BaseWikiMarkup(object): if content[0] in self.envtypes: - t = { 'type': 'DELIM', - 'content': content, - 'continuation': pos < len(line) and line[pos] == ":" } - if t['continuation']: - t['content'] += t['content'][0] + node = WikiDelimNode(type='DELIM', + content=content, + isblock=True, + continuation=pos < len(line) and line[pos] == ":") + if node.continuation: + node.content += node.content[0] pos += 1 - yield(t) + yield(node) @@ -321,5 +222,5 @@ class BaseWikiMarkup(object): else: - yield({ 'type': 'DELIM', - 'content': content.strip(), - 'continuation': False}) + yield(WikiDelimNode(type='DELIM', + isblock=(content.strip() not in self.inline_delims), + content=content.strip())) continue @@ -329,9 +230,6 @@ class BaseWikiMarkup(object): if line[pos:-1] != '': - yield({ 'type': 'TEXT', - 'content': line[pos:-1] }) - yield({ 'type': 'NL', - 'content': '\n' }) + yield(WikiTextNode(content=line[pos:-1])) + yield(WikiNode(type='NL')) else: - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = None @@ -366,7 +264,7 @@ class BaseWikiMarkup(object): for i in range(0,len(self.toklist)): - if self.toklist[i]['type'] == 'DELIM' \ - and (self.toklist[i]['content'] == "''" \ - or self.toklist[i]['content'] == "'''"): + if self.toklist[i].type == 'DELIM' \ + and (self.toklist[i].content == "''" \ + or self.toklist[i].content == "'''"): if len(stack) > 0: - if self.toklist[stack[-1]]['content'] == self.toklist[i]['content']: + if self.toklist[stack[-1]].content == self.toklist[i].content: # Case 1: just pop the matching delimiter off the stack @@ -379,4 +277,4 @@ class BaseWikiMarkup(object): elif i < len(self.toklist) \ - and self.toklist[i+1]['type'] == 'DELIM' \ - and self.toklist[stack[-1]]['content'] == self.toklist[i+1]['content']: + and self.toklist[i+1].type == 'DELIM' \ + and self.toklist[stack[-1]].content == self.toklist[i+1].content: # Case 3: swap current and next tokens @@ -393,6 +291,19 @@ class BaseWikiMarkup(object): for i in stack: - self.toklist[i]['type'] = 'TEXT' + self.toklist[i].type = 'TEXT' # FIXME + + mark = [] + + def push_mark(self): + self.mark.append(self.tokind) - def peektkn(self, off=0): - return self.toklist[self.tokind-off] + def pop_mark(self): + self.tokind = self.mark.pop() + + def clear_mark(self): + self.mark.pop() + + def lookahead(self, off=0): + tok = self.toklist[self.tokind+off] + self.dprint(20, "lookahead(%s): %s", off, tok) + return tok @@ -402,31 +313,176 @@ class BaseWikiMarkup(object): def getkn(self): - self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' + self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' if self.tokind == len(self.toklist): - return { 'type': 'NIL' } + return WikiNode(type='NIL') tok = self.toklist[self.tokind] self.tokind = self.tokind + 1 + self.dprint(20, "getkn: %s", tok) return tok - def ungetkn(self): + def ungetkn(self, tok=None): self.tokind = self.tokind - 1 - self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' + self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' + if tok: + self.toklist[self.tokind] = tok + self.dprint(20, "ungetkn: %s", tok) return self.toklist[self.tokind] + def fixuptkn(self, tok): + if self.tokind == 0: + raise IndexError('wikimarkup.fixuptkn called at start of input') + self.toklist[self.tokind-1] = tok + return tok + + def dump(self, tree, file=sys.stdout): + for node in tree: + file.write(str(node)) + file.write('\n') + + def is_block_end(self, tok): + if tok.type == 'NIL': + return True + elif tok.type == 'NL': + if self.lookahead().type == 'NIL': + return True + elif self.lookahead().type == 'NL': + self.getkn() + return True + elif tok.type in ['DELIM', 'CTAG', 'TAG']: + if tok.isblock: + self.ungetkn(tok) + return True + return False + + def parse_para(self, tok): + self.dprint(80, "ENTER parse_para: %s", tok) + + acc = { 'seq': [], + 'textlist': [] } + + def flush(): + if acc['textlist']: + acc['seq'].append(WikiContentNode(type='TEXT', + content=''.join(acc['textlist']))) + acc['textlist'] = [] + + if isinstance(tok, WikiContentNode) \ + and isinstance(tok.content,str) \ + and re.match("^[ \t]", tok.content): + type = 'PRE' + rx = re.compile("^\S") + else: + type = 'PARA' + rx = re.compile("^[ \t]") + + while not self.is_block_end(tok): + if tok.type == 'TEXT': + if rx and self.newline and rx.match(tok.content): + self.ungetkn() + break + acc['textlist'].append(tok.content) + elif tok.type == 'NL': + acc['textlist'].append('\n') + elif tok.type == 'OTAG': + flu |