From 32be559549aab3d71bee6be566782eef6594442d Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Wed, 22 Jul 2015 12:16:38 +0300 Subject: Major rewrite Use dedicated classes, instead of dictionaries, to represent markup tokens * WikiTrans/wikitoken.py: New file. Defines Wiki markup tokens. * WikiTrans/wikimarkup.py: Rewrite. * WikiTrans/wiki2html.py: Update. * WikiTrans/wiki2texi.py: Update. * WikiTrans/wiki2text.py: Update. * bin/wikitrans: Update --- WikiTrans/wiki2html.py | 88 ++--- WikiTrans/wiki2texi.py | 112 +++--- WikiTrans/wiki2text.py | 81 ++--- WikiTrans/wikimarkup.py | 923 ++++++++++++++++++++++------------------------- WikiTrans/wikitoken.py | 166 +++++++++ bin/wikitrans | 2 +- testdata/colon.html | 5 +- testdata/headings.html | 11 +- testdata/hz.html | 5 +- testdata/nowiki-ind.html | 7 +- testdata/nowiki.html | 1 + testdata/numlist.html | 4 +- testdata/para.html | 4 +- testdata/unlist.html | 4 +- 14 files changed, 770 insertions(+), 643 deletions(-) create mode 100644 WikiTrans/wikitoken.py diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 754fa9b..122c91c 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -85,10 +85,10 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) + arg = self.format(elt.content[0]) text = None - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" elif len(s) > 1 and s[1] == 'thumb': @@ -133,8 +133,8 @@ class HtmlWikiMarkup (WikiMarkup): return self.fmtlink(elt, True) def str_ref(self, elt): - target = elt['ref'] - text = self.format(elt['content']) + target = elt.ref + text = self.format(elt.content) return "%s" % (target, text if (text and text != '') \ else target) @@ -146,30 +146,30 @@ class HtmlWikiMarkup (WikiMarkup): return string def str_it(self, elt): - return "" + self.concat(elt['content']) + "" + return "" + self.concat(elt.content) + "" def str_bold(self, elt): - return "" + self.concat(elt['content']) + "" + return "" + self.concat(elt.content) + "" def str_hdr(self, elt): - level = elt['level'] + 1 + level = elt.level if level > 4: level = 4 - return "%s" % (level, self.format(elt['content']), level) + return "%s\n\n" % (level, self.format(elt.content), level) def str_bar(self): - return "
" + return "
\n" def str_env(self, elt): - type = elt['envtype'] - lev = elt['level'] + type = elt.envtype + lev = elt.level if lev > 4: lev = 2 string = "" - for s in elt['content']: - n = s['subtype']; + for s in elt.content: + n = s.subtype; string += "<%s>%s" % (self.envt[type]["elt"][n], - self.format(s['content']), + self.format(s.content), self.envt[type]["elt"][n]) return "<%s>%s" % (self.envt[type]["hdr"], string, @@ -177,72 +177,72 @@ class HtmlWikiMarkup (WikiMarkup): return string def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return '
' + s + '
' #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) s += '>' - s += self.format(elt['content']) - return s + '' + s += self.format(elt.content) + return s + '' def str_para(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) - return "

" + string + "

" + return "

" + string + "

\n" def str_pre(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) if self.nested: return string return '
' + string + '
' def str_ind(self, elt): - return ("
" * elt['level']) + self.format(elt['content']) + "
" * elt['level'] + return ("
" * elt.level) + self.format(elt.content) + "
" * elt.level def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: string += s else: - string = elt['content'] + string = elt.content return string - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': return self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': return self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': return self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': return self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': return self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': return self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': return self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': return self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': return self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': return self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': return self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': return self.str_ind(elt) - elif elt['type'] == 'SEQ': + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) return string else: diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index f36c0a1..dfde565 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -101,72 +101,72 @@ class TexiWikiMarkup (WikiMarkup): return self._end_print() def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): - for s in elt['content']: + if elt.type == 'TEXT': + if isinstance(elt.content,list): + for s in elt.content: self._print(s) else: - self._print(elt['content']) - elif elt['type'] == 'TAG': + self._print(elt.content) + elif elt.type == 'TAG': self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': self.str_ind(elt) - elif elt['type'] == 'SEQ': - for x in elt['content']: + elif elt.type == 'SEQ': + for x in elt.content: self.format(x) else: self._print(str(elt)) def str_tag(self, elt): - if elt['tag'] in ['code', 'tt']: + if elt.tag in ['code', 'tt']: save = self._begin_print() self.nested += 1 - self.format(elt['content']) + self.format(elt.content) self.nested -= 1 s = self._end_print(save) - if elt['isblock']: + if elt.isblock: self._print('@example', nl=True, escape=False) self._print(s, escape=False) self._print('@end example\n', nl=True, escape=False) else: self._print('@code{%s}' % s, escape=False) - elif elt['tag'] == 'div': - if 'args' in elt and 'id' in elt['args']: - self._print("@anchor{%s}\n" % elt['args']['id'], + elif elt.tag == 'div': + if elt.args and 'id' in elt.args: + self._print("@anchor{%s}\n" % elt.args['id'], nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: - self._print('<' + elt['tag']) - if elt['args']: - self._print(' ' + elt['args']) + self._print('<' + elt.tag) + if elt.args: + self._print(' ' + elt.args) self._print('>'); - self.format(elt['content']); - self._print('') + self.format(elt.content); + self._print('') def str_para(self, elt): if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) - for x in elt['content']: + for x in elt.content: self.format(x) if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) @@ -174,7 +174,7 @@ class TexiWikiMarkup (WikiMarkup): def str_pre(self, elt): if not self.nested: self._print('@example\n', nl=True, escape=False) - for x in elt['content']: + for x in elt.content: self.format(x) if not self.nested: self._print('@end example\n', nl=True, escape=False) @@ -185,26 +185,26 @@ class TexiWikiMarkup (WikiMarkup): def str_it(self, elt): self._print('@i{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_bold(self, elt): self._print('@b{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_hdr(self, elt): - level = elt['level'] + level = elt.level if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: self._print("@* ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print(None, nl=True) if self.sectcomm[self.sectioning_model][0] == '@top': self._print('@node ', nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print('\n') self._print(None, nl=True) @@ -212,47 +212,47 @@ class TexiWikiMarkup (WikiMarkup): self._print("\n-----\n") def str_ind(self, elt): - self._print("@w{ }" * elt['level'], nl=True, escape=False) - self.format(elt['content']) + self._print("@w{ }" * elt.level, nl=True, escape=False) + self.format(elt.content) self._print(None, nl=True) def str_env(self, elt): - if elt['envtype'] == 'unnumbered': + if elt.envtype == 'unnumbered': self._print('@itemize @bullet\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end itemize\n', nl=True, escape=False) - elif elt['envtype'] == 'numbered': + elif elt.envtype == 'numbered': self._print('@enumerate\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end enumerate\n', nl=True, escape=False) - elif elt['envtype'] == 'defn': + elif elt.envtype == 'defn': self._print('@table @asis\n', nl=True, escape=False) - for s in elt['content']: - if s['subtype'] == 0: + for s in elt.content: + if s.subtype == 0: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) else: - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end table\n', nl=True, escape=False) def str_link(self, elt): save = self._begin_print() - self.format(elt['content'][0]) + self.format(elt.content[0]) arg = self._end_print() - if len(elt['content']) > 1: + if len(elt.content) > 1: s = [] - for x in elt['content'][0:2]: + for x in elt.content[0:2]: self._begin_print() self.format(x) s.append(self._end_print()) @@ -279,9 +279,9 @@ class TexiWikiMarkup (WikiMarkup): self._print("FIXME: str_tmpl not implemented\n") def str_ref(self, elt): - target = elt['ref'] + target = elt.ref save = self._begin_print() - self.format(elt['content']) + self.format(elt.content) text = self._end_print(save) if text and text != '': self._print("@uref{%s,%s}" % (target, text), escape=False) diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index 916391e..ee1748c 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -65,9 +65,9 @@ class TextWikiMarkup (WikiMarkup): return self.html_base % { 'lang' : lang } + urllib.quote(tgt) def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + arg = self.format(elt.content[0]) + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] text = s[1] else: s = None @@ -142,23 +142,23 @@ class TextWikiMarkup (WikiMarkup): return output + linebuf def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return s #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) - s += '>' + self.format(elt['content']) + '' + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) + s += '>' + self.format(elt.content) + '' return s def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: if string: if string.endswith("."): string += " " @@ -166,29 +166,29 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s else: - string = elt['content'] - elif elt['type'] == 'PRE': + string = elt.content + elif elt.type == 'PRE': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) string += '\n' - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': string = self.str_tag(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: string += " " + s string = "_" + string.lstrip(" ") + "_" - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: if string.endswith("."): @@ -197,54 +197,53 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s string = string.upper() - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': string = self.fmtlink(elt, False) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': s = self.fmtlink(elt, True) if s: string = '[' + s + ']' else: string = s - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': w = self.width if w < 5: w = 5 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt['type'] == 'HDR': - level = elt['level'] - string = "\n" + ("*" * level) + " " + \ - self.format(elt['content']).lstrip(" ") + "\n\n" - elif elt['type'] == 'REF': - string = self.xref(self.format(elt['content']), elt['ref']) - elif elt['type'] == 'ENV': - type = elt['envtype'] - lev = elt['level'] + elif elt.type == 'HDR': + string = "\n" + ("*" * elt.level) + " " + \ + self.format(elt.content).lstrip(" ") + "\n\n" + elif elt.type == 'REF': + string = self.xref(self.format(elt.content), elt.ref) + elif elt.type == 'ENV': + type = elt.envtype + lev = elt.level if lev > self.width - 4: lev = 1 string = "" n = 1 - for s in elt['content']: + for s in elt.content: if not string.endswith("\n"): string += "\n" - x = self.format(s['content']) + x = self.format(s.content) if type == "unnumbered": string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) elif type == "numbered": string += self.fmtpara(self.indent(lev, "%d. %s" % (n, x))) n += 1 elif type == "defn": - if s['subtype'] == 0: + if s.subtype == 0: string += self.indent(lev-1, x) else: string += self.indent(lev+3, x) if not string.endswith("\n"): string += "\n" - elif elt['type'] == 'IND': - string = (" " * elt['level']) + self.format(elt['content']) + '\n' - elif elt['type'] == 'SEQ': + elif elt.type == 'IND': + string = (" " * elt.level) + self.format(elt.content) + '\n' + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: if len(string) > 1 and not string[-1].isspace(): string += ' ' string += self.format(x) diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index 2fad0af..f971347 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -19,10 +19,15 @@ from __future__ import print_function import sys import re from types import * +from wikitoken import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "TagAttributes", "TagAttributeSyntax" ] +class UnexpectedToken(Exception): + def __init__(self, value): + self.value = value + class TagAttributeSyntax(Exception): def __init__(self, value): self.value = value @@ -116,111 +121,11 @@ class BaseWikiMarkup(object): def dprint(self, lev, fmt, *argv): if self.debug_level >= lev: - print("[DEBUG]", fmt % argv) - - def print_dump_prefix(self, level, file): - file.write("[DUMP]" + ' ' * (2*level + 1)) + for l in (fmt % argv).split('\n'): + print("[DEBUG] %s" % l) - def dump_nil(self, node, level, file): - pass - - def dump_text(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("CONTENT: \"%s\"\n" % node['content']) - - def dump_delim(self, node, level, file): - file.write("'%s'" % node['content']) - if 'continuation' in node and node['continuation']: - file.write(" (cont)") - file.write("\n") - - def dump_tag(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("TAG: %s\n" % node['tag']) - if 'isblock' in node: - self.print_dump_prefix(level, file) - file.write("PLACEMENT: %s\n" % ('BLOCK' if node['isblock'] else 'INLINE')) - if 'args' in node: - self.print_dump_prefix(level, file) - file.write("ARGS: %s\n" % node['args']) - if 'content' in node: - self.dump_node(node['content'], level + 1, file) - - def dump_seq(self, node, level, file): - self.dump(node['content'], level + 1, file) - - def dump_ref(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("REF: %s\n" % node['ref']) - self.dump_node(node['content'], level + 1, file) - - def dump_hdr(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_elt(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("SUBTYPE: %s\n" % node['subtype']) - self.dump_node(node['content'], level + 1, file) - - def dump_env(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("ENVTYPE: %s\n" % node['envtype']) - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump(node['content'], level + 1, file) - - def dump_ind(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_link(self, node, level, file): - self.dump(node['content'], level + 1, file) - - dump_type = { - 'NIL': dump_nil, - 'NL': dump_nil, - 'TEXT': dump_text, - 'DELIM': dump_delim, - 'OTAG': dump_tag, - 'CTAG': dump_tag, - 'TAG': dump_tag, - 'SEQ': dump_seq, - 'REF': dump_ref, - 'HDR': dump_hdr, - 'ELT': dump_elt, - 'ENV': dump_env, - 'IND': dump_ind, - 'BAR': dump_nil, - 'PARA': dump_seq, - 'PRE': dump_text, - 'BOLD': dump_seq, - 'IT': dump_seq, - 'LINK': dump_link, - } + inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] - def dump_node(self, node, level, file): - if type(node) != dict: - file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node)) - return - - self.print_dump_prefix(level, file) - file.write("NODE " + node['type'] + ":\n") - if node['type'] in self.dump_type: - self.dump_type[node['type']](self, node, level, file) - else: - self.print_dump_prefix(level, file) - file.write("(UNHANDLED) ") - file.write("%s\n" % node) - self.print_dump_prefix(level, file) - file.write("END NODE " + node['type'] + "\n") - - def dump(self, tree, level=0, file=sys.stdout): - for node in tree: - self.dump_node(node, level, file) - def tokread(self): line = None pos = 0 @@ -233,11 +138,11 @@ class BaseWikiMarkup(object): line = u'' if not line or line == "": - yield({ 'type': 'NIL' }) + yield(WikiNode(type='NIL')) break if line == '\n': - yield({ 'type': 'NL', 'content': line }) + yield(WikiNode(type='NL')) line = None continue @@ -246,7 +151,7 @@ class BaseWikiMarkup(object): if m: if (pos < m.start(0)): - yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) + yield(WikiTextNode(content=line[pos:m.start(0)])) pos = m.start(0) t = None @@ -260,13 +165,11 @@ class BaseWikiMarkup(object): try: m = self.ctag.search(line, pos) if m and m.group('tag') == 'nowiki': - yield({ 'type': 'TEXT', - 'content': line[pos:m.start(0)] }) + yield(WikiTextNode(content=line[pos:m.start(0)] )) pos = m.end(0) break - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = self.input() pos = 0 @@ -275,65 +178,60 @@ class BaseWikiMarkup(object): continue elif m.group('tag') in self.tags: try: - t = { 'type': 'OTAG', - 'tag': m.group('tag'), - 'args': TagAttributes(m.group('args')) } - yield(t) + yield(WikiTagNode(type='OTAG', + tag=m.group('tag'), + isblock=(line[pos] == '\n'), + args=TagAttributes(m.group('args')))) if m.group('closed'): - t['type'] = 'CTAG' - yield(t) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) except TagAttributeSyntax: - yield({'type': 'TEXT', - 'content': m.group(0)}) + yield(WikiTextNode(content=m.group(0))) continue else: - yield({ 'type': 'TEXT', - 'content': m.group(0) }) + yield(WikiTextNode(content=m.group(0))) continue else: m = self.ctag.match(line, pos) if m: if m.group('tag') in self.tags: - yield( { 'type': 'CTAG', - 'tag': m.group('tag') } ) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) pos = m.end(0) continue else: - yield( { 'type': 'TEXT', - 'content': line[pos:pos+1] }) + yield(WikiTextNode(content=line[pos:pos+1])) pos += 1 continue else: pos = m.end(0) content = m.group(0) if content[0] in self.envtypes: - t = { 'type': 'DELIM', - 'content': content, - 'continuation': pos < len(line) and line[pos] == ":" } - if t['continuation']: - t['content'] += t['content'][0] + node = WikiDelimNode(type='DELIM', + content=content, + isblock=True, + continuation=pos < len(line) and line[pos] == ":") + if node.continuation: + node.content += node.content[0] pos += 1 - yield(t) + yield(node) while pos < len(line) and line[pos] in [' ', '\t']: pos += 1 else: - yield({ 'type': 'DELIM', - 'content': content.strip(), - 'continuation': False}) + yield(WikiDelimNode(type='DELIM', + isblock=(content.strip() not in self.inline_delims), + content=content.strip())) continue if line: if line[-1] == '\n': if line[pos:-1] != '': - yield({ 'type': 'TEXT', - 'content': line[pos:-1] }) - yield({ 'type': 'NL', - 'content': '\n' }) + yield(WikiTextNode(content=line[pos:-1])) + yield(WikiNode(type='NL')) else: - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = None @@ -364,11 +262,11 @@ class BaseWikiMarkup(object): # 3b. ''a b '''c d''''' stack = [] for i in range(0,len(self.toklist)): - if self.toklist[i]['type'] == 'DELIM' \ - and (self.toklist[i]['content'] == "''" \ - or self.toklist[i]['content'] == "'''"): + if self.toklist[i].type == 'DELIM' \ + and (self.toklist[i].content == "''" \ + or self.toklist[i].content == "'''"): if len(stack) > 0: - if self.toklist[stack[-1]]['content'] == self.toklist[i]['content']: + if self.toklist[stack[-1]].content == self.toklist[i].content: # Case 1: just pop the matching delimiter off the stack stack.pop() elif len(stack) == 2 and stack[-2] + 1 == stack[-1]: @@ -377,8 +275,8 @@ class BaseWikiMarkup(object): # and pop off the matching one stack.pop() elif i < len(self.toklist) \ - and self.toklist[i+1]['type'] == 'DELIM' \ - and self.toklist[stack[-1]]['content'] == self.toklist[i+1]['content']: + and self.toklist[i+1].type == 'DELIM' \ + and self.toklist[stack[-1]].content == self.toklist[i+1].content: # Case 3: swap current and next tokens self.swaptkn(i, i+1) # and pop off the matching one @@ -391,440 +289,487 @@ class BaseWikiMarkup(object): stack.append(i) # Redefine all non-matched tokens as TEXT for i in stack: - self.toklist[i]['type'] = 'TEXT' + self.toklist[i].type = 'TEXT' # FIXME + + mark = [] + + def push_mark(self): + self.mark.append(self.tokind) - def peektkn(self, off=0): - return self.toklist[self.tokind-off] + def pop_mark(self): + self.tokind = self.mark.pop() + + def clear_mark(self): + self.mark.pop() + + def lookahead(self, off=0): + tok = self.toklist[self.tokind+off] + self.dprint(20, "lookahead(%s): %s", off, tok) + return tok def setkn(self,val): self.toklist[self.tokind] = val def getkn(self): - self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' + self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' if self.tokind == len(self.toklist): - return { 'type': 'NIL' } + return WikiNode(type='NIL') tok = self.toklist[self.tokind] self.tokind = self.tokind + 1 + self.dprint(20, "getkn: %s", tok) return tok - def ungetkn(self): + def ungetkn(self, tok=None): self.tokind = self.tokind - 1 - self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' + self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' + if tok: + self.toklist[self.tokind] = tok + self.dprint(20, "ungetkn: %s", tok) return self.toklist[self.tokind] + def fixuptkn(self, tok): + if self.tokind == 0: + raise IndexError('wikimarkup.fixuptkn called at start of input') + self.toklist[self.tokind-1] = tok + return tok + + def dump(self, tree, file=sys.stdout): + for node in tree: + file.write(str(node)) + file.write('\n') + + def is_block_end(self, tok): + if tok.type == 'NIL': + return True + elif tok.type == 'NL': + if self.lookahead().type == 'NIL': + return True + elif self.lookahead().type == 'NL': + self.getkn() + return True + elif tok.type in ['DELIM', 'CTAG', 'TAG']: + if tok.isblock: + self.ungetkn(tok) + return True + return False + + def parse_para(self, tok): + self.dprint(80, "ENTER parse_para: %s", tok) + + acc = { 'seq': [], + 'textlist': [] } + + def flush(): + if acc['textlist']: + acc['seq'].append(WikiContentNode(type='TEXT', + content=''.join(acc['textlist']))) + acc['textlist'] = [] + + if isinstance(tok, WikiContentNode) \ + and isinstance(tok.content,str) \ + and re.match("^[ \t]", tok.content): + type = 'PRE' + rx = re.compile("^\S") + else: + type = 'PARA' + rx = re.compile("^[ \t]") + + while not self.is_block_end(tok): + if tok.type == 'TEXT': + if rx and self.newline and rx.match(tok.content): + self.ungetkn() + break + acc['textlist'].append(tok.content) + elif tok.type == 'NL': + acc['textlist'].append('\n') + elif tok.type == 'OTAG': + flush() + acc['seq'].append(self.parse_tag(tok)) + elif tok.type == 'DELIM': + flush() + acc['seq'].append(self.parse_inline_delim(tok)) + else: + raise UnexpectedToken(tok) + tok = self.getkn() + flush() + if acc['seq']: + tok = WikiSeqNode(type=type, content=acc['seq']) + else: + tok = None + self.dprint(80, "LEAVE parse_para=%s", tok) + return tok + + def parse_block_delim(self, tok): + self.dprint(80, "ENTER parse_block_delim") + assert(tok.type == 'DELIM') + if tok.content == "----": + node = WikiNode(type = 'BAR') + elif tok.content[0:2] == "==": + node = self.parse_header(tok) + if not node: + tok = self.ungetkn(WikiTextNode(content=tok.content)) + elif tok.content[0] in self.envtypes: + node = None + if tok.content[0] == ':': + t = self.lookahead(-2) + if not (t.type == 'DELIM' and t.content == ';'): + node = self.parse_indent(tok) + if not node: + node = self.parse_env(tok) + else: + self.ungetkn(tok) + node = None + self.dprint(80, "LEAVE parse_block_delim=%s", node) + return node + + def parse_line(self): + self.dprint(80, "ENTER parse_line") + list = [] + while True: + tok = self.getkn() + if tok.type == 'NL' or tok.type == 'NIL': + break + elif tok.type == 'TEXT': + list.append(tok) + elif tok.type == 'DELIM': + if tok.isblock: + tok = WikiContentNode(type = 'TEXT', content = tok.content) + self.fixuptkn(tok) + list.append(tok) + elif tok.content[0] == ":": + # FIXME + list.append(self.parse_indent(tok)) + break + else: + x = self.parse_inline_delim(tok) + if x: + list.append(x) + else: + list.append(self.fixuptkn(WikiContentNode(type = 'TEXT', content = tok.content))) + elif tok.type == 'OTAG': + if tok.isblock: + self.ungetkn() + break + list.append(self.parse_tag(tok)) + else: + list.append(tok) + ret = WikiSeqNode(type='SEQ', content=list) + self.dprint(80, "LEAVE parse_line=%s", ret) + return ret + + def parse_indent(self, tok): + lev = len(tok.content) + self.dprint(80, "ENTER parse_indent(%s)", lev) + x = WikiIndNode(type='IND', level=lev, content=self.parse_line()) + self.dprint(80, "LEAVE parse_indent=%s", x) + return x + def parse_fontmod(self,delim,what): self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s", - delim, what, self.peektkn()) + delim, what, self.lookahead()) seq = [] text = '' - while 1: + while True: tok = self.getkn() - if tok['type'] == 'TEXT': - text += tok['content'] - elif tok['type'] == 'DELIM': - if tok['content'] == delim: + if tok.type == 'TEXT': + text += tok.content + elif self.is_block_end(tok): + self.dprint(80, "LEAVE parse_fontmod=%s", "None") + return None + elif tok.type == 'DELIM': +# self.dprint(80, "got %s, want %s", tok.content, delim) + if tok.content == delim: break - elif self.is_inline_delim(tok): + else: if text: - seq.append({ 'type': 'TEXT', 'content': text }) + seq.append(WikiContentNode(type='TEXT', content=text)) text = '' - x = self.parse_inline(tok) + x = self.parse_inline_delim(tok) if x: seq.append(x) else: self.dprint(80, "LEAVE parse_fontmod=%s", "None") return None - else: - self.dprint(80, "LEAVE parse_fontmod=None") - return None - elif tok['type'] == 'NL': - if self.peektkn()['type'] == 'NL': - self.dprint(80, "LEAVE parse_fontmod=None") - return None - seq.append({ 'type': 'TEXT', 'content': '\n' }) + elif tok.type == 'NL': + seq.append(WikiContentNode(type='TEXT', content='\n')) else: self.dprint(80, "LEAVE parse_fontmod=None") return None if text: - seq.append({ 'type': 'TEXT', 'content': text }) - res = { 'type': what, 'content': seq } + seq.append(WikiContentNode(type='TEXT', content=text)) + res = WikiSeqNode(type=what, content=seq) self.dprint(80, "LEAVE parse_fontmod=%s", res) return res - def parse_link(self, type, delim): - self.dprint(80, "ENTER parse_link(%s,%s), tok %s", - type, delim, self.peektkn()) - subtree = [] - list = [] - while 1: - tok = self.getkn() - if tok['type'] == 'DELIM': - if tok['content'] == delim: - if list: - subtree.append({ 'type': 'SEQ', 'content': list }) - break - elif tok['content'] == "|": - if len(list) > 1: - subtree.append({ 'type': 'SEQ', 'content': list }) - elif list: - subtree.append(list[0]) - list = [] - else: - x = self.parse_inline(tok) - if x: - list.append(x) - else: - self.dprint(80, "LEAVE parse_link=%s", "None") - return None - elif tok['type'] == 'TEXT': - list.append(tok) - else: - self.dprint(80, "LEAVE parse_link=%s", "None") - return None - self.dprint(80, "LEAVE parse_link=(%s,%s)", type, subtree) - return { 'type': type, 'content': subtree } - def parse_ref(self): + self.dprint(80, "ENTER parse_ref") tok = self.getkn() - self.dprint(80, "ENTER parse_ref, tok %s", tok) - if not (tok['type'] == 'TEXT' and self.refstart.match(tok['content'])): + if not (tok.type == 'TEXT' and self.refstart.match(tok.content)): self.dprint(80, "LEAVE parse_ref=None") return None seq = [] - (ref,sep,text) = tok['content'].partition(' ') + (ref,sep,text) = tok.content.partition(' ') if text: - seq.insert(0, {'type': 'TEXT', 'content': text }) + seq.insert(0, WikiContentNode(type='TEXT', content=text)) - while 1: + while True: tok = self.getkn() - if tok == None or tok['type'] == 'NIL': + if tok.type == 'NIL': self.dprint(80, "LEAVE parse_ref=None") return None - if tok['type'] == 'DELIM': - if tok['content'] == ']': + elif self.is_block_end(tok): + self.dprint(80, "LEAVE parse_ref=None") + return None + elif tok.type == 'DELIM': + if tok.content == ']': break else: - tok = self.parse_inline(tok) + tok = self.parse_inline_delim(tok) if tok: seq.append(tok) else: self.dprint(80, "LEAVE parse_ref=None") return None - elif tok['type'] == 'OTAG': + elif tok.type == 'OTAG': list.append(self.parse_tag(tok)) else: seq.append(tok) - ret = { 'type': 'REF', - 'ref': ref, - 'content': { 'type': 'SEQ', 'content': seq } } + ret = WikiRefNode(type='REF', + ref=ref, + content=WikiSeqNode(type='SEQ', content=seq)) self.dprint(80, "LEAVE parse_ref= %s", ret) return ret - inline_delims = [ "''", "'''", "[", "[[", "{{", "|" ] - - def is_inline_delim(self, tok): - return tok['type'] == 'DELIM' and tok['content'] in self.inline_delims - def is_block_delim(self, tok): - return tok['type'] == 'DELIM' and tok['content'] not in self.inline_delims + def parse_link(self, type, delim): + self.dprint(80, "ENTER parse_link(%s,%s)", type, delim) + subtree = [] + list = [] + while True: + tok = self.getkn() + if tok.type == 'NIL': + self.dprint(80, "LEAVE parse_link=None [EOF]") + return None + if tok.type == 'DELIM': + if tok.content == delim: + if list: + subtree.append(WikiSeqNode(type='SEQ', + content=list)) + break + elif tok.content == "|": + if len(list) > 1: + subtree.append(WikiSeqNode(type='SEQ', + content=list)) + elif list: + subtree.append(list[0]) + list = [] + else: + x = self.parse_inline_delim(tok) + if x: + list.append(x) + else: + self.dprint(80, "LEAVE parse_link=None [bad inline]") + return None + elif tok.type == 'TEXT': + list.append(tok) + else: + self.dprint(80, "LEAVE parse_link=None [unexpected token]") + return None + ret = WikiSeqNode(type=type, content=subtree) + self.dprint(80, "LEAVE parse_link=%s", ret) + return ret - def parse_inline(self, tok): - self.dprint(80, "ENTER parse_inline(%s), tok %s", tok, self.peektkn()) - tokind = self.tokind - if tok['content'] == "''": - x = self.parse_fontmod(tok['content'], 'IT') - elif tok['content'] == "'''": - x = self.parse_fontmod(tok['content'], 'BOLD') - elif tok['content'] == "[": + def parse_inline_delim(self, tok): + self.dprint(80, "ENTER parse_inline_delim") + assert(tok.type == 'DELIM') + self.push_mark() + if tok.content == "''": + x = self.parse_fontmod(tok.content, 'IT') + elif tok.content == "'''": + x = self.parse_fontmod(tok.content, 'BOLD') + elif tok.content == "[": x = self.parse_ref() - elif tok['content'] == "[[": + elif tok.content == "[[": x = self.parse_link('LINK', "]]") - elif tok['content'] == "{{": + elif tok.content == "{{": x = self.parse_link('TMPL', "}}") else: - self.dprint(80, "LEAVE parse_inline=%s (unhandled delimiter)", "None") x = None - if not x: - self.tokind = tokind - tok['type'] = 'TEXT' + + if x: + self.clear_mark() + else: self.dprint(80, "BEGIN DELIMITER RECOVERY: %s", tok) - od = tok['content'] + self.pop_mark() + x = self.fixuptkn(WikiTextNode(content=tok.content)) + od = tok.content if od in self.close_delim: cd = self.close_delim[od] lev = 0 - for tok in self.toklist[self.tokind+1:]: - if tok['type'] == 'NIL': + for i,tok in enumerate(self.toklist[self.tokind+1:]): + if tok.type == 'NIL': break - elif tok['type'] == 'DELIM': - if tok['content'] == od: + elif tok.type == 'DELIM': + if tok.content == od: lev += 1 - elif tok['content'] == cd: + elif tok.content == cd: if lev == 0: - tok['type'] = 'TEXT' + tok = WikiTextNode(content=tok.content) + self.toklist[self.tokind+1+i] = tok lev -= 1 break self.dprint(80, "END DELIMITER RECOVERY: %s", tok) - self.dprint(80, "LEAVE parse_inline=%s", x) + self.dprint(80, "LEAVE parse_inline_delim=%s", x) return x - - def parse_para(self): - self.dprint(80, "ENTER parse_para, tok %s", self.peektkn()) - seq = [] - textlist = [] - tok = self.peektkn() - - if self.newline: - if 'content' in tok and re.match("^\s", tok['content']): - type = 'PRE' - rx = re.compile("^\S") - else: - type = 'PARA' - rx = re.compile("^\s") - else: - type = 'SEQ' - rx = None - self.dprint(80, "IN parse_para, type %s", type) - while 1: - tok = self.getkn() - if tok['type'] == 'TEXT': - if rx and self.newline and rx.match(tok['content']): - self.ungetkn() - break - textlist.append(tok['content']) - elif tok['type'] == 'NL': - tok = self.getkn() - if tok['type'] == 'NL' or tok['type'] == 'NIL': - break - else: - self.ungetkn() - if self.is_block_delim(tok): - break - textlist.append('\n') - elif tok['type'] == 'NIL': - break - elif tok['type'] == 'OTAG': - save = (self.tokind,self.newline) - t = self.parse_tag(tok) - if t['type'] == 'TAG' and t['isblock']: - del self.toklist[save[0]:self.tokind] - self.tokind = save[0] - self.toklist[self.tokind] = t - self.newline = save[1] - break - else: - if textlist: - seq.append({ 'type': 'TEXT', - 'content': ''.join(textlist) }) - textlist = [] - seq.append(t) - elif tok['type'] == 'TAG': - if tok['isblock']: - break - else: - if textlist: - seq.append({ 'type': 'TEXT', - 'content': ''.join(textlist) }) - textlist = [] - seq.append(tok) - elif tok['type'] == 'CTAG': - self.ungetkn() - break - elif tok['type'] == 'DELIM': - if self.is_inline_delim(tok): - if textlist: - seq.append({ 'type': 'TEXT', - 'content': ''.join(textlist) }) - textlist = [] - x = self.parse_inline(tok) - if x: - seq.append(x) - else: - self.ungetkn() - # restart - else: - seq.append({ 'type': 'TEXT', 'content': tok['content'] }) - # self.ungetkn() - break - if textlist: - seq.append({ 'type': 'TEXT', 'content': ''.join(textlist) }) - self.dprint(80, "LEAVE parse_para=%s", seq) - return { 'type': type, 'content': seq } - - def parse_header(self, delim): - self.dprint(80, "ENTER parse_header(%s), tok %s", delim, self.peektkn()) + + def parse_tag(self, tag): + self.dprint(80, "ENTER parse_tag") list = [] - while 1: + self.push_mark() + while True: tok = self.getkn() - if tok['type'] == 'NIL': - self.dprint(80, "LEAVE parse_header=%s", "None") - return None - elif tok['type'] == 'TEXT': - list.append(tok) - elif tok['type'] == 'DELIM': - if tok['content'] == delim: - if self.peektkn()['type'] == 'NL': - break - else: - self.dprint(80, "LEAVE parse_header=%s, tok=%s", - "None", self.peektkn()) - return None + if tok.type == 'NIL': + self.pop_mark() + s = '<' + tag.tag + if tag.args: + s += ' ' + str(tag.args) + s += '>' + node = WikiTextNode(content=s) + if tag.content: + self.tree[self.tokind:self.tokind] = tag.content + self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node) + return node + elif tok.type == 'DELIM': + if tok.isblock: + tok = self.parse_block_delim(tok) else: - x = self.parse_inline(tok) - if x: - list.append(x) - else: - self.ungetkn() - self.dprint(80, "LEAVE parse_header=%s", "None") - return None #FIXME? - else: - self.dprint(80, "LEAVE parse_header=%s", "None") - return None - self.dprint(80, "LEAVE parse_header=(HDR, %s, (SEQ,%s))",len(delim)-1,list) - return { 'type': 'HDR', - 'level': len(delim)-1, - 'content': { 'type': 'SEQ', 'content': list } } - - - def parse_line(self): - self.dprint(80, "ENTER parse_line, tok %s", self.peektkn()) - list = [] - while 1: - tok = self.getkn() - if tok['type'] == 'NL' or tok['type'] == 'NIL': - break - elif tok['type'] == 'TEXT': - list.append(tok) - elif tok['type'] == 'DELIM': - if tok['content'][0] == ":": - list.append(self.parse_indent(len(tok['content']))) + tok = self.parse_inline_delim(tok) + if not tok: + tok = self.getkn() + elif tok.type == 'CTAG': + if tag.tag == tok.tag: break - else: - x = self.parse_inline(tok) - if x: - list.append(x) - else: - list.append(tok) - elif tok['type'] == 'OTAG': - list.append(self.parse_tag(tok)) - else: - list.append(tok) - self.dprint(80, "LEAVE parse_line=(SEQ, %s)", list) - return { 'type': 'SEQ', 'content': list } - - def parse_env(self, type, lev): - self.dprint(80, "ENTER parse_env(%s,%s), tok %s",type,lev,self.peektkn()) + s = '' + tok = self.fixuptkn(WikiTextNode(content=s)) + elif tok.type == 'NL': + tok = WikiContentNode(type = 'TEXT', content = '\n') + list.append(tok) + + self.clear_mark() + ret = WikiTagNode(type = 'TAG', + tag = tag.tag, + args = tag.args, + isblock = tag.isblock, + content = WikiSeqNode(type = 'SEQ', content = list)) + self.dprint(80, "LEAVE parse_tag = %s", ret) + return ret + + def parse_env(self, tok): + type = self.envtypes[tok.content[0]][0] + lev = len(tok.content) + self.dprint(80, "ENTER parse_env(%s,%s)",type,lev) list = [] - while 1: - tok = self.getkn() - if tok['type'] == 'DELIM' \ - and tok['content'][0] in self.envtypes \ - and type == self.envtypes[tok['content'][0]][0]: - if len(tok['content']) < lev: + while True: + if tok.type == 'DELIM' \ + and tok.content[0] in self.envtypes \ + and type == self.envtypes[tok.content[0]][0]: + if len(tok.content) < lev: self.ungetkn() break - elif len(tok['content']) > lev: - self.ungetkn() - elt = self.parse_env(type, len(tok['content'])) + elif len(tok.content) > lev: + elt = self.parse_env(tok) else: elt = self.parse_line() - if not tok['continuation']: - list.append({ 'type': 'ELT', - 'subtype': self.envtypes[tok['content'][0]][1], - 'content': elt }) + if not tok.continuation: + list.append(WikiEltNode(type='ELT', + subtype=self.envtypes[tok.content[0]][1], + content=elt)) + tok = self.getkn() continue if list: - if list[-1]['content']['type'] != 'SEQ': - x = list[-1]['content']['content'] + if list[-1].content.type != 'SEQ': + x = list[-1].content.content # FIXME: - list[-1]['content'] = { 'type': 'SEQ', 'content': [x] } - list[-1]['content']['content'].append(elt) + list[-1].content = WikiNode(type='SEQ', content=[x]) + list[-1].content.content.append(elt) else: self.ungetkn() break - self.dprint(80, "LEAVE parse_env=(ENV, %s, %s, %s)", type, lev, list) - return { 'type': 'ENV', 'envtype': type, 'level': lev, 'content': list } - def parse_indent(self, lev): - self.dprint(80, "ENTER parse_indent(%s), tok %s", lev, self.peektkn()) - x = { 'type': 'IND', 'level': lev, 'content': self.parse_line() } - self.dprint(80, "LEAVE parse_indent=%s", x) - return x + tok = self.getkn() - def parse_tag(self, tag): - self.dprint(80, "ENTER parse_tag(%s)", tag) - seq = [] - save = self.tokind - t = self.peektkn() - isblock = t['type'] == 'NL' - while 1: - t = self.parse0() - if t == None or t['type'] == 'NIL': - self.tokind = save - s = '<' + tag['tag'] - if 'args' in tag and tag['args']: - s += ' ' + str(tag['args']) - del tag['args'] - s += '>' - if 'content' in tag: - subtree = tag['content'] - else: - subtree = None - tag['type'] = 'TEXT' - tag['content'] = s - if subtree: - self.tree[self.tokind:self.tokind] = subtree - self.dprint(80, "LEAVE parse_tag = %s (tree modified)", tag) - self.ungetkn() - return self.parse0() + ret = WikiEnvNode(type='ENV', + envtype=type, + level=lev, + content=list) + self.dprint(80, "LEAVE parse_env=%s", ret) + return ret + + def parse_header(self, tok): + self.dprint(80, "ENTER parse_header") + self.push_mark() + list = [] + delim = tok.content + while True: + tok = self.getkn() - if t['type'] == 'CTAG' and tag['tag'] == t['tag']: - break - seq.append(t) + if tok.type == 'NL': + self.pop_mark() + self.dprint(80, "LEAVE parse_header=None") + return None + elif tok.type == 'TEXT': + list.append(tok) + elif tok.type == 'DELIM': + if tok.content == delim: + if self.lookahead().type == 'NL': + self.getkn() + if self.lookahead().type == 'NL': + self.getkn() + break + else: + self.pop_mark() + self.dprint(80, "LEAVE parse_header=None") + return None + elif tok.isblock: + self.pop_mark() + self.dprint(80, "LEAVE parse_header=None") + return None + else: + list.append(self.parse_inline_delim(tok)) + elif tok.type == 'OTAG': + if tok.isblock: + self.pop_mark() + self.dprint(80, "LEAVE parse_header=None") + return None + list.append(self.parse_tag(tok)) + - ret = { 'type': 'TAG', - 'tag': tag['tag'], - 'args': tag['args'], - 'isblock': isblock, - 'content': { 'type': 'SEQ', 'content': seq } } - self.dprint(80, "LEAVE parse_tag = %s", ret) + self.clear_mark() + ret = WikiHdrNode(level = len(delim), + content = WikiSeqNode(type='SEQ', content=list)) +