diff options
-rw-r--r-- | WikiTrans/wiki2html.py | 88 | ||||
-rw-r--r-- | WikiTrans/wiki2texi.py | 112 | ||||
-rw-r--r-- | WikiTrans/wiki2text.py | 81 | ||||
-rw-r--r-- | WikiTrans/wikimarkup.py | 923 | ||||
-rw-r--r-- | WikiTrans/wikitoken.py | 166 | ||||
-rwxr-xr-x | bin/wikitrans | 2 | ||||
-rw-r--r-- | testdata/colon.html | 5 | ||||
-rw-r--r-- | testdata/headings.html | 11 | ||||
-rw-r--r-- | testdata/hz.html | 5 | ||||
-rw-r--r-- | testdata/nowiki-ind.html | 7 | ||||
-rw-r--r-- | testdata/nowiki.html | 1 | ||||
-rw-r--r-- | testdata/numlist.html | 4 | ||||
-rw-r--r-- | testdata/para.html | 4 | ||||
-rw-r--r-- | testdata/unlist.html | 4 |
14 files changed, 770 insertions, 643 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 754fa9b..122c91c 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -85,10 +85,10 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) + arg = self.format(elt.content[0]) text = None - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" elif len(s) > 1 and s[1] == 'thumb': @@ -133,8 +133,8 @@ class HtmlWikiMarkup (WikiMarkup): return self.fmtlink(elt, True) def str_ref(self, elt): - target = elt['ref'] - text = self.format(elt['content']) + target = elt.ref + text = self.format(elt.content) return "<a href=\"%s\">%s</a>" % (target, text if (text and text != '') \ else target) @@ -146,30 +146,30 @@ class HtmlWikiMarkup (WikiMarkup): return string def str_it(self, elt): - return "<i>" + self.concat(elt['content']) + "</i>" + return "<i>" + self.concat(elt.content) + "</i>" def str_bold(self, elt): - return "<b>" + self.concat(elt['content']) + "</b>" + return "<b>" + self.concat(elt.content) + "</b>" def str_hdr(self, elt): - level = elt['level'] + 1 + level = elt.level if level > 4: level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) + return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level) def str_bar(self): - return "<hr/>" + return "<hr/>\n" def str_env(self, elt): - type = elt['envtype'] - lev = elt['level'] + type = elt.envtype + lev = elt.level if lev > 4: lev = 2 string = "" - for s in elt['content']: - n = s['subtype']; + for s in elt.content: + n = s.subtype; string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s['content']), + self.format(s.content), self.envt[type]["elt"][n]) return "<%s>%s</%s>" % (self.envt[type]["hdr"], string, @@ -177,72 +177,72 @@ class HtmlWikiMarkup (WikiMarkup): return string def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return '<pre><code>' + s + '</code></pre>' #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) s += '>' - s += self.format(elt['content']) - return s + '</' + elt['tag'] + '>' + s += self.format(elt.content) + return s + '</' + elt.tag + '>' def str_para(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) - return "<p>" + string + "</p>" + return "<p>" + string + "</p>\n" def str_pre(self, elt): string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) if self.nested: return string return '<pre>' + string + '</pre>' def str_ind(self, elt): - return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level'] + return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: string += s else: - string = elt['content'] + string = elt.content return string - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': return self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': return self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': return self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': return self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': return self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': return self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': return self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': return self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': return self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': return self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': return self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': return self.str_ind(elt) - elif elt['type'] == 'SEQ': + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) return string else: diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index f36c0a1..dfde565 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -101,72 +101,72 @@ class TexiWikiMarkup (WikiMarkup): return self._end_print() def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): - for s in elt['content']: + if elt.type == 'TEXT': + if isinstance(elt.content,list): + for s in elt.content: self._print(s) else: - self._print(elt['content']) - elif elt['type'] == 'TAG': + self._print(elt.content) + elif elt.type == 'TAG': self.str_tag(elt) - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': self.str_para(elt) - elif elt['type'] == 'PRE': + elif elt.type == 'PRE': self.str_pre(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': self.str_it(elt) - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': self.str_bold(elt) - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': self.str_link(elt) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': self.str_tmpl(elt) - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': self.str_bar() - elif elt['type'] == 'HDR': + elif elt.type == 'HDR': self.str_hdr(elt) - elif elt['type'] == 'REF': + elif elt.type == 'REF': self.str_ref(elt) - elif elt['type'] == 'ENV': + elif elt.type == 'ENV': self.str_env(elt) - elif elt['type'] == 'IND': + elif elt.type == 'IND': self.str_ind(elt) - elif elt['type'] == 'SEQ': - for x in elt['content']: + elif elt.type == 'SEQ': + for x in elt.content: self.format(x) else: self._print(str(elt)) def str_tag(self, elt): - if elt['tag'] in ['code', 'tt']: + if elt.tag in ['code', 'tt']: save = self._begin_print() self.nested += 1 - self.format(elt['content']) + self.format(elt.content) self.nested -= 1 s = self._end_print(save) - if elt['isblock']: + if elt.isblock: self._print('@example', nl=True, escape=False) self._print(s, escape=False) self._print('@end example\n', nl=True, escape=False) else: self._print('@code{%s}' % s, escape=False) - elif elt['tag'] == 'div': - if 'args' in elt and 'id' in elt['args']: - self._print("@anchor{%s}\n" % elt['args']['id'], + elif elt.tag == 'div': + if elt.args and 'id' in elt.args: + self._print("@anchor{%s}\n" % elt.args['id'], nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: - self._print('<' + elt['tag']) - if elt['args']: - self._print(' ' + elt['args']) + self._print('<' + elt.tag) + if elt.args: + self._print(' ' + elt.args) self._print('>'); - self.format(elt['content']); - self._print('</' + elt['tag'] + '>') + self.format(elt.content); + self._print('</' + elt.tag + '>') def str_para(self, elt): if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) - for x in elt['content']: + for x in elt.content: self.format(x) if self.acc and not self.acc.endswith('\n\n'): self._print('\n', nl=True) @@ -174,7 +174,7 @@ class TexiWikiMarkup (WikiMarkup): def str_pre(self, elt): if not self.nested: self._print('@example\n', nl=True, escape=False) - for x in elt['content']: + for x in elt.content: self.format(x) if not self.nested: self._print('@end example\n', nl=True, escape=False) @@ -185,26 +185,26 @@ class TexiWikiMarkup (WikiMarkup): def str_it(self, elt): self._print('@i{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_bold(self, elt): self._print('@b{', escape=False) - self.concat(elt['content']) + self.concat(elt.content) self._print('}', escape=False) def str_hdr(self, elt): - level = elt['level'] + level = elt.level if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: self._print("@* ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) else: self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print(None, nl=True) if self.sectcomm[self.sectioning_model][0] == '@top': self._print('@node ', nl=True, escape=False) - self.format(elt['content']) + self.format(elt.content) self._print('\n') self._print(None, nl=True) @@ -212,47 +212,47 @@ class TexiWikiMarkup (WikiMarkup): self._print("\n-----\n") def str_ind(self, elt): - self._print("@w{ }" * elt['level'], nl=True, escape=False) - self.format(elt['content']) + self._print("@w{ }" * elt.level, nl=True, escape=False) + self.format(elt.content) self._print(None, nl=True) def str_env(self, elt): - if elt['envtype'] == 'unnumbered': + if elt.envtype == 'unnumbered': self._print('@itemize @bullet\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end itemize\n', nl=True, escape=False) - elif elt['envtype'] == 'numbered': + elif elt.envtype == 'numbered': self._print('@enumerate\n', nl=True, escape=False) - for s in elt['content']: + for s in elt.content: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end enumerate\n', nl=True, escape=False) - elif elt['envtype'] == 'defn': + elif elt.envtype == 'defn': self._print('@table @asis\n', nl=True, escape=False) - for s in elt['content']: - if s['subtype'] == 0: + for s in elt.content: + if s.subtype == 0: self._print('@item ', nl=True, escape=False) - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) else: - self.format(s['content']) + self.format(s.content) self._print(None, nl=True) self._print('\n') self._print('@end table\n', nl=True, escape=False) def str_link(self, elt): save = self._begin_print() - self.format(elt['content'][0]) + self.format(elt.content[0]) arg = self._end_print() - if len(elt['content']) > 1: + if len(elt.content) > 1: s = [] - for x in elt['content'][0:2]: + for x in elt.content[0:2]: self._begin_print() self.format(x) s.append(self._end_print()) @@ -279,9 +279,9 @@ class TexiWikiMarkup (WikiMarkup): self._print("FIXME: str_tmpl not implemented\n") def str_ref(self, elt): - target = elt['ref'] + target = elt.ref save = self._begin_print() - self.format(elt['content']) + self.format(elt.content) text = self._end_print(save) if text and text != '': self._print("@uref{%s,%s}" % (target, text), escape=False) diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index 916391e..ee1748c 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -65,9 +65,9 @@ class TextWikiMarkup (WikiMarkup): return self.html_base % { 'lang' : lang } + urllib.quote(tgt) def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + arg = self.format(elt.content[0]) + if len(elt.content) > 1: + s = [x for x in map(self.format, elt.content)] text = s[1] else: s = None @@ -142,23 +142,23 @@ class TextWikiMarkup (WikiMarkup): return output + linebuf def str_tag(self, elt): - if elt['tag'] == 'code': + if elt.tag == 'code': self.nested += 1 - s = self.format(elt['content']) + s = self.format(elt.content) self.nested -= 1 return s #FIXME else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) - s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' + s = '<' + elt.tag + if elt.args: + s += ' ' + str(elt.args) + s += '>' + self.format(elt.content) + '</' + elt.tag + '>' return s def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): + if elt.type == 'TEXT': + if isinstance(elt.content,list): string = "" - for s in elt['content']: + for s in elt.content: if string: if string.endswith("."): string += " " @@ -166,29 +166,29 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s else: - string = elt['content'] - elif elt['type'] == 'PRE': + string = elt.content + elif elt.type == 'PRE': string = "" - for x in elt['content']: + for x in elt.content: string += self.format(x) string += '\n' - elif elt['type'] == 'PARA': + elif elt.type == 'PARA': string = ""; - for x in elt['content']: + for x in elt.content: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt['type'] == 'TAG': + elif elt.type == 'TAG': string = self.str_tag(elt) - elif elt['type'] == 'IT': + elif elt.type == 'IT': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: string += " " + s string = "_" + string.lstrip(" ") + "_" - elif elt['type'] == 'BOLD': + elif elt.type == 'BOLD': string = "" - for x in elt['content']: + for x in elt.content: s = self.format(x) if s: if string.endswith("."): @@ -197,54 +197,53 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s string = string.upper() - elif elt['type'] == 'LINK': + elif elt.type == 'LINK': string = self.fmtlink(elt, False) - elif elt['type'] == 'TMPL': + elif elt.type == 'TMPL': s = self.fmtlink(elt, True) if s: string = '[' + s + ']' else: string = s - elif elt['type'] == 'BAR': + elif elt.type == 'BAR': w = self.width if w < 5: w = 5 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt['type'] == 'HDR': - level = elt['level'] - string = "\n" + ("*" * level) + " " + \ - self.format(elt['content']).lstrip(" ") + "\n\n" - elif elt['type'] == 'REF': - string = self.xref(self.format(elt['content']), elt['ref']) - elif elt['type'] == 'ENV': - type = elt['envtype'] - lev = elt['level'] + elif elt.type == 'HDR': + string = "\n" + ("*" * elt.level) + " " + \ + self.format(elt.content).lstrip(" ") + "\n\n" + elif elt.type == 'REF': + string = self.xref(self.format(elt.content), elt.ref) + elif elt.type == 'ENV': + type = elt.envtype + lev = elt.level if lev > self.width - 4: lev = 1 string = "" n = 1 - for s in elt['content']: + for s in elt.content: if not string.endswith("\n"): string += "\n" - x = self.format(s['content']) + x = self.format(s.content) if type == "unnumbered": string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) elif type == "numbered": string += self.fmtpara(self.indent(lev, "%d. %s" % (n, x))) n += 1 elif type == "defn": - if s['subtype'] == 0: + if s.subtype == 0: string += self.indent(lev-1, x) else: string += self.indent(lev+3, x) if not string.endswith("\n"): string += "\n" - elif elt['type'] == 'IND': - string = (" " * elt['level']) + self.format(elt['content']) + '\n' - elif elt['type'] == 'SEQ': + elif elt.type == 'IND': + string = (" " * elt.level) + self.format(elt.content) + '\n' + elif elt.type == 'SEQ': string = "" - for x in elt['content']: + for x in elt.content: if len(string) > 1 and not string[-1].isspace(): string += ' ' string += self.format(x) diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index 2fad0af..f971347 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -19,10 +19,15 @@ from __future__ import print_function import sys import re from types import * +from wikitoken import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "TagAttributes", "TagAttributeSyntax" ] +class UnexpectedToken(Exception): + def __init__(self, value): + self.value = value + class TagAttributeSyntax(Exception): def __init__(self, value): self.value = value @@ -116,111 +121,11 @@ class BaseWikiMarkup(object): def dprint(self, lev, fmt, *argv): if self.debug_level >= lev: - print("[DEBUG]", fmt % argv) - - def print_dump_prefix(self, level, file): - file.write("[DUMP]" + ' ' * (2*level + 1)) + for l in (fmt % argv).split('\n'): + print("[DEBUG] %s" % l) - def dump_nil(self, node, level, file): - pass - - def dump_text(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("CONTENT: \"%s\"\n" % node['content']) - - def dump_delim(self, node, level, file): - file.write("'%s'" % node['content']) - if 'continuation' in node and node['continuation']: - file.write(" (cont)") - file.write("\n") - - def dump_tag(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("TAG: %s\n" % node['tag']) - if 'isblock' in node: - self.print_dump_prefix(level, file) - file.write("PLACEMENT: %s\n" % ('BLOCK' if node['isblock'] else 'INLINE')) - if 'args' in node: - self.print_dump_prefix(level, file) - file.write("ARGS: %s\n" % node['args']) - if 'content' in node: - self.dump_node(node['content'], level + 1, file) - - def dump_seq(self, node, level, file): - self.dump(node['content'], level + 1, file) - - def dump_ref(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("REF: %s\n" % node['ref']) - self.dump_node(node['content'], level + 1, file) - - def dump_hdr(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_elt(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("SUBTYPE: %s\n" % node['subtype']) - self.dump_node(node['content'], level + 1, file) - - def dump_env(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("ENVTYPE: %s\n" % node['envtype']) - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump(node['content'], level + 1, file) - - def dump_ind(self, node, level, file): - self.print_dump_prefix(level, file) - file.write("LEVEL: %s\n" % node['level']) - self.dump_node(node['content'], level + 1, file) - - def dump_link(self, node, level, file): - self.dump(node['content'], level + 1, file) - - dump_type = { - 'NIL': dump_nil, - 'NL': dump_nil, - 'TEXT': dump_text, - 'DELIM': dump_delim, - 'OTAG': dump_tag, - 'CTAG': dump_tag, - 'TAG': dump_tag, - 'SEQ': dump_seq, - 'REF': dump_ref, - 'HDR': dump_hdr, - 'ELT': dump_elt, - 'ENV': dump_env, - 'IND': dump_ind, - 'BAR': dump_nil, - 'PARA': dump_seq, - 'PRE': dump_text, - 'BOLD': dump_seq, - 'IT': dump_seq, - 'LINK': dump_link, - } + inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] - def dump_node(self, node, level, file): - if type(node) != dict: - file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node)) - return - - self.print_dump_prefix(level, file) - file.write("NODE " + node['type'] + ":\n") - if node['type'] in self.dump_type: - self.dump_type[node['type']](self, node, level, file) - else: - self.print_dump_prefix(level, file) - file.write("(UNHANDLED) ") - file.write("%s\n" % node) - self.print_dump_prefix(level, file) - file.write("END NODE " + node['type'] + "\n") - - def dump(self, tree, level=0, file=sys.stdout): - for node in tree: - self.dump_node(node, level, file) - def tokread(self): line = None pos = 0 @@ -233,11 +138,11 @@ class BaseWikiMarkup(object): line = u'' if not line or line == "": - yield({ 'type': 'NIL' }) + yield(WikiNode(type='NIL')) break if line == '\n': - yield({ 'type': 'NL', 'content': line }) + yield(WikiNode(type='NL')) line = None continue @@ -246,7 +151,7 @@ class BaseWikiMarkup(object): if m: if (pos < m.start(0)): - yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) + yield(WikiTextNode(content=line[pos:m.start(0)])) pos = m.start(0) t = None @@ -260,13 +165,11 @@ class BaseWikiMarkup(object): try: m = self.ctag.search(line, pos) if m and m.group('tag') == 'nowiki': - yield({ 'type': 'TEXT', - 'content': line[pos:m.start(0)] }) + yield(WikiTextNode(content=line[pos:m.start(0)] )) pos = m.end(0) break - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = self.input() pos = 0 @@ -275,65 +178,60 @@ class BaseWikiMarkup(object): continue elif m.group('tag') in self.tags: try: - t = { 'type': 'OTAG', - 'tag': m.group('tag'), - 'args': TagAttributes(m.group('args')) } - yield(t) + yield(WikiTagNode(type='OTAG', + tag=m.group('tag'), + isblock=(line[pos] == '\n'), + args=TagAttributes(m.group('args')))) if m.group('closed'): - t['type'] = 'CTAG' - yield(t) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) except TagAttributeSyntax: - yield({'type': 'TEXT', - 'content': m.group(0)}) + yield(WikiTextNode(content=m.group(0))) continue else: - yield({ 'type': 'TEXT', - 'content': m.group(0) }) + yield(WikiTextNode(content=m.group(0))) continue else: m = self.ctag.match(line, pos) if m: if m.group('tag') in self.tags: - yield( { 'type': 'CTAG', - 'tag': m.group('tag') } ) + yield(WikiTagNode(type='CTAG', + tag=m.group('tag'))) pos = m.end(0) continue else: - yield( { 'type': 'TEXT', - 'content': line[pos:pos+1] }) + yield(WikiTextNode(content=line[pos:pos+1])) pos += 1 continue else: pos = m.end(0) content = m.group(0) if content[0] in self.envtypes: - t = { 'type': 'DELIM', - 'content': content, - 'continuation': pos < len(line) and line[pos] == ":" } - if t['continuation']: - t['content'] += t['content'][0] + node = WikiDelimNode(type='DELIM', + content=content, + isblock=True, + continuation=pos < len(line) and line[pos] == ":") + if node.continuation: + node.content += node.content[0] pos += 1 - yield(t) + yield(node) while pos < len(line) and line[pos] in [' ', '\t']: pos += 1 else: - yield({ 'type': 'DELIM', - 'content': content.strip(), - 'continuation': False}) + yield(WikiDelimNode(type='DELIM', + isblock=(content.strip() not in self.inline_delims), + content=content.strip())) continue if line: if line[-1] == '\n': if line[pos:-1] != '': - yield({ 'type': 'TEXT', - 'content': line[pos:-1] }) - yield({ 'type': 'NL', - 'content': '\n' }) + yield(WikiTextNode(content=line[pos:-1])) + yield(WikiNode(type='NL')) else: - yield({ 'type': 'TEXT', - 'content': line[pos:] }) + yield(WikiTextNode(content=line[pos:])) line = None @@ -364,11 +262,11 @@ class BaseWikiMarkup(object): # 3b. ''a b '''c d''''' stack = [] for i in range(0,len(self.toklist)): - if self.toklist[i]['type'] == 'DELIM' \ - and (self.toklist[i]['content'] == "''" \ - or self.toklist[i]['content'] == "'''"): + if self.toklist[i].type == 'DELIM' \ + and (self.toklist[i].content == "''" \ + or self.toklist[i].content == "'''"): if len(stack) > 0: - if self.toklist[stack[-1]]['content'] == self.toklist[i]['content']: + if self.toklist[stack[-1]].content == self.toklist[i].content: # Case 1: just pop the matching delimiter off the stack stack.pop() elif len(stack) == 2 and stack[-2] + 1 == stack[-1]: @@ -377,8 +275,8 @@ class BaseWikiMarkup(object): # and pop off the matching one stack.pop() elif i < len(self.toklist) \ - and self.toklist[i+1]['type'] == 'DELIM' \ - and self.toklist[stack[-1]]['content'] == self.toklist[i+1]['content']: + and self.toklist[i+1].type == 'DELIM' \ + and self.toklist[stack[-1]].content == self.toklist[i+1].content: # Case 3: swap current and next tokens self.swaptkn(i, i+1) # and pop off the matching one @@ -391,440 +289,487 @@ class BaseWikiMarkup(object): stack.append(i) # Redefine all non-matched tokens as TEXT for i in stack: - self.toklist[i]['type'] = 'TEXT' + self.toklist[i].type = 'TEXT' # FIXME + + mark = [] + + def push_mark(self): + self.mark.append(self.tokind) - def peektkn(self, off=0): - return self.toklist[self.tokind-off] + def pop_mark(self): + self.tokind = self.mark.pop() + + def clear_mark(self): + self.mark.pop() + + def lookahead(self, off=0): + tok = self.toklist[self.tokind+off] + self.dprint(20, "lookahead(%s): %s", off, tok) + return tok def setkn(self,val): self.toklist[self.tokind] = val def getkn(self): - self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' + self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' if self.tokind == len(self.toklist): - return { 'type': 'NIL' } + return WikiNode(type='NIL') tok = self.toklist[self.tokind] self.tokind = self.tokind + 1 + self.dprint(20, "getkn: %s", tok) return tok |