diff options
-rw-r--r-- | wiki2html.py | 68 | ||||
-rw-r--r-- | wiki2text.py | 60 | ||||
-rw-r--r-- | wikimarkup.py | 257 |
3 files changed, 192 insertions, 193 deletions
diff --git a/wiki2html.py b/wiki2html.py index 77cb97a..eee592d 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -42,5 +42,5 @@ class HtmlWikiMarkup (WikiMarkup): "elt": ["li"] }, - "numbered": { "hdr": "ol", + "numbered": { "hdr": "ol", "elt": ["li"] }, - "defn": { "hdr": "dl", + "defn": { "hdr": "dl", "elt": ["dt","dd"] } } @@ -83,6 +83,6 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt[1][0]) + arg = self.format(elt['content'][0]) text = None - if len(elt[1]) > 1: - s = map(self.format, elt[1]) + if len(elt['content']) > 1: + s = map(self.format, elt['content']) if s[0] == 'disambigR' or s[0] == 'wikiquote': @@ -131,4 +131,4 @@ class HtmlWikiMarkup (WikiMarkup): def str_ref(self, elt): - target = elt[1] - text = self.format(elt[2]) + target = elt['ref'] + text = self.format(elt['content']) return "<a href=\"%s\">%s</a>" % (target, @@ -144,12 +144,12 @@ class HtmlWikiMarkup (WikiMarkup): def str_it(self, elt): - return "<i>" + self.concat(elt[1]) + "</i>" + return "<i>" + self.concat(elt['content']) + "</i>" def str_bold(self, elt): - return "<b>" + self.concat(elt[1]) + "</b>" + return "<b>" + self.concat(elt['content']) + "</b>" def str_hdr(self, elt): - level = elt[1] + 1 + level = elt['level'] + 1 if level > 4: level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt[2]), level) + return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) @@ -159,4 +159,4 @@ class HtmlWikiMarkup (WikiMarkup): def str_env(self, elt): - type = elt[1] - lev = elt[2] + type = elt['envtype'] + lev = elt['level'] if lev > 4: @@ -164,6 +164,6 @@ class HtmlWikiMarkup (WikiMarkup): string = "" - for s in elt[3]: - n = s[1]; + for s in elt['content']: + n = s['subtype']; string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s[2]), + self.format(s['content']), self.envt[type]["elt"][n]) @@ -176,3 +176,3 @@ class HtmlWikiMarkup (WikiMarkup): string = ""; - for x in elt[1]: + for x in elt['content']: string += self.format(x) @@ -181,36 +181,36 @@ class HtmlWikiMarkup (WikiMarkup): def str_ind(self, elt): - return (" " * 2 * elt[1]) + self.format(elt[2]) + return (" " * 2 * elt['level']) + self.format(elt['content']) def format(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): string = "" - for s in elt[1]: + for s in elt['content']: string += s else: - string = elt[1] + string = elt['content'] return string - elif elt[0] == PARA: + elif elt['type'] == 'PARA': return self.str_para(elt) - elif elt[0] == IT: + elif elt['type'] == 'IT': return self.str_it(elt) - elif elt[0] == BOLD: + elif elt['type'] == 'BOLD': return self.str_bold(elt) - elif elt[0] == LINK: + elif elt['type'] == 'LINK': return self.str_link(elt) - elif elt[0] == TMPL: + elif elt['type'] == 'TMPL': return self.str_tmpl(elt) - elif elt[0] == BAR: + elif elt['type'] == 'BAR': return self.str_bar() - elif elt[0] == HDR: + elif elt['type'] == 'HDR': return self.str_hdr(elt) - elif elt[0] == REF: + elif elt['type'] == 'REF': return self.str_ref(elt) - elif elt[0] == ENV: + elif elt['type'] == 'ENV': return self.str_env(elt) - elif elt[0] == IND: + elif elt['type'] == 'IND': return self.str_ind(elt) - elif elt[0] == SEQ: + elif elt['type'] == 'SEQ': string = "" - for x in elt[1]: + for x in elt['content']: string += self.format(x) diff --git a/wiki2text.py b/wiki2text.py index 005e551..c94ae51 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -68,5 +68,5 @@ class TextWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt[1][0]) - if len(elt[1]) > 1: - s = map(self.format, elt[1]) + arg = self.format(elt['content'][0]) + if len(elt['content']) > 1: + s = map(self.format, elt['content']) text = s[1] @@ -145,6 +145,6 @@ class TextWikiMarkup (WikiMarkup): def format(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): string = "" - for s in elt[1]: + for s in elt['content']: if string: @@ -156,11 +156,11 @@ class TextWikiMarkup (WikiMarkup): else: - string = elt[1] - elif elt[0] == PARA: + string = elt['content'] + elif elt['type'] == 'PARA': string = ""; - for x in elt[1]: + for x in elt['content']: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt[0] == IT: + elif elt['type'] == 'IT': string = "" - for x in elt[1]: + for x in elt['content']: s = self.format(x) @@ -169,5 +169,5 @@ class TextWikiMarkup (WikiMarkup): string = "_" + string.lstrip(" ") + "_" - elif elt[0] == BOLD: + elif elt['type'] == 'BOLD': string = "" - for x in elt[1]: + for x in elt['content']: s = self.format(x) @@ -180,5 +180,5 @@ class TextWikiMarkup (WikiMarkup): string = string.upper() - elif elt[0] == LINK: + elif elt['type'] == 'LINK': string = self.fmtlink(elt, False) - elif elt[0] == TMPL: + elif elt['type'] == 'TMPL': s = self.fmtlink(elt, True) @@ -188,3 +188,3 @@ class TextWikiMarkup (WikiMarkup): string = s - elif elt[0] == BAR: + elif elt['type'] == 'BAR': w = self.width @@ -193,11 +193,11 @@ class TextWikiMarkup (WikiMarkup): string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt[0] == HDR: - level = elt[1] + elif elt['type'] == 'HDR': + level = elt['level'] string = "\n" + ("*" * level) + " " + \ - self.format(elt[2]).lstrip(" ") + "\n\n" - elif elt[0] == REF: - string = self.xref(self.format(elt[2]), elt[1]) - elif elt[0] == ENV: - type = elt[1] - lev = elt[2] + self.format(elt['content']).lstrip(" ") + "\n\n" + elif elt['type'] == 'REF': + string = self.xref(self.format(elt['content']), elt['ref']) + elif elt['type'] == 'ENV': + type = elt['envtype'] + lev = elt['level'] if lev > self.width - 4: @@ -206,6 +206,6 @@ class TextWikiMarkup (WikiMarkup): n = 1 - for s in elt[3]: + for s in elt['content']: if not string.endswith("\n"): string += "\n" - x = self.format(s[2]) + x = self.format(s['content']) if type == "unnumbered": @@ -223,7 +223,7 @@ class TextWikiMarkup (WikiMarkup): string += "\n" - elif elt[0] == IND: - string = (" " * elt[1]) + self.format(elt[2]) + '\n' - elif elt[0] == SEQ: + elif elt['type'] == 'IND': + string = (" " * elt['level']) + self.format(elt['content']) + '\n' + elif elt['type'] == 'SEQ': string = "" - for x in elt[1]: + for x in elt['content']: if len(string) > 1 and not string[-1].isspace(): diff --git a/wikimarkup.py b/wikimarkup.py index 060b7eb..09c48eb 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -22,5 +22,2 @@ from types import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", - "NIL", "TEXT", "DELIM", "NL", "PARA", - "IT", "BOLD", "LINK", "TMPL", - "BAR", "HDR", "REF", "ENV", "IND", "SEQ", "envtypes" ] @@ -29,20 +26,2 @@ delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{ -NIL = 0 -TEXT = 1 -DELIM = 2 -NL = 3 - -PARA = 4 -IT = 5 -BOLD = 6 -LINK = 7 -TMPL = 8 -BAR = 9 -HDR = 10 -REF = 11 -ENV = 12 -IND = 13 -SEQ = 14 -ELT = 15 - # Environment types: @@ -79,3 +58,3 @@ class BaseWikiMarkup: self.dprint(100, "YIELD: NIL") - yield(NIL,) + yield({ 'type': 'NIL' }) break @@ -84,3 +63,3 @@ class BaseWikiMarkup: self.dprint(100, "YIELD: NL") - yield(NL,line) + yield({ 'type': 'NL', 'content': line }) line = None @@ -94,3 +73,3 @@ class BaseWikiMarkup: self.dprint(100, "YIELD: TEXT %s", line[pos:m.start(0)]) - yield(TEXT, line[pos:m.start(0)]) + yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) pos = m.end(0) @@ -99,3 +78,6 @@ class BaseWikiMarkup: self.dprint(100, "YIELD: DELIM %s, True", m.group(0)) - yield(DELIM, m.group(0), True) + # FIXME: What's "extra"? + yield({ 'type': 'DELIM', + 'content': m.group(0), + 'extra': True }) pos += 1 @@ -103,3 +85,4 @@ class BaseWikiMarkup: self.dprint(100, "YIELD: DELIM %s", m.group(0)) - yield(DELIM, m.group(0)) + yield({ 'type': 'DELIM', + 'content': m.group(0) }) else: @@ -108,8 +91,11 @@ class BaseWikiMarkup: if line[pos:-1] != '': - yield(TEXT, line[pos:-1]) + yield({ 'type': 'TEXT', + 'content': line[pos:-1] }) self.dprint(100, "YIELD: NL") - yield(NL,'\n') + yield({ 'type': 'NL', + 'content': '\n' }) else: self.dprint(100, "YIELD: TEXT %s", line[pos:]) - yield(TEXT, line[pos:]) + yield({ 'type': 'TEXT', + 'content': line[pos:] }) line = None @@ -128,7 +114,7 @@ class BaseWikiMarkup: for i in range(0,len(self.toklist)): - if self.toklist[i][0] == DELIM \ - and (self.toklist[i][1] == "''" \ - or self.toklist[i][1] == "'''"): + if self.toklist[i]['type'] == 'DELIM' \ + and (self.toklist[i]['content'] == "''" \ + or self.toklist[i]['content'] == "'''"): if len(stack) > 0 \ - and self.toklist[stack[-1]][1] == self.toklist[i][1]: + and self.toklist[stack[-1]]['content'] == self.toklist[i]['content']: stack.pop() @@ -150,3 +136,3 @@ class BaseWikiMarkup: tok = self.toklist[self.tokind] - if tok[0] != NIL: + if tok['type'] != 'NIL': self.tokind = self.tokind + 1 @@ -165,6 +151,6 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == TEXT: - textlist.append(tok[1]) - elif tok[0] == DELIM: - if tok[1] == delim: + if tok['type'] == 'TEXT': + textlist.append(tok['content']) + elif tok['type'] == 'DELIM': + if tok['content'] == delim: break @@ -172,3 +158,3 @@ class BaseWikiMarkup: if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) textlist = [] @@ -183,7 +169,7 @@ class BaseWikiMarkup: return None - elif tok[0] == NL: - if self.peektkn()[0] == NL: + elif tok['type'] == 'NL': + if self.peektkn()['type'] == 'NL': self.dprint(80, "LEAVE parse_fontmod=None") return None - seq.append((TEXT, '\n')) + seq.append({ 'type': 'TEXT', 'content': '\n' }) else: @@ -192,4 +178,4 @@ class BaseWikiMarkup: if textlist: - seq.append((TEXT, textlist)) - res = (what, seq) + seq.append({ 'type': 'TEXT', 'content': textlist }) + res = { 'type': what, 'content': seq } self.dprint(80, "LEAVE parse_fontmod=%s", res) @@ -204,10 +190,10 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == DELIM: - if tok[1] == delim: + if tok['type'] == 'DELIM': + if tok['content'] == delim: if list: - subtree.append((SEQ,list)) + subtree.append({ 'type': 'SEQ', 'content': list }) break - elif tok[1] == "|": + elif tok['content'] == "|": if len(list) > 1: - subtree.append((SEQ,list)) + subtree.append({ 'type': 'SEQ', 'content': list }) elif list: @@ -222,3 +208,3 @@ class BaseWikiMarkup: return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) @@ -228,3 +214,3 @@ class BaseWikiMarkup: self.dprint(80, "LEAVE parse_link=(%s,%s)", type, subtree) - return (type, subtree) + return { 'type': type, 'content': subtree } @@ -235,4 +221,4 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == DELIM: - if tok[1] == "]": + if tok['type'] == 'DELIM': + if tok['content'] == "]": break @@ -245,6 +231,6 @@ class BaseWikiMarkup: return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == NL: - list.append((TEXT, '\n')) + elif tok['type'] == 'NL': + list.append({ 'type': 'TEXT', 'content': '\n' }) continue @@ -253,7 +239,10 @@ class BaseWikiMarkup: return None - if len(list) == 0 or list[0][0] != TEXT: + if len(list) == 0 or list[0]['type'] != 'TEXT': self.dprint(80, "LEAVE parse_ref=%s", "None") return None - (ref,sep,text) = list[0][1].partition(' ') - ret = (REF, ref, (SEQ, [(TEXT, text)] + list[1:])) + (ref,sep,text) = list[0]['content'].partition(' ') + ret = { 'type': 'REF', + 'ref': ref, + 'content': { 'type': 'SEQ', + 'content': [{ 'type': 'TEXT', 'content': text }] + list[1:] } } self.dprint(80, "LEAVE parse_ref= %s", ret) @@ -264,5 +253,5 @@ class BaseWikiMarkup: def is_inline_delim(self, tok): - return tok[0] == DELIM and tok[1] in self.inline_delims + return tok['type'] == 'DELIM' and tok['content'] in self.inline_delims def is_block_delim(self, tok): - return tok[0] == DELIM and tok[1] not in self.inline_delims + return tok['type'] == 'DELIM' and tok['content'] not in self.inline_delims @@ -271,12 +260,12 @@ class BaseWikiMarkup: tokind = self.tokind - if tok[1] == "''": - x = self.parse_fontmod(tok[1], IT) - elif tok[1] == "'''": - x = self.parse_fontmod(tok[1], BOLD) - elif tok[1] == "[": + if tok['content'] == "''": + x = self.parse_fontmod(tok['content'], 'IT') + elif tok['content'] == "'''": + x = self.parse_fontmod(tok['content'], 'BOLD') + elif tok['content'] == "[": x = self.parse_ref() - elif tok[1] == "[[": - x = self.parse_link(LINK, "]]") - elif tok[1] == "{{": - x = self.parse_link(TMPL, "}}") + elif tok['content'] == "[[": + x = self.parse_link('LINK', "]]") + elif tok['content'] == "{{": + x = self.parse_link('TMPL', "}}") else: # FIXME @@ -295,7 +284,7 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == TEXT: - textlist.append(tok[1]) - elif tok[0] == NL: + if tok['type'] == 'TEXT': + textlist.append(tok['content']) + elif tok['type'] == 'NL': tok = self.getkn() - if tok[0] == NL or tok[0] == NIL: + if tok['type'] == 'NL' or tok['type'] == 'NIL': break @@ -306,8 +295,8 @@ class BaseWikiMarkup: textlist.append('\n') - elif tok[0] == NIL: + elif tok['type'] == 'NIL': break - elif tok[0] == DELIM: + elif tok['type'] == 'DELIM': if self.is_inline_delim(tok): if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) textlist = [] @@ -320,3 +309,3 @@ class BaseWikiMarkup: else: - seq.append((TEXT,tok[1])) + seq.append({ 'type': 'TEXT', 'content': tok['content'] }) # self.ungetkn() @@ -324,5 +313,5 @@ class BaseWikiMarkup: if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) self.dprint(80, "LEAVE parse_para=%s", seq) - return (PARA, seq) + return { 'type': 'PARA', 'content': seq } @@ -333,10 +322,10 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == NIL: + if tok['type'] == 'NIL': self.dprint(80, "LEAVE parse_header=%s", "None") return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == DELIM: - if tok[1] == delim: - if self.peektkn()[0] == NL: + elif tok['type'] == 'DELIM': + if tok['content'] == delim: + if self.peektkn()['type'] == 'NL': break @@ -356,3 +345,5 @@ class BaseWikiMarkup: self.dprint(80, "LEAVE parse_header=(HDR, %s, (SEQ,%s))",len(delim)-1,list) - return (HDR,len(delim)-1,(SEQ,list)) + return { 'type': 'HDR', + 'level': len(delim)-1, + 'content': { 'type': 'SEQ', 'content': list } } @@ -364,8 +355,8 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == NL or tok[0] == NIL: + if tok['type'] == 'NL' or tok['type'] == 'NIL': break - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == DELIM and tok[1][0] == ":": - list.append(self.parse_indent(len(tok[1]))) + elif tok['type'] == 'DELIM' and tok['content'][0] == ":": + list.append(self.parse_indent(len(tok['content']))) break @@ -378,3 +369,3 @@ class BaseWikiMarkup: self.dprint(80, "LEAVE parse_line=(SEQ, %s)", list) - return (SEQ, list) + return { 'type': 'SEQ', 'content': list } @@ -385,19 +376,24 @@ class BaseWikiMarkup: tok = self.getkn() - if tok[0] == DELIM and envtypes.has_key(tok[1][0]) and type == envtypes[tok[1][0]][0]: - if len(tok[1]) < lev: + if tok['type'] == 'DELIM' \ + and envtypes.has_key(tok['content'][0]) \ + and type == envtypes[tok['content'][0]][0]: + if len(tok['content']) < lev: self.ungetkn() break - elif len(tok[1]) > lev: + elif len(tok['content']) > lev: self.ungetkn() - elt = self.parse_env(type, len(tok[1])) + elt = self.parse_env(type, len(tok['content'])) else: elt = self.parse_line() - if len(tok) == 2: - list.append((ELT, envtypes[tok[1][0]][1], elt)) + if len(tok.keys()) == 2: + list.append({ 'type': 'ELT', + 'subtype': envtypes[tok['content'][0]][1], + 'content': elt }) continue - - if list[-1][2][0] != SEQ: - x = list[-1][2][1] - list[-1][2] = (SEQ, [x]) - list[-1][2][1].append(elt) + + if list[-1]['content']['type'] != 'SEQ': + x = list[-1]['content']['content'] + # FIXME: + list[-1]['content'] = { 'type': 'SEQ', 'content': [x] } + list[-1]['content']['content'].append(elt) else: @@ -406,3 +402,3 @@ class BaseWikiMarkup: self.dprint(80, "LEAVE parse_env=(ENV, %s, %s, %s)", type, lev, list) - return (ENV, type, lev, list) + return { 'type': 'ENV', 'envtype': type, 'level': lev, 'content': list } @@ -410,3 +406,3 @@ class BaseWikiMarkup: self.dprint(80, "ENTER parse_indent(%s), tok %s", lev, self.peektkn()) - x = (IND, lev, self.parse_line()) + x = { 'type': 'IND', 'level': lev, 'content': self.parse_line() } self.dprint(80, "LEAVE parse_indent=%s", x) @@ -416,20 +412,20 @@ class BaseWikiMarkup: tok = self.getkn() - toktype = tok[0] - if toktype == NIL: + toktype = tok['type'] + if toktype == 'NIL': return None - elif toktype == TEXT: + elif toktype == 'TEXT': self.ungetkn() return self.parse_para() - elif toktype == DELIM: - if tok[1] == "----": - return (BAR,) - elif tok[1][0:2] == "==": - return self.parse_header(tok[1]) - elif envtypes.has_key(tok[1][0]): - type = envtypes[tok[1][0]][0] - lev = len(tok[1]) + elif toktype == 'DELIM': + if tok['content'] == "----": + return { 'type': 'BAR' } + elif tok['content'][0:2] == "==": + return self.parse_header(tok['content']) + elif envtypes.has_key(tok['content'][0]): + type = envtypes[tok['content'][0]][0] + lev = len(tok['content']) self.ungetkn() return self.parse_env(type, lev) - elif tok[1][0] == ":": - return self.parse_indent(len(tok[1])) + elif tok['content'][0] == ":": + return self.parse_indent(len(tok['content'])) else: @@ -437,4 +433,4 @@ class BaseWikiMarkup: return self.parse_para() - elif toktype == NL: - return (TEXT, '\n') + elif toktype == 'NL': + return { 'type': 'TEXT', 'content': '\n' } # return self.parse0() @@ -515,10 +511,13 @@ class WikiMarkup (BaseWikiMarkup): def is_lang_link(self, elt): - if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1: - if elt[1][0][0] == TEXT: - m = re.match('([\w-]+):', elt[1][0][1]) + if elt['type'] == 'LINK' \ + and isinstance(elt['content'], list) \ + and len(elt['content']) == 1: + if elt['content'][0]['type'] == TEXT: + m = re.match('([\w-]+):', elt['content'][0]['content']) if m: # and m.group(1) in self.langtab: return True - elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\ - elt[1][0][1][0][0] == TEXT: - m = re.match('([\w-]+):',elt[1][0][1][0][1]) + elif elt['content'][0]['type'] == 'SEQ' \ + and len(elt['content'][0]['content']) == 1 and\ + elt['content'][0]['content'][0]['type'] == TEXT: + m = re.match('([\w-]+):',elt['content'][0]['content'][0]['content']) if m: # and m.group(1) in self.langtab: @@ -528,8 +527,8 @@ class WikiMarkup (BaseWikiMarkup): def is_empty_text(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): - for s in elt[1]: + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): + for s in elt['content']: if re.search('\w', s): return False - elif re.search('\w', elt[1]): + elif re.search('\w', elt['content']): return False |