summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2015-07-22 12:16:38 +0300
committerSergey Poznyakoff <gray@gnu.org>2015-07-22 12:16:38 +0300
commit32be559549aab3d71bee6be566782eef6594442d (patch)
tree71bde2c14190a742832b9a2591c3fb0aa4c2f75d
parent64cf5fdb880815ff21652ddb74e48490dd2f56fe (diff)
downloadwikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.gz
wikitrans-32be559549aab3d71bee6be566782eef6594442d.tar.bz2
Major rewrite
Use dedicated classes, instead of dictionaries, to represent markup tokens * WikiTrans/wikitoken.py: New file. Defines Wiki markup tokens. * WikiTrans/wikimarkup.py: Rewrite. * WikiTrans/wiki2html.py: Update. * WikiTrans/wiki2texi.py: Update. * WikiTrans/wiki2text.py: Update. * bin/wikitrans: Update
-rw-r--r--WikiTrans/wiki2html.py88
-rw-r--r--WikiTrans/wiki2texi.py112
-rw-r--r--WikiTrans/wiki2text.py81
-rw-r--r--WikiTrans/wikimarkup.py923
-rw-r--r--WikiTrans/wikitoken.py166
-rwxr-xr-xbin/wikitrans2
-rw-r--r--testdata/colon.html5
-rw-r--r--testdata/headings.html11
-rw-r--r--testdata/hz.html5
-rw-r--r--testdata/nowiki-ind.html7
-rw-r--r--testdata/nowiki.html1
-rw-r--r--testdata/numlist.html4
-rw-r--r--testdata/para.html4
-rw-r--r--testdata/unlist.html4
14 files changed, 770 insertions, 643 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 754fa9b..122c91c 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -87,6 +87,6 @@ class HtmlWikiMarkup (WikiMarkup):
def fmtlink(self, elt, istmpl):
- arg = self.format(elt['content'][0])
+ arg = self.format(elt.content[0])
text = None
- if len(elt['content']) > 1:
- s = [x for x in map(self.format, elt['content'])]
+ if len(elt.content) > 1:
+ s = [x for x in map(self.format, elt.content)]
if s[0] == 'disambigR' or s[0] == 'wikiquote':
@@ -135,4 +135,4 @@ class HtmlWikiMarkup (WikiMarkup):
def str_ref(self, elt):
- target = elt['ref']
- text = self.format(elt['content'])
+ target = elt.ref
+ text = self.format(elt.content)
return "<a href=\"%s\">%s</a>" % (target,
@@ -148,19 +148,19 @@ class HtmlWikiMarkup (WikiMarkup):
def str_it(self, elt):
- return "<i>" + self.concat(elt['content']) + "</i>"
+ return "<i>" + self.concat(elt.content) + "</i>"
def str_bold(self, elt):
- return "<b>" + self.concat(elt['content']) + "</b>"
+ return "<b>" + self.concat(elt.content) + "</b>"
def str_hdr(self, elt):
- level = elt['level'] + 1
+ level = elt.level
if level > 4:
level = 4
- return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level)
+ return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level)
def str_bar(self):
- return "<hr/>"
+ return "<hr/>\n"
def str_env(self, elt):
- type = elt['envtype']
- lev = elt['level']
+ type = elt.envtype
+ lev = elt.level
if lev > 4:
@@ -168,6 +168,6 @@ class HtmlWikiMarkup (WikiMarkup):
string = ""
- for s in elt['content']:
- n = s['subtype'];
+ for s in elt.content:
+ n = s.subtype;
string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
- self.format(s['content']),
+ self.format(s.content),
self.envt[type]["elt"][n])
@@ -179,5 +179,5 @@ class HtmlWikiMarkup (WikiMarkup):
def str_tag(self, elt):
- if elt['tag'] == 'code':
+ if elt.tag == 'code':
self.nested += 1
- s = self.format(elt['content'])
+ s = self.format(elt.content)
self.nested -= 1
@@ -185,8 +185,8 @@ class HtmlWikiMarkup (WikiMarkup):
else:
- s = '<' + elt['tag']
- if elt['args']:
- s += ' ' + str(elt['args'])
+ s = '<' + elt.tag
+ if elt.args:
+ s += ' ' + str(elt.args)
s += '>'
- s += self.format(elt['content'])
- return s + '</' + elt['tag'] + '>'
+ s += self.format(elt.content)
+ return s + '</' + elt.tag + '>'
@@ -194,5 +194,5 @@ class HtmlWikiMarkup (WikiMarkup):
string = "";
- for x in elt['content']:
+ for x in elt.content:
string += self.format(x)
- return "<p>" + string + "</p>"
+ return "<p>" + string + "</p>\n"
@@ -200,3 +200,3 @@ class HtmlWikiMarkup (WikiMarkup):
string = "";
- for x in elt['content']:
+ for x in elt.content:
string += self.format(x)
@@ -207,40 +207,40 @@ class HtmlWikiMarkup (WikiMarkup):
def str_ind(self, elt):
- return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level']
+ return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level
def format(self, elt):
- if elt['type'] == 'TEXT':
- if isinstance(elt['content'],list):
+ if elt.type == 'TEXT':
+ if isinstance(elt.content,list):
string = ""
- for s in elt['content']:
+ for s in elt.content:
string += s
else:
- string = elt['content']
+ string = elt.content
return string
- elif elt['type'] == 'TAG':
+ elif elt.type == 'TAG':
return self.str_tag(elt)
- elif elt['type'] == 'PARA':
+ elif elt.type == 'PARA':
return self.str_para(elt)
- elif elt['type'] == 'PRE':
+ elif elt.type == 'PRE':
return self.str_pre(elt)
- elif elt['type'] == 'IT':
+ elif elt.type == 'IT':
return self.str_it(elt)
- elif elt['type'] == 'BOLD':
+ elif elt.type == 'BOLD':
return self.str_bold(elt)
- elif elt['type'] == 'LINK':
+ elif elt.type == 'LINK':
return self.str_link(elt)
- elif elt['type'] == 'TMPL':
+ elif elt.type == 'TMPL':
return self.str_tmpl(elt)
- elif elt['type'] == 'BAR':
+ elif elt.type == 'BAR':
return self.str_bar()
- elif elt['type'] == 'HDR':
+ elif elt.type == 'HDR':
return self.str_hdr(elt)
- elif elt['type'] == 'REF':
+ elif elt.type == 'REF':
return self.str_ref(elt)
- elif elt['type'] == 'ENV':
+ elif elt.type == 'ENV':
return self.str_env(elt)
- elif elt['type'] == 'IND':
+ elif elt.type == 'IND':
return self.str_ind(elt)
- elif elt['type'] == 'SEQ':
+ elif elt.type == 'SEQ':
string = ""
- for x in elt['content']:
+ for x in elt.content:
string += self.format(x)
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index f36c0a1..dfde565 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -103,34 +103,34 @@ class TexiWikiMarkup (WikiMarkup):
def format(self, elt):
- if elt['type'] == 'TEXT':
- if isinstance(elt['content'],list):
- for s in elt['content']:
+ if elt.type == 'TEXT':
+ if isinstance(elt.content,list):
+ for s in elt.content:
self._print(s)
else:
- self._print(elt['content'])
- elif elt['type'] == 'TAG':
+ self._print(elt.content)
+ elif elt.type == 'TAG':
self.str_tag(elt)
- elif elt['type'] == 'PARA':
+ elif elt.type == 'PARA':
self.str_para(elt)
- elif elt['type'] == 'PRE':
+ elif elt.type == 'PRE':
self.str_pre(elt)
- elif elt['type'] == 'IT':
+ elif elt.type == 'IT':
self.str_it(elt)
- elif elt['type'] == 'BOLD':
+ elif elt.type == 'BOLD':
self.str_bold(elt)
- elif elt['type'] == 'LINK':
+ elif elt.type == 'LINK':
self.str_link(elt)
- elif elt['type'] == 'TMPL':
+ elif elt.type == 'TMPL':
self.str_tmpl(elt)
- elif elt['type'] == 'BAR':
+ elif elt.type == 'BAR':
self.str_bar()
- elif elt['type'] == 'HDR':
+ elif elt.type == 'HDR':
self.str_hdr(elt)
- elif elt['type'] == 'REF':
+ elif elt.type == 'REF':
self.str_ref(elt)
- elif elt['type'] == 'ENV':
+ elif elt.type == 'ENV':
self.str_env(elt)
- elif elt['type'] == 'IND':
+ elif elt.type == 'IND':
self.str_ind(elt)
- elif elt['type'] == 'SEQ':
- for x in elt['content']:
+ elif elt.type == 'SEQ':
+ for x in elt.content:
self.format(x)
@@ -140,9 +140,9 @@ class TexiWikiMarkup (WikiMarkup):
def str_tag(self, elt):
- if elt['tag'] in ['code', 'tt']:
+ if elt.tag in ['code', 'tt']:
save = self._begin_print()
self.nested += 1
- self.format(elt['content'])
+ self.format(elt.content)
self.nested -= 1
s = self._end_print(save)
- if elt['isblock']:
+ if elt.isblock:
self._print('@example', nl=True, escape=False)
@@ -152,14 +152,14 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@code{%s}' % s, escape=False)
- elif elt['tag'] == 'div':
- if 'args' in elt and 'id' in elt['args']:
- self._print("@anchor{%s}\n" % elt['args']['id'],
+ elif elt.tag == 'div':
+ if elt.args and 'id' in elt.args:
+ self._print("@anchor{%s}\n" % elt.args['id'],
nl=True, escape=False)
- self.format(elt['content'])
+ self.format(elt.content)
else:
- self._print('<' + elt['tag'])
- if elt['args']:
- self._print(' ' + elt['args'])
+ self._print('<' + elt.tag)
+ if elt.args:
+ self._print(' ' + elt.args)
self._print('>');
- self.format(elt['content']);
- self._print('</' + elt['tag'] + '>')
+ self.format(elt.content);
+ self._print('</' + elt.tag + '>')
@@ -168,3 +168,3 @@ class TexiWikiMarkup (WikiMarkup):
self._print('\n', nl=True)
- for x in elt['content']:
+ for x in elt.content:
self.format(x)
@@ -176,3 +176,3 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@example\n', nl=True, escape=False)
- for x in elt['content']:
+ for x in elt.content:
self.format(x)
@@ -187,3 +187,3 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@i{', escape=False)
- self.concat(elt['content'])
+ self.concat(elt.content)
self._print('}', escape=False)
@@ -192,3 +192,3 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@b{', escape=False)
- self.concat(elt['content'])
+ self.concat(elt.content)
self._print('}', escape=False)
@@ -196,9 +196,9 @@ class TexiWikiMarkup (WikiMarkup):
def str_hdr(self, elt):
- level = elt['level']
+ level = elt.level
if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
self._print("@* ", nl=True, escape=False)
- self.format(elt['content'])
+ self.format(elt.content)
else:
self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False)
- self.format(elt['content'])
+ self.format(elt.content)
self._print(None, nl=True)
@@ -206,3 +206,3 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@node ', nl=True, escape=False)
- self.format(elt['content'])
+ self.format(elt.content)
self._print('\n')
@@ -214,4 +214,4 @@ class TexiWikiMarkup (WikiMarkup):
def str_ind(self, elt):
- self._print("@w{ }" * elt['level'], nl=True, escape=False)
- self.format(elt['content'])
+ self._print("@w{ }" * elt.level, nl=True, escape=False)
+ self.format(elt.content)
self._print(None, nl=True)
@@ -219,7 +219,7 @@ class TexiWikiMarkup (WikiMarkup):
def str_env(self, elt):
- if elt['envtype'] == 'unnumbered':
+ if elt.envtype == 'unnumbered':
self._print('@itemize @bullet\n', nl=True, escape=False)
- for s in elt['content']:
+ for s in elt.content:
self._print('@item ', nl=True, escape=False)
- self.format(s['content'])
+ self.format(s.content)
self._print(None, nl=True)
@@ -227,7 +227,7 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@end itemize\n', nl=True, escape=False)
- elif elt['envtype'] == 'numbered':
+ elif elt.envtype == 'numbered':
self._print('@enumerate\n', nl=True, escape=False)
- for s in elt['content']:
+ for s in elt.content:
self._print('@item ', nl=True, escape=False)
- self.format(s['content'])
+ self.format(s.content)
self._print(None, nl=True)
@@ -235,11 +235,11 @@ class TexiWikiMarkup (WikiMarkup):
self._print('@end enumerate\n', nl=True, escape=False)
- elif elt['envtype'] == 'defn':
+ elif elt.envtype == 'defn':
self._print('@table @asis\n', nl=True, escape=False)
- for s in elt['content']:
- if s['subtype'] == 0:
+ for s in elt.content:
+ if s.subtype == 0:
self._print('@item ', nl=True, escape=False)
- self.format(s['content'])
+ self.format(s.content)
self._print(None, nl=True)
else:
- self.format(s['content'])
+ self.format(s.content)
self._print(None, nl=True)
@@ -250,7 +250,7 @@ class TexiWikiMarkup (WikiMarkup):
save = self._begin_print()
- self.format(elt['content'][0])
+ self.format(elt.content[0])
arg = self._end_print()
- if len(elt['content']) > 1:
+ if len(elt.content) > 1:
s = []
- for x in elt['content'][0:2]:
+ for x in elt.content[0:2]:
self._begin_print()
@@ -281,5 +281,5 @@ class TexiWikiMarkup (WikiMarkup):
def str_ref(self, elt):
- target = elt['ref']
+ target = elt.ref
save = self._begin_print()
- self.format(elt['content'])
+ self.format(elt.content)
text = self._end_print(save)
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index 916391e..ee1748c 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -67,5 +67,5 @@ class TextWikiMarkup (WikiMarkup):
def fmtlink(self, elt, istmpl):
- arg = self.format(elt['content'][0])
- if len(elt['content']) > 1:
- s = [x for x in map(self.format, elt['content'])]
+ arg = self.format(elt.content[0])
+ if len(elt.content) > 1:
+ s = [x for x in map(self.format, elt.content)]
text = s[1]
@@ -144,5 +144,5 @@ class TextWikiMarkup (WikiMarkup):
def str_tag(self, elt):
- if elt['tag'] == 'code':
+ if elt.tag == 'code':
self.nested += 1
- s = self.format(elt['content'])
+ s = self.format(elt.content)
self.nested -= 1
@@ -150,6 +150,6 @@ class TextWikiMarkup (WikiMarkup):
else:
- s = '<' + elt['tag']
- if elt['args']:
- s += ' ' + str(elt['args'])
- s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
+ s = '<' + elt.tag
+ if elt.args:
+ s += ' ' + str(elt.args)
+ s += '>' + self.format(elt.content) + '</' + elt.tag + '>'
return s
@@ -157,6 +157,6 @@ class TextWikiMarkup (WikiMarkup):
def format(self, elt):
- if elt['type'] == 'TEXT':
- if isinstance(elt['content'],list):
+ if elt.type == 'TEXT':
+ if isinstance(elt.content,list):
string = ""
- for s in elt['content']:
+ for s in elt.content:
if string:
@@ -168,18 +168,18 @@ class TextWikiMarkup (WikiMarkup):
else:
- string = elt['content']
- elif elt['type'] == 'PRE':
+ string = elt.content
+ elif elt.type == 'PRE':
string = ""
- for x in elt['content']:
+ for x in elt.content:
string += self.format(x)
string += '\n'
- elif elt['type'] == 'PARA':
+ elif elt.type == 'PARA':
string = "";
- for x in elt['content']:
+ for x in elt.content:
string += self.format(x)
string = self.fmtpara(string) + '\n\n'
- elif elt['type'] == 'TAG':
+ elif elt.type == 'TAG':
string = self.str_tag(elt)
- elif elt['type'] == 'IT':
+ elif elt.type == 'IT':
string = ""
- for x in elt['content']:
+ for x in elt.content:
s = self.format(x)
@@ -188,5 +188,5 @@ class TextWikiMarkup (WikiMarkup):
string = "_" + string.lstrip(" ") + "_"
- elif elt['type'] == 'BOLD':
+ elif elt.type == 'BOLD':
string = ""
- for x in elt['content']:
+ for x in elt.content:
s = self.format(x)
@@ -199,5 +199,5 @@ class TextWikiMarkup (WikiMarkup):
string = string.upper()
- elif elt['type'] == 'LINK':
+ elif elt.type == 'LINK':
string = self.fmtlink(elt, False)
- elif elt['type'] == 'TMPL':
+ elif elt.type == 'TMPL':
s = self.fmtlink(elt, True)
@@ -207,3 +207,3 @@ class TextWikiMarkup (WikiMarkup):
string = s
- elif elt['type'] == 'BAR':
+ elif elt.type == 'BAR':
w = self.width
@@ -212,11 +212,10 @@ class TextWikiMarkup (WikiMarkup):
string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
- elif elt['type'] == 'HDR':
- level = elt['level']
- string = "\n" + ("*" * level) + " " + \
- self.format(elt['content']).lstrip(" ") + "\n\n"
- elif elt['type'] == 'REF':
- string = self.xref(self.format(elt['content']), elt['ref'])
- elif elt['type'] == 'ENV':
- type = elt['envtype']
- lev = elt['level']
+ elif elt.type == 'HDR':
+ string = "\n" + ("*" * elt.level) + " " + \
+ self.format(elt.content).lstrip(" ") + "\n\n"
+ elif elt.type == 'REF':
+ string = self.xref(self.format(elt.content), elt.ref)
+ elif elt.type == 'ENV':
+ type = elt.envtype
+ lev = elt.level
if lev > self.width - 4:
@@ -225,6 +224,6 @@ class TextWikiMarkup (WikiMarkup):
n = 1
- for s in elt['content']:
+ for s in elt.content:
if not string.endswith("\n"):
string += "\n"
- x = self.format(s['content'])
+ x = self.format(s.content)
if type == "unnumbered":
@@ -235,3 +234,3 @@ class TextWikiMarkup (WikiMarkup):
elif type == "defn":
- if s['subtype'] == 0:
+ if s.subtype == 0:
string += self.indent(lev-1, x)
@@ -242,7 +241,7 @@ class TextWikiMarkup (WikiMarkup):
string += "\n"
- elif elt['type'] == 'IND':
- string = (" " * elt['level']) + self.format(elt['content']) + '\n'
- elif elt['type'] == 'SEQ':
+ elif elt.type == 'IND':
+ string = (" " * elt.level) + self.format(elt.content) + '\n'
+ elif elt.type == 'SEQ':
string = ""
- for x in elt['content']:
+ for x in elt.content:
if len(string) > 1 and not string[-1].isspace():
diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py
index 2fad0af..f971347 100644
--- a/WikiTrans/wikimarkup.py
+++ b/WikiTrans/wikimarkup.py
@@ -21,2 +21,3 @@ import re
from types import *
+from wikitoken import *
@@ -25,2 +26,6 @@ __all__ = [ "BaseWikiMarkup", "WikiMarkup",
+class UnexpectedToken(Exception):
+ def __init__(self, value):
+ self.value = value
+
class TagAttributeSyntax(Exception):
@@ -118,107 +123,7 @@ class BaseWikiMarkup(object):
if self.debug_level >= lev:
- print("[DEBUG]", fmt % argv)
-
- def print_dump_prefix(self, level, file):
- file.write("[DUMP]" + ' ' * (2*level + 1))
+ for l in (fmt % argv).split('\n'):
+ print("[DEBUG] %s" % l)
- def dump_nil(self, node, level, file):
- pass
-
- def dump_text(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("CONTENT: \"%s\"\n" % node['content'])
-
- def dump_delim(self, node, level, file):
- file.write("'%s'" % node['content'])
- if 'continuation' in node and node['continuation']:
- file.write(" (cont)")
- file.write("\n")
-
- def dump_tag(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("TAG: %s\n" % node['tag'])
- if 'isblock' in node:
- self.print_dump_prefix(level, file)
- file.write("PLACEMENT: %s\n" % ('BLOCK' if node['isblock'] else 'INLINE'))
- if 'args' in node:
- self.print_dump_prefix(level, file)
- file.write("ARGS: %s\n" % node['args'])
- if 'content' in node:
- self.dump_node(node['content'], level + 1, file)
-
- def dump_seq(self, node, level, file):
- self.dump(node['content'], level + 1, file)
-
- def dump_ref(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("REF: %s\n" % node['ref'])
- self.dump_node(node['content'], level + 1, file)
-
- def dump_hdr(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("LEVEL: %s\n" % node['level'])
- self.dump_node(node['content'], level + 1, file)
-
- def dump_elt(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("SUBTYPE: %s\n" % node['subtype'])
- self.dump_node(node['content'], level + 1, file)
-
- def dump_env(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("ENVTYPE: %s\n" % node['envtype'])
- self.print_dump_prefix(level, file)
- file.write("LEVEL: %s\n" % node['level'])
- self.dump(node['content'], level + 1, file)
-
- def dump_ind(self, node, level, file):
- self.print_dump_prefix(level, file)
- file.write("LEVEL: %s\n" % node['level'])
- self.dump_node(node['content'], level + 1, file)
-
- def dump_link(self, node, level, file):
- self.dump(node['content'], level + 1, file)
-
- dump_type = {
- 'NIL': dump_nil,
- 'NL': dump_nil,
- 'TEXT': dump_text,
- 'DELIM': dump_delim,
- 'OTAG': dump_tag,
- 'CTAG': dump_tag,
- 'TAG': dump_tag,
- 'SEQ': dump_seq,
- 'REF': dump_ref,
- 'HDR': dump_hdr,
- 'ELT': dump_elt,
- 'ENV': dump_env,
- 'IND': dump_ind,
- 'BAR': dump_nil,
- 'PARA': dump_seq,
- 'PRE': dump_text,
- 'BOLD': dump_seq,
- 'IT': dump_seq,
- 'LINK': dump_link,
- }
+ inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ]
- def dump_node(self, node, level, file):
- if type(node) != dict:
- file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node))
- return
-
- self.print_dump_prefix(level, file)
- file.write("NODE " + node['type'] + ":\n")
- if node['type'] in self.dump_type:
- self.dump_type[node['type']](self, node, level, file)
- else:
- self.print_dump_prefix(level, file)
- file.write("(UNHANDLED) ")
- file.write("%s\n" % node)
- self.print_dump_prefix(level, file)
- file.write("END NODE " + node['type'] + "\n")
-
- def dump(self, tree, level=0, file=sys.stdout):
- for node in tree:
- self.dump_node(node, level, file)
-
def tokread(self):
@@ -235,3 +140,3 @@ class BaseWikiMarkup(object):
if not line or line == "":
- yield({ 'type': 'NIL' })
+ yield(WikiNode(type='NIL'))
break
@@ -239,3 +144,3 @@ class BaseWikiMarkup(object):
if line == '\n':
- yield({ 'type': 'NL', 'content': line })
+ yield(WikiNode(type='NL'))
line = None
@@ -248,3 +153,3 @@ class BaseWikiMarkup(object):
if (pos < m.start(0)):
- yield({'type': 'TEXT', 'content': line[pos:m.start(0)]})
+ yield(WikiTextNode(content=line[pos:m.start(0)]))
pos = m.start(0)
@@ -262,4 +167,3 @@ class BaseWikiMarkup(object):
if m and m.group('tag') == 'nowiki':
- yield({ 'type': 'TEXT',
- 'content': line[pos:m.start(0)] })
+ yield(WikiTextNode(content=line[pos:m.start(0)] ))
pos = m.end(0)
@@ -267,4 +171,3 @@ class BaseWikiMarkup(object):
- yield({ 'type': 'TEXT',
- 'content': line[pos:] })
+ yield(WikiTextNode(content=line[pos:]))
@@ -277,16 +180,14 @@ class BaseWikiMarkup(object):
try:
- t = { 'type': 'OTAG',
- 'tag': m.group('tag'),
- 'args': TagAttributes(m.group('args')) }
- yield(t)
+ yield(WikiTagNode(type='OTAG',
+ tag=m.group('tag'),
+ isblock=(line[pos] == '\n'),
+ args=TagAttributes(m.group('args'))))
if m.group('closed'):
- t['type'] = 'CTAG'
- yield(t)
+ yield(WikiTagNode(type='CTAG',
+ tag=m.group('tag')))
except TagAttributeSyntax:
- yield({'type': 'TEXT',
- 'content': m.group(0)})
+ yield(WikiTextNode(content=m.group(0)))
continue
else:
- yield({ 'type': 'TEXT',
- 'content': m.group(0) })
+ yield(WikiTextNode(content=m.group(0)))
continue
@@ -296,4 +197,4 @@ class BaseWikiMarkup(object):
if m.group('tag') in self.tags:
- yield( { 'type': 'CTAG',
- 'tag': m.group('tag') } )
+ yield(WikiTagNode(type='CTAG',
+ tag=m.group('tag')))
pos = m.end(0)
@@ -301,4 +202,3 @@ class BaseWikiMarkup(object):
else:
- yield( { 'type': 'TEXT',
- 'content': line[pos:pos+1] })
+ yield(WikiTextNode(content=line[pos:pos+1]))
pos += 1
@@ -309,10 +209,11 @@ class BaseWikiMarkup(object):
if content[0] in self.envtypes:
- t = { 'type': 'DELIM',
- 'content': content,
- 'continuation': pos < len(line) and line[pos] == ":" }
- if t['continuation']:
- t['content'] += t['content'][0]
+ node = WikiDelimNode(type='DELIM',
+ content=content,
+ isblock=True,
+ continuation=pos < len(line) and line[pos] == ":")
+ if node.continuation:
+ node.content += node.content[0]
pos += 1
- yield(t)
+ yield(node)
@@ -321,5 +222,5 @@ class BaseWikiMarkup(object):
else:
- yield({ 'type': 'DELIM',
- 'content': content.strip(),
- 'continuation': False})
+ yield(WikiDelimNode(type='DELIM',
+ isblock=(content.strip() not in self.inline_delims),
+ content=content.strip()))
continue
@@ -329,9 +230,6 @@ class BaseWikiMarkup(object):
if line[pos:-1] != '':
- yield({ 'type': 'TEXT',
- 'content': line[pos:-1] })
- yield({ 'type': 'NL',
- 'content': '\n' })
+ yield(WikiTextNode(content=line[pos:-1]))
+ yield(WikiNode(type='NL'))
else:
- yield({ 'type': 'TEXT',
- 'content': line[pos:] })
+ yield(WikiTextNode(content=line[pos:]))
line = None
@@ -366,7 +264,7 @@ class BaseWikiMarkup(object):
for i in range(0,len(self.toklist)):
- if self.toklist[i]['type'] == 'DELIM' \
- and (self.toklist[i]['content'] == "''" \
- or self.toklist[i]['content'] == "'''"):
+ if self.toklist[i].type == 'DELIM' \
+ and (self.toklist[i].content == "''" \
+ or self.toklist[i].content == "'''"):
if len(stack) > 0:
- if self.toklist[stack[-1]]['content'] == self.toklist[i]['content']:
+ if self.toklist[stack[-1]].content == self.toklist[i].content:
# Case 1: just pop the matching delimiter off the stack
@@ -379,4 +277,4 @@ class BaseWikiMarkup(object):
elif i < len(self.toklist) \
- and self.toklist[i+1]['type'] == 'DELIM' \
- and self.toklist[stack[-1]]['content'] == self.toklist[i+1]['content']:
+ and self.toklist[i+1].type == 'DELIM' \
+ and self.toklist[stack[-1]].content == self.toklist[i+1].content:
# Case 3: swap current and next tokens
@@ -393,6 +291,19 @@ class BaseWikiMarkup(object):
for i in stack:
- self.toklist[i]['type'] = 'TEXT'
+ self.toklist[i].type = 'TEXT' # FIXME
+
+ mark = []
+
+ def push_mark(self):
+ self.mark.append(self.tokind)
- def peektkn(self, off=0):
- return self.toklist[self.tokind-off]
+ def pop_mark(self):
+ self.tokind = self.mark.pop()
+
+ def clear_mark(self):
+ self.mark.pop()
+
+ def lookahead(self, off=0):
+ tok = self.toklist[self.tokind+off]
+ self.dprint(20, "lookahead(%s): %s", off, tok)
+ return tok
@@ -402,31 +313,176 @@ class BaseWikiMarkup(object):
def getkn(self):
- self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
+ self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL'
if self.tokind == len(self.toklist):
- return { 'type': 'NIL' }
+ return WikiNode(type='NIL')
tok = self.toklist[self.tokind]
self.tokind = self.tokind + 1
+ self.dprint(20, "getkn: %s", tok)
return tok
- def ungetkn(self):
+ def ungetkn(self, tok=None):
self.tokind = self.tokind - 1
- self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
+ self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL'
+ if tok:
+ self.toklist[self.tokind] = tok
+ self.dprint(20, "ungetkn: %s", tok)
return self.toklist[self.tokind]
+ def fixuptkn(self, tok):
+ if self.tokind == 0:
+ raise IndexError('wikimarkup.fixuptkn called at start of input')
+ self.toklist[self.tokind-1] = tok
+ return tok
+
+ def dump(self, tree, file=sys.stdout):
+ for node in tree:
+ file.write(str(node))
+ file.write('\n')
+
+ def is_block_end(self, tok):
+ if tok.type == 'NIL':
+ return True
+ elif tok.type == 'NL':
+ if self.lookahead().type == 'NIL':
+ return True
+ elif self.lookahead().type == 'NL':
+ self.getkn()
+ return True
+ elif tok.type in ['DELIM', 'CTAG', 'TAG']:
+ if tok.isblock:
+ self.ungetkn(tok)
+ return True
+ return False
+
+ def parse_para(self, tok):
+ self.dprint(80, "ENTER parse_para: %s", tok)
+
+ acc = { 'seq': [],
+ 'textlist': [] }
+
+ def flush():
+ if acc['textlist']:
+ acc['seq'].append(WikiContentNode(type='TEXT',
+ content=''.join(acc['textlist'])))
+ acc['textlist'] = []
+
+ if isinstance(tok, WikiContentNode) \
+ and isinstance(tok.content,str) \
+ and re.match("^[ \t]", tok.content):
+ type = 'PRE'
+ rx = re.compile("^\S")
+ else:
+ type = 'PARA'
+ rx = re.compile("^[ \t]")
+
+ while not self.is_block_end(tok):
+ if tok.type == 'TEXT':
+ if rx and self.newline and rx.match(tok.content):
+ self.ungetkn()
+ break
+ acc['textlist'].append(tok.content)
+ elif tok.type == 'NL':
+ acc['textlist'].append('\n')
+ elif tok.type == 'OTAG':
+ flu