aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-12 23:11:40 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2015-07-12 23:11:40 +0300
commit28072898f1bd9a925d73ac187d560198d6345524 (patch)
treea46d781fb85d9dda61fc8f68e0ba6ec43d60ce55
parent75672b57a2d63f01d00795fe8d661d1efe7b6e8d (diff)
downloadwit-28072898f1bd9a925d73ac187d560198d6345524.tar.gz
wit-28072898f1bd9a925d73ac187d560198d6345524.tar.bz2
Improve tag handling and debugging
* wikimarkup.py: Rewrite tag recognition. Implement dump method. * wikicvt.py: New options -D (--dump), and -t dump * wiki2html.py (input_tag): Remove method (str_tag): Change handling of tags * wiki2texi.py: Likewise. * wiki2text.py: Likewise.
-rw-r--r--wiki2html.py28
-rw-r--r--wiki2texi.py37
-rw-r--r--wiki2text.py27
-rwxr-xr-xwikicvt.py26
-rw-r--r--wikimarkup.py317
5 files changed, 309 insertions, 126 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 441bc76..66939c4 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -151,67 +151,63 @@ class HtmlWikiMarkup (WikiMarkup):
level = elt['level'] + 1
if level > 4:
level = 4
return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level)
def str_bar(self):
return "<hr/>"
def str_env(self, elt):
type = elt['envtype']
lev = elt['level']
if lev > 4:
lev = 2
string = ""
for s in elt['content']:
n = s['subtype'];
string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
self.format(s['content']),
self.envt[type]["elt"][n])
return "<%s>%s</%s>" % (self.envt[type]["hdr"],
string,
self.envt[type]["hdr"])
return string
- supported_tags = [ 'nowiki', 'code' ]
- def input_tag(self, tag):
- return tag['tag'] in self.supported_tags
-
def str_tag(self, elt):
if elt['tag'] == 'nowiki':
- return '<pre>' + elt['content'] + '</pre>'
+ return '<pre>' + self.format(elt['content']) + '</pre>'
elif elt['tag'] == 'code':
- kwdict = {
- 'nested': self.nested + 1,
- 'lang': self.lang,
- 'text': elt['content'],
- 'html_base': self.html_base,
- 'image_base': self.image_base,
- 'media_base': self.media_base }
- markup = HtmlWiktionaryMarkup(**kwdict)
- markup.debug_level = self.debug_level
- markup.parse()
- return '<pre><code>' + str(markup) + '</code></pre>' #FIXME
+ self.nested += 1
+ s = self.format(elt['content'])
+ self.nested -= 1
+ return '<pre><code>' + s + '</code></pre>' #FIXME
+ else:
+ s = '<' + elt['tag']
+ if elt['args']:
+ s += ' ' + elt['args']
+ s += '>'
+ s += self.format(elt['content'])
+ return s + '</' + elt['tag'] + '>'
def str_para(self, elt):
string = "";
for x in elt['content']:
string += self.format(x)
return "<p>" + string + "</p>"
def str_pre(self, elt):
string = "";
for x in elt['content']:
string += self.format(x)
if self.nested:
return string
return '<pre>' + string + '</pre>'
def str_ind(self, elt):
return ("&nbsp;" * 2 * elt['level']) + self.format(elt['content'])
def format(self, elt):
if elt['type'] == 'TEXT':
if isinstance(elt['content'],list):
string = ""
for s in elt['content']:
string += s
diff --git a/wiki2texi.py b/wiki2texi.py
index 7cc67bd..0b3eb77 100644
--- a/wiki2texi.py
+++ b/wiki2texi.py
@@ -98,86 +98,85 @@ class TexiWikiMarkup (WikiMarkup):
elif elt['type'] == 'BOLD':
return self.str_bold(elt)
elif elt['type'] == 'LINK':
return self.str_link(elt)
elif elt['type'] == 'TMPL':
return self.str_tmpl(elt)
elif elt['type'] == 'BAR':
return self.str_bar()
elif elt['type'] == 'HDR':
return self.str_hdr(elt)
elif elt['type'] == 'REF':
return self.str_ref(elt)
elif elt['type'] == 'ENV':
return self.str_env(elt)
elif elt['type'] == 'IND':
return self.str_ind(elt)
elif elt['type'] == 'SEQ':
string = ""
for x in elt['content']:
string += self.format(x)
return string
else:
return str(elt)
- supported_tags = [ 'nowiki', 'code' ]
- def input_tag(self, tag):
- return tag['tag'] in self.supported_tags
-
def str_tag(self, elt):
if elt['tag'] == 'nowiki':
- return '@example\n' + elt['content'] + '@end example\n'
+ return '@example\n' + self.format(elt['content']) + '@end example\n'
elif elt['tag'] == 'code':
- kwdict = {
- 'nested': self.nested + 1,
- 'lang': self.lang,
- 'text': elt['content'],
- 'html_base': self.html_base,
- 'image_base': self.image_base,
- 'media_base': self.media_base }
- markup = TexiWikiMarkup(**kwdict)
- markup.debug_level = self.debug_level
- markup.parse()
- s = str(markup)
+ self.nested += 1
+ s = self.format(elt['content'])
+ self.nested -= 1
if not s.endswith("\n"):
- s += "\n";
+ s += "\n"
return '@example\n' + s + '@end example\n'
-
+ elif elt['tag'] == 'tt':
+ self.nested += 1
+ s = self.format(elt['content'])
+ self.nested -= 1
+ return "@code{%s}" % s
+ else:
+ s = '<' + elt['tag']
+ if elt['args']:
+ s += ' ' + elt['args']
+ s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
+ return s
+
def str_para(self, elt):
string = "";
for x in elt['content']:
string += self.format(x)
return "\n" + string + "\n"
def str_pre(self, elt):
string = "";
for x in elt['content']:
string += self.format(x)
if self.nested:
return string
if not string.endswith("\n"):
string += "\n";
- return '@example\n' + string + '@end example\n'
+ return '\n@example\n' + string + '@end example\n'
def concat(self, eltlist):
string = ""
for x in eltlist:
string += self.format(x)
return string
def str_it(self, elt):
return "@i{" + self.concat(elt['content']) + "}"
def str_bold(self, elt):
return "@b{" + self.concat(elt['content']) + "}"
def nodename(self, elt):
return self.format(elt) # FIXME
def str_hdr(self, elt):
level = elt['level']
if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
s ="\n@* %s" % (self.format(elt['content']))
else:
s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n"
if self.sectcomm[self.sectioning_model][0] == '@top':
s += "@node %s\n" % (self.nodename(elt['content']))
diff --git a/wiki2text.py b/wiki2text.py
index 27a7051..d4cab81 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -121,67 +121,62 @@ class TextWikiMarkup (WikiMarkup):
# print "IN: '%s'" % (text)
# print "OUT: '%s'" % (s)
return s
def fmtpara(self, input):
output = ""
linebuf = ""
length = 0
for s in input.split():
wlen = len(s)
if linebuf.endswith("."):
wsc = 2
else:
wsc = 1
if length + wsc + wlen > self.width:
# FIXME: fill out linebuf
output += linebuf + '\n'
wsc = 0
length = 0
linebuf = ""
linebuf += " " * wsc + s
length += wsc + wlen
return output + linebuf
- supported_tags = [ 'nowiki', 'code' ]
- def input_tag(self, tag):
- return tag['tag'] in self.supported_tags
-
def str_tag(self, elt):
if elt['tag'] == 'nowiki':
- return elt['content']
+ return self.format(elt['content'])
elif elt['tag'] == 'code':
- kwdict = {
- 'nested': self.nested + 1,
- 'lang': self.lang,
- 'text': elt['content'],
- 'html_base': self.html_base,
- 'image_base': self.image_base,
- 'media_base': self.media_base }
- markup = TextWiktionaryMarkup(**kwdict)
- markup.debug_level = self.debug_level
- markup.parse()
- return str(markup)
+ self.nested += 1
+ s = self.format(elt['content'])
+ self.nested -= 1
+ return s #FIXME
+ else:
+ s = '<' + elt['tag']
+ if elt['args']:
+ s += ' ' + elt['args']
+ s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
+ return s
def format(self, elt):
if elt['type'] == 'TEXT':
if isinstance(elt['content'],list):
string = ""
for s in elt['content']:
if string:
if string.endswith("."):
string += " "
else:
string += " "
string += s
else:
string = elt['content']
elif elt['type'] == 'PRE':
string = ""
for x in elt['content']:
string += self.format(x)
string += '\n'
elif elt['type'] == 'PARA':
string = "";
for x in elt['content']:
string += self.format(x)
string = self.fmtpara(string) + '\n\n'
diff --git a/wikicvt.py b/wikicvt.py
index e61e28b..c8ca887 100755
--- a/wikicvt.py
+++ b/wikicvt.py
@@ -1,105 +1,121 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008,2015 Sergey Poznyakoff
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import getopt
+import StringIO
from wiki2html import *
from wiki2text import *
from wiki2texi import *
+class DumpWikiMarkup (WikiMarkup):
+ def __str__(self):
+ if self.tree:
+ s = StringIO.StringIO()
+ self.dump(self.tree, 0, s)
+ return s.getvalue()
+ else:
+ return ""
+
def usage(code=0):
print """
usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val]
[--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file
""" % (sys.argv[0])
sys.exit(code)
handlers = {
+ 'dump': {
+ 'default': DumpWikiMarkup
+ },
'html': {
'default': HtmlWikiMarkup,
'wiktionary': HtmlWiktionaryMarkup
},
'text': {
'default': TextWikiMarkup,
'wiktionary': TextWiktionaryMarkup
},
'texi': {
'default': TexiWikiMarkup
}
}
def main():
verbose_flag = 0
itype = 'default'
otype = 'html'
lang = "pl"
kwdict = {}
debug = 0
try:
- opts, args = getopt.getopt(sys.argv[1:], "d:I:hl:o:t:v",
- ["debug=", "help", "lang=", "option=",
- "to", "type", "input-text", "input-type",
+ opts, args = getopt.getopt(sys.argv[1:], "Dd:I:hl:o:t:v",
+ ["dump",
+ "debug=", "help", "lang=", "option=",
+ "to=", "type=", "input-text", "input-type=",
"verbose" ])
except getopt.GetoptError:
usage(1)
for o, a in opts:
if o in ("-h", "--help"):
usage()
elif o in ("-v", "--verbose"):
verbose_flag = verbose_flag + 1
elif o in ("-I", "--input-type"):
itype = a
elif o in ("-t", "--to", "--type"):
otype = a
elif o in ("-l", "--lang"):
lang = a
elif o in ("-o", "--option"):
(kw,sep,val) = a.partition('=')
if val != '':
kwdict[kw] = val
elif o == "--input-text":
input_text = True
elif o in ("-d", "--debug"):
debug = eval(a)
+ elif o in ("-D", "--dump"):
+ otype = 'dump'
if len(args) == 1:
if args[0] == '-':
kwdict['file'] = sys.stdin
else:
kwdict['filename'] = args[0]
else:
usage(1)
kwdict['lang']=lang
- if handlers.has_key(otype):
- if handlers[otype].has_key(itype):
+ if otype in handlers:
+ if itype in handlers[otype]:
markup = handlers[otype][itype](**kwdict)
markup.debug_level = debug
markup.parse()
print str(markup)
exit(0)
else:
print "unsupported input type: %s" % (itype)
else:
print "unsupported output type: %s" % (otype)
exit(1)
if __name__ == '__main__':
main()
diff --git a/wikimarkup.py b/wikimarkup.py
index fde1ec1..9a79d1e 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -1,155 +1,274 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008, 2009, 2015 Sergey Poznyakoff
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import re
from types import *
__all__ = [ "BaseWikiMarkup", "WikiMarkup",
"envtypes" ]
-delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)")
-otag = re.compile("^\s*<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>.+))?\s*(?P<closed>/)?>")
-ctag = re.compile("^\s*</(?P<tag>[a-zA-Z0-9_]+)\s*>")
+delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
+otag = re.compile("(?P<pfx>[^<]*)<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>.+))?\s*(?P<closed>/)?>")
+ctag = re.compile("(?P<pfx>[^<]*)</(?P<tag>[a-zA-Z0-9_]+)\s*>")
close_delim = {
'[': ']',
'[[': ']]',
'{{': '}}'
}
# Environment types:
envtypes = { "*": [ "unnumbered", 0 ],
"#": [ "numbered", 0 ],
";": [ "defn", 0 ],
":": [ "defn", 1 ]
}
class BaseWikiMarkup(object):
toklist = None
tokind = 0
newline = 0
tree = None
+ tags = [ 'code', 'nowiki', 'tt', 'div' ]
+
nested = 0
debug_level = 0
def dprint(self, lev, fmt, *argv):
if self.debug_level >= lev:
print "[DEBUG]", fmt % argv
- def input_tag(self, tag):
+ def print_dump_prefix(self, level, file):
+ file.write("[DUMP]" + ' ' * (2*level + 1))
+
+ def dump_nil(self, node, level, file):
pass
+ def dump_text(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("CONTENT: \"%s\"\n" % node['content'])
+
+ def dump_delim(self, node, level, file):
+ file.write("'%s'" % node['content'])
+ if 'continuation' in node:
+ file.write(" (cont)")
+ file.write("\n")
+
+ def dump_tag(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("TAG: %s\n" % node['tag'])
+ if 'args' in node:
+ self.print_dump_prefix(level, file)
+ file.write("ARGS: %s\n" % node['args'])
+ if 'content' in node:
+ self.dump_node(node['content'], level + 1, file)
+
+ def dump_seq(self, node, level, file):
+ self.dump(node['content'], level + 1, file)
+
+ def dump_ref(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("REF: %s\n" % node['ref'])
+ self.dump_node(node['content'], level + 1, file)
+
+ def dump_hdr(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("LEVEL: %s\n" % node['level'])
+ self.dump_node(node['content'], level + 1, file)
+
+ def dump_elt(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("SUBTYPE: %s\n" % node['subtype'])
+ self.dump_node(node['content'], level + 1, file)
+
+ def dump_env(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("ENVTYPE: %s\n" % node['envtype'])
+ self.print_dump_prefix(level, file)
+ file.write("LEVEL: %s\n" % node['level'])
+ self.dump(node['content'], level + 1, file)
+
+ def dump_ind(self, node, level, file):
+ self.print_dump_prefix(level, file)
+ file.write("LEVEL: %s\n" % node['level'])
+ self.dump_node(node['content'], level + 1, file)
+
+ def dump_link(self, node, level, file):
+ self.dump(node['content'], level + 1, file)
+
+ dump_type = {
+ 'NIL': dump_nil,
+ 'NL': dump_nil,
+ 'TEXT': dump_text,
+ 'DELIM': dump_delim,
+ 'OTAG': dump_tag,
+ 'CTAG': dump_tag,
+ 'TAG': dump_tag,
+ 'SEQ': dump_seq,
+ 'REF': dump_ref,
+ 'HDR': dump_hdr,
+ 'ELT': dump_elt,
+ 'ENV': dump_env,
+ 'IND': dump_ind,
+ 'BAR': dump_nil,
+ 'PARA': dump_seq,
+ 'PRE': dump_text,
+ 'BOLD': dump_seq,
+ 'IT': dump_seq,
+ 'LINK': dump_link,
+ }
+
+ def dump_node(self, node, level, file):
+ if type(node) != dict:
+ file.write("UNHANDLED NODE: %s, %s\n" % (type(node),node))
+ return
+
+ self.print_dump_prefix(level, file)
+ file.write("NODE " + node['type'] + ":\n")
+ if node['type'] in self.dump_type:
+ self.dump_type[node['type']](self, node, level, file)
+ else:
+ self.print_dump_prefix(level, file)
+ file.write("(UNHANDLED) ")
+ file.write("%s\n" % node)
+ self.print_dump_prefix(level, file)
+ file.write("END NODE " + node['type'] + "\n")
+
+ def dump(self, tree, level=0, file=sys.stdout):
+ for node in tree:
+ self.dump_node(node, level, file)
+
def tokread(self):
line = None
pos = 0
while 1:
if (not line or pos == len(line)):
try:
line = self.input()
pos = 0
except StopIteration:
line = u''
if not line or line == "":
yield({ 'type': 'NIL' })
break
if line == '\n':
yield({ 'type': 'NL', 'content': line })
line = None
continue
self.dprint(100, "LINE: %s", line[pos:])
m = delim.search(line, pos)
if m:
if (pos < m.start(0)):
yield({'type': 'TEXT', 'content': line[pos:m.start(0)]})
pos = m.end(0)
- if envtypes.has_key(m.group(0)[0]) and line[pos] == ":":
- # FIXME?
- # FIXME: What's "extra"?
+
+ if m and line[m.start(0)] != '<':
+ if m.group(0)[0] in envtypes and pos < len(line) and line[pos] == ":":
yield({ 'type': 'DELIM',
- 'content': m.group(0) })
+ 'content': m.group(0),
+ 'continuation': True })
pos += 1
else:
yield({ 'type': 'DELIM',
'content': m.group(0) })
else:
- m = otag.match(line)
if m:
- t = { 'type': 'TAG',
+ pos -= 1
+ t = None
+ m = otag.match(line, pos)
+ if m and m.group('tag') in self.tags:
+ rest = line[m.end(0):]
+ line = m.group('pfx')
+ pos = 0
+ t = { 'type': 'OTAG',
'tag': m.group('tag'),
'args': m.group('args') }
-
- if self.input_tag(t):
+ else:
+ m = ctag.match(line, pos)
+ if m and m.group('tag') in self.tags:
+ rest = line[m.end(0):]
+ line = m.group('pfx')
+ pos = 0
+ t = { 'type': 'CTAG',
+ 'tag': m.group('tag') }
+
+ if line:
+ if line[-1] == '\n':
+ if line[pos:-1] != '':
+ yield({ 'type': 'TEXT',
+ 'content': line[pos:-1] })
+ yield({ 'type': 'NL',
+ 'content': '\n' })
+ else:
+ yield({ 'type': 'TEXT',
+ 'content': line[pos:] })
+
+ if t:
+ if t['type'] == 'OTAG' and t['tag'] == 'nowiki':
s = ''
if not m.group('closed'):
while 1:
try:
l = self.input()
m = ctag.match(l)
if m and m.group('tag') == t['tag']:
break
s += l
except StopIteration:
break
- yield({ 'type': 'TAG',
- 'tag': t['tag'],
- 'args': t['args'],
- 'content': s
- })
- line = None
- continue
-
- if line[-1] == '\n':
- if line[pos:-1] != '':
- yield({ 'type': 'TEXT',
- 'content': line[pos:-1] })
- yield({ 'type': 'NL',
- 'content': '\n' })
+ t['type'] = 'TAG'
+ t['content'] = {'type': 'TEXT', 'content': s}
+
+ yield(t)
+ if t['type'] == 'OTAG' and m.group('closed'):
+ t['type'] = 'CTAG'
+ yield(t)
+ line = rest
+ pos = 0
else:
- yield({ 'type': 'TEXT',
- 'content': line[pos:] })
- line = None
+ line = None
def input(self):
return None
def swaptkn(self, i, j):
self.dprint(80, "SWAPPING %s <-> %s", i, j)
x = self.toklist[i]
self.toklist[i] = self.toklist[j]
self.toklist[j] = x
def tokenize(self):
self.toklist = []
for tok in self.tokread():
self.dprint(100, "TOK: %s", tok)
self.toklist.append(tok)
# Determine and fix up the ordering of bold and italic markers
# There are three possible cases:
#
# 1a. '''a b ''c'' d'''
# 1b. ''a b '''c''' d''
#
# 2a. '''''a b'' c d'''
# 2b. '''''a b''' c d''
#
@@ -173,93 +292,94 @@ class BaseWikiMarkup(object):
and self.toklist[i+1]['type'] == 'DELIM' \
and self.toklist[stack[-1]]['content'] == self.toklist[i+1]['content']:
# Case 3: swap current and next tokens
self.swaptkn(i, i+1)
# and pop off the matching one
stack.pop()
else:
# Push the token on stack
stack.append(i)
else:
# Push the token on stack
stack.append(i)
# Redefine all non-matched tokens as TEXT
for i in stack:
self.toklist[i]['type'] = 'TEXT'
def peektkn(self):
return self.toklist[self.tokind]
def setkn(self,val):
self.toklist[self.tokind] = val
def getkn(self):
self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
+ if self.tokind == len(self.toklist):
+ return { 'type': 'NIL' }
tok = self.toklist[self.tokind]
- if tok['type'] != 'NIL':
- self.tokind = self.tokind + 1
+ self.tokind = self.tokind + 1
return tok
def ungetkn(self):
self.tokind = self.tokind - 1
self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
return self.toklist[self.tokind]
def parse_fontmod(self,delim,what):
self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s",
delim, what, self.peektkn())
seq = []
- textlist = []
+ text = ''
while 1:
tok = self.getkn()
if tok['type'] == 'TEXT':
- textlist.append(tok['content'])
+ text += tok['content']
elif tok['type'] == 'DELIM':
if tok['content'] == delim:
break
elif self.is_inline_delim(tok):
- if textlist:
- seq.append({ 'type': 'TEXT', 'content': textlist })
- textlist = []
+ if text:
+ seq.append({ 'type': 'TEXT', 'content': text })
+ text = ''
x = self.parse_inline(tok)
if x:
seq.append(x)
else:
self.dprint(80, "LEAVE parse_fontmod=%s", "None")
return None
else:
self.dprint(80, "LEAVE parse_fontmod=None")
return None
elif tok['type'] == 'NL':
if self.peektkn()['type'] == 'NL':
self.dprint(80, "LEAVE parse_fontmod=None")
return None
seq.append({ 'type': 'TEXT', 'content': '\n' })
else:
self.dprint(80, "LEAVE parse_fontmod=None")
return None
- if textlist:
- seq.append({ 'type': 'TEXT', 'content': textlist })
+ if text:
+ seq.append({ 'type': 'TEXT', 'content': text })
res = { 'type': what, 'content': seq }
self.dprint(80, "LEAVE parse_fontmod=%s", res)
return res
def parse_link(self, type, delim):
self.dprint(80, "ENTER parse_link(%s,%s), tok %s",
type, delim, self.peektkn())
subtree = []
list = []
while 1:
tok = self.getkn()
if tok['type'] == 'DELIM':
if tok['content'] == delim:
if list:
subtree.append({ 'type': 'SEQ', 'content': list })
break
elif tok['content'] == "|":
if len(list) > 1:
subtree.append({ 'type': 'SEQ', 'content': list })
elif list:
subtree.append(list[0])
list = []
else:
x = self.parse_inline(tok)
@@ -322,243 +442,304 @@ class BaseWikiMarkup(object):
tokind = self.tokind
if tok['content'] == "''":
x = self.parse_fontmod(tok['content'], 'IT')
elif tok['content'] == "'''":
x = self.parse_fontmod(tok['content'], 'BOLD')
elif tok['content'] == "[":
x = self.parse_ref()
elif tok['content'] == "[[":
x = self.parse_link('LINK', "]]")
elif tok['content'] == "{{":
x = self.parse_link('TMPL', "}}")
else: # FIXME
self.dprint(80, "LEAVE parse_inline=%s", "None")
x = None
if not x:
self.tokind = tokind
self.dprint(80, "LEAVE parse_inline=%s", x)
return x
def parse_para(self):
self.dprint(80, "ENTER parse_para, tok %s", self.peektkn())
seq = []
textlist = []
tok = self.peektkn()
- if re.match("^\s", tok['content']):
- type = 'PRE'
- rx = re.compile("^\S")
+
+ if self.newline:
+ if re.match("^\s", tok['content']):
+ type = 'PRE'
+ rx = re.compile("^\S")
+ else:
+ type = 'PARA'
+ rx = re.compile("^\s")
else:
- type = 'PARA'
- rx = re.compile("^\s")
+ type = 'SEQ'
+ rx = None
+
while 1:
tok = self.getkn()
if tok['type'] == 'TEXT':
- if self.newline and rx.match(tok['content']):
+ if rx and self.newline and rx.match(tok['content']):
self.ungetkn()
break
textlist.append(tok['content'])
elif tok['type'] == 'NL':
tok = self.getkn()
if tok['type'] == 'NL' or tok['type'] == 'NIL':
break
else:
self.ungetkn()
if self.is_block_delim(tok):
break
textlist.append('\n')
elif tok['type'] == 'NIL':
break
+ elif tok['type'] == 'OTAG' or tok['type'] == 'CTAG' or tok['type'] == 'TAG':
+ self.ungetkn()
+ break
elif tok['type'] == 'DELIM':
if self.is_inline_delim(tok):
if textlist:
- seq.append({ 'type': 'TEXT', 'content': textlist })
+ seq.append({ 'type': 'TEXT',
+ 'content': ''.join(textlist) })
textlist = []
x = self.parse_inline(tok)
if x:
seq.append(x)
else:
self.dprint(80, "ROLLBACK parse_para=%s", tok)
od = tok['content']
textlist.append(od)
if close_delim.has_key(od):
cd = close_delim[od]
lev = 0
for tok in self.toklist[self.tokind:]:
if tok['type'] == 'NIL':
break
elif tok['type'] == 'DELIM':
if tok['content'] == od:
lev += 1
elif tok['content'] == cd:
if lev == 0:
tok['type'] = 'TEXT'
break
else:
seq.append({ 'type': 'TEXT', 'content': tok['content'] })
# self.ungetkn()
break
if textlist:
- seq.append({ 'type': 'TEXT', 'content': textlist })
+ seq.append({ 'type': 'TEXT', 'content': ''.join(textlist) })
self.dprint(80, "LEAVE parse_para=%s", seq)
return { 'type': type, 'content': seq }
def parse_header(self, delim):
self.dprint(80, "ENTER parse_header(%s), tok %s", delim, self.peektkn())
list = []
while 1:
tok = self.getkn()
if tok['type'] == 'NIL':
self.dprint(80, "LEAVE parse_header=%s", "None")
return None
elif tok['type'] == 'TEXT':
list.append(tok)
elif tok['type'] == 'DELIM':
if tok['content'] == delim:
if self.peektkn()['type'] == 'NL':
break
else:
self.dprint(80, "LEAVE parse_header=%s", "None")
return None
else:
x = self.parse_inline(tok)
if x:
list.append(x)
else:
self.dprint(80, "LEAVE parse_header=%s", "None")
return None #FIXME?
else:
self.dprint(80, "LEAVE parse_header=%s", "None")
return None
self.dprint(80, "LEAVE parse_header=(HDR, %s, (SEQ,%s))",len(delim)-1,list)
return { 'type': 'HDR',
'level': len(delim)-1,
'content': { 'type': 'SEQ', 'content': list } }
def parse_line(self):
self.dprint(80, "ENTER parse_line, tok %s", self.peektkn())
list = []
while 1:
tok = self.getkn()
if tok['type'] == 'NL' or tok['type'] == 'NIL':
break
elif tok['type'] == 'TEXT':
list.append(tok)
- elif tok['type'] == 'DELIM' and tok['content'][0] == ":":
- list.append(self.parse_indent(len(tok['content'])))
- break
- else:
- x = self.parse_inline(tok)
- if x:
- list.append(x)
+ elif tok['type'] == 'DELIM':
+ if tok['content'][0] == ":":
+ list.append(self.parse_indent(len(tok['content'])))
+ break
else:
- list.append(tok)
+ x = self.parse