summaryrefslogtreecommitdiff
path: root/wikitrans/wikimarkup.py
diff options
context:
space:
mode:
Diffstat (limited to 'wikitrans/wikimarkup.py')
-rw-r--r--wikitrans/wikimarkup.py35
1 files changed, 30 insertions, 5 deletions
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 19f69e6..1e2429f 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -142,12 +142,19 @@ class WikiMarkupParser(object):
Abstract methods (must be overridden by the subclass):
input() -- returns next physical line from the input material.
Public attributes:
+ Input:
+ debug_level -- debug verbosity level (0 - no debug info, 100 - excessively
+ copious debug messages). Default is 0.
+ strict -- if True, parser will throw exception upon encountering
+ invalid markup tag (mostly for future use)
+
+ Output:
tree -- constructed parse tree (a subclass of WikiNode)
"""
delim = re.compile("^==+[ \t]*|[ \t]*==+[ \t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^/][^>]+))?\s*(?P<closed>/)?>")
@@ -172,25 +179,26 @@ class WikiMarkupParser(object):
newline = 0
tree = None
tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ]
debug_level = 0
+ strict = False
def dprint(self, lev, fmt, *argv):
"""If current debug level is greater than or equal to lev, print *argv
according to format.
"""
if self.debug_level >= lev:
for l in (fmt % argv).split('\n'):
print("[DEBUG] %s" % l)
inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ]
token_class = {}
-
+
def _new_node(self, **kwarg):
return self.token_class[kwarg['type']](self, **kwarg)
def tokread(self):
"""Read next token from the input. Return it as a subclass of WikiNode."""
line = None
@@ -371,13 +379,13 @@ class WikiMarkupParser(object):
stack.append(i)
else:
# Push the token on stack
stack.append(i)
# Redefine all non-matched tokens as TEXT
for i in stack:
- # FIXME
+ # FIXME: How to convert node to TEXT?
self.toklist[i] = self._new_node(type='TEXT',
content=str(self.toklist[i]))
mark = []
def push_mark(self):
@@ -489,13 +497,16 @@ class WikiMarkupParser(object):
flush()
acc['seq'].append(self.parse_tag(tok))
elif tok.type == 'DELIM':
flush()
acc['seq'].append(self.parse_inline_delim(tok))
else:
- raise UnexpectedTokenError(tok)
+ if self.strict:
+ raise UnexpectedTokenError(tok)
+ # FIXME: Another possible variant of handling this case is to
+ # convert tok to TEXT node and append it to acc['seq']
tok = self.getkn()
flush()
if acc['seq']:
tok = self._new_node(type=type, content=acc['seq'])
else:
tok = None
@@ -973,12 +984,19 @@ class WikiMarkup(WikiMarkupParser):
image_base=URL
Base URL for images. Default is
'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf'
media_base=URL
Base URL for media files. Default is
'http://www.mediawiki.org/xml/export-0.3'
+
+ debug_level=INT
+ debug verbosity level (0 - no debug info, 100 - excessively
+ copious debug messages). Default is 0.
+ strict=BOOL
+ Strict parsing mode. Throw exceptions on syntax errors. Default
+ is False.
"""
self.token_class = {
'NIL': WikiNode,
'NL': WikiNode,
'OTAG': WikiTagNode,
'CTAG': WikiTagNode,
@@ -996,28 +1014,35 @@ class WikiMarkup(WikiMarkupParser):
'BOLD': WikiSeqNode,
'ELT': WikiEltNode,
'ENV': WikiEnvNode,
'LINK': WikiSeqNode,
'HDR': WikiHdrNode
}
-
+
for kw in keywords:
if kw == 'file':
self.file = keywords[kw]
elif kw == 'filename':
self.file = open(keywords[kw])
elif kw == 'text':
- self.text = keywords[kw].split("\n")
+ if sys.version_info[0] > 2:
+ self.text = keywords[kw].decode('utf-8').split("\n")
+ else:
+ self.text = keywords[kw].split("\n")
elif kw == 'lang':
self.lang = keywords[kw]
elif kw == 'html_base':
self.html_base = keywords[kw]
elif kw == 'image_base':
self.image_base = keywords[kw]
elif kw == 'media_base':
self.media_base = keywords[kw]
+ elif kw == 'strict':
+ self.strict = keywords[kw]
+ elif kw == 'debug_level':
+ self.debug_level = keywords[kw]
def __del__(self):
if self.file:
self.file.close()
def input(self):

Return to:

Send suggestions and report system problems to the System administrator.