4 files changed, 62 insertions, 26 deletions
diff --git a/README.rst b/README.rst
index 7838fa1..7edeb04 100644
--- a/README.rst
+++ b/README.rst
@@ -34,12 +34,18 @@ image_base = *url*
   Base URL for images. Default is
   ``http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf``
 media_base = *url*
   Base URL for media files. Default is
   ``http://www.mediawiki.org/xml/export-0.3``
 
+debug_level = *int*
+  Debug verbosity level (0 - no debug info, 100 - excessively
+  copious debug messages). Default is 0.
+
+strict = *bool*
+  Strict parsing mode. Throw exceptions on syntax errors. Default is False.
 
 class ``TextWikiMarkup``
 ------------------------
 Translates material in Wiki markup language to plain text. Usage::
 
    from WikiTrans.wiki2text import TextWikiMarkup
diff --git a/bin/wikitrans b/bin/wikitrans
index 4a0fc06..09ba0b3 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -1,20 +1,20 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 # Copyright (C) 2008-2018 Sergey Poznyakoff
-# 
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 3, or (at your option)
 # any later version.
-# 
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from __future__ import print_function
 from __future__ import unicode_literals
 import sys
@@ -29,19 +29,19 @@ except ImportError:
 from wikitrans.wiki2html  import HtmlWikiMarkup, HtmlWiktionaryMarkup
 from wikitrans.wiki2text  import TextWikiMarkup, TextWiktionaryMarkup
 from wikitrans.wiki2texi  import TexiWikiMarkup
 from wikitrans.wikimarkup import WikiMarkup
 from wikitrans.wikidump   import DumpWikiMarkup
 
-# Set utf-8 as the default encoding for Python 2.7. 
-# Trying to do so using encode('utf_8')/unicode, which is 
+# Set utf-8 as the default encoding for Python 2.7.
+# Trying to do so using encode('utf_8')/unicode, which is
 # supposed to be the right way, does not work in Python 2.7
 # Simply calling sys.setdefaultencoding is not possible,
-# because, for some obscure reason, Python chooses to delete 
-# this symbol from the namespace after setting its default 
-# encoding in site.py. That's why reload is needed. 
+# because, for some obscure reason, Python chooses to delete
+# this symbol from the namespace after setting its default
+# encoding in site.py. That's why reload is needed.
 try:
     reload(sys)
     sys.setdefaultencoding('utf-8')
 except:
     pass
 
@@ -68,14 +68,19 @@ def setkw(option, opt, value, parser):
     if not parser.values.kwdict:
         parser.values.kwdict = {}
     (kw,sep,val) = value.partition('=')
     if val:
         parser.values.kwdict[kw] = val
 
+def setdebug(option, opt, value, parser):
+    if not parser.values.kwdict:
+        parser.values.kwdict = {}
+    parser.values.kwdict['debug_level'] = value
+
 def getwiki(url, options):
-    tmp = tempfile.NamedTemporaryFile()            
+    tmp = tempfile.NamedTemporaryFile()
     if sys.version_info[0] > 2:
         import urllib.request
         with urllib.request.urlopen(url) as u:
             root = etree.fromstring(u.read())
     else:
         import urllib
@@ -92,27 +97,27 @@ def getwiki(url, options):
     m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url)
     if m:
         options.lang = m.group('lang')
         options.kwdict['html_base'] = m.group('url') + '/wiki/'
         if m.group('root') == 'wiktionary':
             options.itype = 'wiktionary'
-        
+
     options.kwdict['text'] = text.text.encode()
-        
+
 def main():
     usage = '%prog [OPTIONS] ARG'
     version = '%prog 1.2'
     description = """Translates MediaWiki documents markup to various other formats.
 If ARG looks like a URL, the wiki text to be converted will be downloaded
 from that URL.
 Otherwise, if --base-url is given, ARG is treated as the name of the page to
 get from the WikiMedia istallation at that URL.
-Otherwise, ARG is name of the file to read wiki material from.    
+Otherwise, ARG is name of the file to read wiki material from.
 """
     epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>"
-    
+
     parser = OptionParser(usage=usage,
                           version=version,
                           description=description,
                           epilog=epilog)
     parser.add_option('-v', '--verbose',
                       action="count", dest="verbose",
@@ -132,50 +137,50 @@ Otherwise, ARG is name of the file to read wiki material from.
     parser.add_option('-o', '--option',
                       action='callback', callback=setkw,
                       type='string', dest='kwdict',
                       default={},
                       help='set keyword option for the parser class constructor')
     parser.add_option('-d', '--debug',
-                      action='store', type='int', dest='debug',
-                      default=0,
+                      action='callback', callback=setdebug,
+                      type='int', dest='kwdict',
                       help='set debug level (0..100)')
     parser.add_option('-D', '--dump',
                       action='store_const', const='dump',
                       dest='otype',
                       help='dump parse tree and exit; similar to --type=dump')
     parser.add_option('-b', '--base-url',
                       action='store', type='string', dest='base_url',
                       help='set base url')
-    
+
 
     (options, args) = parser.parse_args()
     if len(args) == 1:
         if options.base_url:
-            getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options)
+            getwiki(options.base_url + '/wiki/Special:Export/' + args[0],
+                    options)
         elif args[0] == '-':
             options.kwdict['file'] = sys.stdin
         elif re.match('^(http|ftp)s?://',args[0]):
             getwiki(args[0], options)
         else:
             options.kwdict['filename'] = args[0]
     else:
         parser.error("bad number of arguments")
-        
+
     options.kwdict['lang'] = options.lang # FIXME
 
     if options.otype == 'dump' and not 'indent' in options.kwdict:
         options.kwdict['indent'] = 2
     if options.otype in handlers:
         if options.itype in handlers[options.otype]:
             markup = handlers[options.otype][options.itype](**options.kwdict)
-            markup.debug_level = options.debug
             markup.parse()
             print("%s" % str(markup))
             exit(0)
         else:
             print("input type %s is not supported for %s output" % (options.itype, options.otype))
     else:
         print("unsupported output type: %s" % options.otype)
     exit(1)
 
 if __name__ == '__main__':
-    main()            
+    main()
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 19f69e6..1e2429f 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -142,12 +142,19 @@ class WikiMarkupParser(object):
     Abstract methods (must be overridden by the subclass):
 
     input()  --  returns next physical line from the input material.
 
     Public attributes:
 
+    Input:
+    debug_level -- debug verbosity level (0 - no debug info, 100 - excessively
+                   copious debug messages). Default is 0.
+    strict      -- if True, parser will throw exception upon encountering
+                   invalid markup tag (mostly for future use)
+
+    Output:
     tree     --  constructed parse tree (a subclass of WikiNode)
 
     """
 
     delim = re.compile("^==+[ \t]*|[ \t]*==+[ \t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
     otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^/][^>]+))?\s*(?P<closed>/)?>")
@@ -172,25 +179,26 @@ class WikiMarkupParser(object):
     newline = 0
     tree = None
 
     tags = [ 'code', 'nowiki', 'tt', 'div', 'ref', 'references' ]
 
     debug_level = 0
+    strict = False
 
     def dprint(self, lev, fmt, *argv):
         """If current debug level is greater than or equal to lev, print *argv
         according to format.
         """
         if self.debug_level >= lev:
             for l in (fmt % argv).split('\n'):
                 print("[DEBUG] %s" % l)
 
     inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ]
 
     token_class = {}
-    
+
     def _new_node(self, **kwarg):
         return self.token_class[kwarg['type']](self, **kwarg)
 
     def tokread(self):
         """Read next token from the input. Return it as a subclass of WikiNode."""
         line = None
@@ -371,13 +379,13 @@ class WikiMarkupParser(object):
                         stack.append(i)
                 else:
                     # Push the token on stack
                     stack.append(i)
         # Redefine all non-matched tokens as TEXT
         for i in stack:
-            # FIXME
+            # FIXME: How to convert node to TEXT?
             self.toklist[i] = self._new_node(type='TEXT',
                                              content=str(self.toklist[i]))
 
     mark = []
 
     def push_mark(self):
@@ -489,13 +497,16 @@ class WikiMarkupParser(object):
                 flush()
                 acc['seq'].append(self.parse_tag(tok))
             elif tok.type == 'DELIM':
                 flush()
                 acc['seq'].append(self.parse_inline_delim(tok))
             else:
-                raise UnexpectedTokenError(tok)
+                if self.strict:
+                    raise UnexpectedTokenError(tok)
+                # FIXME: Another possible variant of handling this case is to
+                # convert tok to TEXT node and append it to acc['seq']
             tok = self.getkn()
         flush()
         if acc['seq']:
             tok = self._new_node(type=type, content=acc['seq'])
         else:
             tok = None
@@ -973,12 +984,19 @@ class WikiMarkup(WikiMarkupParser):
         image_base=URL
           Base URL for images. Default is
               'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf'
         media_base=URL
           Base URL for media files. Default is
               'http://www.mediawiki.org/xml/export-0.3'
+
+        debug_level=INT
+          debug verbosity level (0 - no debug info, 100 - excessively
+                                 copious debug messages). Default is 0.
+        strict=BOOL
+          Strict parsing mode. Throw exceptions on syntax errors. Default
+          is False.
         """
         self.token_class = {
             'NIL':   WikiNode,
             'NL':    WikiNode,
             'OTAG':  WikiTagNode,
             'CTAG':  WikiTagNode,
@@ -996,28 +1014,35 @@ class WikiMarkup(WikiMarkupParser):
             'BOLD':  WikiSeqNode,
             'ELT':   WikiEltNode,
             'ENV':   WikiEnvNode,
             'LINK':  WikiSeqNode,
             'HDR':   WikiHdrNode
         }
-        
+
         for kw in keywords:
             if kw == 'file':
                 self.file = keywords[kw]
             elif kw == 'filename':
                 self.file = open(keywords[kw])
             elif kw == 'text':
-                self.text = keywords[kw].split("\n")
+                if sys.version_info[0] > 2:
+                    self.text = keywords[kw].decode('utf-8').split("\n")
+                else:
+                    self.text = keywords[kw].split("\n")
             elif kw == 'lang':
                 self.lang = keywords[kw]
             elif kw == 'html_base':
                 self.html_base = keywords[kw]
             elif kw == 'image_base':
                 self.image_base = keywords[kw]
             elif kw == 'media_base':
                 self.media_base = keywords[kw]
+            elif kw == 'strict':
+                self.strict = keywords[kw]
+            elif kw == 'debug_level':
+                self.debug_level = keywords[kw]
 
     def __del__(self):
         if self.file:
             self.file.close()
 
     def input(self):
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
index deedea8..1f81092 100644
--- a/wikitrans/wikitoken.py
+++ b/wikitrans/wikitoken.py
@@ -308,13 +308,13 @@ class WikiEnvNode(WikiContentNode):
 
     @jsonencoder
     def json_encode(self):
         return {
             'envtype': self.envtype,
             'level': self.level,
-            'content': map(lambda x: x.json_encode(), self.content)
+            'content': [x for x in map(lambda x: x.json_encode(), self.content)]
         }
 
 
 class WikiIndNode(WikiContentNode):
     """Indented block node.