summaryrefslogtreecommitdiff
path: root/bin/wikitrans
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-09-01 22:10:01 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-09-01 22:24:02 +0300
commit5320bea15e388200e613e6a2bdac3c1449030986 (patch)
tree7d8571c63f72cc690cea8323e43be09c18527c45 /bin/wikitrans
parent0aae19835045bac0be0f22ecd0e84527cdaee21c (diff)
downloadwikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.gz
wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.bz2
Bugfixes
* README.rst: Describe new options. * bin/wikitrans: Change handling of the --debug option. * wikitrans/wikimarkup.py (WikiMarkupParser): New attribute - strict. (parse_para): Don't throw UnexpectedTokenError if self.strict is False, instead ignore invalid token. (WikiMarkup): Fix Python 3 compatibility * wikitrans/wikitoken.py: Fix Python 3 compatibility
Diffstat (limited to 'bin/wikitrans')
-rwxr-xr-xbin/wikitrans45
1 files changed, 25 insertions, 20 deletions
diff --git a/bin/wikitrans b/bin/wikitrans
index 4a0fc06..09ba0b3 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -1,20 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from __future__ import unicode_literals
import sys
@@ -29,19 +29,19 @@ except ImportError:
from wikitrans.wiki2html import HtmlWikiMarkup, HtmlWiktionaryMarkup
from wikitrans.wiki2text import TextWikiMarkup, TextWiktionaryMarkup
from wikitrans.wiki2texi import TexiWikiMarkup
from wikitrans.wikimarkup import WikiMarkup
from wikitrans.wikidump import DumpWikiMarkup
-# Set utf-8 as the default encoding for Python 2.7.
-# Trying to do so using encode('utf_8')/unicode, which is
+# Set utf-8 as the default encoding for Python 2.7.
+# Trying to do so using encode('utf_8')/unicode, which is
# supposed to be the right way, does not work in Python 2.7
# Simply calling sys.setdefaultencoding is not possible,
-# because, for some obscure reason, Python chooses to delete
-# this symbol from the namespace after setting its default
-# encoding in site.py. That's why reload is needed.
+# because, for some obscure reason, Python chooses to delete
+# this symbol from the namespace after setting its default
+# encoding in site.py. That's why reload is needed.
try:
reload(sys)
sys.setdefaultencoding('utf-8')
except:
pass
@@ -68,14 +68,19 @@ def setkw(option, opt, value, parser):
if not parser.values.kwdict:
parser.values.kwdict = {}
(kw,sep,val) = value.partition('=')
if val:
parser.values.kwdict[kw] = val
+def setdebug(option, opt, value, parser):
+ if not parser.values.kwdict:
+ parser.values.kwdict = {}
+ parser.values.kwdict['debug_level'] = value
+
def getwiki(url, options):
- tmp = tempfile.NamedTemporaryFile()
+ tmp = tempfile.NamedTemporaryFile()
if sys.version_info[0] > 2:
import urllib.request
with urllib.request.urlopen(url) as u:
root = etree.fromstring(u.read())
else:
import urllib
@@ -92,27 +97,27 @@ def getwiki(url, options):
m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url)
if m:
options.lang = m.group('lang')
options.kwdict['html_base'] = m.group('url') + '/wiki/'
if m.group('root') == 'wiktionary':
options.itype = 'wiktionary'
-
+
options.kwdict['text'] = text.text.encode()
-
+
def main():
usage = '%prog [OPTIONS] ARG'
version = '%prog 1.2'
description = """Translates MediaWiki documents markup to various other formats.
If ARG looks like a URL, the wiki text to be converted will be downloaded
from that URL.
Otherwise, if --base-url is given, ARG is treated as the name of the page to
get from the WikiMedia istallation at that URL.
-Otherwise, ARG is name of the file to read wiki material from.
+Otherwise, ARG is name of the file to read wiki material from.
"""
epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>"
-
+
parser = OptionParser(usage=usage,
version=version,
description=description,
epilog=epilog)
parser.add_option('-v', '--verbose',
action="count", dest="verbose",
@@ -132,50 +137,50 @@ Otherwise, ARG is name of the file to read wiki material from.
parser.add_option('-o', '--option',
action='callback', callback=setkw,
type='string', dest='kwdict',
default={},
help='set keyword option for the parser class constructor')
parser.add_option('-d', '--debug',
- action='store', type='int', dest='debug',
- default=0,
+ action='callback', callback=setdebug,
+ type='int', dest='kwdict',
help='set debug level (0..100)')
parser.add_option('-D', '--dump',
action='store_const', const='dump',
dest='otype',
help='dump parse tree and exit; similar to --type=dump')
parser.add_option('-b', '--base-url',
action='store', type='string', dest='base_url',
help='set base url')
-
+
(options, args) = parser.parse_args()
if len(args) == 1:
if options.base_url:
- getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options)
+ getwiki(options.base_url + '/wiki/Special:Export/' + args[0],
+ options)
elif args[0] == '-':
options.kwdict['file'] = sys.stdin
elif re.match('^(http|ftp)s?://',args[0]):
getwiki(args[0], options)
else:
options.kwdict['filename'] = args[0]
else:
parser.error("bad number of arguments")
-
+
options.kwdict['lang'] = options.lang # FIXME
if options.otype == 'dump' and not 'indent' in options.kwdict:
options.kwdict['indent'] = 2
if options.otype in handlers:
if options.itype in handlers[options.otype]:
markup = handlers[options.otype][options.itype](**options.kwdict)
- markup.debug_level = options.debug
markup.parse()
print("%s" % str(markup))
exit(0)
else:
print("input type %s is not supported for %s output" % (options.itype, options.otype))
else:
print("unsupported output type: %s" % options.otype)
exit(1)
if __name__ == '__main__':
- main()
+ main()

Return to:

Send suggestions and report system problems to the System administrator.