diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-09-01 22:10:01 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-09-01 22:24:02 +0300 |
commit | 5320bea15e388200e613e6a2bdac3c1449030986 (patch) | |
tree | 7d8571c63f72cc690cea8323e43be09c18527c45 /bin/wikitrans | |
parent | 0aae19835045bac0be0f22ecd0e84527cdaee21c (diff) | |
download | wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.gz wikitrans-5320bea15e388200e613e6a2bdac3c1449030986.tar.bz2 |
Bugfixes
* README.rst: Describe new options.
* bin/wikitrans: Change handling of the --debug option.
* wikitrans/wikimarkup.py (WikiMarkupParser): New attribute - strict.
(parse_para): Don't throw UnexpectedTokenError if self.strict is False,
instead ignore invalid token.
(WikiMarkup): Fix Python 3 compatibility
* wikitrans/wikitoken.py: Fix Python 3 compatibility
Diffstat (limited to 'bin/wikitrans')
-rwxr-xr-x | bin/wikitrans | 45 |
1 files changed, 25 insertions, 20 deletions
diff --git a/bin/wikitrans b/bin/wikitrans index 4a0fc06..09ba0b3 100755 --- a/bin/wikitrans +++ b/bin/wikitrans @@ -1,20 +1,20 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import print_function from __future__ import unicode_literals import sys @@ -29,19 +29,19 @@ except ImportError: from wikitrans.wiki2html import HtmlWikiMarkup, HtmlWiktionaryMarkup from wikitrans.wiki2text import TextWikiMarkup, TextWiktionaryMarkup from wikitrans.wiki2texi import TexiWikiMarkup from wikitrans.wikimarkup import WikiMarkup from wikitrans.wikidump import DumpWikiMarkup -# Set utf-8 as the default encoding for Python 2.7. -# Trying to do so using encode('utf_8')/unicode, which is +# Set utf-8 as the default encoding for Python 2.7. +# Trying to do so using encode('utf_8')/unicode, which is # supposed to be the right way, does not work in Python 2.7 # Simply calling sys.setdefaultencoding is not possible, -# because, for some obscure reason, Python chooses to delete -# this symbol from the namespace after setting its default -# encoding in site.py. That's why reload is needed. +# because, for some obscure reason, Python chooses to delete +# this symbol from the namespace after setting its default +# encoding in site.py. That's why reload is needed. try: reload(sys) sys.setdefaultencoding('utf-8') except: pass @@ -68,14 +68,19 @@ def setkw(option, opt, value, parser): if not parser.values.kwdict: parser.values.kwdict = {} (kw,sep,val) = value.partition('=') if val: parser.values.kwdict[kw] = val +def setdebug(option, opt, value, parser): + if not parser.values.kwdict: + parser.values.kwdict = {} + parser.values.kwdict['debug_level'] = value + def getwiki(url, options): - tmp = tempfile.NamedTemporaryFile() + tmp = tempfile.NamedTemporaryFile() if sys.version_info[0] > 2: import urllib.request with urllib.request.urlopen(url) as u: root = etree.fromstring(u.read()) else: import urllib @@ -92,27 +97,27 @@ def getwiki(url, options): m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url) if m: options.lang = m.group('lang') options.kwdict['html_base'] = m.group('url') + '/wiki/' if m.group('root') == 'wiktionary': options.itype = 'wiktionary' - + options.kwdict['text'] = text.text.encode() - + def main(): usage = '%prog [OPTIONS] ARG' version = '%prog 1.2' description = """Translates MediaWiki documents markup to various other formats. If ARG looks like a URL, the wiki text to be converted will be downloaded from that URL. Otherwise, if --base-url is given, ARG is treated as the name of the page to get from the WikiMedia istallation at that URL. -Otherwise, ARG is name of the file to read wiki material from. +Otherwise, ARG is name of the file to read wiki material from. """ epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>" - + parser = OptionParser(usage=usage, version=version, description=description, epilog=epilog) parser.add_option('-v', '--verbose', action="count", dest="verbose", @@ -132,50 +137,50 @@ Otherwise, ARG is name of the file to read wiki material from. parser.add_option('-o', '--option', action='callback', callback=setkw, type='string', dest='kwdict', default={}, help='set keyword option for the parser class constructor') parser.add_option('-d', '--debug', - action='store', type='int', dest='debug', - default=0, + action='callback', callback=setdebug, + type='int', dest='kwdict', help='set debug level (0..100)') parser.add_option('-D', '--dump', action='store_const', const='dump', dest='otype', help='dump parse tree and exit; similar to --type=dump') parser.add_option('-b', '--base-url', action='store', type='string', dest='base_url', help='set base url') - + (options, args) = parser.parse_args() if len(args) == 1: if options.base_url: - getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options) + getwiki(options.base_url + '/wiki/Special:Export/' + args[0], + options) elif args[0] == '-': options.kwdict['file'] = sys.stdin elif re.match('^(http|ftp)s?://',args[0]): getwiki(args[0], options) else: options.kwdict['filename'] = args[0] else: parser.error("bad number of arguments") - + options.kwdict['lang'] = options.lang # FIXME if options.otype == 'dump' and not 'indent' in options.kwdict: options.kwdict['indent'] = 2 if options.otype in handlers: if options.itype in handlers[options.otype]: markup = handlers[options.otype][options.itype](**options.kwdict) - markup.debug_level = options.debug markup.parse() print("%s" % str(markup)) exit(0) else: print("input type %s is not supported for %s output" % (options.itype, options.otype)) else: print("unsupported output type: %s" % options.otype) exit(1) if __name__ == '__main__': - main() + main() |