diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-17 17:05:32 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-17 17:05:32 +0300 |
commit | 0c03a5a7b40b598b88f22f46b9e9086af6c59877 (patch) | |
tree | 4c894eef1dc0d998330683d2ecab12dfca99b803 /bin | |
parent | ad4a97d83528b00e76435d3d2674ff05a44bd398 (diff) | |
download | wikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.gz wikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.bz2 |
Improve bin/wikitrans
* bin/wikitrans (getwiki): Take options as second argument and
modify it directly. Deduce options.lang, options.itype, and
options.kwdict['html_base'] from the URL, when possible.
* README.rst: Update description of bin/wikitrans
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/wikitrans | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/bin/wikitrans b/bin/wikitrans index e9ab81f..caaa885 100755 --- a/bin/wikitrans +++ b/bin/wikitrans @@ -70,7 +70,7 @@ def setkw(option, opt, value, parser): if val: parser.values.kwdict[kw] = val -def getwiki(url): +def getwiki(url, options): tmp = tempfile.NamedTemporaryFile() if sys.version_info[0] > 2: import urllib.request @@ -87,7 +87,15 @@ def getwiki(url): if text is None: print("no page/revision/text element in the downloaded page") exit(0) - return text.text.encode() + + m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url) + if m: + options.lang = m.group('lang') + options.kwdict['html_base'] = m.group('url') + '/wiki/' + if m.group('root') == 'wiktionary': + options.itype = 'wiktionary' + + options.kwdict['text'] = text.text.encode() def main(): usage = '%prog [OPTIONS] ARG' @@ -141,19 +149,11 @@ Otherwise, ARG is name of the file to read wiki material from. (options, args) = parser.parse_args() if len(args) == 1: if options.base_url: - options.kwdict['text'] = getwiki(options.base_url - + '/wiki/Special:Export/' + args[0]) - m = re.match('(?:.+://)(.+?)\.(wik(?:ipedia|tionary))\.org', - options.base_url) - if m: - options.lang = m.group(1) - options.kwdict['html_base'] = options.base_url + '/wiki/' - if m.group(2) == 'wiktionary': - options.itype = 'wiktionary' + getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options) elif args[0] == '-': options.kwdict['file'] = sys.stdin elif re.match('^(http|ftp)s?://',args[0]): - options.kwdict['text'] = getwiki(args[0]) + getwiki(args[0], options) else: options.kwdict['filename'] = args[0] else: |