summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-17 17:05:32 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-17 17:05:32 +0300
commit0c03a5a7b40b598b88f22f46b9e9086af6c59877 (patch)
tree4c894eef1dc0d998330683d2ecab12dfca99b803
parentad4a97d83528b00e76435d3d2674ff05a44bd398 (diff)
downloadwikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.gz
wikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.bz2
Improve bin/wikitrans
* bin/wikitrans (getwiki): Take options as second argument and modify it directly. Deduce options.lang, options.itype, and options.kwdict['html_base'] from the URL, when possible. * README.rst: Update description of bin/wikitrans
-rw-r--r--README.rst4
-rwxr-xr-xbin/wikitrans24
2 files changed, 15 insertions, 13 deletions
diff --git a/README.rst b/README.rst
index b6863bc..d189c98 100644
--- a/README.rst
+++ b/README.rst
@@ -167,8 +167,10 @@ Options are:
Set debug level (0..100)
``-D``, ``--dump``
Dump parse tree and exit; same as ``--type=dump``.
``-b URL``, ``--base-url=URL``
Set base url.
-
+Note: when using ``--base-url`` or passing URL as an argument (2nd and 3rd
+use cases above), if the URL is in 'wikipedia.org' or 'wiktionary.org'
+domain, the options ``--input-type``, and ``--lang`` are set automatically.
diff --git a/bin/wikitrans b/bin/wikitrans
index e9ab81f..caaa885 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -67,13 +67,13 @@ def setkw(option, opt, value, parser):
if not parser.values.kwdict:
parser.values.kwdict = {}
(kw,sep,val) = value.partition('=')
if val:
parser.values.kwdict[kw] = val
-def getwiki(url):
+def getwiki(url, options):
tmp = tempfile.NamedTemporaryFile()
if sys.version_info[0] > 2:
import urllib.request
with urllib.request.urlopen(url) as u:
root = etree.fromstring(u.read())
else:
@@ -84,13 +84,21 @@ def getwiki(url):
if 'version' in root.attrib:
ns['wiki'] = 'http://www.mediawiki.org/xml/export-%s/' % root.attrib['version']
text = root.find('wiki:page/wiki:revision/wiki:text',ns)
if text is None:
print("no page/revision/text element in the downloaded page")
exit(0)
- return text.text.encode()
+
+ m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url)
+ if m:
+ options.lang = m.group('lang')
+ options.kwdict['html_base'] = m.group('url') + '/wiki/'
+ if m.group('root') == 'wiktionary':
+ options.itype = 'wiktionary'
+
+ options.kwdict['text'] = text.text.encode()
def main():
usage = '%prog [OPTIONS] ARG'
version = '%prog 1.0'
description = """Translates MediaWiki documents markup to various other formats.
If ARG looks like a URL, the wiki text to be converted will be downloaded
@@ -138,25 +146,17 @@ Otherwise, ARG is name of the file to read wiki material from.
help='set base url')
(options, args) = parser.parse_args()
if len(args) == 1:
if options.base_url:
- options.kwdict['text'] = getwiki(options.base_url
- + '/wiki/Special:Export/' + args[0])
- m = re.match('(?:.+://)(.+?)\.(wik(?:ipedia|tionary))\.org',
- options.base_url)
- if m:
- options.lang = m.group(1)
- options.kwdict['html_base'] = options.base_url + '/wiki/'
- if m.group(2) == 'wiktionary':
- options.itype = 'wiktionary'
+ getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options)
elif args[0] == '-':
options.kwdict['file'] = sys.stdin
elif re.match('^(http|ftp)s?://',args[0]):
- options.kwdict['text'] = getwiki(args[0])
+ getwiki(args[0], options)
else:
options.kwdict['filename'] = args[0]
else:
parser.error("bad number of arguments")
options.kwdict['lang'] = options.lang # FIXME

Return to:

Send suggestions and report system problems to the System administrator.