summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org>2018-08-17 14:05:32 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2018-08-17 14:05:32 (GMT)
commit0c03a5a7b40b598b88f22f46b9e9086af6c59877 (patch) (side-by-side diff)
tree4c894eef1dc0d998330683d2ecab12dfca99b803
parentad4a97d83528b00e76435d3d2674ff05a44bd398 (diff)
downloadwikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.gz
wikitrans-0c03a5a7b40b598b88f22f46b9e9086af6c59877.tar.bz2
Improve bin/wikitrans
* bin/wikitrans (getwiki): Take options as second argument and modify it directly. Deduce options.lang, options.itype, and options.kwdict['html_base'] from the URL, when possible. * README.rst: Update description of bin/wikitrans
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--README.rst4
-rwxr-xr-xbin/wikitrans24
2 files changed, 15 insertions, 13 deletions
diff --git a/README.rst b/README.rst
index b6863bc..d189c98 100644
--- a/README.rst
+++ b/README.rst
@@ -170,5 +170,7 @@ Options are:
``-b URL``, ``--base-url=URL``
Set base url.
-
+Note: when using ``--base-url`` or passing URL as an argument (2nd and 3rd
+use cases above), if the URL is in 'wikipedia.org' or 'wiktionary.org'
+domain, the options ``--input-type``, and ``--lang`` are set automatically.
diff --git a/bin/wikitrans b/bin/wikitrans
index e9ab81f..caaa885 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -70,7 +70,7 @@ def setkw(option, opt, value, parser):
if val:
parser.values.kwdict[kw] = val
-def getwiki(url):
+def getwiki(url, options):
tmp = tempfile.NamedTemporaryFile()
if sys.version_info[0] > 2:
import urllib.request
@@ -87,7 +87,15 @@ def getwiki(url):
if text is None:
print("no page/revision/text element in the downloaded page")
exit(0)
- return text.text.encode()
+
+ m = re.match('(?P<url>(?:.+://)(?P<lang>.+?)\.(?P<root>wik(?:ipedia|tionary))\.org)', url)
+ if m:
+ options.lang = m.group('lang')
+ options.kwdict['html_base'] = m.group('url') + '/wiki/'
+ if m.group('root') == 'wiktionary':
+ options.itype = 'wiktionary'
+
+ options.kwdict['text'] = text.text.encode()
def main():
usage = '%prog [OPTIONS] ARG'
@@ -141,19 +149,11 @@ Otherwise, ARG is name of the file to read wiki material from.
(options, args) = parser.parse_args()
if len(args) == 1:
if options.base_url:
- options.kwdict['text'] = getwiki(options.base_url
- + '/wiki/Special:Export/' + args[0])
- m = re.match('(?:.+://)(.+?)\.(wik(?:ipedia|tionary))\.org',
- options.base_url)
- if m:
- options.lang = m.group(1)
- options.kwdict['html_base'] = options.base_url + '/wiki/'
- if m.group(2) == 'wiktionary':
- options.itype = 'wiktionary'
+ getwiki(options.base_url + '/wiki/Special:Export/' + args[0], options)
elif args[0] == '-':
options.kwdict['file'] = sys.stdin
elif re.match('^(http|ftp)s?://',args[0]):
- options.kwdict['text'] = getwiki(args[0])
+ getwiki(args[0], options)
else:
options.kwdict['filename'] = args[0]
else:

Return to:

Send suggestions and report system problems to the System administrator.