diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-07-31 15:11:30 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-07-31 15:11:30 +0300 |
commit | 74fe0a22139f946835d50120708c9f9bd236d081 (patch) | |
tree | 4b1b805667b45af507c97c6c2fb88afc6406caf9 | |
parent | 4e4519b079fdc723a885741c306a51be245e2111 (diff) | |
download | wikitrans-74fe0a22139f946835d50120708c9f9bd236d081.tar.gz wikitrans-74fe0a22139f946835d50120708c9f9bd236d081.tar.bz2 |
Minor fixes
* WikiTrans/wiki2html.py: Use absolute import names.
* WikiTrans/wiki2texi.py: Likewise.
* WikiTrans/wiki2text.py: Likewise.
* WikiTrans/wikimarkup.py: Likewise.
* bin/wikitrans: Enforce UTF-8 for Python 2
New option --base-name.
Optionally download the material from http
-rw-r--r-- | WikiTrans/wiki2html.py | 6 | ||||
-rw-r--r-- | WikiTrans/wiki2texi.py | 6 | ||||
-rw-r--r-- | WikiTrans/wiki2text.py | 6 | ||||
-rw-r--r-- | WikiTrans/wikimarkup.py | 2 | ||||
-rwxr-xr-x | bin/wikitrans | 51 |
5 files changed, 58 insertions, 13 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 0309ae3..67cb9c3 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -16,9 +16,9 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import print_function -from wikimarkup import * -from wikitoken import * -from wikins import wiki_ns_re, wiki_ns +from WikiTrans.wikimarkup import * +from WikiTrans.wikitoken import * +from WikiTrans.wikins import wiki_ns_re, wiki_ns import re try: from urllib import quote as url_quote diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index 106ad77..616fccb 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -15,9 +15,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from wikimarkup import * -from wikitoken import * -from wikins import wiki_ns_re, wiki_ns +from WikiTrans.wikimarkup import * +from WikiTrans.wikitoken import * +from WikiTrans.wikins import wiki_ns_re, wiki_ns import re import urllib diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index b5bd708..cd4937e 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -15,9 +15,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from wikitoken import * -from wikimarkup import * -from wikins import wiki_ns_re, wiki_ns +from WikiTrans.wikitoken import * +from WikiTrans.wikimarkup import * +from WikiTrans.wikins import wiki_ns_re, wiki_ns import re import urllib diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index ad0f675..e3bc26f 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -19,7 +19,7 @@ from __future__ import print_function import sys import re from types import * -from wikitoken import * +from WikiTrans.wikitoken import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "TagAttributes", "TagAttributeSyntax" ] diff --git a/bin/wikitrans b/bin/wikitrans index 0b2d867..e794dc2 100755 --- a/bin/wikitrans +++ b/bin/wikitrans @@ -16,7 +16,12 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import print_function +from __future__ import unicode_literals import sys +import re +import urllib +import tempfile +import xml.etree.ElementTree as etree from optparse import OptionParser try: from StringIO import StringIO @@ -28,6 +33,19 @@ from WikiTrans.wiki2texi import TexiWikiMarkup from WikiTrans.wikimarkup import WikiMarkup from WikiTrans.wikidump import DumpWikiMarkup +# Set utf-8 as the default encoding. +# Trying to do so using encode('utf_8')/unicode, which is +# supposed to be the right way, does not work in Python 2.7 +# Simply calling sys.setdefaultencoding is not possible, +# because, for some obscure reason, Python chooses to delete +# this symbol from the namespace after setting its default +# encoding in site.py. That's why reload is needed. +try: + reload(sys) + sys.setdefaultencoding('utf-8') +except: + pass + handlers = { 'dump': { 'default': DumpWikiMarkup @@ -52,10 +70,30 @@ def setkw(option, opt, value, parser): if val: parser.values.kwdict[kw] = val +def getwiki(url): + tmp = tempfile.NamedTemporaryFile() + u = urllib.URLopener() + u.retrieve(url, tmp.name) + tree = etree.parse(tmp.name) + root = tree.getroot() + ns = { 'wiki':'' } + if 'version' in root.attrib: + ns['wiki'] = 'http://www.mediawiki.org/xml/export-%s/' % root.attrib['version'] + text = root.find('wiki:page/wiki:revision/wiki:text',ns) + if text is None: + print("no page/revision/text element in the downloaded page") + exit(0) + return text.text + def main(): - usage = '%prog [OPTIONS] FILE' + usage = '%prog [OPTIONS] ARG' version = '%prog 1.0' description = """Translates MediaWiki documents markup to various other formats. +If ARG looks like a URL, the wiki text to be converted will be downloaded +from that URL. +Otherwise, if --base-url is given, ARG is treated as the name of the page to +get from the WikiMedia istallation at that URL. +Otherwise, ARG is name of the file to read wiki material from. """ epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>" @@ -91,12 +129,19 @@ def main(): action='store_const', const='dump', dest='otype', help='dump parse tree and exit; similar to --type=dump') + parser.add_option('-b', '--base-url', + action='store', type='string', dest='base_url', + help='set base url') + (options, args) = parser.parse_args() - if len(args) == 1: - if args[0] == '-': + if options.base_url: + options.kwdict['text'] = getwiki(options.base_url + '/wiki/Special:Export/' + args[0]) + elif args[0] == '-': options.kwdict['file'] = sys.stdin + elif re.match('^(http|ftp)s?://',args[0]): + options.kwdict['text'] = getwiki(args[0]) else: options.kwdict['filename'] = args[0] else: |