summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-07-31 15:11:30 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-07-31 15:11:30 +0300
commit74fe0a22139f946835d50120708c9f9bd236d081 (patch)
tree4b1b805667b45af507c97c6c2fb88afc6406caf9
parent4e4519b079fdc723a885741c306a51be245e2111 (diff)
downloadwikitrans-74fe0a22139f946835d50120708c9f9bd236d081.tar.gz
wikitrans-74fe0a22139f946835d50120708c9f9bd236d081.tar.bz2
Minor fixes
* WikiTrans/wiki2html.py: Use absolute import names. * WikiTrans/wiki2texi.py: Likewise. * WikiTrans/wiki2text.py: Likewise. * WikiTrans/wikimarkup.py: Likewise. * bin/wikitrans: Enforce UTF-8 for Python 2 New option --base-name. Optionally download the material from http
-rw-r--r--WikiTrans/wiki2html.py6
-rw-r--r--WikiTrans/wiki2texi.py6
-rw-r--r--WikiTrans/wiki2text.py6
-rw-r--r--WikiTrans/wikimarkup.py2
-rwxr-xr-xbin/wikitrans51
5 files changed, 58 insertions, 13 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 0309ae3..67cb9c3 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -16,9 +16,9 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
-from wikimarkup import *
-from wikitoken import *
-from wikins import wiki_ns_re, wiki_ns
+from WikiTrans.wikimarkup import *
+from WikiTrans.wikitoken import *
+from WikiTrans.wikins import wiki_ns_re, wiki_ns
import re
try:
from urllib import quote as url_quote
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index 106ad77..616fccb 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -15,9 +15,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from wikimarkup import *
-from wikitoken import *
-from wikins import wiki_ns_re, wiki_ns
+from WikiTrans.wikimarkup import *
+from WikiTrans.wikitoken import *
+from WikiTrans.wikins import wiki_ns_re, wiki_ns
import re
import urllib
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index b5bd708..cd4937e 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -15,9 +15,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from wikitoken import *
-from wikimarkup import *
-from wikins import wiki_ns_re, wiki_ns
+from WikiTrans.wikitoken import *
+from WikiTrans.wikimarkup import *
+from WikiTrans.wikins import wiki_ns_re, wiki_ns
import re
import urllib
diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py
index ad0f675..e3bc26f 100644
--- a/WikiTrans/wikimarkup.py
+++ b/WikiTrans/wikimarkup.py
@@ -19,7 +19,7 @@ from __future__ import print_function
import sys
import re
from types import *
-from wikitoken import *
+from WikiTrans.wikitoken import *
__all__ = [ "BaseWikiMarkup", "WikiMarkup",
"TagAttributes", "TagAttributeSyntax" ]
diff --git a/bin/wikitrans b/bin/wikitrans
index 0b2d867..e794dc2 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -16,7 +16,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
+from __future__ import unicode_literals
import sys
+import re
+import urllib
+import tempfile
+import xml.etree.ElementTree as etree
from optparse import OptionParser
try:
from StringIO import StringIO
@@ -28,6 +33,19 @@ from WikiTrans.wiki2texi import TexiWikiMarkup
from WikiTrans.wikimarkup import WikiMarkup
from WikiTrans.wikidump import DumpWikiMarkup
+# Set utf-8 as the default encoding.
+# Trying to do so using encode('utf_8')/unicode, which is
+# supposed to be the right way, does not work in Python 2.7
+# Simply calling sys.setdefaultencoding is not possible,
+# because, for some obscure reason, Python chooses to delete
+# this symbol from the namespace after setting its default
+# encoding in site.py. That's why reload is needed.
+try:
+ reload(sys)
+ sys.setdefaultencoding('utf-8')
+except:
+ pass
+
handlers = {
'dump': {
'default': DumpWikiMarkup
@@ -52,10 +70,30 @@ def setkw(option, opt, value, parser):
if val:
parser.values.kwdict[kw] = val
+def getwiki(url):
+ tmp = tempfile.NamedTemporaryFile()
+ u = urllib.URLopener()
+ u.retrieve(url, tmp.name)
+ tree = etree.parse(tmp.name)
+ root = tree.getroot()
+ ns = { 'wiki':'' }
+ if 'version' in root.attrib:
+ ns['wiki'] = 'http://www.mediawiki.org/xml/export-%s/' % root.attrib['version']
+ text = root.find('wiki:page/wiki:revision/wiki:text',ns)
+ if text is None:
+ print("no page/revision/text element in the downloaded page")
+ exit(0)
+ return text.text
+
def main():
- usage = '%prog [OPTIONS] FILE'
+ usage = '%prog [OPTIONS] ARG'
version = '%prog 1.0'
description = """Translates MediaWiki documents markup to various other formats.
+If ARG looks like a URL, the wiki text to be converted will be downloaded
+from that URL.
+Otherwise, if --base-url is given, ARG is treated as the name of the page to
+get from the WikiMedia istallation at that URL.
+Otherwise, ARG is name of the file to read wiki material from.
"""
epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>"
@@ -91,12 +129,19 @@ def main():
action='store_const', const='dump',
dest='otype',
help='dump parse tree and exit; similar to --type=dump')
+ parser.add_option('-b', '--base-url',
+ action='store', type='string', dest='base_url',
+ help='set base url')
+
(options, args) = parser.parse_args()
-
if len(args) == 1:
- if args[0] == '-':
+ if options.base_url:
+ options.kwdict['text'] = getwiki(options.base_url + '/wiki/Special:Export/' + args[0])
+ elif args[0] == '-':
options.kwdict['file'] = sys.stdin
+ elif re.match('^(http|ftp)s?://',args[0]):
+ options.kwdict['text'] = getwiki(args[0])
else:
options.kwdict['filename'] = args[0]
else:

Return to:

Send suggestions and report system problems to the System administrator.