aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-26 23:16:31 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-26 23:16:31 +0300
commit5d25aa3815a470ae497ff313d15ba5b9270d6d71 (patch)
treeecae8b41be4b43877fa0f6f3a79162e7eb0451ed
parent06fcb57554c4bd401b255ec887bd1970156ec3fa (diff)
downloaddico-5d25aa3815a470ae497ff313d15ba5b9270d6d71.tar.gz
dico-5d25aa3815a470ae497ff313d15ba5b9270d6d71.tar.bz2
Rewrite mediawiki.py to work with both Python versions
* app/python/mediawiki.py: Rewritten to work with Python 2.7 and 3.x * app/python/mediawiki2.py: Remove
-rw-r--r--app/python/mediawiki.py46
-rw-r--r--app/python/mediawiki2.py154
2 files changed, 32 insertions, 168 deletions
diff --git a/app/python/mediawiki.py b/app/python/mediawiki.py
index 6c8fb52..b9586c7 100644
--- a/app/python/mediawiki.py
+++ b/app/python/mediawiki.py
@@ -17,14 +17,30 @@
# You should have received a copy of the GNU General Public License
# along with GNU Dico. If not, see <http://www.gnu.org/licenses/>.
+from __future__ import print_function
import sys
import re
import socket
-import urllib.request, urllib.error, urllib.parse
-from html.entities import name2codepoint
from xml.dom import minidom
from wikitrans.wiki2text import TextWiktionaryMarkup
-import imp
+
+if sys.version_info[0] > 2:
+ from urllib.request import urlopen, Request
+ from urllib.error import URLError
+ from urllib.parse import quote as url_quote
+ from html.entities import name2codepoint
+else:
+ from urllib2 import urlopen, Request, quote as url_quote, URLError
+ from htmlentitydefs import name2codepoint
+ # Set utf-8 as the default encoding.
+ # Trying to do so using encode('utf_8')/unicode, which is
+ # supposed to be the right way, does not work.
+ # Simply calling sys.setdefaultencoding is not possible,
+ # because, for some obscure reason, Python chooses to delete
+ # this symbol from the namespace after setting its default
+ # encoding in site.py. That's why reload is needed.
+ reload(sys)
+ sys.setdefaultencoding('utf-8')
try:
import json
@@ -60,12 +76,12 @@ class DicoModule:
def define_word (self, word):
url = 'http://%s%s%s' % (self.wikihost, self.endpoint_define,
- urllib.parse.quote (word))
- req = urllib.request.Request (url)
+ url_quote (word))
+ req = Request (url)
req.add_header ('User-Agent', self.user_agent)
try:
- xml = urllib.request.urlopen (req).read ()
- except urllib.error.URLError:
+ xml = urlopen (req).read ()
+ except URLError:
return False
dom = minidom.parseString (xml)
el = dom.getElementsByTagName ('text')
@@ -73,6 +89,8 @@ class DicoModule:
data = el[0].firstChild.data
if dico.current_markup () != 'wiki':
data = self.__htmlentitydecode (data)
+ if sys.version_info[0] == 2:
+ data = data.encode ('utf-8')
wikiparser = TextWiktionaryMarkup (text=data)
wikiparser.parse ()
data = str (wikiparser)
@@ -82,11 +100,11 @@ class DicoModule:
def match_word (self, strat, key):
url = 'http://%s%s%s' % (self.wikihost, self.endpoint_match,
- urllib.parse.quote (key.word))
- req = urllib.request.Request (url)
+ url_quote (key.word))
+ req = Request (url)
req.add_header ('User-Agent', self.user_agent)
try:
- result = json.load (urllib.request.urlopen (req))
+ result = json.load (urlopen (req))
if result:
if strat.has_selector:
fltres = []
@@ -94,12 +112,12 @@ class DicoModule:
if strat.select (k, key):
fltres.append (k)
if len(fltres) > 0:
- return ['match', sorted(fltres, key=str.lower)]
+ return ['match', sorted(fltres, key=unicode.lower)]
else:
result[1].sort ()
- return ['match', sorted(result[1], key=str.lower)]
+ return ['match', sorted(result[1], key=unicode.lower)]
return False
- except urllib.error.URLError:
+ except URLError:
return False
def output (self, rh, n):
@@ -136,4 +154,4 @@ class DicoModule:
def __htmlentitydecode (self, s):
return re.sub ('&(%s);' % '|'.join (name2codepoint),
- lambda m: chr (name2codepoint[m.group (1)]), s)
+ lambda m: unichr (name2codepoint[m.group (1)]), s)
diff --git a/app/python/mediawiki2.py b/app/python/mediawiki2.py
deleted file mode 100644
index 4d2a227..0000000
--- a/app/python/mediawiki2.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-# Mediawiki module for Python 2
-# This file is part of GNU Dico.
-# Copyright (C) 2008-2010, 2012 Wojciech Polak
-#
-# GNU Dico is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Dico is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Dico. If not, see <http://www.gnu.org/licenses/>.
-
-import sys
-import re
-import socket
-import urllib2
-from htmlentitydefs import name2codepoint
-from xml.dom import minidom
-from wikitrans.wiki2text import TextWiktionaryMarkup
-
-# Set utf-8 as the default encoding.
-# Trying to do so using encode('utf_8')/unicode, which is
-# supposed to be the right way, does not work.
-# Simply calling sys.setdefaultencoding is not possible,
-# because, for some obscure reason, Python chooses to delete
-# this symbol from the namespace after setting its default
-# encoding in site.py. That's why reload is needed.
-
-reload(sys)
-sys.setdefaultencoding('utf-8')
-
-try:
- import json
-except ImportError:
- import simplejson as json
-
-import dico
-
-__version__ = '1.03'
-
-class DicoModule:
- user_agent = 'Mozilla/1.0'
- endpoint_match = '/w/api.php?action=opensearch&format=json&search='
- endpoint_define = '/wiki/Special:Export/'
-
- def __init__ (self, *argv):
- self.wikihost = argv[0]
- socket.setdefaulttimeout (4)
- dico.register_markup ('wiki')
-
- def open (self, dbname):
- self.dbname = dbname
- return True
-
- def close (self):
- return True
-
- def descr (self):
- return self.wikihost
-
- def info (self):
- return False
-
- def define_word (self, word):
- url = 'http://%s%s%s' % (self.wikihost, self.endpoint_define,
- urllib2.quote (word))
- req = urllib2.Request (url)
- req.add_header ('User-Agent', self.user_agent)
- try:
- xml = urllib2.urlopen (req).read ()
- except urllib2.URLError:
- return False
- dom = minidom.parseString (xml)
- el = dom.getElementsByTagName ('text')
- if len (el):
- data = el[0].firstChild.data
- if dico.current_markup () != 'wiki':
- data = self.__htmlentitydecode (data).encode ('utf_8')
- wikiparser = TextWiktionaryMarkup (text=data)
- wikiparser.parse ()
- data = str (wikiparser)
- return ['define', data]
- else:
- return False
-
- def match_word (self, strat, key):
- url = 'http://%s%s%s' % (self.wikihost, self.endpoint_match,
- urllib2.quote (key.word))
- req = urllib2.Request (url)
- req.add_header ('User-Agent', self.user_agent)
- try:
- result = json.load (urllib2.urlopen (req))
- if result:
- if strat.has_selector:
- fltres = []
- for k in result[1]:
- if strat.select (k, key):
- fltres.append (k)
- if fltres.count > 0:
- return ['match', sorted(fltres, key=unicode.lower)]
- else:
- result[1].sort ()
- return ['match', sorted(result[1], key=unicode.lower)]
- return False
- except urllib2.URLError:
- return False
-
- def output (self, rh, n):
- if rh[0] == 'define':
- try:
- print rh[1].encode ('utf_8'),
- except UnicodeDecodeError:
- print rh[1],
- else:
- list = rh[1]
- sys.stdout.softspace = 0
- try:
- print list[n].encode ('utf_8'),
- except UnicodeDecodeError:
- print list[n],
- return True
-
- def result_count (self, rh):
- if rh[0] == 'define':
- return 1
- else:
- return len (rh[1])
-
- def compare_count (self, rh):
- return 1
-
- def result_headers (self, rh, hdr):
- if dico.current_markup () != 'wiki':
- hdr['Content-Type'] = 'text/plain';
- elif '.wikipedia.org' in self.wikihost:
- hdr['Content-Type'] = 'text/x-wiki-wikipedia';
- elif '.wiktionary.org' in self.wikihost:
- hdr['Content-Type'] = 'text/x-wiki-wiktionary';
- else:
- hdr['Content-Type'] = 'text/x-wiki';
- return hdr
-
- def free_result (self, rh):
- pass
-
- def __htmlentitydecode (self, s):
- return re.sub ('&(%s);' % '|'.join (name2codepoint),
- lambda m: unichr (name2codepoint[m.group (1)]), s)

Return to:

Send suggestions and report system problems to the System administrator.