diff options
-rw-r--r-- | test.py | 2 | ||||
-rw-r--r-- | wiki2html.py | 4 | ||||
-rw-r--r-- | wiki2texi.py | 4 | ||||
-rw-r--r-- | wiki2text.py | 20 | ||||
-rw-r--r-- | wikimarkup.py | 2 |
5 files changed, 19 insertions, 13 deletions
@@ -1,42 +1,44 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008,2015 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import print_function +import sys, os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) import unittest import wiki2html class TestMarkupParserBasic (unittest.TestCase): def test_colon(self): self.assertTrue(self.__test('colon')) pass def test_headings(self): self.assertTrue(self.__test('headings')) pass def test_hz(self): self.assertTrue(self.__test('hz')) pass def test_numlist(self): self.assertTrue(self.__test('numlist')) pass def test_unlist(self): self.assertTrue(self.__test('unlist')) pass diff --git a/wiki2html.py b/wiki2html.py index 05d4642..abf851a 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -1,43 +1,43 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008,2015 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from wikimarkup import * -from wikins import wiki_ns_re, wiki_ns +from wit.wikimarkup import * +from wit.wikins import wiki_ns_re, wiki_ns import re try: from urllib import quote as url_quote except ImportError: from urllib.parse import quote as url_quote class HtmlWikiMarkup (WikiMarkup): """ A (hopefully) general-purpose Wiki->HTML translator class. FIXME: 1. See WikiMarkup for a list 2. [[official position]]s : final 's' gets after closing </a> tag. Should be before. """ def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None envt = { "unnumbered": { "hdr": "ul", diff --git a/wiki2texi.py b/wiki2texi.py index 6e32c56..4ce32f9 100644 --- a/wiki2texi.py +++ b/wiki2texi.py @@ -1,43 +1,43 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2015 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from wikimarkup import * -from wikins import wiki_ns_re, wiki_ns +from wit.wikimarkup import * +from wit.wikins import wiki_ns_re, wiki_ns import re import urllib class TexiWikiMarkup (WikiMarkup): sectcomm = { 'numbered': [ '@top', '@chapter', '@section', '@subsection', '@subsubsection' ], 'unnumbered': [ '@top', '@unnumbered', '@unnumberedsec', '@unnumberedsubsec', '@unnumberedsubsubsec' ], 'appendix': [ '@top', '@appendix', '@appendixsec', '@appendixsubsec', diff --git a/wiki2text.py b/wiki2text.py index 916391e..5041ea0 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -1,139 +1,143 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008,2015 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from wikimarkup import * -from wikins import wiki_ns_re, wiki_ns +from wit.wikimarkup import * +from wit.wikins import wiki_ns_re, wiki_ns import re -import urllib +try: + from urllib import quote as url_quote +except ImportError: + from urllib.parse import quote as url_quote + class TextWikiMarkup (WikiMarkup): """ A (general-purpose Wiki->Text translator class. """ # Output width width = 78 # Do not show references. references = False # Provide a minimum markup markup = True # Number of current element in the environment num = 0 def __init__(self, *args, **keywords): WikiMarkup.__init__(self, *args, **keywords) if 'width' in keywords: self.width = keywords['width'] if 'refs' in keywords: self.references = keywords['refs'] if 'markup' in keywords: self.markup = keywords['markup'] def xref(self, text, target): if text: return "%s (see %s) " % (text, target) else: return "see " + target def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None - + def mktgt(self, tgt, lang = None): if not lang: lang = self.lang - return self.html_base % { 'lang' : lang } + urllib.quote(tgt) + return self.html_base % { 'lang' : lang } + url_quote(tgt) def fmtlink(self, elt, istmpl): arg = self.format(elt['content'][0]) if len(elt['content']) > 1: s = [x for x in map(self.format, elt['content'])] text = s[1] else: s = None text = None if s: if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" if len(s) > 1 and s[1] == 'thumb': return "" (qual,sep,tgt) = arg.partition(':') if tgt != '': ns = self.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': if not self.references: return "" text = "[%s: %s]" % (qual, text if text else arg) tgt = self.image_base + '/' + \ - urllib.quote(tgt) + \ - '/250px-' + urllib.quote(tgt) + url_quote(tgt) + \ + '/250px-' + url_quote(tgt) elif ns == 'NS_MEDIA': text = "[%s]" % (qual) else: tgt = self.mktgt(tgt) elif not istmpl and qual in self.langtab: text = self.langtab[qual] + ": " + tgt tgt = self.mktgt(tgt, qual) else: tgt = self.mktgt(tgt) else: tgt = self.mktgt(arg) if self.references: return "%s (see %s) " % (text, tgt) elif not text or text == '': return arg else: return text def indent (self, lev, text): if text.find('\n') == -1: s = (" " * lev) + text else: s = "" - for elt in text.split('\n'): + for elt in text.decode("utf-8").split('\n'): if elt: s += (" " * lev) + elt + '\n' if not text.endswith('\n'): s = s.rstrip('\n') # print "IN: '%s'" % (text) # print "OUT: '%s'" % (s) return s def fmtpara(self, input): output = "" linebuf = "" length = 0 for s in input.split(): wlen = len(s) if linebuf.endswith("."): wsc = 2 else: wsc = 1 if length + wsc + wlen > self.width: # FIXME: fill out linebuf output += linebuf + '\n' wsc = 0 length = 0 linebuf = "" diff --git a/wikimarkup.py b/wikimarkup.py index 2ef6be1..9371d89 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -832,49 +832,49 @@ class WikiMarkup (BaseWikiMarkup): 3. text=<string> Input is taken from <string>, line by line. Usage: obj = WikiMarkup(arg=val) obj.parse ... Do whatever you need with obj.tree ... """ file = None text = None lang = 'en' html_base = 'http://%(lang)s.wiktionary.org/wiki/' image_base = 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf' media_base = 'http://www.mediawiki.org/xml/export-0.3' def __init__(self, *args, **keywords): for kw in keywords: if kw == 'file': self.file = keywords[kw] elif kw == 'filename': self.file = open(keywords[kw]) elif kw == 'text': - self.text = keywords[kw].split("\n") + self.text = keywords[kw].decode("utf-8").split("\n") elif kw == 'lang': self.lang = keywords[kw] elif kw == 'html_base': self.html_base = keywords[kw] elif kw == 'image_base': self.image_base = keywords[kw] elif kw == 'media_base': self.media_base = keywords[kw] elif kw == 'nested': self.nested = keywords[kw] def __del__(self): if self.file: self.file.close() def input(self): if self.file: return self.file.readline() elif self.text: return self.text.pop(0) + '\n' else: return None def is_lang_link(self, elt): |