summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-16 15:45:00 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-17 13:17:11 +0300
commit7186dbab7f1c1227e9229866e086bc417e3e4e52 (patch)
treef29114e9ff7a7b023dd3d611a9bc8808f5cf5bbd
parentd9e26129527ce84f626eb44ff95e4ecfbc5bc92a (diff)
downloadwikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.gz
wikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.bz2
Fix PEP 8 issues.
-rw-r--r--tests/test_html.py8
-rw-r--r--tests/test_texi.py6
-rw-r--r--tests/test_text.py8
-rw-r--r--tests/wikitest.py (renamed from tests/WikiTest.py)6
-rw-r--r--wikitrans/__init__.py (renamed from WikiTrans/__init__.py)0
-rw-r--r--wikitrans/wiki2html.py (renamed from WikiTrans/wiki2html.py)115
-rw-r--r--wikitrans/wiki2texi.py (renamed from WikiTrans/wiki2texi.py)63
-rw-r--r--wikitrans/wiki2text.py (renamed from WikiTrans/wiki2text.py)74
-rw-r--r--wikitrans/wikidump.py (renamed from WikiTrans/wikidump.py)41
-rw-r--r--wikitrans/wikimarkup.py (renamed from WikiTrans/wikimarkup.py)784
-rw-r--r--wikitrans/wikins.py (renamed from WikiTrans/wikins.py)0
-rw-r--r--wikitrans/wikitoken.py (renamed from WikiTrans/wikitoken.py)154
12 files changed, 802 insertions, 457 deletions
diff --git a/tests/test_html.py b/tests/test_html.py
index 3da57f6..5a15cb8 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -1,14 +1,14 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import unittest
-from WikiTrans.wiki2html import HtmlWiktionaryMarkup
-from WikiTest import populateMethods
+from wikitrans.wiki2html import HtmlWikiMarkup
+from wikitest import populate_methods
-class TestWiktionaryMarkup (unittest.TestCase):
+class TestWikiMarkup (unittest.TestCase):
pass
-populateMethods(TestWiktionaryMarkup, HtmlWiktionaryMarkup, '.html')
+populate_methods(TestWikiMarkup, HtmlWikiMarkup, '.html')
if __name__ == '__main__':
unittest.main()
diff --git a/tests/test_texi.py b/tests/test_texi.py
index 75314c9..ddd26c7 100644
--- a/tests/test_texi.py
+++ b/tests/test_texi.py
@@ -1,14 +1,14 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import unittest
-from WikiTrans.wiki2texi import TexiWikiMarkup
-from WikiTest import populateMethods
+from wikitrans.wiki2texi import TexiWikiMarkup
+from wikitest import populate_methods
class TestTexiWikiMarkup (unittest.TestCase):
pass
-populateMethods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi')
+populate_methods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi')
if __name__ == '__main__':
unittest.main()
diff --git a/tests/test_text.py b/tests/test_text.py
index a06f519..b3d0a12 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -1,14 +1,14 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
import unittest
-from WikiTrans.wiki2text import TextWiktionaryMarkup
-from WikiTest import populateMethods
+from wikitrans.wiki2text import TextWikiMarkup
+from wikitest import populate_methods
-class TestTextWiktionaryMarkup (unittest.TestCase):
+class TestTextWikiMarkup (unittest.TestCase):
pass
-populateMethods(TestTextWiktionaryMarkup, TextWiktionaryMarkup, '.text')
+populate_methods(TestTextWikiMarkup, TextWikiMarkup, '.text')
if __name__ == '__main__':
unittest.main()
diff --git a/tests/WikiTest.py b/tests/wikitest.py
index 1429f5e..ff26227 100644
--- a/tests/WikiTest.py
+++ b/tests/wikitest.py
@@ -1,34 +1,34 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function
from glob import glob
import os.path
-def MarkupTest(classname, name_in, name_out):
+def wiki_markup_test(classname, name_in, name_out):
fh = open(name_out)
buf = ''.join(fh.readlines()).strip()
fh.close()
hwm = classname(filename=name_in, lang="en")
hwm.parse()
if str(hwm).strip() == buf:
return True
# fail
print("\n>>>%s<<<" % buf)
print(">>>%s<<<" % str(hwm).strip())
return False
-def populateMethods(cls, wcls, suffix):
+def populate_methods(cls, wcls, suffix):
def settest(self, base, wiki_name, pat_name):
def dyntest(self):
- self.assertTrue(MarkupTest(wcls, wiki_name, pat_name))
+ self.assertTrue(wiki_markup_test(wcls, wiki_name, pat_name))
meth = 'test_' + wcls.__name__ + '_' + base
dyntest.__name__ = meth
setattr(cls, meth, dyntest)
for file in glob('testdata/*.wiki'):
if os.path.isfile(file):
patfile = file[:len(file) - 5] + suffix
base, ext = os.path.splitext(os.path.basename(file))
if os.path.exists(patfile) and os.path.isfile(patfile):
settest(cls, base, file, patfile)
diff --git a/WikiTrans/__init__.py b/wikitrans/__init__.py
index 5832e38..5832e38 100644
--- a/WikiTrans/__init__.py
+++ b/wikitrans/__init__.py
diff --git a/WikiTrans/wiki2html.py b/wikitrans/wiki2html.py
index 6147642..ce65bae 100644
--- a/WikiTrans/wiki2html.py
+++ b/wikitrans/wiki2html.py
@@ -6,28 +6,39 @@
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Wiki markup to HTML translator.
+
+Classes:
+
+HtmlWikiMarkup -- Converts Wiki material to HTML.
+HtmlWiktionaryMarkup -- Reserved for future use. Currently does the same as
+ HtmlWikiMarkup.
+
+"""
+
from __future__ import print_function
-from WikiTrans.wikimarkup import *
-from WikiTrans.wikitoken import *
-from WikiTrans.wikins import wiki_ns_re, wiki_ns
+from wikitrans.wikimarkup import *
+from wikitrans.wikitoken import *
+from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
try:
from urllib import quote as url_quote
except ImportError:
from urllib.parse import quote as url_quote
try:
from html import escape as html_escape
except ImportError:
from cgi import escape as html_escape
__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
@@ -70,34 +81,34 @@ class HtmlLinkNode(HtmlSeqNode):
tgt = self.parser.media_base + '/' + tgt
else:
tgt = self.parser.mktgt(tgt)
elif self.type == 'LINK' and qual in self.parser.langtab:
tgt = self.parser.mktgt(tgt, qual)
if not text or text == '':
text = self.parser.langtab[qual]
else:
tgt = self.parser.mktgt(tgt)
else:
tgt = self.parser.mktgt(arg)
return "<a href=\"%s\">%s</a>" % (tgt,
- text if (text and text != '') \
- else arg)
+ text if (text and text != '') else arg)
class HtmlRefNode(WikiRefNode):
def format(self):
target = self.ref
text = self.content.format()
- return "<a href=\"%s\">%s</a>" % (target,
- text if (text and text != '') \
- else target)
+ return "<a href=\"%s\">%s</a>" % (
+ target,
+ text if (text and text != '') else target
+ )
class HtmlFontNode(HtmlSeqNode):
def format(self):
comm = { 'IT': 'i',
'BOLD': 'b' }
s = '<%s>' % comm[self.type]
for x in self.content:
s += x.format()
s += '</%s>' % comm[self.type]
return s
class HtmlTextNode(HtmlSeqNode):
@@ -143,32 +154,32 @@ class HtmlTagNode(WikiTagNode):
s = self.content.format()
self.parser.nested -= 1
return '<pre><code>' + s + '</code></pre>' #FIXME
elif self.tag == 'ref':
n = self.idx+1
return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n)
elif self.tag == 'references':
s = '<div class="references">\n'
s += '<ol class="references">\n'
n = 0
for ref in self.parser.references:
n += 1
- s += ('<li id="cite_note-%d">' + \
- '<span class="mw-cite-backlink">' + \
- '<b><a href="#cite_ref-%d">^</a></b>' + \
- '</span>' + \
- '<span class="reference-text">' + \
- ref.content.format() + \
- '</span>' + \
- '</li>\n') % (n,n)
+ s += ('<li id="cite_note-%d">'
+ + '<span class="mw-cite-backlink">'
+ + '<b><a href="#cite_ref-%d">^</a></b>'
+ + '</span>'
+ + '<span class="reference-text">'
+ + ref.content.format()
+ + '</span>'
+ + '</li>\n') % (n,n)
s += '</ol>\n</div>\n'
return s
else:
s = '<' + self.tag
if self.args:
s += ' ' + str(self.args)
s += '>'
s += self.content.format()
return s + '</' + self.tag + '>'
class HtmlParaNode(HtmlSeqNode):
def format(self):
@@ -178,35 +189,67 @@ class HtmlPreNode(HtmlSeqNode):
def format(self):
s = super(HtmlPreNode, self).format()
if self.parser.nested:
return s
else:
return '<pre>' + s + '</pre>'
class HtmlIndNode(WikiIndNode):
def format(self):
return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
-class HtmlWikiMarkup (WikiMarkup):
- """
- A (hopefully) general-purpose Wiki->HTML translator class.
- FIXME: 1. See WikiMarkup for a list
- 2. [[official position]]s : final 's' gets after closing </a> tag.
- Should be before.
+class HtmlWikiMarkup(WikiMarkup):
+ """A Wiki markup to HTML translator class.
+
+ Usage:
+
+ x = HtmlWikiMarkup(file="input.wiki")
+ # Parse the input:
+ x.parse()
+ # Print it as HTML:
+ print(str(x))
+
+ Known bugs:
+ * [[official position]]s
+ Final 's' gets after closing </a> tag. Should be before.
"""
nested = 0
references = []
def __init__(self, *args, **kwargs):
+ """Create a HtmlWikiMarkup object.
+
+ Arguments:
+
+ filename=FILE
+ Read Wiki material from the file named FILE.
+ file=FD
+ Read Wiki material from file object FD.
+ text=STRING
+ Read Wiki material from STRING.
+ lang=CODE
+ Specifies source language. Default is 'en'. This variable can be
+ referred to as '%(lang)s' in the keyword arguments below.
+ html_base=URL
+ Base URL for cross-references. Default is
+ 'http://%(lang)s.wiktionary.org/wiki/'
+ image_base=URL
+ Base URL for images. Default is
+ 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf'
+ media_base=URL
+ Base URL for media files. Default is
+ 'http://www.mediawiki.org/xml/export-0.3'
+ """
+
super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
self.token_class['LINK'] = HtmlLinkNode
self.token_class['TMPL'] = HtmlLinkNode
self.token_class['REF'] = HtmlRefNode
self.token_class['IT'] = HtmlFontNode
self.token_class['BOLD'] = HtmlFontNode
self.token_class['HDR'] = HtmlHdrNode
self.token_class['BAR'] = HtmlBarNode
self.token_class['ENV'] = HtmlEnvNode
self.token_class['TAG'] = HtmlTagNode
self.token_class['PARA'] = HtmlParaNode
self.token_class['PRE'] = HtmlPreNode
@@ -261,39 +304,17 @@ class HtmlWikiMarkup (WikiMarkup):
text += ','
n += 1
text += ' <span class="proto">' + x + '</span>'
text += ' <span class="meaning">(' + s[-2] + ')</span>'
return text
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
-class HtmlWiktionaryMarkup (HtmlWikiMarkup):
- """
- A class for translating Wiktionary articles into HTML.
- This version does not do much, except that it tries to correctly
- format templates. But "tries" does not mean "does". The heuristics
- used here is clearly not enough to cope with it.
-
- 1. FIXME:
- The right solution would be to have a database of templates with their
- semantics and to decide on their rendering depending on that. E.g.
- {{term}} in en.wiktionary means "replace this with the search term".
- This, however, does not work in other wiktionaries. There are
- also more complex templates, e.g.: {{t+|bg|врата|n|p|tr=vrata|sc=Cyrl}}
- I don't know what it means. Couldn't find any documentation either.
- Again, this template does not work in other dictionaries.
+class HtmlWiktionaryMarkup(HtmlWikiMarkup):
+ """A class for translating Wiktionary articles into HTML.
- 2. Capitulation notice:
- Given the:
- 1. vast amount of wiktionaries available,
- 2. abundance of various templates for each wictionary,
- 3. apparent lack of documentation thereof,
- 4. the lack of standardized language-independent templates,
- I dont see any way to cope with the template-rendering task within a
- reasonable amount of time.
-
- Faeci quod potui, faciant meliora potentes.
+ Reserved for future use. Currently does the same as HtmlWikiMarkup.
"""
diff --git a/WikiTrans/wiki2texi.py b/wikitrans/wiki2texi.py
index 7297195..d9e5f52 100644
--- a/WikiTrans/wiki2texi.py
+++ b/wikitrans/wiki2texi.py
@@ -6,27 +6,36 @@
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from WikiTrans.wikimarkup import *
-from WikiTrans.wikitoken import *
-from WikiTrans.wikins import wiki_ns_re, wiki_ns
+"""
+Wiki markup to Texinfo translator.
+
+Classes:
+
+TexiWikiMarkup -- Converts Wiki material to Texinfo.
+
+"""
+
+from wikitrans.wikimarkup import *
+from wikitrans.wikitoken import *
+from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
import urllib
class Acc(list):
def prepend(self,x):
self.insert(0,x)
def is_empty(self):
return len(self) == 0
def clear(self):
self = []
@@ -242,25 +251,37 @@ class TexiLinkNode(WikiSeqNode):
class TexiRefNode(WikiRefNode):
def format(self):
parser = self.parser
target = self.ref
save = parser._begin_print()
self.content.format()
text = parser._end_print(save)
if text and text != '':
parser._print("@uref{%s,%s}" % (target, text), escape=False)
else:
parser._print("@uref{%s}" % target, escape=False)
-class TexiWikiMarkup (WikiMarkup):
+class TexiWikiMarkup(WikiMarkup):
+ """Wiki markup to Texinfo translator class.
+
+ Usage:
+
+ x = TexiWikiMarkup(file="input.wiki")
+ # Parse the input:
+ x.parse()
+ # Print it as Texi:
+ print(str(x))
+
+ """
+
nested = 0
sectcomm = {
'numbered': [
'@top',
'@chapter',
'@section',
'@subsection',
'@subsubsection'
],
'unnumbered': [
'@top',
'@unnumbered',
@@ -279,24 +300,58 @@ class TexiWikiMarkup (WikiMarkup):
'@majorheading'
'@chapheading',
'@heading',
'@subheading',
'@subsubheading'
]
}
sectioning_model = 'numbered'
sectioning_start = 0
def __init__(self, *args, **keywords):
+ """Create a TexiWikiMarkup object.
+
+ Arguments:
+
+ filename=FILE
+ Read Wiki material from the file named FILE.
+ file=FD
+ Read Wiki material from file object FD.
+ text=STRING
+ Read Wiki material from STRING.
+
+ sectioning_model=MODEL
+ Select the Texinfo sectioning model for the output document. Possible
+ values are:
+
+ 'numbered'
+ Top of document is marked with "@top". Headings ("=", "==",
+ "===", etc) produce "@chapter", "@section", "@subsection", etc.
+ 'unnumbered'
+ Unnumbered sectioning: "@top", "@unnumbered", "@unnumberedsec",
+ "@unnumberedsubsec".
+ 'appendix'
+ Sectioning suitable for appendix entries: "@top", "@appendix",
+ "@appendixsec", "@appendixsubsec", etc.
+ 'heading'
+ Use heading directives to reflect sectioning: "@majorheading",
+ "@chapheading", "@heading", "@subheading", etc.
+ sectioning_start=N
+ Shift resulting heading level by N positions. For example, supposing
+ "sectioning_model='numbered'", "== A ==" normally produces
+ "@section A" on output. Now, if given "sectioning_start=1", this
+ directive will produce "@subsection A" instead.
+ """
+
super(TexiWikiMarkup, self).__init__(*args, **keywords)
self.token_class['TEXT'] = TexiTextNode
self.token_class['TAG'] = TexiTagNode
self.token_class['PARA'] = TexiParaNode
self.token_class['PRE'] = TexiPreNode
self.token_class['IT'] = TexiFontNode
self.token_class['BOLD'] = TexiFontNode
self.token_class['HDR'] = TexiHdrNode
self.token_class['BAR'] = TexiBarNode
self.token_class['IND'] = TexiIndNode
self.token_class['ENV'] = TexiEnvNode
diff --git a/WikiTrans/wiki2text.py b/wikitrans/wiki2text.py
index cb3a183..1fbc61b 100644
--- a/WikiTrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -6,27 +6,38 @@
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from WikiTrans.wikitoken import *
-from WikiTrans.wikimarkup import *
-from WikiTrans.wikins import wiki_ns_re, wiki_ns
+"""
+Wiki markup to plain text translator.
+
+Classes:
+
+TextWikiMarkup -- Converts Wiki material to plain text.
+TextWiktionaryMarkup -- Reserved for future use. Currently does the same as
+ TextWikiMarkup.
+
+"""
+
+from wikitrans.wikitoken import *
+from wikitrans.wikimarkup import *
+from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
try:
from urllib import quote as url_quote
except ImportError:
from urllib.parse import quote as url_quote
class TextSeqNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
if len(string) > 1 and not string[-1].isspace():
string += ' '
@@ -98,27 +109,27 @@ class TextLinkNode(WikiSeqNode):
if s[0] == 'disambigR' or s[0] == 'wikiquote':
return ""
if len(s) > 1 and s[1] == 'thumb':
return ""
(qual,sep,tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
if ns == 'NS_IMAGE':
if not self.parser.show_urls:
return ""
text = "[%s: %s]" % (qual, text if text else arg)
- tgt = self.image_base + '/' + \
- url_quote(tgt) + \
- '/250px-' + url_quote(tgt)
+ tgt = "%s/%s/250px-%s" % (self.image_base,
+ url_quote(tgt),
+ url_quote(tgt))
elif ns == 'NS_MEDIA':
text = "[%s]" % (qual)
else:
tgt = self.parser.mktgt(tgt)
elif self.type == 'LINK' and qual in self.parser.langtab:
text = self.parser.langtab[qual] + ": " + tgt
tgt = self.parser.mktgt(tgt, qual)
else:
tgt = self.parser.mktgt(tgt)
else:
tgt = self.parser.mktgt(arg)
if self.parser.show_urls:
@@ -132,26 +143,29 @@ class TextTmplNode(TextLinkNode):
def format(self):
return '[' + super(TextTmplNode, self).format() + ']'
class TextBarNode(WikiNode):
def format(self):
w = self.parser.width
if w < 5:
w = 5
return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
class TextHdrNode(WikiHdrNode):
def format(self):
- return "\n" + ("*" * self.level) + " " + \
- self.content.format().lstrip(" ") + "\n\n"
+ return ("\n"
+ + ("*" * self.level)
+ + " "
+ + self.content.format().lstrip(" ")
+ + "\n\n")
class TextRefNode(WikiRefNode):
def format(self):
text = self.content.format()
if text:
return "%s (see %s) " % (text, self.ref)
else:
return "see " + self.ref
class TextEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
@@ -195,43 +209,70 @@ class TextTagNode(WikiTagNode):
elif self.tag == 'references':
s = '\nReferences:\n'
for ref in self.parser.references:
s += ('[%d]. ' % (ref.idx+1)) + ref.content.format() + '\n'
else:
s = '<' + self.tag
if self.args:
s += ' ' + str(self.args)
s += '>' + self.content.format() + '</' + self.tag + '>'
return s
-class TextWikiMarkup (WikiMarkup):
- """
- A (general-purpose Wiki->Text translator class.
+class TextWikiMarkup(WikiMarkup):
+ """A Wiki markup to plain text translator.
+
+ Usage:
+
+ x = TextWikiMarkup(file="input.wiki")
+ # Parse the input:
+ x.parse()
+ # Print it as plain text:
+ print(str(x))
+
"""
# Output width
width = 78
# Do not show references.
show_urls = False
# Provide a minimum markup
markup = True
# Number of current element in the environment
num = 0
# Array of footnote references
references = []
def __init__(self, *args, **keywords):
+ """Create a TextWikiMarkup object.
+
+ Arguments:
+
+ filename=FILE
+ Read Wiki material from the file named FILE.
+ file=FD
+ Read Wiki material from file object FD.
+ text=STRING
+ Read Wiki material from STRING.
+
+ width=N
+ Limit output width to N columns. Default is 78.
+ show_urls=False
+ By default, the link URLs are displayed in parentheses next to the
+ link text. If this argument is given, only the link text will be
+ displayed.
+ """
+
super(TextWikiMarkup,self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'show_urls' in keywords:
self.show_urls = keywords['show_urls']
self.token_class['SEQ'] = TextSeqNode
self.token_class['TEXT'] = TextTextNode
self.token_class['PRE'] = TextPreNode
self.token_class['PARA'] = TextParaNode
self.token_class['SEQ'] = TextSeqNode
self.token_class['IT'] = TextItNode
self.token_class['BOLD'] = TextBoldNode
@@ -249,25 +290,25 @@ class TextWikiMarkup (WikiMarkup):
return wiki_ns[self.lang][str]
elif str in wiki_ns_re[self.lang]:
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + url_quote(tgt)
- def indent (self, lev, text):
+ def indent(self, lev, text):
if text.find('\n') == -1:
s = (" " * lev) + text
else:
s = ""
for elt in text.split('\n'):
if elt:
s += (" " * lev) + elt + '\n'
if not text.endswith('\n'):
s = s.rstrip('\n')
return s
def fmtpara(self, input):
@@ -289,18 +330,19 @@ class TextWikiMarkup (WikiMarkup):
length = 0
linebuf = ""
linebuf += " " * wsc + s
length += wsc + wlen
return output + linebuf
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
-class TextWiktionaryMarkup (TextWikiMarkup):
- """
- See documentation for HtmlWiktionaryMarkup
+class TextWiktionaryMarkup(TextWikiMarkup):
+ """A class for translating Wiktionary articles into plain text.
+
+ Reserved for future use. Currently does the same as TextWikiMarkup.
"""
- # FIXME: It is supposed to do something about templates
+
diff --git a/WikiTrans/wikidump.py b/wikitrans/wikidump.py
index 7457dfa..d5f651c 100644
--- a/WikiTrans/wikidump.py
+++ b/wikitrans/wikidump.py
@@ -5,38 +5,73 @@
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Print Wiki parse tree as JSON.
+
+Classes:
+
+DumpWikiMarkup
+
+"""
+
from __future__ import print_function
-from WikiTrans.wikitoken import *
+from wikitrans.wikitoken import *
import json
-from WikiTrans.wikimarkup import WikiMarkup
+from wikitrans.wikimarkup import WikiMarkup
class DumpReferences(object):
idx = 0
def __len__(self):
return self.idx + 1
def append(self, obj):
self.idx += 1
class DumpWikiMarkup(WikiMarkup):
+ """Produce a JSON dump of the Wiki markup parse tree.
+
+ Usage:
+
+ x = DumpWikiMarkup(file="input.wiki")
+ # Parse the input:
+ x.parse()
+ # Print a JSON dump of the parse tree
+ print(str(x))
+
+ """
+
indent = None
references = DumpReferences()
def __init__(self, **kwarg):
+ """Create a DumpWikiMarkup object.
+
+ Arguments:
+
+ filename=FILE
+ Read Wiki material from the file named FILE.
+ file=FD
+ Read Wiki material from file object FD.
+ text=STRING
+ Read Wiki material from STRING.
+ indent=N
+ Basic indent offset for JSON objects.
+ """
+
n = kwarg.pop('indent', None)
if n != None:
self.indent = int(n)
- WikiMarkup.__init__(self, **kwarg)
+ super(DumpWikiMarkup,self).__init__(self, **kwarg)
def __str__(self):
return json.dumps(self.tree,
cls=WikiNodeEncoder,
indent=self.indent,
separators=(',',': '),
sort_keys=True)
diff --git a/WikiTrans/wikimarkup.py b/wikitrans/wikimarkup.py
index 6cbf5de..77c3b30 100644
--- a/WikiTrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -1,49 +1,74 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
+Wiki markup parser.
+
+This module provides two class:
+
+WikiMarkupParser:
+ An abstract parser class, which serves as a base class for all markup
+ classes in this package.
+
+WikiMarkup
+ A subclass of the above, providing basic input method.
+
+"""
+
from __future__ import print_function
import sys
import re
from types import *
-from WikiTrans.wikitoken import *
+from wikitrans.wikitoken import *
-__all__ = [ "BaseWikiMarkup", "WikiMarkup",
- "TagAttributes", "TagAttributeSyntax" ]
+__all__ = [ "WikiMarkupParser", "WikiMarkup",
+ "TagAttributes", "TagAttributeSyntaxError" ]
-class UnexpectedToken(Exception):
+class UnexpectedTokenError(Exception):
def __init__(self, value):
self.value = value
-class TagAttributeSyntax(Exception):
+class TagAttributeSyntaxError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class TagAttributes(object):
+ """A dictionary-like collection of tag attributes.
+
+ Example:
+
+ attr = TagAttributes('href="foo" length=2')
+ if 'href' in attr:
+ print(x['href']) # returns "foo"
+ for a in attr:
+ ...
+ """
+
attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?")
valseg = re.compile("^[^\\\"]+")
tab = {}
printable = None
def __init__(self, string):
if not string:
self.printable = ''
return
self.printable = string
s = string
self.tab = {}
while s != '':
@@ -59,706 +84,778 @@ class TagAttributes(object):
val += m.group(0)
s = s[m.end(0):]
if s[0] == '\\':
val += s[1]
s += 2
elif s[0] == '"':
s = s[1:]
break
else:
val = 1
self.tab[name] = val
else:
- raise TagAttributeSyntax(s)
+ raise TagAttributeSyntaxError(s)
def __len__(self):
return len(self.tab)
def __getitem__(self, key):
return self.tab[key]
def __contains__(self, key):
return key in self.tab
def __iter__(self):
for key in self.tab:
yield(key)
def has_key(self, key):
return self.__contains__(key)
def __setitem__(self, key, value):
self.tab[key] = value
def __delitem__(self, key):
del self.tab[key]
def __str__(self):
return self.printable
def __repr__(self):
return self.printable
-class BaseWikiMarkup(object):
+class WikiMarkupParser(object):
+ """Parser for Wiki markup language.
+
+ Given input in Wiki markup language creates an abstract parse tree for it.
+ This is a base class for actual parsers. The subclasses must provide the
+ input