summaryrefslogtreecommitdiffabout
Side-by-side diff
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--README.rst6
-rwxr-xr-xbin/wikitrans3
-rw-r--r--wikitrans/wiki2html.py53
-rw-r--r--wikitrans/wiki2texi.py82
-rw-r--r--wikitrans/wiki2text.py66
-rw-r--r--wikitrans/wikidump.py20
-rw-r--r--wikitrans/wikimarkup.py46
-rw-r--r--wikitrans/wikitoken.py72
8 files changed, 205 insertions, 143 deletions
diff --git a/README.rst b/README.rst
index d189c98..7c53e3c 100644
--- a/README.rst
+++ b/README.rst
@@ -124,7 +124,7 @@ is ``http://%(lang)s.wikipedia.org/wiki/``.
The ``wikitrans`` utility
=========================
-This command line utility converts the supplied text to a selected
+This command line utility converts the supplied text to selected
output format. The usage syntax is::
wikitrans [OPTIONS] ARG
@@ -160,11 +160,11 @@ Options are:
Set output document type (``html`` (the default), ``texi``,
``text``, or ``dump``).
``-l LANG``, ``--lang=LANG``
- Set input document language
+ Set input document language.
``-o KW=VAL``, ``--option=KW=VAL``
Pass the keyword argument ``KW=VAL`` to the parser class construct.
``-d DEBUG``, ``--debug=DEBUG``
- Set debug level (0..100)
+ Set debug level (0..100).
``-D``, ``--dump``
Dump parse tree and exit; same as ``--type=dump``.
``-b URL``, ``--base-url=URL``
diff --git a/bin/wikitrans b/bin/wikitrans
index caaa885..87de020 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -59,7 +59,8 @@ handlers = {
'wiktionary': TextWiktionaryMarkup
},
'texi': {
- 'default': TexiWikiMarkup
+ 'default': TexiWikiMarkup,
+ 'wiktionary': TextWikiMarkup
}
}
diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py
index fc6b142..0696dce 100644
--- a/wikitrans/wiki2html.py
+++ b/wikitrans/wiki2html.py
@@ -1,17 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -40,16 +40,18 @@ try:
from html import escape as html_escape
except ImportError:
from cgi import escape as html_escape
-
+
__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
+
class HtmlSeqNode(WikiSeqNode):
def format(self):
s = ''
for x in self.content:
s += x.format()
return s
-
+
+
class HtmlLinkNode(HtmlSeqNode):
def format(self):
arg = self.content[0].format()
@@ -70,8 +72,8 @@ class HtmlLinkNode(HtmlSeqNode):
elif s[0] == "proto":
text = self.parser.tmpl_proto(s)
return text
-
- (qual,sep,tgt) = arg.partition(':')
+
+ (qual, sep, tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
@@ -92,6 +94,7 @@ class HtmlLinkNode(HtmlSeqNode):
return "<a href=\"%s\">%s</a>" % (tgt,
text if (text and text != '') else arg)
+
class HtmlRefNode(WikiRefNode):
def format(self):
target = self.ref
@@ -101,6 +104,7 @@ class HtmlRefNode(WikiRefNode):
text if (text and text != '') else target
)
+
class HtmlFontNode(HtmlSeqNode):
def format(self):
comm = { 'IT': 'i',
@@ -111,14 +115,16 @@ class HtmlFontNode(HtmlSeqNode):
s += '</%s>' % comm[self.type]
return s
+
class HtmlTextNode(HtmlSeqNode):
def format(self):
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
s = ''.join(self.content)
else:
s = html_escape(self.content, quote=False)
return s
+
class HtmlHdrNode(WikiHdrNode):
def format(self):
level = self.level
@@ -126,10 +132,12 @@ class HtmlHdrNode(WikiHdrNode):
level = 6
return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
+
class HtmlBarNode(WikiNode):
def format(self):
return "<hr/>\n"
+
class HtmlEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
@@ -146,7 +154,8 @@ class HtmlEnvNode(WikiEnvNode):
string,
self.parser.envt[type]["hdr"])
return string
-
+
+
class HtmlTagNode(WikiTagNode):
def format(self):
if self.tag == 'code':
@@ -156,13 +165,13 @@ class HtmlTagNode(WikiTagNode):
return '<pre><code>' + s + '</code></pre>' #FIXME
elif self.tag == 'ref':
n = self.idx+1
- return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n)
+ return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n, n, n, n)
elif self.tag == 'references':
s = '<div class="references">\n'
s += '<ol class="references">\n'
n = 0
for ref in self.parser.references:
- n += 1
+ n += 1
s += ('<li id="cite_note-%d">'
+ '<span class="mw-cite-backlink">'
+ '<b><a href="#cite_ref-%d">^</a></b>'
@@ -170,7 +179,7 @@ class HtmlTagNode(WikiTagNode):
+ '<span class="reference-text">'
+ ref.content.format()
+ '</span>'
- + '</li>\n') % (n,n)
+ + '</li>\n') % (n, n)
s += '</ol>\n</div>\n'
return s
else:
@@ -180,11 +189,13 @@ class HtmlTagNode(WikiTagNode):
s += '>'
s += self.content.format()
return s + '</' + self.tag + '>'
-
+
+
class HtmlParaNode(HtmlSeqNode):
def format(self):
return "<p>" + super(HtmlParaNode, self).format() + "</p>\n"
+
class HtmlPreNode(HtmlSeqNode):
def format(self):
s = super(HtmlPreNode, self).format()
@@ -193,10 +204,11 @@ class HtmlPreNode(HtmlSeqNode):
else:
return '<pre>' + s + '</pre>'
+
class HtmlIndNode(WikiIndNode):
def format(self):
return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
-
+
class HtmlWikiMarkup(WikiMarkup):
"""A Wiki markup to HTML translator class.
@@ -223,9 +235,9 @@ class HtmlWikiMarkup(WikiMarkup):
[html_base=URL],[image_base=URL],[media_base=URL])
The arguments have the same meaning as in the WikiMarkup constructor.
-
+
"""
-
+
super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
self.token_class['LINK'] = HtmlLinkNode
self.token_class['TMPL'] = HtmlLinkNode
@@ -249,14 +261,14 @@ class HtmlWikiMarkup(WikiMarkup):
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
- return None
-
+ return None
+
envt = { "unnumbered": { "hdr": "ul",
"elt": ["li"] },
"numbered": { "hdr": "ol",
"elt": ["li"] },
"defn": { "hdr": "dl",
- "elt": ["dt","dd"] } }
+ "elt": ["dt","dd"] } }
def mktgt(self, tgt, lang = None):
if not lang:
@@ -291,13 +303,14 @@ class HtmlWikiMarkup(WikiMarkup):
text += ' <span class="proto">' + x + '</span>'
text += ' <span class="meaning">(' + s[-2] + ')</span>'
return text
-
+
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
+
class HtmlWiktionaryMarkup(HtmlWikiMarkup):
"""A class for translating Wiktionary articles into HTML.
diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py
index 55dffe2..936a133 100644
--- a/wikitrans/wiki2texi.py
+++ b/wikitrans/wiki2texi.py
@@ -1,17 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2015-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -30,16 +30,17 @@ from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
import urllib
+
class Acc(list):
- def prepend(self,x):
- self.insert(0,x)
-
+ def prepend(self, x):
+ self.insert(0, x)
+
def is_empty(self):
return len(self) == 0
-
+
def clear(self):
self = []
-
+
def tail(self, n = 1):
s = Acc()
i = len(self)
@@ -65,33 +66,35 @@ class Acc(list):
self += elt[0:-n]
break
n -= l
-
+
def trimnl(self):
if self.endswith('\n'):
self.trim(1)
-
+
def trimpara(self):
if self.endswith('\n\n'):
self.trim(2)
-
+
def endswith(self, x):
return self.tail(len(x)) == x
-
+
def in_new_para(self):
return self.is_empty() or self.endswith('\n\n')
-
+
def __str__(self):
return ''.join(self)
+
class TexiTextNode(WikiTextNode):
def format(self):
parser = self.parser
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
for s in self.content:
parser._print(s)
else:
parser._print(self.content)
+
class TexiTagNode(WikiTagNode):
def format(self):
parser = self.parser
@@ -125,9 +128,10 @@ class TexiTagNode(WikiTagNode):
parser._print('>');
self.content.format()
parser._print('</' + self.tag + '>')
-
+
+
class TexiParaNode(WikiSeqNode):
- def format(self):
+ def format(self):
parser = self.parser
if not parser.acc.in_new_para():
parser._print('\n', nl=True)
@@ -135,7 +139,8 @@ class TexiParaNode(WikiSeqNode):
x.format()
if not parser.acc.in_new_para():
parser._print('\n', nl=True)
-
+
+
class TexiPreNode(WikiSeqNode):
def format(self):
parser = self.parser
@@ -146,6 +151,7 @@ class TexiPreNode(WikiSeqNode):
if not parser.nested:
parser._print('@end example\n', nl=True, escape=False)
+
class TexiFontNode(WikiSeqNode):
def format(self):
parser = self.parser
@@ -156,6 +162,7 @@ class TexiFontNode(WikiSeqNode):
x.format()
parser._print('}', escape=False)
+
class TexiHdrNode(WikiHdrNode):
def format(self):
parser = self.parser
@@ -174,10 +181,12 @@ class TexiHdrNode(WikiHdrNode):
parser._print('\n')
parser._print(None, nl=True)
+
class TexiBarNode(WikiNode):
def format(self):
self.parser._print("\n-----\n")
+
class TexiIndNode(WikiIndNode):
def format(self):
parser = self.parser
@@ -185,6 +194,7 @@ class TexiIndNode(WikiIndNode):
self.content.format()
parser._print(None, nl=True)
+
class TexiEnvNode(WikiEnvNode):
def format(self):
parser = self.parser
@@ -216,7 +226,8 @@ class TexiEnvNode(WikiEnvNode):
parser._print(None, nl=True)
parser._print('\n')
parser._print('@end table\n', nl=True, escape=False)
-
+
+
class TexiLinkNode(WikiSeqNode):
def format(self):
parser = self.parser
@@ -242,12 +253,13 @@ class TexiLinkNode(WikiSeqNode):
if len(s) > 1 and s[1] == 'thumb':
return
- (qual,sep,tgt) = arg.partition(':')
+ (qual, sep, tgt) = arg.partition(':')
if text:
parser._print("@ref{%s,%s}" % (qual, text), escape=False)
else:
parser._print("@ref{%s}" % qual, escape=False)
+
class TexiRefNode(WikiRefNode):
def format(self):
parser = self.parser
@@ -259,10 +271,11 @@ class TexiRefNode(WikiRefNode):
parser._print("@uref{%s,%s}" % (target, text), escape=False)
else:
parser._print("@uref{%s}" % target, escape=False)
-
+
+
class TexiWikiMarkup(WikiMarkup):
"""Wiki markup to Texinfo translator class.
-
+
Usage:
x = TexiWikiMarkup(file="input.wiki")
@@ -270,16 +283,16 @@ class TexiWikiMarkup(WikiMarkup):
x.parse()
# Print it as Texi:
print(str(x))
-
+
"""
-
+
nested = 0
sectcomm = {
'numbered': [
'@top',
- '@chapter',
- '@section',
- '@subsection',
+ '@chapter',
+ '@section',
+ '@subsection',
'@subsubsection'
],
'unnumbered': [
@@ -317,7 +330,7 @@ class TexiWikiMarkup(WikiMarkup):
For a discussion of generic arguments, see the constructor of
the WikiMarkup class.
-
+
Additional arguments:
sectioning_model=MODEL
@@ -342,9 +355,9 @@ class TexiWikiMarkup(WikiMarkup):
"@section A" on output. Now, if given "sectioning_start=1", this
directive will produce "@subsection A" instead.
"""
-
+
super(TexiWikiMarkup, self).__init__(*args, **keywords)
-
+
self.token_class['TEXT'] = TexiTextNode
self.token_class['TAG'] = TexiTagNode
self.token_class['PARA'] = TexiParaNode
@@ -357,7 +370,7 @@ class TexiWikiMarkup(WikiMarkup):
self.token_class['ENV'] = TexiEnvNode
self.token_class['LINK'] = TexiLinkNode
self.token_class['REF'] = TexiRefNode
-
+
if "sectioning_model" in keywords:
val = keywords["sectioning_model"]
if val in self.sectcomm:
@@ -394,17 +407,10 @@ class TexiWikiMarkup(WikiMarkup):
s = self.acc
self.acc = val
return str(s)
-
+
def __str__(self):
self._begin_print()
for elt in self.tree:
elt.format()
self.acc.trimpara()
return self._end_print()
-
-
-
-
-
-
-
diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py
index 88e7610..7585bff 100644
--- a/wikitrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -1,17 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -35,6 +35,7 @@ try:
except ImportError:
from urllib.parse import quote as url_quote
+
class TextSeqNode(WikiSeqNode):
def format(self):
string = ""
@@ -43,10 +44,11 @@ class TextSeqNode(WikiSeqNode):
string += ' '
string += x.format()
return string
-
+
+
class TextTextNode(WikiTextNode):
def format(self):
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
string = ""
for s in self.content:
if string:
@@ -59,6 +61,7 @@ class TextTextNode(WikiTextNode):
string = self.content
return string
+
class TextPreNode(WikiSeqNode):
def format(self):
string = ""
@@ -67,6 +70,7 @@ class TextPreNode(WikiSeqNode):
string += '\n'
return string
+
class TextParaNode(WikiSeqNode):
def format(self):
string = ""
@@ -75,6 +79,7 @@ class TextParaNode(WikiSeqNode):
string = self.parser.fmtpara(string) + '\n\n'
return string
+
class TextItNode(WikiSeqNode):
def format(self):
string = ""
@@ -83,7 +88,8 @@ class TextItNode(WikiSeqNode):
if s:
string += " " + s
return "_" + string.lstrip(" ") + "_"
-
+
+
class TextBoldNode(WikiSeqNode):
def format(self):
string = ""
@@ -95,6 +101,7 @@ class TextBoldNode(WikiSeqNode):
string += x.format()
return string.upper()
+
class TextLinkNode(WikiSeqNode):
def format(self):
arg = self.content[0].format()
@@ -110,7 +117,7 @@ class TextLinkNode(WikiSeqNode):
return ""
if len(s) > 1 and s[1] == 'thumb':
return ""
- (qual,sep,tgt) = arg.partition(':')
+ (qual, sep, tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
@@ -138,11 +145,13 @@ class TextLinkNode(WikiSeqNode):
return arg
else:
return text
-
+
+
class TextTmplNode(TextLinkNode):
def format(self):
return '[' + super(TextTmplNode, self).format() + ']'
-
+
+
class TextBarNode(WikiNode):
def format(self):
w = self.parser.width
@@ -150,6 +159,7 @@ class TextBarNode(WikiNode):
w = 5
return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
class TextHdrNode(WikiHdrNode):
def format(self):
return ("\n"
@@ -157,7 +167,8 @@ class TextHdrNode(WikiHdrNode):
+ " "
+ self.content.format().lstrip(" ")
+ "\n\n")
-
+
+
class TextRefNode(WikiRefNode):
def format(self):
text = self.content.format()
@@ -166,6 +177,7 @@ class TextRefNode(WikiRefNode):
else:
return "see " + self.ref
+
class TextEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
@@ -188,16 +200,16 @@ class TextEnvNode(WikiEnvNode):
string += self.parser.indent(lev-1, x)
else:
string += self.parser.indent(lev+3, x)
-
if not string.endswith("\n"):
string += "\n"
-
return string
+
class TextIndNode(WikiIndNode):
def format(self):
return (" " * self.level) + self.content.format() + '\n'
+
class TextTagNode(WikiTagNode):
def format(self):
if self.tag == 'code':
@@ -215,8 +227,8 @@ class TextTagNode(WikiTagNode):
if self.args:
s += ' ' + str(self.args)
s += '>' + self.content.format() + '</' + self.tag + '>'
- return s
-
+ return s
+
class TextWikiMarkup(WikiMarkup):
"""A Wiki markup to plain text translator.
@@ -228,7 +240,7 @@ class TextWikiMarkup(WikiMarkup):
x.parse()
# Print it as plain text:
print(str(x))
-
+
"""
# Output width
@@ -243,7 +255,7 @@ class TextWikiMarkup(WikiMarkup):
# Array of footnote references
references = []
-
+
def __init__(self, *args, **keywords):
"""Create a TextWikiMarkup object.
@@ -254,16 +266,16 @@ class TextWikiMarkup(WikiMarkup):
Most arguments have the same meaning as in the WikiMarkup constructor.
Class-specific arguments:
-
+
width=N
- Limit output width to N columns. Default is 78.
+ Limit output width to N columns. Default is 78.
show_urls=False
By default, the link URLs are displayed in parentheses next to the
link text. If this argument is given, only the link text will be
displayed.
"""
-
- super(TextWikiMarkup,self).__init__(*args, **keywords)
+
+ super(TextWikiMarkup, self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'show_urls' in keywords:
@@ -283,7 +295,7 @@ class TextWikiMarkup(WikiMarkup):
self.token_class['ENV'] = TextEnvNode
self.token_class['IND'] = TextIndNode
self.token_class['TAG'] = TextTagNode
-
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
@@ -292,15 +304,15 @@ class TextWikiMarkup(WikiMarkup):
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
-
+
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + url_quote(tgt)
-
+
def indent(self, lev, text):
if text.find('\n') == -1:
- s = (" " * lev) + text
+ s = (" " * lev) + text
else:
s = ""
for elt in text.split('\n'):
@@ -309,7 +321,7 @@ class TextWikiMarkup(WikiMarkup):
if not text.endswith('\n'):
s = s.rstrip('\n')
return s
-
+
def fmtpara(self, input):
output = ""
linebuf = ""
@@ -331,13 +343,14 @@ class TextWikiMarkup(WikiMarkup):
linebuf += " " * wsc + s
length += wsc + wlen
return output + linebuf
-
+
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
+
class TextWiktionaryMarkup(TextWikiMarkup):
"""A class for translating Wiktionary articles into plain text.
@@ -345,4 +358,3 @@ class TextWiktionaryMarkup(TextWikiMarkup):
"""
html_base='http://%(lang)s.wiktionary.org/wiki/'
-
diff --git a/wikitrans/wikidump.py b/wikitrans/wikidump.py
index d5f651c..bc71876 100644
--- a/wikitrans/wikidump.py
+++ b/wikitrans/wikidump.py
@@ -1,16 +1,16 @@
# Wiki "dump" format. -*- coding: utf-8 -*-
# Copyright (C) 2015-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -28,6 +28,7 @@ from wikitrans.wikitoken import *
import json
from wikitrans.wikimarkup import WikiMarkup
+
class DumpReferences(object):
idx = 0
def __len__(self):
@@ -43,13 +44,14 @@ class DumpWikiMarkup(WikiMarkup):
x = DumpWikiMarkup(file="input.wiki")
# Parse the input:
x.parse()
- # Print a JSON dump of the parse tree
+ # Print a JSON dump of the parse tree
print(str(x))
-
+
"""
-
+
indent = None
references = DumpReferences()
+
def __init__(self, **kwarg):
"""Create a DumpWikiMarkup object.
@@ -64,14 +66,14 @@ class DumpWikiMarkup(WikiMarkup):
indent=N
Basic indent offset for JSON objects.
"""
-
n = kwarg.pop('indent', None)
if n != None:
self.indent = int(n)
- super(DumpWikiMarkup,self).__init__(self, **kwarg)
+ super(DumpWikiMarkup, self).__init__(self, **kwarg)
+
def __str__(self):
return json.dumps(self.tree,
cls=WikiNodeEncoder,
indent=self.indent,
- separators=(',',': '),
+ separators=(',', ': '),
sort_keys=True)
diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py
index a3e17d5..d199335 100644
--- a/wikitrans/wikimarkup.py
+++ b/wikitrans/wikimarkup.py
@@ -38,16 +38,20 @@ from wikitrans.wikitoken import *
__all__ = [ "WikiMarkupParser", "WikiMarkup",
"TagAttributes", "TagAttributeSyntaxError" ]
+
class UnexpectedTokenError(Exception):
def __init__(self, value):
self.value = value
+
class TagAttributeSyntaxError(Exception):
def __init__(self, value):
self.value = value
+
def __str__(self):
return repr(self.value)
+
class TagAttributes(object):
"""A dictionary-like collection of tag attributes.
@@ -59,7 +63,7 @@ class TagAttributes(object):
for a in attr:
...
"""
-
+
attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?")
valseg = re.compile("^[^\\\"]+")
tab = {}
@@ -94,26 +98,36 @@ class TagAttributes(object):
self.tab[name] = val
else:
raise TagAttributeSyntaxError(s)
+
def __len__(self):
return len(self.tab)
+
def __getitem__(self, key):
return self.tab[key]
+
def __contains__(self, key):
return key in self.tab
+
def __iter__(self):
for key in self.tab:
yield(key)
+
def has_key(self, key):
return self.__contains__(key)
+
def __setitem__(self, key, value):
self.tab[key] = value
+
def __delitem__(self, key):
del self.tab[key]
+
def __str__(self):
return self.printable
+
def __repr__(self):
return self.printable
+
class WikiMarkupParser(object):
"""Parser for Wiki markup language.
@@ -195,7 +209,7 @@ class WikiMarkupParser(object):
'HDR': WikiHdrNode
}
- def _new_node(self,**kwarg):
+ def _new_node(self, **kwarg):
return self.token_class[kwarg['type']](self, **kwarg)
def tokread(self):
@@ -266,7 +280,7 @@ class WikiMarkupParser(object):
content=m.group(0)))
continue
else:
- yield(self._new_node(type='TEXT',content=m.group(0)))
+ yield(self._new_node(type='TEXT', content=m.group(0)))
continue
else:
m = self.ctag.match(line, pos)
@@ -306,13 +320,12 @@ class WikiMarkupParser(object):
if line:
if line[-1] == '\n':
if line[pos:-1] != '':
- yield(self._new_node(type='TEXT',content=line[pos:-1]))
+ yield(self._new_node(type='TEXT', content=line[pos:-1]))
yield(self._new_node(type='NL'))
else:
- yield(self._new_node(type='TEXT',content=line[pos:]))
+ yield(self._new_node(type='TEXT', content=line[pos:]))
line = None
-
def input(self):
"""Return next physical line from the input.
@@ -349,7 +362,7 @@ class WikiMarkupParser(object):
# 3a. '''a b ''c d'''''
# 3b. ''a b '''c d'''''
stack = []
- for i in range(0,len(self.toklist)):
+ for i in range(0, len(self.toklist)):
if (self.toklist[i].type == 'DELIM'
and (self.toklist[i].content == "''"
or self.toklist[i].content == "'''")):
@@ -400,7 +413,7 @@ class WikiMarkupParser(object):
self.dprint(20, "lookahead(%s): %s", off, tok)
return tok
- def setkn(self,val):
+ def setkn(self, val):
"""Store token val at the current token index."""
self.toklist[self.tokind] = val
@@ -471,7 +484,7 @@ class WikiMarkupParser(object):
acc['textlist'] = []
if (isinstance(tok, WikiContentNode)
- and isinstance(tok.content,str)
+ and isinstance(tok.content, str)
and re.match("^[ \t]", tok.content)):
type = 'PRE'
rx = re.compile("^\S")
@@ -574,7 +587,7 @@ class WikiMarkupParser(object):
self.dprint(80, "LEAVE parse_indent=%s", x)
return x
- def parse_fontmod(self,delim,what):
+ def parse_fontmod(self, delim, what):
"""Parse font modification directive (bold or italics).
Arguments:
@@ -627,7 +640,7 @@ class WikiMarkupParser(object):
return None
seq = []
- (ref,sep,text) = tok.content.partition(' ')
+ (ref, sep, text) = tok.content.partition(' ')
if text:
seq.insert(0, self._new_node(type='TEXT', content=text))
@@ -735,7 +748,7 @@ class WikiMarkupParser(object):
if od in self.close_delim:
cd = self.close_delim[od]
lev = 0
- for i,tok in enumerate(self.toklist[self.tokind+1:]):
+ for i, tok in enumerate(self.toklist[self.tokind+1:]):
if tok.type == 'NIL':
break
elif tok.type == 'DELIM':
@@ -766,7 +779,7 @@ class WikiMarkupParser(object):
if tag.args:
s += ' ' + str(tag.args)
s += '>'
- node = self._new_node(type='TEXT',content=s)
+ node = self._new_node(type='TEXT', content=s)
if tag.content:
self.tree[self.tokind:self.tokind] = tag.content
self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node)
@@ -786,13 +799,13 @@ class WikiMarkupParser(object):
elif tok.type == 'NL':
tok = self._new_node(type = 'TEXT', content = '\n')
list.append(tok)
-
self.clear_mark()
ret = self._new_node(type = 'TAG',
tag = tag.tag,
args = tag.args,
isblock = tag.isblock,
- content = self._new_node(type = 'SEQ', content = list))
+ content = self._new_node(type = 'SEQ',
+ content = list))
self.dprint(80, "LEAVE parse_tag = %s", ret)
return ret
@@ -800,7 +813,7 @@ class WikiMarkupParser(object):
"""Parse a block environment (numbered, unnumbered, or definition list)."""
type = self.envtypes[tok.content[0]][0]
lev = len(tok.content)
- self.dprint(80, "ENTER parse_env(%s,%s)",type,lev)
+ self.dprint(80, "ENTER parse_env(%s,%s)", type, lev)
list = []
while True:
if (tok.type == 'DELIM'
@@ -831,7 +844,6 @@ class WikiMarkupParser(object):
break
tok = self.getkn()
-
ret = self._new_node(type='ENV',
envtype=type,
level=lev,
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
index 49c6c68..0678a75 100644
--- a/wikitrans/wikitoken.py
+++ b/wikitrans/wikitoken.py
@@ -41,21 +41,26 @@ from __future__ import print_function
import re
import json
+
class WikiNodeEncoder(json.JSONEncoder):
"""Custom JSONEncoder subclass for serializing WikiNode and its subclasses."""
+
def default(self, obj):
- if isinstance(obj,WikiNode):
- return obj.jsonEncode()
+ if isinstance(obj, WikiNode):
+ return obj.json_encode()
return json.JSONEncoder.default(self, obj)
+
def jsonencoder(func):
def _mkencoder(self):
json = func(self)
json['wikinode'] = self.__class__.__name__
json['type'] = self.type
return json
+
return _mkencoder
+
class WikiNode(object):
"""Generic parse tree node.
@@ -71,7 +76,7 @@ class WikiNode(object):
def __init__(self, parser, **kwargs):
self.parser = parser
for key in kwargs:
- if hasattr(self,key):
+ if hasattr(self, key):
self.__dict__[key] = kwargs[key]
else:
raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key))
@@ -80,7 +85,7 @@ class WikiNode(object):
return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True)
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
ret = {}
for x in dir(self):
if x == 'parser' or x.startswith('_') or type(x) == 'function':
@@ -96,6 +101,7 @@ class WikiNode(object):
"""
pass
+
class WikiContentNode(WikiNode):
"""Generic content node.
@@ -110,21 +116,22 @@ class WikiContentNode(WikiNode):
pass
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
ret = {}
if self.content:
if self.type == 'TEXT':
ret['content'] = self.content
- elif isinstance(self.content,list):
- ret['content'] = map(lambda x: x.jsonEncode(), self.content)
- elif isinstance(self.content,WikiNode):
- ret['content'] = self.content.jsonEncode()
+ elif isinstance(self.content, list):
+ ret['content'] = map(lambda x: x.json_encode(), self.content)
+ elif isinstance(self.content, WikiNode):
+ ret['content'] = self.content.json_encode()
else:
ret['content'] = self.content
else:
ret['content'] = None
return ret
+
class WikiSeqNode(WikiContentNode):
"""Generic sequence of nodes.
@@ -138,14 +145,14 @@ class WikiSeqNode(WikiContentNode):
x.format()
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
ret = {}
if not self.content:
ret['content'] = None
- elif isinstance(self.content,list):
- ret['content'] = map(lambda x: x.jsonEncode(), self.content)
- elif isinstance(self.content,WikiNode):
- ret['content'] = self.content.jsonEncode()
+ elif isinstance(self.content, list):
+ ret['content'] = map(lambda x: x.json_encode(), self.content)
+ elif isinstance(self.content, WikiNode):
+ ret['content'] = self.content.json_encode()
else:
ret['content'] = self.content
return ret
@@ -165,11 +172,12 @@ class WikiTextNode(WikiContentNode):
type = 'TEXT'
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'content': self.content
}
+
class WikiDelimNode(WikiContentNode):
"""Delimiter node.
@@ -185,6 +193,7 @@ class WikiDelimNode(WikiContentNode):
isblock=False
continuation = False
+
class WikiTagNode(WikiContentNode):
"""A Wiki tag.
@@ -205,20 +214,23 @@ class WikiTagNode(WikiContentNode):
def __init__(self, *args, **keywords):
super(WikiTagNode, self).__init__(*args, **keywords)
- if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'):
+ if (self.type == 'TAG'
+ and self.tag == 'ref'
+ and hasattr(self.parser, 'references')):
self.idx = len(self.parser.references)
self.parser.references.append(self)
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'tag': self.tag,
'isblock': self.isblock,
'args': self.args.tab if self.args else None,
- 'content': self.content.jsonEncode() if self.content else None,
+ 'content': self.content.json_encode() if self.content else None,
'idx': self.idx
}
+
class WikiRefNode(WikiContentNode):
"""Reference node.
@@ -233,12 +245,13 @@ class WikiRefNode(WikiContentNode):
type = 'REF'
ref = None
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'ref': self.ref,
- 'content': self.content.jsonEncode()
+ 'content': self.content.json_encode()
}
+
class WikiHdrNode(WikiContentNode):
"""A wiki markup header class.
@@ -252,12 +265,13 @@ class WikiHdrNode(WikiContentNode):
level = None
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'level': self.level,
- 'content': self.content.jsonEncode()
+ 'content': self.content.json_encode()
}
+
class WikiEltNode(WikiContentNode):
"""Environment element node.
@@ -271,12 +285,13 @@ class WikiEltNode(WikiContentNode):
subtype = None
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'subtype': self.subtype,
- 'content': self.content.jsonEncode()
+ 'content': self.content.json_encode()
}
+
class WikiEnvNode(WikiContentNode):
"""Wiki Environment Node
@@ -291,13 +306,14 @@ class WikiEnvNode(WikiContentNode):
level = None
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'envtype': self.envtype,
'level': self.level,
- 'content': map(lambda x: x.jsonEncode(), self.content)
+ 'content': map(lambda x: x.json_encode(), self.content)
}
+
class WikiIndNode(WikiContentNode):
"""Indented block node.
@@ -311,8 +327,8 @@ class WikiIndNode(WikiContentNode):
level = None
@jsonencoder
- def jsonEncode(self):
+ def json_encode(self):
return {
'level': self.level,
- 'content': self.content.jsonEncode()
+ 'content': self.content.json_encode()
}

Return to:

Send suggestions and report system problems to the System administrator.