summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-18 17:43:36 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-18 17:43:36 +0300
commit8c0de7a515aa9d6db7b3ce4110a4e29261851abb (patch)
tree228c2bb1be5a907b373158c6c94cd8d51f23c419
parent0c03a5a7b40b598b88f22f46b9e9086af6c59877 (diff)
downloadwikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.gz
wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.bz2
Minor stylistic fixes.
-rw-r--r--README.rst6
-rwxr-xr-xbin/wikitrans3
-rw-r--r--wikitrans/wiki2html.py53
-rw-r--r--wikitrans/wiki2texi.py82
-rw-r--r--wikitrans/wiki2text.py66
-rw-r--r--wikitrans/wikidump.py20
-rw-r--r--wikitrans/wikimarkup.py46
-rw-r--r--wikitrans/wikitoken.py72
8 files changed, 205 insertions, 143 deletions
diff --git a/README.rst b/README.rst
index d189c98..7c53e3c 100644
--- a/README.rst
+++ b/README.rst
@@ -121,13 +121,13 @@ supposed to provide a wiktionary-specific form of
``HtmlWikiMarkup``. Currently both classes are equivalent, except that
the default value for ``html_base`` in ``HtmlWiktionaryMarkup``
is ``http://%(lang)s.wikipedia.org/wiki/``.
The ``wikitrans`` utility
=========================
-This command line utility converts the supplied text to a selected
+This command line utility converts the supplied text to selected
output format. The usage syntax is::
wikitrans [OPTIONS] ARG
If ARG looks like a URL, the wiki text to be converted will be
downloaded from that URL.
@@ -157,17 +157,17 @@ Options are:
``-I ITYPE``, ``--input-type=ITYPE``
Set input document type. *ITYPE* is one of: ``default`` or ``wiktionary``.
``-t OTYPE``, ``--to=OTYPE``, ``--type=OTYPE``
Set output document type (``html`` (the default), ``texi``,
``text``, or ``dump``).
``-l LANG``, ``--lang=LANG``
- Set input document language
+ Set input document language.
``-o KW=VAL``, ``--option=KW=VAL``
Pass the keyword argument ``KW=VAL`` to the parser class construct.
``-d DEBUG``, ``--debug=DEBUG``
- Set debug level (0..100)
+ Set debug level (0..100).
``-D``, ``--dump``
Dump parse tree and exit; same as ``--type=dump``.
``-b URL``, ``--base-url=URL``
Set base url.
Note: when using ``--base-url`` or passing URL as an argument (2nd and 3rd
diff --git a/bin/wikitrans b/bin/wikitrans
index caaa885..87de020 100755
--- a/bin/wikitrans
+++ b/bin/wikitrans
@@ -56,13 +56,14 @@ handlers = {
},
'text': {
'default': TextWikiMarkup,
'wiktionary': TextWiktionaryMarkup
},
'texi': {
- 'default': TexiWikiMarkup
+ 'default': TexiWikiMarkup,
+ 'wiktionary': TextWikiMarkup
}
}
def setkw(option, opt, value, parser):
if not parser.values.kwdict:
parser.values.kwdict = {}
diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py
index fc6b142..0696dce 100644
--- a/wikitrans/wiki2html.py
+++ b/wikitrans/wiki2html.py
@@ -1,20 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Wiki markup to HTML translator.
@@ -37,22 +37,24 @@ except ImportError:
from urllib.parse import quote as url_quote
try:
from html import escape as html_escape
except ImportError:
from cgi import escape as html_escape
-
+
__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
+
class HtmlSeqNode(WikiSeqNode):
def format(self):
s = ''
for x in self.content:
s += x.format()
return s
-
+
+
class HtmlLinkNode(HtmlSeqNode):
def format(self):
arg = self.content[0].format()
text = None
if len(self.content) > 1:
s = [x for x in map(lambda x: x.format(), self.content)]
@@ -67,14 +69,14 @@ class HtmlLinkNode(HtmlSeqNode):
text = s[2]
elif s[0] == "term":
text = self.parser.tmpl_term(s)
elif s[0] == "proto":
text = self.parser.tmpl_proto(s)
return text
-
- (qual,sep,tgt) = arg.partition(':')
+
+ (qual, sep, tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
if ns == 'NS_IMAGE':
return ''
elif ns == 'NS_MEDIA':
@@ -89,50 +91,56 @@ class HtmlLinkNode(HtmlSeqNode):
tgt = self.parser.mktgt(tgt)
else:
tgt = self.parser.mktgt(arg)
return "<a href=\"%s\">%s</a>" % (tgt,
text if (text and text != '') else arg)
+
class HtmlRefNode(WikiRefNode):
def format(self):
target = self.ref
text = self.content.format()
return "<a href=\"%s\">%s</a>" % (
target,
text if (text and text != '') else target
)
+
class HtmlFontNode(HtmlSeqNode):
def format(self):
comm = { 'IT': 'i',
'BOLD': 'b' }
s = '<%s>' % comm[self.type]
for x in self.content:
s += x.format()
s += '</%s>' % comm[self.type]
return s
+
class HtmlTextNode(HtmlSeqNode):
def format(self):
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
s = ''.join(self.content)
else:
s = html_escape(self.content, quote=False)
return s
+
class HtmlHdrNode(WikiHdrNode):
def format(self):
level = self.level
if level > 6:
level = 6
return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
+
class HtmlBarNode(WikiNode):
def format(self):
return "<hr/>\n"
+
class HtmlEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
lev = self.level
if lev > 4:
lev = 2
@@ -143,63 +151,67 @@ class HtmlEnvNode(WikiEnvNode):
s.content.format(),
self.parser.envt[type]["elt"][n])
return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"],
string,
self.parser.envt[type]["hdr"])
return string
-
+
+
class HtmlTagNode(WikiTagNode):
def format(self):
if self.tag == 'code':
self.parser.nested += 1
s = self.content.format()
self.parser.nested -= 1
return '<pre><code>' + s + '</code></pre>' #FIXME
elif self.tag == 'ref':
n = self.idx+1
- return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n)
+ return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n, n, n, n)
elif self.tag == 'references':
s = '<div class="references">\n'
s += '<ol class="references">\n'
n = 0
for ref in self.parser.references:
- n += 1
+ n += 1
s += ('<li id="cite_note-%d">'
+ '<span class="mw-cite-backlink">'
+ '<b><a href="#cite_ref-%d">^</a></b>'
+ '</span>'
+ '<span class="reference-text">'
+ ref.content.format()
+ '</span>'
- + '</li>\n') % (n,n)
+ + '</li>\n') % (n, n)
s += '</ol>\n</div>\n'
return s
else:
s = '<' + self.tag
if self.args:
s += ' ' + str(self.args)
s += '>'
s += self.content.format()
return s + '</' + self.tag + '>'
-
+
+
class HtmlParaNode(HtmlSeqNode):
def format(self):
return "<p>" + super(HtmlParaNode, self).format() + "</p>\n"
+
class HtmlPreNode(HtmlSeqNode):
def format(self):
s = super(HtmlPreNode, self).format()
if self.parser.nested:
return s
else:
return '<pre>' + s + '</pre>'
+
class HtmlIndNode(WikiIndNode):
def format(self):
return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
-
+
class HtmlWikiMarkup(WikiMarkup):
"""A Wiki markup to HTML translator class.
Usage:
@@ -220,15 +232,15 @@ class HtmlWikiMarkup(WikiMarkup):
"""Create a HtmlWikiMarkup object.
HtmlWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE],
[html_base=URL],[image_base=URL],[media_base=URL])
The arguments have the same meaning as in the WikiMarkup constructor.
-
+
"""
-
+
super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
self.token_class['LINK'] = HtmlLinkNode
self.token_class['TMPL'] = HtmlLinkNode
self.token_class['REF'] = HtmlRefNode
self.token_class['IT'] = HtmlFontNode
self.token_class['BOLD'] = HtmlFontNode
@@ -246,20 +258,20 @@ class HtmlWikiMarkup(WikiMarkup):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
elif str in wiki_ns_re[self.lang]:
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
- return None
-
+ return None
+
envt = { "unnumbered": { "hdr": "ul",
"elt": ["li"] },
"numbered": { "hdr": "ol",
"elt": ["li"] },
"defn": { "hdr": "dl",
- "elt": ["dt","dd"] } }
+ "elt": ["dt","dd"] } }
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + url_quote(tgt)
@@ -288,19 +300,20 @@ class HtmlWikiMarkup(WikiMarkup):
if n > 0:
text += ','
n += 1
text += ' <span class="proto">' + x + '</span>'
text += ' <span class="meaning">(' + s[-2] + ')</span>'
return text
-
+
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
+
class HtmlWiktionaryMarkup(HtmlWikiMarkup):
"""A class for translating Wiktionary articles into HTML.
Reserved for future use. Currently does the same as HtmlWikiMarkup.
"""
diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py
index 55dffe2..936a133 100644
--- a/wikitrans/wiki2texi.py
+++ b/wikitrans/wiki2texi.py
@@ -1,20 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2015-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Wiki markup to Texinfo translator.
@@ -27,22 +27,23 @@ TexiWikiMarkup -- Converts Wiki material to Texinfo.
from wikitrans.wikimarkup import *
from wikitrans.wikitoken import *
from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
import urllib
+
class Acc(list):
- def prepend(self,x):
- self.insert(0,x)
-
+ def prepend(self, x):
+ self.insert(0, x)
+
def is_empty(self):
return len(self) == 0
-
+
def clear(self):
self = []
-
+
def tail(self, n = 1):
s = Acc()
i = len(self)
while i > 0 and n > 0:
elt = self[i-1]
l = len(elt)
@@ -62,39 +63,41 @@ class Acc(list):
if l == 0:
continue
elif l > n:
self += elt[0:-n]
break
n -= l
-
+
def trimnl(self):
if self.endswith('\n'):
self.trim(1)
-
+
def trimpara(self):
if self.endswith('\n\n'):
self.trim(2)
-
+
def endswith(self, x):
return self.tail(len(x)) == x
-
+
def in_new_para(self):
return self.is_empty() or self.endswith('\n\n')
-
+
def __str__(self):
return ''.join(self)
+
class TexiTextNode(WikiTextNode):
def format(self):
parser = self.parser
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
for s in self.content:
parser._print(s)
else:
parser._print(self.content)
+
class TexiTagNode(WikiTagNode):
def format(self):
parser = self.parser
if self.tag in ['code', 'tt']:
save = parser._begin_print()
parser.nested += 1
@@ -122,43 +125,47 @@ class TexiTagNode(WikiTagNode):
parser._print('<' + self.tag)
if self.args:
parser._print(' ' + self.args)
parser._print('>');
self.content.format()
parser._print('</' + self.tag + '>')
-
+
+
class TexiParaNode(WikiSeqNode):
- def format(self):
+ def format(self):
parser = self.parser
if not parser.acc.in_new_para():
parser._print('\n', nl=True)
for x in self.content:
x.format()
if not parser.acc.in_new_para():
parser._print('\n', nl=True)
-
+
+
class TexiPreNode(WikiSeqNode):
def format(self):
parser = self.parser
if not parser.nested:
parser._print('@example\n', nl=True, escape=False)
for x in self.content:
x.format()
if not parser.nested:
parser._print('@end example\n', nl=True, escape=False)
+
class TexiFontNode(WikiSeqNode):
def format(self):
parser = self.parser
comm = { 'IT': 'i',
'BOLD': 'b' }
parser._print('@%s{' % comm[self.type], escape=False)
for x in self.content:
x.format()
parser._print('}', escape=False)
+
class TexiHdrNode(WikiHdrNode):
def format(self):
parser = self.parser
level = self.level
# FIXME
if level > len(parser.sectcomm[parser.sectioning_model]) - 1 - parser.sectioning_start:
@@ -171,23 +178,26 @@ class TexiHdrNode(WikiHdrNode):
if parser.sectcomm[parser.sectioning_model][0] == '@top':
parser._print('@node ', nl=True, escape=False)
self.content.format()
parser._print('\n')
parser._print(None, nl=True)
+
class TexiBarNode(WikiNode):
def format(self):
self.parser._print("\n-----\n")
+
class TexiIndNode(WikiIndNode):
def format(self):
parser = self.parser
parser._print("@w{ }" * self.level, nl=True, escape=False)
self.content.format()
parser._print(None, nl=True)
+
class TexiEnvNode(WikiEnvNode):
def format(self):
parser = self.parser
if self.envtype == 'unnumbered':
parser._print('@itemize @bullet\n', nl=True, escape=False)
for s in self.content:
@@ -213,13 +223,14 @@ class TexiEnvNode(WikiEnvNode):
parser._print(None, nl=True)
else:
s.content.format()
parser._print(None, nl=True)
parser._print('\n')
parser._print('@end table\n', nl=True, escape=False)
-
+
+
class TexiLinkNode(WikiSeqNode):
def format(self):
parser = self.parser
save = parser._begin_print()
self.content[0].format()
arg = parser._end_print()
@@ -239,50 +250,52 @@ class TexiLinkNode(WikiSeqNode):
if s:
if s[0] == 'disambigR' or s[0] == 'wikiquote':
return
if len(s) > 1 and s[1] == 'thumb':
return
- (qual,sep,tgt) = arg.partition(':')
+ (qual, sep, tgt) = arg.partition(':')
if text:
parser._print("@ref{%s,%s}" % (qual, text), escape=False)
else:
parser._print("@ref{%s}" % qual, escape=False)
+
class TexiRefNode(WikiRefNode):
def format(self):
parser = self.parser
target = self.ref
save = parser._begin_print()
self.content.format()
text = parser._end_print(save)
if text and text != '':
parser._print("@uref{%s,%s}" % (target, text), escape=False)
else:
parser._print("@uref{%s}" % target, escape=False)
-
+
+
class TexiWikiMarkup(WikiMarkup):
"""Wiki markup to Texinfo translator class.
-
+
Usage:
x = TexiWikiMarkup(file="input.wiki")
# Parse the input:
x.parse()
# Print it as Texi:
print(str(x))
-
+
"""
-
+
nested = 0
sectcomm = {
'numbered': [
'@top',
- '@chapter',
- '@section',
- '@subsection',
+ '@chapter',
+ '@section',
+ '@subsection',
'@subsubsection'
],
'unnumbered': [
'@top',
'@unnumbered',
'@unnumberedsec',
@@ -314,13 +327,13 @@ class TexiWikiMarkup(WikiMarkup):
TexiWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE],
[html_base=URL],[image_base=URL],[media_base=URL],
[sectioning_model=MODEL],[sectioning_start=N])
For a discussion of generic arguments, see the constructor of
the WikiMarkup class.
-
+
Additional arguments:
sectioning_model=MODEL
Select the Texinfo sectioning model for the output document. Possible
values are:
@@ -339,28 +352,28 @@ class TexiWikiMarkup(WikiMarkup):
sectioning_start=N
Shift resulting heading level by N positions. For example, supposing
"sectioning_model='numbered'", "== A ==" normally produces
"@section A" on output. Now, if given "sectioning_start=1", this
directive will produce "@subsection A" instead.
"""
-
+
super(TexiWikiMarkup, self).__init__(*args, **keywords)
-
+
self.token_class['TEXT'] = TexiTextNode
self.token_class['TAG'] = TexiTagNode
self.token_class['PARA'] = TexiParaNode
self.token_class['PRE'] = TexiPreNode
self.token_class['IT'] = TexiFontNode
self.token_class['BOLD'] = TexiFontNode
self.token_class['HDR'] = TexiHdrNode
self.token_class['BAR'] = TexiBarNode
self.token_class['IND'] = TexiIndNode
self.token_class['ENV'] = TexiEnvNode
self.token_class['LINK'] = TexiLinkNode
self.token_class['REF'] = TexiRefNode
-
+
if "sectioning_model" in keywords:
val = keywords["sectioning_model"]
if val in self.sectcomm:
self.sectioning_model = val
else:
raise ValueError("Invalid value for sectioning model: %s" % val)
@@ -391,20 +404,13 @@ class TexiWikiMarkup(WikiMarkup):
return s
def _end_print(self, val = None):
s = self.acc
self.acc = val
return str(s)
-
+
def __str__(self):
self._begin_print()
for elt in self.tree:
elt.format()
self.acc.trimpara()
return self._end_print()
-
-
-
-
-
-
-
diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py
index 88e7610..7585bff 100644
--- a/wikitrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -1,20 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Wiki markup to plain text translator.
@@ -32,72 +32,79 @@ from wikitrans.wikins import wiki_ns_re, wiki_ns
import re
try:
from urllib import quote as url_quote
except ImportError:
from urllib.parse import quote as url_quote
+
class TextSeqNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
if len(string) > 1 and not string[-1].isspace():
string += ' '
string += x.format()
return string
-
+
+
class TextTextNode(WikiTextNode):
def format(self):
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
string = ""
for s in self.content:
if string:
if string.endswith("."):
string += " "
else:
string += " "
string += s
else:
string = self.content
return string
+
class TextPreNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
string += x.format()
string += '\n'
return string
+
class TextParaNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
string += x.format()
string = self.parser.fmtpara(string) + '\n\n'
return string
+
class TextItNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
s = x.format()
if s:
string += " " + s
return "_" + string.lstrip(" ") + "_"
-
+
+
class TextBoldNode(WikiSeqNode):
def format(self):
string = ""
for x in self.content:
if string.endswith("."):
string += " "
else:
string += " "
string += x.format()
return string.upper()
+
class TextLinkNode(WikiSeqNode):
def format(self):
arg = self.content[0].format()
if len(self.content) > 1:
s = [x for x in map(lambda x: x.format(), self.content)]
text = s[1]
@@ -107,13 +114,13 @@ class TextLinkNode(WikiSeqNode):
if s:
if s[0] == 'disambigR' or s[0] == 'wikiquote':
return ""
if len(s) > 1 and s[1] == 'thumb':
return ""
- (qual,sep,tgt) = arg.partition(':')
+ (qual, sep, tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
if ns == 'NS_IMAGE':
if not self.parser.show_urls:
return ""
@@ -135,40 +142,45 @@ class TextLinkNode(WikiSeqNode):
if self.parser.show_urls:
return "%s (see %s) " % (text, tgt)
elif not text or text == '':
return arg
else:
return text
-
+
+
class TextTmplNode(TextLinkNode):
def format(self):
return '[' + super(TextTmplNode, self).format() + ']'
-
+
+
class TextBarNode(WikiNode):
def format(self):
w = self.parser.width
if w < 5:
w = 5
return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
class TextHdrNode(WikiHdrNode):
def format(self):
return ("\n"
+ ("*" * self.level)
+ " "
+ self.content.format().lstrip(" ")
+ "\n\n")
-
+
+
class TextRefNode(WikiRefNode):
def format(self):
text = self.content.format()
if text:
return "%s (see %s) " % (text, self.ref)
else:
return "see " + self.ref
+
class TextEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
lev = self.level
if lev > self.parser.width - 4:
lev = 1
@@ -185,22 +197,22 @@ class TextEnvNode(WikiEnvNode):
n += 1
elif type == "defn":
if s.subtype == 0:
string += self.parser.indent(lev-1, x)
else:
string += self.parser.indent(lev+3, x)
-
if not string.endswith("\n"):
string += "\n"
-
return string
+
class TextIndNode(WikiIndNode):
def format(self):
return (" " * self.level) + self.content.format() + '\n'
+
class TextTagNode(WikiTagNode):
def format(self):
if self.tag == 'code':
self.parser.nested += 1
s = self.content.format()
self.parser.nested -= 1
@@ -212,26 +224,26 @@ class TextTagNode(WikiTagNode):
s += ('[%d]. ' % (ref.idx+1)) + ref.content.format() + '\n'
else:
s = '<' + self.tag
if self.args:
s += ' ' + str(self.args)
s += '>' + self.content.format() + '</' + self.tag + '>'
- return s
-
+ return s
+
class TextWikiMarkup(WikiMarkup):
"""A Wiki markup to plain text translator.
Usage:
x = TextWikiMarkup(file="input.wiki")
# Parse the input:
x.parse()
# Print it as plain text:
print(str(x))
-
+
"""
# Output width
width = 78
# Do not show references.
show_urls = False
@@ -240,33 +252,33 @@ class TextWikiMarkup(WikiMarkup):
# Number of current element in the environment
num = 0
# Array of footnote references
references = []
-
+
def __init__(self, *args, **keywords):
"""Create a TextWikiMarkup object.
TextWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE],
[html_base=URL],[image_base=URL],[media_base=URL],
[width=N],[show_urls=False])
Most arguments have the same meaning as in the WikiMarkup constructor.
Class-specific arguments:
-
+
width=N
- Limit output width to N columns. Default is 78.
+ Limit output width to N columns. Default is 78.
show_urls=False
By default, the link URLs are displayed in parentheses next to the
link text. If this argument is given, only the link text will be
displayed.
"""
-
- super(TextWikiMarkup,self).__init__(*args, **keywords)
+
+ super(TextWikiMarkup, self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'show_urls' in keywords:
self.show_urls = keywords['show_urls']
self.token_class['SEQ'] = TextSeqNode
self.token_class['TEXT'] = TextTextNode
@@ -280,39 +292,39 @@ class TextWikiMarkup(WikiMarkup):
self.token_class['BAR'] = TextBarNode
self.token_class['HDR'] = TextHdrNode
self.token_class['REF'] = TextRefNode
self.token_class['ENV'] = TextEnvNode
self.token_class['IND'] = TextIndNode
self.token_class['TAG'] = TextTagNode
-
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
elif str in wiki_ns_re[self.lang]:
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
-
+
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + url_quote(tgt)
-
+
def indent(self, lev, text):
if text.find('\n') == -1:
- s = (" " * lev) + text
+ s = (" " * lev) + text
else:
s = ""
for elt in text.split('\n'):
if elt:
s += (" " * lev) + elt + '\n'
if not text.endswith('\n'):
s = s.rstrip('\n')
return s
-
+
def fmtpara(self, input):
output = ""
linebuf = ""
length = 0
for s in input.split():
wlen = len(s)
@@ -328,21 +340,21 @@ class TextWikiMarkup(WikiMarkup):
wsc = 0
length = 0
linebuf = ""
linebuf += " " * wsc + s
length += wsc + wlen
return output + linebuf
-
+
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
+
class TextWiktionaryMarkup(TextWikiMarkup):
"""A class for translating Wiktionary articles into plain text.
Reserved for future use. Currently does the same as TextWikiMarkup.
"""
html_base='http://%(lang)s.wiktionary.org/wiki/'
-
diff --git a/wikitrans/wikidump.py b/wikitrans/wikidump.py
index d5f651c..bc71876 100644
--- a/wikitrans/wikidump.py
+++ b/wikitrans/wikidump.py
@@ -1,19 +1,19 @@
# Wiki "dump" format. -*- coding: utf-8 -*-
# Copyright (C) 2015-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Print Wiki parse tree as JSON.
@@ -25,12 +25,13 @@ DumpWikiMarkup
from __future__ import print_function
from wikitrans.wikitoken import *
import json
from wikitrans.wikimarkup import WikiMarkup
+
class DumpReferences(object):
idx = 0
def __len__(self):
return self.idx + 1
def append(self, obj):
self.idx += 1
@@ -40,19 +41,20 @@ class DumpWikiMarkup(WikiMarkup):
Usage:
x = DumpWikiMarkup(file="input.wiki")
# Parse the input:
x.parse()
- # Print a JSON dump of the parse tree
+ # Print a JSON dump of the parse tree
print(str(x))
-
+
"""
-
+
indent = None
references = DumpReferences()
+
def __init__(self, **kwarg):
"""Create a DumpWikiMarkup object.
Arguments:
filename=FILE
@@ -61,17 +63,17 @@ class DumpWikiMarkup(WikiMarkup):
Read Wiki material from file object FD.
text=STRING
Read Wiki material from STRING.
indent=N