summaryrefslogtreecommitdiffabout
path: root/wikitrans/wiki2text.py
Side-by-side diff
Diffstat (limited to 'wikitrans/wiki2text.py') (more/less context) (ignore whitespace changes)
-rw-r--r--wikitrans/wiki2text.py66
1 files changed, 39 insertions, 27 deletions
diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py
index 88e7610..7585bff 100644
--- a/wikitrans/wiki2text.py
+++ b/wikitrans/wiki2text.py
@@ -1,17 +1,17 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
-#
+#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
@@ -35,6 +35,7 @@ try:
except ImportError:
from urllib.parse import quote as url_quote
+
class TextSeqNode(WikiSeqNode):
def format(self):
string = ""
@@ -43,10 +44,11 @@ class TextSeqNode(WikiSeqNode):
string += ' '
string += x.format()
return string
-
+
+
class TextTextNode(WikiTextNode):
def format(self):
- if isinstance(self.content,list):
+ if isinstance(self.content, list):
string = ""
for s in self.content:
if string:
@@ -59,6 +61,7 @@ class TextTextNode(WikiTextNode):
string = self.content
return string
+
class TextPreNode(WikiSeqNode):
def format(self):
string = ""
@@ -67,6 +70,7 @@ class TextPreNode(WikiSeqNode):
string += '\n'
return string
+
class TextParaNode(WikiSeqNode):
def format(self):
string = ""
@@ -75,6 +79,7 @@ class TextParaNode(WikiSeqNode):
string = self.parser.fmtpara(string) + '\n\n'
return string
+
class TextItNode(WikiSeqNode):
def format(self):
string = ""
@@ -83,7 +88,8 @@ class TextItNode(WikiSeqNode):
if s:
string += " " + s
return "_" + string.lstrip(" ") + "_"
-
+
+
class TextBoldNode(WikiSeqNode):
def format(self):
string = ""
@@ -95,6 +101,7 @@ class TextBoldNode(WikiSeqNode):
string += x.format()
return string.upper()
+
class TextLinkNode(WikiSeqNode):
def format(self):
arg = self.content[0].format()
@@ -110,7 +117,7 @@ class TextLinkNode(WikiSeqNode):
return ""
if len(s) > 1 and s[1] == 'thumb':
return ""
- (qual,sep,tgt) = arg.partition(':')
+ (qual, sep, tgt) = arg.partition(':')
if tgt != '':
ns = self.parser.wiki_ns_name(qual)
if ns:
@@ -138,11 +145,13 @@ class TextLinkNode(WikiSeqNode):
return arg
else:
return text
-
+
+
class TextTmplNode(TextLinkNode):
def format(self):
return '[' + super(TextTmplNode, self).format() + ']'
-
+
+
class TextBarNode(WikiNode):
def format(self):
w = self.parser.width
@@ -150,6 +159,7 @@ class TextBarNode(WikiNode):
w = 5
return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
class TextHdrNode(WikiHdrNode):
def format(self):
return ("\n"
@@ -157,7 +167,8 @@ class TextHdrNode(WikiHdrNode):
+ " "
+ self.content.format().lstrip(" ")
+ "\n\n")
-
+
+
class TextRefNode(WikiRefNode):
def format(self):
text = self.content.format()
@@ -166,6 +177,7 @@ class TextRefNode(WikiRefNode):
else:
return "see " + self.ref
+
class TextEnvNode(WikiEnvNode):
def format(self):
type = self.envtype
@@ -188,16 +200,16 @@ class TextEnvNode(WikiEnvNode):
string += self.parser.indent(lev-1, x)
else:
string += self.parser.indent(lev+3, x)
-
if not string.endswith("\n"):
string += "\n"
-
return string
+
class TextIndNode(WikiIndNode):
def format(self):
return (" " * self.level) + self.content.format() + '\n'
+
class TextTagNode(WikiTagNode):
def format(self):
if self.tag == 'code':
@@ -215,8 +227,8 @@ class TextTagNode(WikiTagNode):
if self.args:
s += ' ' + str(self.args)
s += '>' + self.content.format() + '</' + self.tag + '>'
- return s
-
+ return s
+
class TextWikiMarkup(WikiMarkup):
"""A Wiki markup to plain text translator.
@@ -228,7 +240,7 @@ class TextWikiMarkup(WikiMarkup):
x.parse()
# Print it as plain text:
print(str(x))
-
+
"""
# Output width
@@ -243,7 +255,7 @@ class TextWikiMarkup(WikiMarkup):
# Array of footnote references
references = []
-
+
def __init__(self, *args, **keywords):
"""Create a TextWikiMarkup object.
@@ -254,16 +266,16 @@ class TextWikiMarkup(WikiMarkup):
Most arguments have the same meaning as in the WikiMarkup constructor.
Class-specific arguments:
-
+
width=N
- Limit output width to N columns. Default is 78.
+ Limit output width to N columns. Default is 78.
show_urls=False
By default, the link URLs are displayed in parentheses next to the
link text. If this argument is given, only the link text will be
displayed.
"""
-
- super(TextWikiMarkup,self).__init__(*args, **keywords)
+
+ super(TextWikiMarkup, self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'show_urls' in keywords:
@@ -283,7 +295,7 @@ class TextWikiMarkup(WikiMarkup):
self.token_class['ENV'] = TextEnvNode
self.token_class['IND'] = TextIndNode
self.token_class['TAG'] = TextTagNode
-
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
@@ -292,15 +304,15 @@ class TextWikiMarkup(WikiMarkup):
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
-
+
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + url_quote(tgt)
-
+
def indent(self, lev, text):
if text.find('\n') == -1:
- s = (" " * lev) + text
+ s = (" " * lev) + text
else:
s = ""
for elt in text.split('\n'):
@@ -309,7 +321,7 @@ class TextWikiMarkup(WikiMarkup):
if not text.endswith('\n'):
s = s.rstrip('\n')
return s
-
+
def fmtpara(self, input):
output = ""
linebuf = ""
@@ -331,13 +343,14 @@ class TextWikiMarkup(WikiMarkup):
linebuf += " " * wsc + s
length += wsc + wlen
return output + linebuf
-
+
def __str__(self):
str = ""
for elt in self.tree:
str += elt.format()
return str
+
class TextWiktionaryMarkup(TextWikiMarkup):
"""A class for translating Wiktionary articles into plain text.
@@ -345,4 +358,3 @@ class TextWiktionaryMarkup(TextWikiMarkup):
"""
html_base='http://%(lang)s.wiktionary.org/wiki/'
-

Return to:

Send suggestions and report system problems to the System administrator.