summaryrefslogtreecommitdiff
path: root/WikiTrans/wiki2text.py
diff options
context:
space:
mode:
Diffstat (limited to 'WikiTrans/wiki2text.py')
-rw-r--r--WikiTrans/wiki2text.py200
1 files changed, 192 insertions, 8 deletions
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index ee1748c..b5bd708 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -15,11 +15,186 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+from wikitoken import *
from wikimarkup import *
from wikins import wiki_ns_re, wiki_ns
import re
import urllib
+class TextSeqNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in self.content:
+ if len(string) > 1 and not string[-1].isspace():
+ string += ' '
+ string += x.format()
+ return string
+
+class TextTextNode(WikiTextNode):
+ def format(self):
+ if isinstance(elt.content,list):
+ string = ""
+ for s in elt.content:
+ if string:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += s
+ else:
+ string = elt.content
+ return string
+
+class TextPreNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ string += x.format()
+ string += '\n'
+ return string
+
+class TextParaNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ string += x.format()
+ string = self.parser.fmtpara(string) + '\n\n'
+ return string
+
+class TextItNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ s = x.format()
+ if s:
+ string += " " + s
+ return "_" + string.lstrip(" ") + "_"
+
+class TextBoldNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += x.format()
+ return string.upper()
+
+class TextLinkNode(WikiSeqNode):
+ def format(self):
+ arg = self.content[0].format()
+ if len(self.content) > 1:
+ s = [x for x in map(lambda x: x.format(), self.content)]
+ text = s[1]
+ else:
+ s = None
+ text = None
+
+ if s:
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return ""
+ if len(s) > 1 and s[1] == 'thumb':
+ return ""
+ (qual,sep,tgt) = arg.partition(':')
+ if tgt != '':
+ ns = self.parser.wiki_ns_name(qual)
+ if ns:
+ if ns == 'NS_IMAGE':
+ if not self.parser.references:
+ return ""
+ text = "[%s: %s]" % (qual, text if text else arg)
+ tgt = self.image_base + '/' + \
+ urllib.quote(tgt) + \
+ '/250px-' + urllib.quote(tgt)
+ elif ns == 'NS_MEDIA':
+ text = "[%s]" % (qual)
+ else:
+ tgt = self.parser.mktgt(tgt)
+ elif self.type == 'LINK' and qual in self.parser.langtab:
+ text = self.parser.langtab[qual] + ": " + tgt
+ tgt = self.parser.mktgt(tgt, qual)
+ else:
+ tgt = self.parser.mktgt(tgt)
+ else:
+ tgt = self.parser.mktgt(arg)
+ if self.parser.references:
+ return "%s (see %s) " % (text, tgt)
+ elif not text or text == '':
+ return arg
+ else:
+ return text
+
+class TextTmplNode(TextLinkNode):
+ def format(self):
+ return '[' + super(TextTmplNode, self).format() + ']'
+
+class TextBarNode(WikiNode):
+ def format(self):
+ w = self.parser.width
+ if w < 5:
+ w = 5
+ return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
+class TextHdrNode(WikiHdrNode):
+ def format(self):
+ return "\n" + ("*" * self.level) + " " + \
+ elt.content.format().lstrip(" ") + "\n\n"
+
+class TextRefNode(WikiRefNode):
+ def format(self):
+ text = self.content.format()
+ if text:
+ return "%s (see %s) " % (text, self.ref)
+ else:
+ return "see " + self.ref
+
+class TextEnvNode(WikiEnvNode):
+ def format(self):
+ type = self.envtype
+ lev = self.level
+ if lev > self.parser.width - 4:
+ lev = 1
+ string = ""
+ n = 1
+ for s in self.content:
+ if not string.endswith("\n"):
+ string += "\n"
+ x = s.content.format()
+ if type == "unnumbered":
+ string += self.parser.fmtpara(self.parser.indent(lev, "- " + x.lstrip(" ")))
+ elif type == "numbered":
+ string += self.parser.fmtpara(self.parser.indent(lev, "%d. %s" % (n, x)))
+ n += 1
+ elif type == "defn":
+ if s.subtype == 0:
+ string += self.parser.indent(lev-1, x)
+ else:
+ string += self.parser.indent(lev+3, x)
+
+ if not string.endswith("\n"):
+ string += "\n"
+
+ return string
+
+class TextIndNode(WikiIndNode):
+ def format(self):
+ return (" " * self.level) + self.content.format() + '\n'
+
+class TextTagNode(WikiTagNode):
+ def format(self):
+ if self.tag == 'code':
+ self.parser.nested += 1
+ s = elt.content.format()
+ self.parser.nested -= 1
+ else:
+ s = '<' + self.tag
+ if self.args:
+ s += ' ' + str(self.args)
+ s += '>' + elt.content.format() + '</' + self.tag + '>'
+ return s
+
+
class TextWikiMarkup (WikiMarkup):
"""
A (general-purpose Wiki->Text translator class.
@@ -36,20 +211,29 @@ class TextWikiMarkup (WikiMarkup):
num = 0
def __init__(self, *args, **keywords):
- WikiMarkup.__init__(self, *args, **keywords)
+ super(TextWikiMarkup,self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'refs' in keywords:
self.references = keywords['refs']
if 'markup' in keywords:
self.markup = keywords['markup']
-
- def xref(self, text, target):
- if text:
- return "%s (see %s) " % (text, target)
- else:
- return "see " + target
-
+ self.token_class['SEQ'] = TextSeqNode
+ self.token_class['TEXT'] = TextTextNode
+ self.token_class['PRE'] = TextPreNode
+ self.token_class['PARA'] = TextParaNode
+ self.token_class['SEQ'] = TextSeqNode
+ self.token_class['IT'] = TextItNode
+ self.token_class['BOLD'] = TextBoldNode
+ self.token_class['LINK'] = TextLinkNode
+ self.token_class['TMPL'] = TextTmplNode
+ self.token_class['BAR'] = TextBarNode
+ self.token_class['HDR'] = TextHdrNode
+ self.token_class['REF'] = TextRefNode
+ self.token_class['ENV'] = TextEnvNode
+ self.token_class['IND'] = TextIndNode
+ self.token_class['TAG'] = TextTagNode
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]

Return to:

Send suggestions and report system problems to the System administrator.