diff options
Diffstat (limited to 'WikiTrans/wiki2text.py')
-rw-r--r-- | WikiTrans/wiki2text.py | 200 |
1 files changed, 192 insertions, 8 deletions
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index ee1748c..b5bd708 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py @@ -15,11 +15,186 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from wikitoken import * from wikimarkup import * from wikins import wiki_ns_re, wiki_ns import re import urllib +class TextSeqNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + if len(string) > 1 and not string[-1].isspace(): + string += ' ' + string += x.format() + return string + +class TextTextNode(WikiTextNode): + def format(self): + if isinstance(elt.content,list): + string = "" + for s in elt.content: + if string: + if string.endswith("."): + string += " " + else: + string += " " + string += s + else: + string = elt.content + return string + +class TextPreNode(WikiSeqNode): + def format(self): + string = "" + for x in elt.content: + string += x.format() + string += '\n' + return string + +class TextParaNode(WikiSeqNode): + def format(self): + string = "" + for x in elt.content: + string += x.format() + string = self.parser.fmtpara(string) + '\n\n' + return string + +class TextItNode(WikiSeqNode): + def format(self): + string = "" + for x in elt.content: + s = x.format() + if s: + string += " " + s + return "_" + string.lstrip(" ") + "_" + +class TextBoldNode(WikiSeqNode): + def format(self): + string = "" + for x in elt.content: + if string.endswith("."): + string += " " + else: + string += " " + string += x.format() + return string.upper() + +class TextLinkNode(WikiSeqNode): + def format(self): + arg = self.content[0].format() + if len(self.content) > 1: + s = [x for x in map(lambda x: x.format(), self.content)] + text = s[1] + else: + s = None + text = None + + if s: + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return "" + if len(s) > 1 and s[1] == 'thumb': + return "" + (qual,sep,tgt) = arg.partition(':') + if tgt != '': + ns = self.parser.wiki_ns_name(qual) + if ns: + if ns == 'NS_IMAGE': + if not self.parser.references: + return "" + text = "[%s: %s]" % (qual, text if text else arg) + tgt = self.image_base + '/' + \ + urllib.quote(tgt) + \ + '/250px-' + urllib.quote(tgt) + elif ns == 'NS_MEDIA': + text = "[%s]" % (qual) + else: + tgt = self.parser.mktgt(tgt) + elif self.type == 'LINK' and qual in self.parser.langtab: + text = self.parser.langtab[qual] + ": " + tgt + tgt = self.parser.mktgt(tgt, qual) + else: + tgt = self.parser.mktgt(tgt) + else: + tgt = self.parser.mktgt(arg) + if self.parser.references: + return "%s (see %s) " % (text, tgt) + elif not text or text == '': + return arg + else: + return text + +class TextTmplNode(TextLinkNode): + def format(self): + return '[' + super(TextTmplNode, self).format() + ']' + +class TextBarNode(WikiNode): + def format(self): + w = self.parser.width + if w < 5: + w = 5 + return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" + +class TextHdrNode(WikiHdrNode): + def format(self): + return "\n" + ("*" * self.level) + " " + \ + elt.content.format().lstrip(" ") + "\n\n" + +class TextRefNode(WikiRefNode): + def format(self): + text = self.content.format() + if text: + return "%s (see %s) " % (text, self.ref) + else: + return "see " + self.ref + +class TextEnvNode(WikiEnvNode): + def format(self): + type = self.envtype + lev = self.level + if lev > self.parser.width - 4: + lev = 1 + string = "" + n = 1 + for s in self.content: + if not string.endswith("\n"): + string += "\n" + x = s.content.format() + if type == "unnumbered": + string += self.parser.fmtpara(self.parser.indent(lev, "- " + x.lstrip(" "))) + elif type == "numbered": + string += self.parser.fmtpara(self.parser.indent(lev, "%d. %s" % (n, x))) + n += 1 + elif type == "defn": + if s.subtype == 0: + string += self.parser.indent(lev-1, x) + else: + string += self.parser.indent(lev+3, x) + + if not string.endswith("\n"): + string += "\n" + + return string + +class TextIndNode(WikiIndNode): + def format(self): + return (" " * self.level) + self.content.format() + '\n' + +class TextTagNode(WikiTagNode): + def format(self): + if self.tag == 'code': + self.parser.nested += 1 + s = elt.content.format() + self.parser.nested -= 1 + else: + s = '<' + self.tag + if self.args: + s += ' ' + str(self.args) + s += '>' + elt.content.format() + '</' + self.tag + '>' + return s + + class TextWikiMarkup (WikiMarkup): """ A (general-purpose Wiki->Text translator class. @@ -36,20 +211,29 @@ class TextWikiMarkup (WikiMarkup): num = 0 def __init__(self, *args, **keywords): - WikiMarkup.__init__(self, *args, **keywords) + super(TextWikiMarkup,self).__init__(*args, **keywords) if 'width' in keywords: self.width = keywords['width'] if 'refs' in keywords: self.references = keywords['refs'] if 'markup' in keywords: self.markup = keywords['markup'] - - def xref(self, text, target): - if text: - return "%s (see %s) " % (text, target) - else: - return "see " + target - + self.token_class['SEQ'] = TextSeqNode + self.token_class['TEXT'] = TextTextNode + self.token_class['PRE'] = TextPreNode + self.token_class['PARA'] = TextParaNode + self.token_class['SEQ'] = TextSeqNode + self.token_class['IT'] = TextItNode + self.token_class['BOLD'] = TextBoldNode + self.token_class['LINK'] = TextLinkNode + self.token_class['TMPL'] = TextTmplNode + self.token_class['BAR'] = TextBarNode + self.token_class['HDR'] = TextHdrNode + self.token_class['REF'] = TextRefNode + self.token_class['ENV'] = TextEnvNode + self.token_class['IND'] = TextIndNode + self.token_class['TAG'] = TextTagNode + def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] |