diff options
Diffstat (limited to 'wiki2text.py')
-rw-r--r-- | wiki2text.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/wiki2text.py b/wiki2text.py new file mode 100644 index 0000000..e943f32 --- /dev/null +++ b/wiki2text.py @@ -0,0 +1,163 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2008 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from wikimarkup import * +from types import TupleType +import urllib + +class TextWikiMarkup (WikiMarkup): + """ + A (general-purpose Wiki->Text translator class. + """ + + width = 80 + num = 0 + references = False + + def __init__(self, *args, **keywords): + WikiMarkup.__init__(self, *args, **keywords) + if 'width' in keywords: + self.width = keywords['width'] + elif 'refs' in keywords: + self.references = keywords['refs'] + + def target(self, t): + (qual,sep,tgt) = t.partition(':') + r = None + if tgt != '': + if qual == "Image": + t = self.image_base + '/' + urllib.quote(tgt) + elif qual == "Media": + t = self.media_base + '/' + tgt + elif qual in self.langtab: + t = self.html_base % { 'lang' : qual } + '/' + urllib.quote(tgt) + r = self.langtab[qual] + else: + t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) + else: + t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) + return t, r + + def xref(self, text, target): + if text: + return "%s (see %s) " % (text, target) + else: + return "see " + target + + def str_link(self, tok, env): + arg = self.fmtok(tok[1], env) + text = self.fmtok(tok[2], env) + if self.references: + (target, r) = self.target(arg) + return self.xref(text if text else r, target) + else: + (qual,sep,tgt) = arg.partition(':') + if sep != '': + return "" + elif text: + return text + return arg + + def str_tmpl(self, tok, env): + arg = self.fmtok(tok[1], env) + (target, r) = self.target(arg) + text = self.fmtok(tok[2], env) + if not text and r: + text = r + if self.references: + return self.xref(text, target) + return text + + def str_ref(self, tok, env): + return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) + + def str_it(self, tok, env): + return "_" + self.fmtok(tok[1], env) + "_" + + def str_bold(self, tok, env): + return self.fmtok(tok[1], env).upper() + + def str_hdr(self, tok, env): + level = tok[1] + return "\n\n" + ("*" * level) + " " + self.fmtok(tok[2], env) + "\n\n" + + def str_bar(self, tok, env): + w = self.width + if w < 5: + w = 5 + return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" + + def str_env(self, tok, env): + self.num = 1 + return "\n" + self.fmtok(tok[3], tok) + + def indent (self, lev, text): + w = self.width + self.width = w - lev + if text.find('\n') == -1: + s = (" " * lev) + text + else: + s = "" + for elt in text.split('\n'): + s += (" " * lev) + elt + if elt == '': + s += "\n" + + self.width = w + return s + + def str_item(self, tok, env): + t = env[1] + lev = env[2] + if lev > self.width - 4: + lev = 1 + if t == self.INDENT: + return self.indent(lev, self.fmtok(tok[1], env)) + elif t == self.ENVNUM: + n = self.num + self.num += 1 + return "" + self.indent(lev, + "%d. %s" % (n, self.fmtok(tok[1], env))) + elif t == self.ENVUNNUM: + return "" + self.indent(lev, + "- " + self.fmtok(tok[1], env)) + + def __str__(self): + return self.fmtok(self.tree, None) + +class TextWiktionaryMarkup (TextWikiMarkup): + """ + See documentation for HtmlWiktionaryMarkup + """ + + seq_pos = 0 + + def str_seq(self, tok, env): + s = "" + self.seq_pos=0 + for t in tok[1:]: + s += self.fmtok(t, env) + self.seq_pos += 1 + return s + + def str_tmpl(self, tok, env): + arg = self.fmtok(tok[1], env) + if self.seq_pos > 0: + return arg + else: + return "\n" + arg + ":\n" + |