summaryrefslogtreecommitdiff
path: root/wiki2text.py
diff options
context:
space:
mode:
Diffstat (limited to 'wiki2text.py')
-rw-r--r--wiki2text.py171
1 files changed, 113 insertions, 58 deletions
diff --git a/wiki2text.py b/wiki2text.py
index f28c343..c41c4e0 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -23,13 +23,13 @@ import urllib
class TextWikiMarkup (WikiMarkup):
"""
A (general-purpose Wiki->Text translator class.
"""
# Output width
- width = 80
+ width = 78
# Do not show references.
references = False
# Provide a minimum markup
markup = True
# Number of current element in the environment
@@ -55,27 +55,31 @@ class TextWikiMarkup (WikiMarkup):
return wiki_ns[self.lang][str]
elif str in wiki_ns_re[self.lang]:
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
+
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + urllib.quote(tgt)
- def link(self, tok, env, istmpl):
- arg = self.fmtok(tok[1], env)
- text = self.fmtok(tok[2], env)
+ def fmtlink(self, elt, istmpl):
+ arg = self.format(elt[1][0])
+ if len(elt[1]) > 1:
+ text = self.format(elt[1][1])
+ else:
+ text = None
(qual,sep,tgt) = arg.partition(':')
if tgt != '':
ns = self.wiki_ns_name(qual)
if ns:
if ns == 'NS_IMAGE':
if not self.references:
- return None
+ return ""
text = "[%s: %s]" % (qual, text if text else arg)
tgt = self.image_base + '/' + \
urllib.quote(tgt) + \
'/250px-' + urllib.quote(tgt)
elif ns == 'NS_MEDIA':
text = "[%s]" % (qual)
@@ -92,81 +96,132 @@ class TextWikiMarkup (WikiMarkup):
return "%s (see %s) " % (text, tgt)
elif not text or text == '':
return arg
else:
return text
- def str_link(self, tok, env):
- return self.link(tok, env, False)
-
- def str_tmpl(self, tok, env):
- return self.link(tok, env, True)
-
- def str_ref(self, tok, env):
- return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env))
-
- def str_it(self, tok, env):
- if self.markup:
- return "_" + self.fmtok(tok[1], env) + "_"
- return self.fmtok(tok[1], env);
-
- def str_bold(self, tok, env):
- if self.markup:
- return self.fmtok(tok[1], env).upper()
- return self.fmtok(tok[1], env);
-
- def str_hdr(self, tok, env):
- level = tok[1]
- return "\n\n" + ("*" * level) + " " + self.fmtok(tok[2], env) + "\n\n"
-
- def str_bar(self, tok, env):
- w = self.width
- if w < 5:
- w = 5
- return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
-
- def str_env(self, tok, env):
- self.num = 1
- return "\n" + self.fmtok(tok[3], tok)
-
def indent (self, lev, text):
+ print "T \"",text,"\""
w = self.width
self.width = w - lev
if text.find('\n') == -1:
s = (" " * lev) + text
else:
s = ""
for elt in text.split('\n'):
- s += (" " * lev) + elt
- if elt == '':
- s += "\n"
+ s += (" " * lev) + elt + '\n'
self.width = w
return s
- def str_item(self, tok, env):
- t = env[1]
- lev = env[2]
+ def fmtpara(self, input):
+ output = ""
+ linebuf = ""
+ length = 0
+ for s in input.split():
+ wlen = len(s)
+ if linebuf.endswith("."):
+ wsc = 2
+ else:
+ wsc = 1
+ if length + wsc + wlen > self.width:
+ # FIXME: fill out linebuf
+ output += linebuf + '\n'
+ wsc = 0
+ length = 0
+ linebuf = ""
+ linebuf += " " * wsc + s
+ length += wsc + wlen
+ return output + linebuf
+
+ def fmtelt(self, elt, indent=0):
+ if elt[0] == TEXT:
+ if isinstance(elt[1],list):
+ string = ""
+ for s in elt[1]:
+ if string:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += s.rstrip(" ")
+ else:
+ string = elt[1]
+ elif elt[0] == PARA:
+ string = "";
+ for x in elt[1]:
+ string += self.format(x)
+ string = self.fmtpara(string) + '\n\n'
+ elif elt[0] == IT:
+ string = ""
+ for x in elt[1]:
+ s = self.format(x)
+ if s:
+ string += " " + s.rstrip(" ")
+ string = "_" + string.lstrip(" ") + "_"
+ elif elt[0] == BOLD:
+ string = ""
+ for x in elt[1]:
+ s = self.format(x)
+ if s:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += s.rstrip(" ")
+ string = string.upper()
+ elif elt[0] == LINK:
+ string = self.fmtlink(elt, False)
+ elif elt[0] == TMPL:
+ string = '\n' + self.fmtlink(elt, True) + '\n'
+ elif elt[0] == BAR:
+ w = self.width
+ if w < 5:
+ w = 5
+ string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+ elif elt[0] == HDR:
+ level = elt[1]
+ string = "\n" + ("*" * level) + " " + \
+ self.format(elt[2]).lstrip(" ") + "\n\n"
+ elif elt[0] == REF:
+ string = self.xref(self.format(elt[2]), elt[1])
+ elif elt[0] == ENV:
+ type = elt[1]
+ lev = elt[2]
if lev > self.width - 4:
lev = 1
- if t == self.INDENT:
- return self.indent(lev, self.fmtok(tok[1], env))
- elif t == self.ENVNUM:
- n = self.num
- self.num += 1
- return "" + self.indent(lev,
- "%d. %s" % (n, self.fmtok(tok[1], env)))
- elif t == self.ENVUNNUM:
- return "" + self.indent(lev,
- "- " + self.fmtok(tok[1], env))
-
- def str_para(self, tok, env):
- return "\n"
+ string = "\n"
+ n = 1
+ for s in elt[3]:
+ x = self.format(s)
+# print "X",x
+ if type == ENVUNNUM:
+ string += self.indent(lev, "*" + x.lstrip(" ")) + '\n'
+ elif type == ENVNUM:
+ string += self.indent(lev, "%d. %s" % (n, x)) + '\n'
+ n += 1
+ elif elt[0] == IND:
+ string = (" " * elt[1]) + self.format(elt[2]) + '\n'
+ else:
+ string = str(elt)
+ return string
+
+ def format(self, elt, indent=0):
+ string = ""
+ if elt[0] == SEQ:
+ for x in elt[1]:
+ string += " " + self.format(x, indent)
+ else:
+ string += " " + self.fmtelt(elt, indent)
+ return string
def __str__(self):
- return self.fmtok(self.tree, None)
+ str = ""
+ for elt in self.tree:
+ str += self.format(elt)
+ return str
class TextWiktionaryMarkup (TextWikiMarkup):
"""
See documentation for HtmlWiktionaryMarkup
"""

Return to:

Send suggestions and report system problems to the System administrator.