summaryrefslogtreecommitdiffabout
path: root/wiki2text.py
Unidiff
Diffstat (limited to 'wiki2text.py') (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2text.py177
1 files changed, 116 insertions, 61 deletions
diff --git a/wiki2text.py b/wiki2text.py
index f28c343..c41c4e0 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -26,7 +26,7 @@ class TextWikiMarkup (WikiMarkup):
26 """ 26 """
27 27
28 # Output width 28 # Output width
29 width = 80 29 width = 78
30 # Do not show references. 30 # Do not show references.
31 references = False 31 references = False
32 # Provide a minimum markup 32 # Provide a minimum markup
@@ -57,22 +57,26 @@ class TextWikiMarkup (WikiMarkup):
57 for elt in wiki_ns_re[self.lang][str]: 57 for elt in wiki_ns_re[self.lang][str]:
58 if str.beginswith(elt[0]) and str.endswith(elt[1]): 58 if str.beginswith(elt[0]) and str.endswith(elt[1]):
59 return elt[2] 59 return elt[2]
60 return None 60 return None
61
61 def mktgt(self, tgt, lang = None): 62 def mktgt(self, tgt, lang = None):
62 if not lang: 63 if not lang:
63 lang = self.lang 64 lang = self.lang
64 return self.html_base % { 'lang' : lang } + urllib.quote(tgt) 65 return self.html_base % { 'lang' : lang } + urllib.quote(tgt)
65 66
66 def link(self, tok, env, istmpl): 67 def fmtlink(self, elt, istmpl):
67 arg = self.fmtok(tok[1], env) 68 arg = self.format(elt[1][0])
68 text = self.fmtok(tok[2], env) 69 if len(elt[1]) > 1:
70 text = self.format(elt[1][1])
71 else:
72 text = None
69 (qual,sep,tgt) = arg.partition(':') 73 (qual,sep,tgt) = arg.partition(':')
70 if tgt != '': 74 if tgt != '':
71 ns = self.wiki_ns_name(qual) 75 ns = self.wiki_ns_name(qual)
72 if ns: 76 if ns:
73 if ns == 'NS_IMAGE': 77 if ns == 'NS_IMAGE':
74 if not self.references: 78 if not self.references:
75 return None 79 return ""
76 text = "[%s: %s]" % (qual, text if text else arg) 80 text = "[%s: %s]" % (qual, text if text else arg)
77 tgt = self.image_base + '/' + \ 81 tgt = self.image_base + '/' + \
78 urllib.quote(tgt) + \ 82 urllib.quote(tgt) + \
@@ -94,41 +98,9 @@ class TextWikiMarkup (WikiMarkup):
94 return arg 98 return arg
95 else: 99 else:
96 return text 100 return text
97
98 def str_link(self, tok, env):
99 return self.link(tok, env, False)
100
101 def str_tmpl(self, tok, env):
102 return self.link(tok, env, True)
103
104 def str_ref(self, tok, env):
105 return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env))
106
107 def str_it(self, tok, env):
108 if self.markup:
109 return "_" + self.fmtok(tok[1], env) + "_"
110 return self.fmtok(tok[1], env);
111
112 def str_bold(self, tok, env):
113 if self.markup:
114 return self.fmtok(tok[1], env).upper()
115 return self.fmtok(tok[1], env);
116
117 def str_hdr(self, tok, env):
118 level = tok[1]
119 return "\n\n" + ("*" * level) + " " + self.fmtok(tok[2], env) + "\n\n"
120
121 def str_bar(self, tok, env):
122 w = self.width
123 if w < 5:
124 w = 5
125 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
126
127 def str_env(self, tok, env):
128 self.num = 1
129 return "\n" + self.fmtok(tok[3], tok)
130 101
131 def indent (self, lev, text): 102 def indent (self, lev, text):
103 print "T \"",text,"\""
132 w = self.width 104 w = self.width
133 self.width = w - lev 105 self.width = w - lev
134 if text.find('\n') == -1: 106 if text.find('\n') == -1:
@@ -136,34 +108,117 @@ class TextWikiMarkup (WikiMarkup):
136 else: 108 else:
137 s = "" 109 s = ""
138 for elt in text.split('\n'): 110 for elt in text.split('\n'):
139 s += (" " * lev) + elt 111 s += (" " * lev) + elt + '\n'
140 if elt == '':
141 s += "\n"
142 112
143 self.width = w 113 self.width = w
144 return s 114 return s
115
116 def fmtpara(self, input):
117 output = ""
118 linebuf = ""
119 length = 0
120 for s in input.split():
121 wlen = len(s)
122 if linebuf.endswith("."):
123 wsc = 2
124 else:
125 wsc = 1
126 if length + wsc + wlen > self.width:
127 # FIXME: fill out linebuf
128 output += linebuf + '\n'
129 wsc = 0
130 length = 0
131 linebuf = ""
132 linebuf += " " * wsc + s
133 length += wsc + wlen
134 return output + linebuf
145 135
146 def str_item(self, tok, env): 136 def fmtelt(self, elt, indent=0):
147 t = env[1] 137 if elt[0] == TEXT:
148 lev = env[2] 138 if isinstance(elt[1],list):
149 if lev > self.width - 4: 139 string = ""
150 lev = 1 140 for s in elt[1]:
151 if t == self.INDENT: 141 if string:
152 return self.indent(lev, self.fmtok(tok[1], env)) 142 if string.endswith("."):
153 elif t == self.ENVNUM: 143 string += " "
154 n = self.num 144 else:
155 self.num += 1 145 string += " "
156 return "" + self.indent(lev, 146 string += s.rstrip(" ")
157 "%d. %s" % (n, self.fmtok(tok[1], env))) 147 else:
158 elif t == self.ENVUNNUM: 148 string = elt[1]
159 return "" + self.indent(lev, 149 elif elt[0] == PARA:
160 "- " + self.fmtok(tok[1], env)) 150 string = "";
151 for x in elt[1]:
152 string += self.format(x)
153 string = self.fmtpara(string) + '\n\n'
154 elif elt[0] == IT:
155 string = ""
156 for x in elt[1]:
157 s = self.format(x)
158 if s:
159 string += " " + s.rstrip(" ")
160 string = "_" + string.lstrip(" ") + "_"
161 elif elt[0] == BOLD:
162 string = ""
163 for x in elt[1]:
164 s = self.format(x)
165 if s:
166 if string.endswith("."):
167 string += " "
168 else:
169 string += " "
170 string += s.rstrip(" ")
171 string = string.upper()
172 elif elt[0] == LINK:
173 string = self.fmtlink(elt, False)
174 elif elt[0] == TMPL:
175 string = '\n' + self.fmtlink(elt, True) + '\n'
176 elif elt[0] == BAR:
177 w = self.width
178 if w < 5:
179 w = 5
180 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
181 elif elt[0] == HDR:
182 level = elt[1]
183 string = "\n" + ("*" * level) + " " + \
184 self.format(elt[2]).lstrip(" ") + "\n\n"
185 elif elt[0] == REF:
186 string = self.xref(self.format(elt[2]), elt[1])
187 elif elt[0] == ENV:
188 type = elt[1]
189 lev = elt[2]
190 if lev > self.width - 4:
191 lev = 1
192 string = "\n"
193 n = 1
194 for s in elt[3]:
195 x = self.format(s)
196# print "X",x
197 if type == ENVUNNUM:
198 string += self.indent(lev, "*" + x.lstrip(" ")) + '\n'
199 elif type == ENVNUM:
200 string += self.indent(lev, "%d. %s" % (n, x)) + '\n'
201 n += 1
202 elif elt[0] == IND:
203 string = (" " * elt[1]) + self.format(elt[2]) + '\n'
204 else:
205 string = str(elt)
206 return string
207
208 def format(self, elt, indent=0):
209 string = ""
210 if elt[0] == SEQ:
211 for x in elt[1]:
212 string += " " + self.format(x, indent)
213 else:
214 string += " " + self.fmtelt(elt, indent)
215 return string
161 216
162 def str_para(self, tok, env):
163 return "\n"
164
165 def __str__(self): 217 def __str__(self):
166 return self.fmtok(self.tree, None) 218 str = ""
219 for elt in self.tree:
220 str += self.format(elt)
221 return str
167 222
168class TextWiktionaryMarkup (TextWikiMarkup): 223class TextWiktionaryMarkup (TextWikiMarkup):
169 """ 224 """

Return to:

Send suggestions and report system problems to the System administrator.