summaryrefslogtreecommitdiffabout
path: root/wiki2text.py
authorSergey Poznyakoff <gray@gnu.org>2015-07-06 05:05:31 (GMT)
committer Sergey Poznyakoff <gray@gnu.org>2015-07-06 05:05:31 (GMT)
commitf3378aebac7e89000ff097ac51c49b62eb6e9f08 (patch) (unidiff)
treecdf7a9b58b52cd6e995ddf63ef05526e60a918f1 /wiki2text.py
parent7ab9949e2c038ee6a7215d91896f2b47a5e7c06d (diff)
downloadwikitrans-f3378aebac7e89000ff097ac51c49b62eb6e9f08.tar.gz
wikitrans-f3378aebac7e89000ff097ac51c49b62eb6e9f08.tar.bz2
Redo parse tree as a sequence of dictionaries, instead of arrays.
Diffstat (limited to 'wiki2text.py') (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2text.py60
1 files changed, 30 insertions, 30 deletions
diff --git a/wiki2text.py b/wiki2text.py
index 005e551..c94ae51 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -66,9 +66,9 @@ class TextWikiMarkup (WikiMarkup):
66 return self.html_base % { 'lang' : lang } + urllib.quote(tgt) 66 return self.html_base % { 'lang' : lang } + urllib.quote(tgt)
67 67
68 def fmtlink(self, elt, istmpl): 68 def fmtlink(self, elt, istmpl):
69 arg = self.format(elt[1][0]) 69 arg = self.format(elt['content'][0])
70 if len(elt[1]) > 1: 70 if len(elt['content']) > 1:
71 s = map(self.format, elt[1]) 71 s = map(self.format, elt['content'])
72 text = s[1] 72 text = s[1]
73 else: 73 else:
74 s = None 74 s = None
@@ -143,10 +143,10 @@ class TextWikiMarkup (WikiMarkup):
143 return output + linebuf 143 return output + linebuf
144 144
145 def format(self, elt): 145 def format(self, elt):
146 if elt[0] == TEXT: 146 if elt['type'] == 'TEXT':
147 if isinstance(elt[1],list): 147 if isinstance(elt['content'],list):
148 string = "" 148 string = ""
149 for s in elt[1]: 149 for s in elt['content']:
150 if string: 150 if string:
151 if string.endswith("."): 151 if string.endswith("."):
152 string += " " 152 string += " "
@@ -154,22 +154,22 @@ class TextWikiMarkup (WikiMarkup):
154 string += " " 154 string += " "
155 string += s 155 string += s
156 else: 156 else:
157 string = elt[1] 157 string = elt['content']
158 elif elt[0] == PARA: 158 elif elt['type'] == 'PARA':
159 string = ""; 159 string = "";
160 for x in elt[1]: 160 for x in elt['content']:
161 string += self.format(x) 161 string += self.format(x)
162 string = self.fmtpara(string) + '\n\n' 162 string = self.fmtpara(string) + '\n\n'
163 elif elt[0] == IT: 163 elif elt['type'] == 'IT':
164 string = "" 164 string = ""
165 for x in elt[1]: 165 for x in elt['content']:
166 s = self.format(x) 166 s = self.format(x)
167 if s: 167 if s:
168 string += " " + s 168 string += " " + s
169 string = "_" + string.lstrip(" ") + "_" 169 string = "_" + string.lstrip(" ") + "_"
170 elif elt[0] == BOLD: 170 elif elt['type'] == 'BOLD':
171 string = "" 171 string = ""
172 for x in elt[1]: 172 for x in elt['content']:
173 s = self.format(x) 173 s = self.format(x)
174 if s: 174 if s:
175 if string.endswith("."): 175 if string.endswith("."):
@@ -178,36 +178,36 @@ class TextWikiMarkup (WikiMarkup):
178 string += " " 178 string += " "
179 string += s 179 string += s
180 string = string.upper() 180 string = string.upper()
181 elif elt[0] == LINK: 181 elif elt['type'] == 'LINK':
182 string = self.fmtlink(elt, False) 182 string = self.fmtlink(elt, False)
183 elif elt[0] == TMPL: 183 elif elt['type'] == 'TMPL':
184 s = self.fmtlink(elt, True) 184 s = self.fmtlink(elt, True)
185 if s: 185 if s:
186 string = '[' + s + ']' 186 string = '[' + s + ']'
187 else: 187 else:
188 string = s 188 string = s
189 elif elt[0] == BAR: 189 elif elt['type'] == 'BAR':
190 w = self.width 190 w = self.width
191 if w < 5: 191 if w < 5:
192 w = 5 192 w = 5
193 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" 193 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
194 elif elt[0] == HDR: 194 elif elt['type'] == 'HDR':
195 level = elt[1] 195 level = elt['level']
196 string = "\n" + ("*" * level) + " " + \ 196 string = "\n" + ("*" * level) + " " + \
197 self.format(elt[2]).lstrip(" ") + "\n\n" 197 self.format(elt['content']).lstrip(" ") + "\n\n"
198 elif elt[0] == REF: 198 elif elt['type'] == 'REF':
199 string = self.xref(self.format(elt[2]), elt[1]) 199 string = self.xref(self.format(elt['content']), elt['ref'])
200 elif elt[0] == ENV: 200 elif elt['type'] == 'ENV':
201 type = elt[1] 201 type = elt['envtype']
202 lev = elt[2] 202 lev = elt['level']
203 if lev > self.width - 4: 203 if lev > self.width - 4:
204 lev = 1 204 lev = 1
205 string = "" 205 string = ""
206 n = 1 206 n = 1
207 for s in elt[3]: 207 for s in elt['content']:
208 if not string.endswith("\n"): 208 if not string.endswith("\n"):
209 string += "\n" 209 string += "\n"
210 x = self.format(s[2]) 210 x = self.format(s['content'])
211 if type == "unnumbered": 211 if type == "unnumbered":
212 string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) 212 string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" ")))
213 elif type == "numbered": 213 elif type == "numbered":
@@ -221,11 +221,11 @@ class TextWikiMarkup (WikiMarkup):
221 221
222 if not string.endswith("\n"): 222 if not string.endswith("\n"):
223 string += "\n" 223 string += "\n"
224 elif elt[0] == IND: 224 elif elt['type'] == 'IND':
225 string = (" " * elt[1]) + self.format(elt[2]) + '\n' 225 string = (" " * elt['level']) + self.format(elt['content']) + '\n'
226 elif elt[0] == SEQ: 226 elif elt['type'] == 'SEQ':
227 string = "" 227 string = ""
228 for x in elt[1]: 228 for x in elt['content']:
229 if len(string) > 1 and not string[-1].isspace(): 229 if len(string) > 1 and not string[-1].isspace():
230 string += ' ' 230 string += ' '
231 string += self.format(x) 231 string += self.format(x)

Return to:

Send suggestions and report system problems to the System administrator.