summaryrefslogtreecommitdiff
path: root/wiki2text.py
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2009-03-02 20:58:09 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2009-03-03 00:31:18 +0200
commitae8b8bc81eab08b2ebe9f8c0957c085b5d45fc2b (patch)
tree551e90f993a83674faa367b776538c44704e78a6 /wiki2text.py
parent86ee544f442aa3c4a0516a620890ec64de0770cc (diff)
downloadwikitrans-ae8b8bc81eab08b2ebe9f8c0957c085b5d45fc2b.tar.gz
wikitrans-ae8b8bc81eab08b2ebe9f8c0957c085b5d45fc2b.tar.bz2
Rewrite from scratch. Text conversion almost(TM) works
Diffstat (limited to 'wiki2text.py')
-rw-r--r--wiki2text.py177
1 files changed, 116 insertions, 61 deletions
diff --git a/wiki2text.py b/wiki2text.py
index f28c343..c41c4e0 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -28,3 +28,3 @@ class TextWikiMarkup (WikiMarkup):
28 # Output width 28 # Output width
29 width = 80 29 width = 78
30 # Do not show references. 30 # Do not show references.
@@ -59,3 +59,4 @@ class TextWikiMarkup (WikiMarkup):
59 return elt[2] 59 return elt[2]
60 return None 60 return None
61
61 def mktgt(self, tgt, lang = None): 62 def mktgt(self, tgt, lang = None):
@@ -65,5 +66,8 @@ class TextWikiMarkup (WikiMarkup):
65 66
66 def link(self, tok, env, istmpl): 67 def fmtlink(self, elt, istmpl):
67 arg = self.fmtok(tok[1], env) 68 arg = self.format(elt[1][0])
68 text = self.fmtok(tok[2], env) 69 if len(elt[1]) > 1:
70 text = self.format(elt[1][1])
71 else:
72 text = None
69 (qual,sep,tgt) = arg.partition(':') 73 (qual,sep,tgt) = arg.partition(':')
@@ -74,3 +78,3 @@ class TextWikiMarkup (WikiMarkup):
74 if not self.references: 78 if not self.references:
75 return None 79 return ""
76 text = "[%s: %s]" % (qual, text if text else arg) 80 text = "[%s: %s]" % (qual, text if text else arg)
@@ -96,37 +100,5 @@ class TextWikiMarkup (WikiMarkup):
96 return text 100 return text
97
98 def str_link(self, tok, env):
99 return self.link(tok, env, False)
100
101 def str_tmpl(self, tok, env):
102 return self.link(tok, env, True)
103
104 def str_ref(self, tok, env):
105 return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env))
106
107 def str_it(self, tok, env):
108 if self.markup:
109 return "_" + self.fmtok(tok[1], env) + "_"
110 return self.fmtok(tok[1], env);
111
112 def str_bold(self, tok, env):
113 if self.markup:
114 return self.fmtok(tok[1], env).upper()
115 return self.fmtok(tok[1], env);
116
117 def str_hdr(self, tok, env):
118 level = tok[1]
119 return "\n\n" + ("*" * level) + " " + self.fmtok(tok[2], env) + "\n\n"
120
121 def str_bar(self, tok, env):
122 w = self.width
123 if w < 5:
124 w = 5
125 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
126
127 def str_env(self, tok, env):
128 self.num = 1
129 return "\n" + self.fmtok(tok[3], tok)
130 101
131 def indent (self, lev, text): 102 def indent (self, lev, text):
103 print "T \"",text,"\""
132 w = self.width 104 w = self.width
@@ -138,5 +110,3 @@ class TextWikiMarkup (WikiMarkup):
138 for elt in text.split('\n'): 110 for elt in text.split('\n'):
139 s += (" " * lev) + elt 111 s += (" " * lev) + elt + '\n'
140 if elt == '':
141 s += "\n"
142 112
@@ -144,24 +114,109 @@ class TextWikiMarkup (WikiMarkup):
144 return s 114 return s
115
116 def fmtpara(self, input):
117 output = ""
118 linebuf = ""
119 length = 0
120 for s in input.split():
121 wlen = len(s)
122 if linebuf.endswith("."):
123 wsc = 2
124 else:
125 wsc = 1
126 if length + wsc + wlen > self.width:
127 # FIXME: fill out linebuf
128 output += linebuf + '\n'
129 wsc = 0
130 length = 0
131 linebuf = ""
132 linebuf += " " * wsc + s
133 length += wsc + wlen
134 return output + linebuf
145 135
146 def str_item(self, tok, env): 136 def fmtelt(self, elt, indent=0):
147 t = env[1] 137 if elt[0] == TEXT:
148 lev = env[2] 138 if isinstance(elt[1],list):
149 if lev > self.width - 4: 139 string = ""
150 lev = 1 140 for s in elt[1]:
151 if t == self.INDENT: 141 if string:
152 return self.indent(lev, self.fmtok(tok[1], env)) 142 if string.endswith("."):
153 elif t == self.ENVNUM: 143 string += " "
154 n = self.num 144 else:
155 self.num += 1 145 string += " "
156 return "" + self.indent(lev, 146 string += s.rstrip(" ")
157 "%d. %s" % (n, self.fmtok(tok[1], env))) 147 else:
158 elif t == self.ENVUNNUM: 148 string = elt[1]
159 return "" + self.indent(lev, 149 elif elt[0] == PARA:
160 "- " + self.fmtok(tok[1], env)) 150 string = "";
151 for x in elt[1]:
152 string += self.format(x)
153 string = self.fmtpara(string) + '\n\n'
154 elif elt[0] == IT:
155 string = ""
156 for x in elt[1]:
157 s = self.format(x)
158 if s:
159 string += " " + s.rstrip(" ")
160 string = "_" + string.lstrip(" ") + "_"
161 elif elt[0] == BOLD:
162 string = ""
163 for x in elt[1]:
164 s = self.format(x)
165 if s:
166 if string.endswith("."):
167 string += " "
168 else:
169 string += " "
170 string += s.rstrip(" ")
171 string = string.upper()
172 elif elt[0] == LINK:
173 string = self.fmtlink(elt, False)
174 elif elt[0] == TMPL:
175 string = '\n' + self.fmtlink(elt, True) + '\n'
176 elif elt[0] == BAR:
177 w = self.width
178 if w < 5:
179 w = 5
180 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
181 elif elt[0] == HDR:
182 level = elt[1]
183 string = "\n" + ("*" * level) + " " + \
184 self.format(elt[2]).lstrip(" ") + "\n\n"
185 elif elt[0] == REF:
186 string = self.xref(self.format(elt[2]), elt[1])
187 elif elt[0] == ENV:
188 type = elt[1]
189 lev = elt[2]
190 if lev > self.width - 4:
191 lev = 1
192 string = "\n"
193 n = 1
194 for s in elt[3]:
195 x = self.format(s)
196# print "X",x
197 if type == ENVUNNUM:
198 string += self.indent(lev, "*" + x.lstrip(" ")) + '\n'
199 elif type == ENVNUM:
200 string += self.indent(lev, "%d. %s" % (n, x)) + '\n'
201 n += 1
202 elif elt[0] == IND:
203 string = (" " * elt[1]) + self.format(elt[2]) + '\n'
204 else:
205 string = str(elt)
206 return string
207
208 def format(self, elt, indent=0):
209 string = ""
210 if elt[0] == SEQ:
211 for x in elt[1]:
212 string += " " + self.format(x, indent)
213 else:
214 string += " " + self.fmtelt(elt, indent)
215 return string
161 216
162 def str_para(self, tok, env):
163 return "\n"
164
165 def __str__(self): 217 def __str__(self):
166 return self.fmtok(self.tree, None) 218 str = ""
219 for elt in self.tree:
220 str += self.format(elt)
221 return str
167 222

Return to:

Send suggestions and report system problems to the System administrator.