diff options
Diffstat (limited to 'wiki2text.py')
-rw-r--r-- | wiki2text.py | 177 |
1 files changed, 116 insertions, 61 deletions
diff --git a/wiki2text.py b/wiki2text.py index f28c343..c41c4e0 100644 --- a/wiki2text.py +++ b/wiki2text.py | |||
@@ -23,13 +23,13 @@ import urllib | |||
23 | class TextWikiMarkup (WikiMarkup): | 23 | class TextWikiMarkup (WikiMarkup): |
24 | """ | 24 | """ |
25 | A (general-purpose Wiki->Text translator class. | 25 | A (general-purpose Wiki->Text translator class. |
26 | """ | 26 | """ |
27 | 27 | ||
28 | # Output width | 28 | # Output width |
29 | width = 80 | 29 | width = 78 |
30 | # Do not show references. | 30 | # Do not show references. |
31 | references = False | 31 | references = False |
32 | # Provide a minimum markup | 32 | # Provide a minimum markup |
33 | markup = True | 33 | markup = True |
34 | 34 | ||
35 | # Number of current element in the environment | 35 | # Number of current element in the environment |
@@ -54,28 +54,32 @@ class TextWikiMarkup (WikiMarkup): | |||
54 | if str in wiki_ns[self.lang]: | 54 | if str in wiki_ns[self.lang]: |
55 | return wiki_ns[self.lang][str] | 55 | return wiki_ns[self.lang][str] |
56 | elif str in wiki_ns_re[self.lang]: | 56 | elif str in wiki_ns_re[self.lang]: |
57 | for elt in wiki_ns_re[self.lang][str]: | 57 | for elt in wiki_ns_re[self.lang][str]: |
58 | if str.beginswith(elt[0]) and str.endswith(elt[1]): | 58 | if str.beginswith(elt[0]) and str.endswith(elt[1]): |
59 | return elt[2] | 59 | return elt[2] |
60 | return None | 60 | return None |
61 | |||
61 | def mktgt(self, tgt, lang = None): | 62 | def mktgt(self, tgt, lang = None): |
62 | if not lang: | 63 | if not lang: |
63 | lang = self.lang | 64 | lang = self.lang |
64 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) | 65 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) |
65 | 66 | ||
66 | def link(self, tok, env, istmpl): | 67 | def fmtlink(self, elt, istmpl): |
67 | arg = self.fmtok(tok[1], env) | 68 | arg = self.format(elt[1][0]) |
68 | text = self.fmtok(tok[2], env) | 69 | if len(elt[1]) > 1: |
70 | text = self.format(elt[1][1]) | ||
71 | else: | ||
72 | text = None | ||
69 | (qual,sep,tgt) = arg.partition(':') | 73 | (qual,sep,tgt) = arg.partition(':') |
70 | if tgt != '': | 74 | if tgt != '': |
71 | ns = self.wiki_ns_name(qual) | 75 | ns = self.wiki_ns_name(qual) |
72 | if ns: | 76 | if ns: |
73 | if ns == 'NS_IMAGE': | 77 | if ns == 'NS_IMAGE': |
74 | if not self.references: | 78 | if not self.references: |
75 | return None | 79 | return "" |
76 | text = "[%s: %s]" % (qual, text if text else arg) | 80 | text = "[%s: %s]" % (qual, text if text else arg) |
77 | tgt = self.image_base + '/' + \ | 81 | tgt = self.image_base + '/' + \ |
78 | urllib.quote(tgt) + \ | 82 | urllib.quote(tgt) + \ |
79 | '/250px-' + urllib.quote(tgt) | 83 | '/250px-' + urllib.quote(tgt) |
80 | elif ns == 'NS_MEDIA': | 84 | elif ns == 'NS_MEDIA': |
81 | text = "[%s]" % (qual) | 85 | text = "[%s]" % (qual) |
@@ -91,82 +95,133 @@ class TextWikiMarkup (WikiMarkup): | |||
91 | if self.references: | 95 | if self.references: |
92 | return "%s (see %s) " % (text, tgt) | 96 | return "%s (see %s) " % (text, tgt) |
93 | elif not text or text == '': | 97 | elif not text or text == '': |
94 | return arg | 98 | return arg |
95 | else: | 99 | else: |
96 | return text | 100 | return text |
97 | |||
98 | def str_link(self, tok, env): | ||
99 | return self.link(tok, env, False) | ||
100 | |||
101 | def str_tmpl(self, tok, env): | ||
102 | return self.link(tok, env, True) | ||
103 | |||
104 | def str_ref(self, tok, env): | ||
105 | return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) | ||
106 | |||
107 | def str_it(self, tok, env): | ||
108 | if self.markup: | ||
109 | return "_" + self.fmtok(tok[1], env) + "_" | ||
110 | return self.fmtok(tok[1], env); | ||
111 | |||
112 | def str_bold(self, tok, env): | ||
113 | if self.markup: | ||
114 | return self.fmtok(tok[1], env).upper() | ||
115 | return self.fmtok(tok[1], env); | ||
116 | |||
117 | def str_hdr(self, tok, env): | ||
118 | level = tok[1] | ||
119 | return "\n\n" + ("*" * level) + " " + self.fmtok(tok[2], env) + "\n\n" | ||
120 | |||
121 | def str_bar(self, tok, env): | ||
122 | w = self.width | ||
123 | if w < 5: | ||
124 | w = 5 | ||
125 | return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" | ||
126 | |||
127 | def str_env(self, tok, env): | ||
128 | self.num = 1 | ||
129 | return "\n" + self.fmtok(tok[3], tok) | ||
130 | 101 | ||
131 | def indent (self, lev, text): | 102 | def indent (self, lev, text): |
103 | print "T \"",text,"\"" | ||
132 | w = self.width | 104 | w = self.width |
133 | self.width = w - lev | 105 | self.width = w - lev |
134 | if text.find('\n') == -1: | 106 | if text.find('\n') == -1: |
135 | s = (" " * lev) + text | 107 | s = (" " * lev) + text |
136 | else: | 108 | else: |
137 | s = "" | 109 | s = "" |
138 | for elt in text.split('\n'): | 110 | for elt in text.split('\n'): |
139 | s += (" " * lev) + elt | 111 | s += (" " * lev) + elt + '\n' |
140 | if elt == '': | ||
141 | s += "\n" | ||
142 | 112 | ||
143 | self.width = w | 113 | self.width = w |
144 | return s | 114 | return s |
115 | |||
116 | def fmtpara(self, input): | ||
117 | output = "" | ||
118 | linebuf = "" | ||
119 | length = 0 | ||
120 | for s in input.split(): | ||
121 | wlen = len(s) | ||
122 | if linebuf.endswith("."): | ||
123 | wsc = 2 | ||
124 | else: | ||
125 | wsc = 1 | ||
126 | if length + wsc + wlen > self.width: | ||
127 | # FIXME: fill out linebuf | ||
128 | output += linebuf + '\n' | ||
129 | wsc = 0 | ||
130 | length = 0 | ||
131 | linebuf = "" | ||
132 | linebuf += " " * wsc + s | ||
133 | length += wsc + wlen | ||
134 | return output + linebuf | ||
145 | 135 | ||
146 | def str_item(self, tok, env): | 136 | def fmtelt(self, elt, indent=0): |
147 | t = env[1] | 137 | if elt[0] == TEXT: |
148 | lev = env[2] | 138 | if isinstance(elt[1],list): |
149 | if lev > self.width - 4: | 139 | string = "" |
150 | lev = 1 | 140 | for s in elt[1]: |
151 | if t == self.INDENT: | 141 | if string: |
152 | return self.indent(lev, self.fmtok(tok[1], env)) | 142 | if string.endswith("."): |
153 | elif t == self.ENVNUM: | 143 | string += " " |
154 | n = self.num | 144 | else: |
155 | self.num += 1 | 145 | string += " " |
156 | return "" + self.indent(lev, | 146 | string += s.rstrip(" ") |
157 | "%d. %s" % (n, self.fmtok(tok[1], env))) | 147 | else: |
158 | elif t == self.ENVUNNUM: | 148 | string = elt[1] |
159 | return "" + self.indent(lev, | 149 | elif elt[0] == PARA: |
160 | "- " + self.fmtok(tok[1], env)) | 150 | string = ""; |
151 | for x in elt[1]: | ||
152 | string += self.format(x) | ||
153 | string = self.fmtpara(string) + '\n\n' | ||
154 | elif elt[0] == IT: | ||
155 | string = "" | ||
156 | for x in elt[1]: | ||
157 | s = self.format(x) | ||
158 | if s: | ||
159 | string += " " + s.rstrip(" ") | ||
160 | string = "_" + string.lstrip(" ") + "_" | ||
161 | elif elt[0] == BOLD: | ||
162 | string = "" | ||
163 | for x in elt[1]: | ||
164 | s = self.format(x) | ||
165 | if s: | ||
166 | if string.endswith("."): | ||
167 | string += " " | ||
168 | else: | ||
169 | string += " " | ||
170 | string += s.rstrip(" ") | ||
171 | string = string.upper() | ||
172 | elif elt[0] == LINK: | ||
173 | string = self.fmtlink(elt, False) | ||
174 | elif elt[0] == TMPL: | ||
175 | string = '\n' + self.fmtlink(elt, True) + '\n' | ||
176 | elif elt[0] == BAR: | ||
177 | w = self.width | ||
178 | if w < 5: | ||
179 | w = 5 | ||
180 | string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" | ||
181 | elif elt[0] == HDR: | ||
182 | level = elt[1] | ||
183 | string = "\n" + ("*" * level) + " " + \ | ||
184 | self.format(elt[2]).lstrip(" ") + "\n\n" | ||
185 | elif elt[0] == REF: | ||
186 | string = self.xref(self.format(elt[2]), elt[1]) | ||
187 | elif elt[0] == ENV: | ||
188 | type = elt[1] | ||
189 | lev = elt[2] | ||
190 | if lev > self.width - 4: | ||
191 | lev = 1 | ||
192 | string = "\n" | ||
193 | n = 1 | ||
194 | for s in elt[3]: | ||
195 | x = self.format(s) | ||
196 | # print "X",x | ||
197 | if type == ENVUNNUM: | ||
198 | string += self.indent(lev, "*" + x.lstrip(" ")) + '\n' | ||
199 | elif type == ENVNUM: | ||
200 | string += self.indent(lev, "%d. %s" % (n, x)) + '\n' | ||
201 | n += 1 | ||
202 | elif elt[0] == IND: | ||
203 | string = (" " * elt[1]) + self.format(elt[2]) + '\n' | ||
204 | else: | ||
205 | string = str(elt) | ||
206 | return string | ||
207 | |||
208 | def format(self, elt, indent=0): | ||
209 | string = "" | ||
210 | if elt[0] == SEQ: | ||
211 |