diff options
Diffstat (limited to 'wikitrans/wiki2text.py')
-rw-r--r-- | wikitrans/wiki2text.py | 66 |
1 files changed, 39 insertions, 27 deletions
diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py index 88e7610..7585bff 100644 --- a/wikitrans/wiki2text.py +++ b/wikitrans/wiki2text.py | |||
@@ -1,17 +1,17 @@ | |||
1 | #!/usr/bin/python | 1 | #!/usr/bin/python |
2 | # -*- coding: utf-8 -*- | 2 | # -*- coding: utf-8 -*- |
3 | # Copyright (C) 2008-2018 Sergey Poznyakoff | 3 | # Copyright (C) 2008-2018 Sergey Poznyakoff |
4 | # | 4 | # |
5 | # This program is free software; you can redistribute it and/or modify | 5 | # This program is free software; you can redistribute it and/or modify |
6 | # it under the terms of the GNU General Public License as published by | 6 | # it under the terms of the GNU General Public License as published by |
7 | # the Free Software Foundation; either version 3, or (at your option) | 7 | # the Free Software Foundation; either version 3, or (at your option) |
8 | # any later version. | 8 | # any later version. |
9 | # | 9 | # |
10 | # This program is distributed in the hope that it will be useful, | 10 | # This program is distributed in the hope that it will be useful, |
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | # GNU General Public License for more details. | 13 | # GNU General Public License for more details. |
14 | # | 14 | # |
15 | # You should have received a copy of the GNU General Public License | 15 | # You should have received a copy of the GNU General Public License |
16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 | ||
@@ -35,6 +35,7 @@ try: | |||
35 | except ImportError: | 35 | except ImportError: |
36 | from urllib.parse import quote as url_quote | 36 | from urllib.parse import quote as url_quote |
37 | 37 | ||
38 | |||
38 | class TextSeqNode(WikiSeqNode): | 39 | class TextSeqNode(WikiSeqNode): |
39 | def format(self): | 40 | def format(self): |
40 | string = "" | 41 | string = "" |
@@ -43,10 +44,11 @@ class TextSeqNode(WikiSeqNode): | |||
43 | string += ' ' | 44 | string += ' ' |
44 | string += x.format() | 45 | string += x.format() |
45 | return string | 46 | return string |
46 | 47 | ||
48 | |||
47 | class TextTextNode(WikiTextNode): | 49 | class TextTextNode(WikiTextNode): |
48 | def format(self): | 50 | def format(self): |
49 | if isinstance(self.content,list): | 51 | if isinstance(self.content, list): |
50 | string = "" | 52 | string = "" |
51 | for s in self.content: | 53 | for s in self.content: |
52 | if string: | 54 | if string: |
@@ -59,6 +61,7 @@ class TextTextNode(WikiTextNode): | |||
59 | string = self.content | 61 | string = self.content |
60 | return string | 62 | return string |
61 | 63 | ||
64 | |||
62 | class TextPreNode(WikiSeqNode): | 65 | class TextPreNode(WikiSeqNode): |
63 | def format(self): | 66 | def format(self): |
64 | string = "" | 67 | string = "" |
@@ -67,6 +70,7 @@ class TextPreNode(WikiSeqNode): | |||
67 | string += '\n' | 70 | string += '\n' |
68 | return string | 71 | return string |
69 | 72 | ||
73 | |||
70 | class TextParaNode(WikiSeqNode): | 74 | class TextParaNode(WikiSeqNode): |
71 | def format(self): | 75 | def format(self): |
72 | string = "" | 76 | string = "" |
@@ -75,6 +79,7 @@ class TextParaNode(WikiSeqNode): | |||
75 | string = self.parser.fmtpara(string) + '\n\n' | 79 | string = self.parser.fmtpara(string) + '\n\n' |
76 | return string | 80 | return string |
77 | 81 | ||
82 | |||
78 | class TextItNode(WikiSeqNode): | 83 | class TextItNode(WikiSeqNode): |
79 | def format(self): | 84 | def format(self): |
80 | string = "" | 85 | string = "" |
@@ -83,7 +88,8 @@ class TextItNode(WikiSeqNode): | |||
83 | if s: | 88 | if s: |
84 | string += " " + s | 89 | string += " " + s |
85 | return "_" + string.lstrip(" ") + "_" | 90 | return "_" + string.lstrip(" ") + "_" |
86 | 91 | ||
92 | |||
87 | class TextBoldNode(WikiSeqNode): | 93 | class TextBoldNode(WikiSeqNode): |
88 | def format(self): | 94 | def format(self): |
89 | string = "" | 95 | string = "" |
@@ -95,6 +101,7 @@ class TextBoldNode(WikiSeqNode): | |||
95 | string += x.format() | 101 | string += x.format() |
96 | return string.upper() | 102 | return string.upper() |
97 | 103 | ||
104 | |||
98 | class TextLinkNode(WikiSeqNode): | 105 | class TextLinkNode(WikiSeqNode): |
99 | def format(self): | 106 | def format(self): |
100 | arg = self.content[0].format() | 107 | arg = self.content[0].format() |
@@ -110,7 +117,7 @@ class TextLinkNode(WikiSeqNode): | |||
110 | return "" | 117 | return "" |
111 | if len(s) > 1 and s[1] == 'thumb': | 118 | if len(s) > 1 and s[1] == 'thumb': |
112 | return "" | 119 | return "" |
113 | (qual,sep,tgt) = arg.partition(':') | 120 | (qual, sep, tgt) = arg.partition(':') |
114 | if tgt != '': | 121 | if tgt != '': |
115 | ns = self.parser.wiki_ns_name(qual) | 122 | ns = self.parser.wiki_ns_name(qual) |
116 | if ns: | 123 | if ns: |
@@ -138,11 +145,13 @@ class TextLinkNode(WikiSeqNode): | |||
138 | return arg | 145 | return arg |
139 | else: | 146 | else: |
140 | return text | 147 | return text |
141 | 148 | ||
149 | |||
142 | class TextTmplNode(TextLinkNode): | 150 | class TextTmplNode(TextLinkNode): |
143 | def format(self): | 151 | def format(self): |
144 | return '[' + super(TextTmplNode, self).format() + ']' | 152 | return '[' + super(TextTmplNode, self).format() + ']' |
145 | 153 | ||
154 | |||
146 | class TextBarNode(WikiNode): | 155 | class TextBarNode(WikiNode): |
147 | def format(self): | 156 | def format(self): |
148 | w = self.parser.width | 157 | w = self.parser.width |
@@ -150,6 +159,7 @@ class TextBarNode(WikiNode): | |||
150 | w = 5 | 159 | w = 5 |
151 | return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" | 160 | return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" |
152 | 161 | ||
162 | |||
153 | class TextHdrNode(WikiHdrNode): | 163 | class TextHdrNode(WikiHdrNode): |
154 | def format(self): | 164 | def format(self): |
155 | return ("\n" | 165 | return ("\n" |
@@ -157,7 +167,8 @@ class TextHdrNode(WikiHdrNode): | |||
157 | + " " | 167 | + " " |
158 | + self.content.format().lstrip(" ") | 168 | + self.content.format().lstrip(" ") |
159 | + "\n\n") | 169 | + "\n\n") |
160 | 170 | ||
171 | |||
161 | class TextRefNode(WikiRefNode): | 172 | class TextRefNode(WikiRefNode): |
162 | def format(self): | 173 | def format(self): |
163 | text = self.content.format() | 174 | text = self.content.format() |
@@ -166,6 +177,7 @@ class TextRefNode(WikiRefNode): | |||
166 | else: | 177 | else: |
167 | return "see " + self.ref | 178 | return "see " + self.ref |
168 | 179 | ||
180 | |||
169 | class TextEnvNode(WikiEnvNode): | 181 | class TextEnvNode(WikiEnvNode): |
170 | def format(self): | 182 | def format(self): |
171 | type = self.envtype | 183 | type = self.envtype |
@@ -188,16 +200,16 @@ class TextEnvNode(WikiEnvNode): | |||
188 | string += self.parser.indent(lev-1, x) | 200 | string += self.parser.indent(lev-1, x) |
189 | else: | 201 | else: |
190 | string += self.parser.indent(lev+3, x) | 202 | string += self.parser.indent(lev+3, x) |
191 | |||
192 | if not string.endswith("\n"): | 203 | if not string.endswith("\n"): |
193 | string += "\n" | 204 | string += "\n" |
194 | |||
195 | return string | 205 | return string |
196 | 206 | ||
207 | |||
197 | class TextIndNode(WikiIndNode): | 208 | class TextIndNode(WikiIndNode): |
198 | def format(self): | 209 | def format(self): |
199 | return (" " * self.level) + self.content.format() + '\n' | 210 | return (" " * self.level) + self.content.format() + '\n' |
200 | 211 | ||
212 | |||
201 | class TextTagNode(WikiTagNode): | 213 | class TextTagNode(WikiTagNode): |
202 | def format(self): | 214 | def format(self): |
203 | if self.tag == 'code': | 215 | if self.tag == 'code': |
@@ -215,8 +227,8 @@ class TextTagNode(WikiTagNode): | |||
215 | if self.args: | 227 | if self.args: |
216 | s += ' ' + str(self.args) | 228 | s += ' ' + str(self.args) |
217 | s += '>' + self.content.format() + '</' + self.tag + '>' | 229 | s += '>' + self.content.format() + '</' + self.tag + '>' |
218 | return s | 230 | return s |
219 | 231 | ||
220 | 232 | ||
221 | class TextWikiMarkup(WikiMarkup): | 233 | class TextWikiMarkup(WikiMarkup): |
222 | """A Wiki markup to plain text translator. | 234 | """A Wiki markup to plain text translator. |
@@ -228,7 +240,7 @@ class TextWikiMarkup(WikiMarkup): | |||
228 | x.parse() | 240 | x.parse() |
229 | # Print it as plain text: | 241 | # Print it as plain text: |
230 | print(str(x)) | 242 | print(str(x)) |
231 | 243 | ||
232 | """ | 244 | """ |
233 | 245 | ||
234 | # Output width | 246 | # Output width |
@@ -243,7 +255,7 @@ class TextWikiMarkup(WikiMarkup): | |||
243 | 255 | ||
244 | # Array of footnote references | 256 | # Array of footnote references |
245 | references = [] | 257 | references = [] |
246 | 258 | ||
247 | def __init__(self, *args, **keywords): | 259 | def __init__(self, *args, **keywords): |
248 | """Create a TextWikiMarkup object. | 260 | """Create a TextWikiMarkup object. |
249 | 261 | ||
@@ -254,16 +266,16 @@ class TextWikiMarkup(WikiMarkup): | |||
254 | Most arguments have the same meaning as in the WikiMarkup constructor. | 266 | Most arguments have the same meaning as in the WikiMarkup constructor. |
255 | 267 | ||
256 | Class-specific arguments: |