diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
commit | 28072898f1bd9a925d73ac187d560198d6345524 (patch) | |
tree | a46d781fb85d9dda61fc8f68e0ba6ec43d60ce55 | |
parent | 75672b57a2d63f01d00795fe8d661d1efe7b6e8d (diff) | |
download | wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.gz wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.bz2 |
Improve tag handling and debugging
* wikimarkup.py: Rewrite tag recognition.
Implement dump method.
* wikicvt.py: New options -D (--dump), and -t dump
* wiki2html.py (input_tag): Remove method
(str_tag): Change handling of tags
* wiki2texi.py: Likewise.
* wiki2text.py: Likewise.
-rw-r--r-- | wiki2html.py | 28 | ||||
-rw-r--r-- | wiki2texi.py | 37 | ||||
-rw-r--r-- | wiki2text.py | 27 | ||||
-rwxr-xr-x | wikicvt.py | 26 | ||||
-rw-r--r-- | wikimarkup.py | 317 |
5 files changed, 309 insertions, 126 deletions
diff --git a/wiki2html.py b/wiki2html.py index 441bc76..66939c4 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -169,31 +169,27 @@ class HtmlWikiMarkup (WikiMarkup): | |||
169 | self.envt[type]["elt"][n]) | 169 | self.envt[type]["elt"][n]) |
170 | return "<%s>%s</%s>" % (self.envt[type]["hdr"], | 170 | return "<%s>%s</%s>" % (self.envt[type]["hdr"], |
171 | string, | 171 | string, |
172 | self.envt[type]["hdr"]) | 172 | self.envt[type]["hdr"]) |
173 | return string | 173 | return string |
174 | 174 | ||
175 | supported_tags = [ 'nowiki', 'code' ] | ||
176 | def input_tag(self, tag): | ||
177 | return tag['tag'] in self.supported_tags | ||
178 | |||
179 | def str_tag(self, elt): | 175 | def str_tag(self, elt): |
180 | if elt['tag'] == 'nowiki': | 176 | if elt['tag'] == 'nowiki': |
181 | return '<pre>' + elt['content'] + '</pre>' | 177 | return '<pre>' + self.format(elt['content']) + '</pre>' |
182 | elif elt['tag'] == 'code': | 178 | elif elt['tag'] == 'code': |
183 | kwdict = { | 179 | self.nested += 1 |
184 | 'nested': self.nested + 1, | 180 | s = self.format(elt['content']) |
185 | 'lang': self.lang, | 181 | self.nested -= 1 |
186 | 'text': elt['content'], | 182 | return '<pre><code>' + s + '</code></pre>' #FIXME |
187 | 'html_base': self.html_base, | 183 | else: |
188 | 'image_base': self.image_base, | 184 | s = '<' + elt['tag'] |
189 | 'media_base': self.media_base } | 185 | if elt['args']: |
190 | markup = HtmlWiktionaryMarkup(**kwdict) | 186 | s += ' ' + elt['args'] |
191 | markup.debug_level = self.debug_level | 187 | s += '>' |
192 | markup.parse() | 188 | s += self.format(elt['content']) |
193 | return '<pre><code>' + str(markup) + '</code></pre>' #FIXME | 189 | return s + '</' + elt['tag'] + '>' |
194 | 190 | ||
195 | def str_para(self, elt): | 191 | def str_para(self, elt): |
196 | string = ""; | 192 | string = ""; |
197 | for x in elt['content']: | 193 | for x in elt['content']: |
198 | string += self.format(x) | 194 | string += self.format(x) |
199 | return "<p>" + string + "</p>" | 195 | return "<p>" + string + "</p>" |
diff --git a/wiki2texi.py b/wiki2texi.py index 7cc67bd..0b3eb77 100644 --- a/wiki2texi.py +++ b/wiki2texi.py | |||
@@ -116,35 +116,34 @@ class TexiWikiMarkup (WikiMarkup): | |||
116 | for x in elt['content']: | 116 | for x in elt['content']: |
117 | string += self.format(x) | 117 | string += self.format(x) |
118 | return string | 118 | return string |
119 | else: | 119 | else: |
120 | return str(elt) | 120 | return str(elt) |
121 | 121 | ||
122 | supported_tags = [ 'nowiki', 'code' ] | ||
123 | def input_tag(self, tag): | ||
124 | return tag['tag'] in self.supported_tags | ||
125 | |||
126 | def str_tag(self, elt): | 122 | def str_tag(self, elt): |
127 | if elt['tag'] == 'nowiki': | 123 | if elt['tag'] == 'nowiki': |
128 | return '@example\n' + elt['content'] + '@end example\n' | 124 | return '@example\n' + self.format(elt['content']) + '@end example\n' |
129 | elif elt['tag'] == 'code': | 125 | elif elt['tag'] == 'code': |
130 | kwdict = { | 126 | self.nested += 1 |
131 | 'nested': self.nested + 1, | 127 | s = self.format(elt['content']) |
132 | 'lang': self.lang, | 128 | self.nested -= 1 |
133 | 'text': elt['content'], | ||
134 | 'html_base': self.html_base, | ||
135 | 'image_base': self.image_base, | ||
136 | 'media_base': self.media_base } | ||
137 | markup = TexiWikiMarkup(**kwdict) | ||
138 | markup.debug_level = self.debug_level | ||
139 | markup.parse() | ||
140 | s = str(markup) | ||
141 | if not s.endswith("\n"): | 129 | if not s.endswith("\n"): |
142 | s += "\n"; | 130 | s += "\n" |
143 | return '@example\n' + s + '@end example\n' | 131 | return '@example\n' + s + '@end example\n' |
144 | 132 | elif elt['tag'] == 'tt': | |
133 | self.nested += 1 | ||
134 | s = self.format(elt['content']) | ||
135 | self.nested -= 1 | ||
136 | return "@code{%s}" % s | ||
137 | else: | ||
138 | s = '<' + elt['tag'] | ||
139 | if elt['args']: | ||
140 | s += ' ' + elt['args'] | ||
141 | s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' | ||
142 | return s | ||
143 | |||
145 | def str_para(self, elt): | 144 | def str_para(self, elt): |
146 | string = ""; | 145 | string = ""; |
147 | for x in elt['content']: | 146 | for x in elt['content']: |
148 | string += self.format(x) | 147 | string += self.format(x) |
149 | return "\n" + string + "\n" | 148 | return "\n" + string + "\n" |
150 | 149 | ||
@@ -153,13 +152,13 @@ class TexiWikiMarkup (WikiMarkup): | |||
153 | for x in elt['content']: | 152 | for x in elt['content']: |
154 | string += self.format(x) | 153 | string += self.format(x) |
155 | if self.nested: | 154 | if self.nested: |
156 | return string | 155 | return string |
157 | if not string.endswith("\n"): | 156 | if not string.endswith("\n"): |
158 | string += "\n"; | 157 | string += "\n"; |
159 | return '@example\n' + string + '@end example\n' | 158 | return '\n@example\n' + string + '@end example\n' |
160 | 159 | ||
161 | def concat(self, eltlist): | 160 | def concat(self, eltlist): |
162 | string = "" | 161 | string = "" |
163 | for x in eltlist: | 162 | for x in eltlist: |
164 | string += self.format(x) | 163 | string += self.format(x) |
165 | return string | 164 | return string |
diff --git a/wiki2text.py b/wiki2text.py index 27a7051..d4cab81 100644 --- a/wiki2text.py +++ b/wiki2text.py | |||
@@ -139,31 +139,26 @@ class TextWikiMarkup (WikiMarkup): | |||
139 | length = 0 | 139 | length = 0 |
140 | linebuf = "" | 140 | linebuf = "" |
141 | linebuf += " " * wsc + s | 141 | linebuf += " " * wsc + s |
142 | length += wsc + wlen | 142 | length += wsc + wlen |
143 | return output + linebuf | 143 | return output + linebuf |
144 | 144 | ||
145 | supported_tags = [ 'nowiki', 'code' ] | ||
146 | def input_tag(self, tag): | ||
147 | return tag['tag'] in self.supported_tags | ||
148 | |||
149 | def str_tag(self, elt): | 145 | def str_tag(self, elt): |
150 | if elt['tag'] == 'nowiki': | 146 | if elt['tag'] == 'nowiki': |
151 | return elt['content'] | 147 | return self.format(elt['content']) |
152 | elif elt['tag'] == 'code': | 148 | elif elt['tag'] == 'code': |
153 | kwdict = { | 149 | self.nested += 1 |
154 | 'nested': self.nested + 1, | 150 | s = self.format(elt['content']) |
155 | 'lang': self.lang, | 151 | self.nested -= 1 |
156 | 'text': elt['content'], | 152 | return s #FIXME |
157 | 'html_base': self.html_base, | 153 | else: |
158 | 'image_base': self.image_base, | 154 | s = '<' + elt['tag'] |
159 | 'media_base': self.media_base } | 155 | if elt['args']: |
160 | markup = TextWiktionaryMarkup(**kwdict) | 156 | s += ' ' + elt['args'] |
161 | markup.debug_level = self.debug_level | 157 | s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' |
162 | markup.parse() | 158 | return s |
163 | return str(markup) | ||
164 | 159 | ||
165 | def format(self, elt): | 160 | def format(self, elt): |
166 | if elt['type'] == 'TEXT': | 161 | if elt['type'] == 'TEXT': |
167 | if isinstance(elt['content'],list): | 162 | if isinstance(elt['content'],list): |
168 | string = "" | 163 | string = "" |
169 | for s in elt['content']: | 164 | for s in elt['content']: |
@@ -14,24 +14,37 @@ | |||
14 | # | 14 | # |
15 | # You should have received a copy of the GNU General Public License | 15 | # You should have received a copy of the GNU General Public License |
16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 16 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
17 | 17 | ||
18 | import sys | 18 | import sys |
19 | import getopt | 19 | import getopt |
20 | import StringIO | ||
20 | from wiki2html import * | 21 | from wiki2html import * |
21 | from wiki2text import * | 22 | from wiki2text import * |
22 | from wiki2texi import * | 23 | from wiki2texi import * |
23 | 24 | ||
25 | class DumpWikiMarkup (WikiMarkup): | ||
26 | def __str__(self): | ||
27 | if self.tree: | ||
28 | s = StringIO.StringIO() | ||
29 | self.dump(self.tree, 0, s) | ||
30 | return s.getvalue() | ||
31 | else: | ||
32 | return "" | ||
33 | |||
24 | def usage(code=0): | 34 | def usage(code=0): |
25 | print """ | 35 | print """ |
26 | usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] | 36 | usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] |
27 | [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file | 37 | [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file |
28 | """ % (sys.argv[0]) | 38 | """ % (sys.argv[0]) |
29 | sys.exit(code) | 39 | sys.exit(code) |
30 | 40 | ||
31 | handlers = { | 41 | handlers = { |
42 | 'dump': { | ||
43 | 'default': DumpWikiMarkup | ||
44 | }, | ||
32 | 'html': { | 45 | 'html': { |