diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-12 23:11:40 +0300 |
commit | 28072898f1bd9a925d73ac187d560198d6345524 (patch) | |
tree | a46d781fb85d9dda61fc8f68e0ba6ec43d60ce55 | |
parent | 75672b57a2d63f01d00795fe8d661d1efe7b6e8d (diff) | |
download | wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.gz wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.bz2 |
Improve tag handling and debugging
* wikimarkup.py: Rewrite tag recognition.
Implement dump method.
* wikicvt.py: New options -D (--dump), and -t dump
* wiki2html.py (input_tag): Remove method
(str_tag): Change handling of tags
* wiki2texi.py: Likewise.
* wiki2text.py: Likewise.
-rw-r--r-- | wiki2html.py | 28 | ||||
-rw-r--r-- | wiki2texi.py | 37 | ||||
-rw-r--r-- | wiki2text.py | 27 | ||||
-rwxr-xr-x | wikicvt.py | 26 | ||||
-rw-r--r-- | wikimarkup.py | 317 |
5 files changed, 309 insertions, 126 deletions
diff --git a/wiki2html.py b/wiki2html.py index 441bc76..66939c4 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -172,25 +172,21 @@ class HtmlWikiMarkup (WikiMarkup): | |||
172 | self.envt[type]["hdr"]) | 172 | self.envt[type]["hdr"]) |
173 | return string | 173 | return string |
174 | 174 | ||
175 | supported_tags = [ 'nowiki', 'code' ] | ||
176 | def input_tag(self, tag): | ||
177 | return tag['tag'] in self.supported_tags | ||
178 | |||
179 | def str_tag(self, elt): | 175 | def str_tag(self, elt): |
180 | if elt['tag'] == 'nowiki': | 176 | if elt['tag'] == 'nowiki': |
181 | return '<pre>' + elt['content'] + '</pre>' | 177 | return '<pre>' + self.format(elt['content']) + '</pre>' |
182 | elif elt['tag'] == 'code': | 178 | elif elt['tag'] == 'code': |
183 | kwdict = { | 179 | self.nested += 1 |
184 | 'nested': self.nested + 1, | 180 | s = self.format(elt['content']) |
185 | 'lang': self.lang, | 181 | self.nested -= 1 |
186 | 'text': elt['content'], | 182 | return '<pre><code>' + s + '</code></pre>' #FIXME |
187 | 'html_base': self.html_base, | 183 | else: |
188 | 'image_base': self.image_base, | 184 | s = '<' + elt['tag'] |
189 | 'media_base': self.media_base } | 185 | if elt['args']: |
190 | markup = HtmlWiktionaryMarkup(**kwdict) | 186 | s += ' ' + elt['args'] |
191 | markup.debug_level = self.debug_level | 187 | s += '>' |
192 | markup.parse() | 188 | s += self.format(elt['content']) |
193 | return '<pre><code>' + str(markup) + '</code></pre>' #FIXME | 189 | return s + '</' + elt['tag'] + '>' |
194 | 190 | ||
195 | def str_para(self, elt): | 191 | def str_para(self, elt): |
196 | string = ""; | 192 | string = ""; |
diff --git a/wiki2texi.py b/wiki2texi.py index 7cc67bd..0b3eb77 100644 --- a/wiki2texi.py +++ b/wiki2texi.py | |||
@@ -119,29 +119,28 @@ class TexiWikiMarkup (WikiMarkup): | |||
119 | else: | 119 | else: |
120 | return str(elt) | 120 | return str(elt) |
121 | 121 | ||
122 | supported_tags = [ 'nowiki', 'code' ] | ||
123 | def input_tag(self, tag): | ||
124 | return tag['tag'] in self.supported_tags | ||
125 | |||
126 | def str_tag(self, elt): | 122 | def str_tag(self, elt): |
127 | if elt['tag'] == 'nowiki': | 123 | if elt['tag'] == 'nowiki': |
128 | return '@example\n' + elt['content'] + '@end example\n' | 124 | return '@example\n' + self.format(elt['content']) + '@end example\n' |
129 | elif elt['tag'] == 'code': | 125 | elif elt['tag'] == 'code': |
130 | kwdict = { | 126 | self.nested += 1 |
131 | 'nested': self.nested + 1, | 127 | s = self.format(elt['content']) |
132 | 'lang': self.lang, | 128 | self.nested -= 1 |
133 | 'text': elt['content'], | ||
134 | 'html_base': self.html_base, | ||
135 | 'image_base': self.image_base, | ||
136 | 'media_base': self.media_base } | ||
137 | markup = TexiWikiMarkup(**kwdict) | ||
138 | markup.debug_level = self.debug_level | ||
139 | markup.parse() | ||
140 | s = str(markup) | ||
141 | if not s.endswith("\n"): | 129 | if not s.endswith("\n"): |
142 | s += "\n"; | 130 | s += "\n" |
143 | return '@example\n' + s + '@end example\n' | 131 | return '@example\n' + s + '@end example\n' |
144 | 132 | elif elt['tag'] == 'tt': | |
133 | self.nested += 1 | ||
134 | s = self.format(elt['content']) | ||
135 | self.nested -= 1 | ||
136 | return "@code{%s}" % s | ||
137 | else: | ||
138 | s = '<' + elt['tag'] | ||
139 | if elt['args']: | ||
140 | s += ' ' + elt['args'] | ||
141 | s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' | ||
142 | return s | ||
143 | |||
145 | def str_para(self, elt): | 144 | def str_para(self, elt): |
146 | string = ""; | 145 | string = ""; |
147 | for x in elt['content']: | 146 | for x in elt['content']: |
@@ -156,7 +155,7 @@ class TexiWikiMarkup (WikiMarkup): | |||
156 | return string | 155 | return string |
157 | if not string.endswith("\n"): | 156 | if not string.endswith("\n"): |
158 | string += "\n"; | 157 | string += "\n"; |
159 | return '@example\n' + string + '@end example\n' | 158 | return '\n@example\n' + string + '@end example\n' |
160 | 159 | ||
161 | def concat(self, eltlist): | 160 | def concat(self, eltlist): |
162 | string = "" | 161 | string = "" |
diff --git a/wiki2text.py b/wiki2text.py index 27a7051..d4cab81 100644 --- a/wiki2text.py +++ b/wiki2text.py | |||
@@ -142,25 +142,20 @@ class TextWikiMarkup (WikiMarkup): | |||
142 | length += wsc + wlen | 142 | length += wsc + wlen |
143 | return output + linebuf | 143 | return output + linebuf |
144 | 144 | ||
145 | supported_tags = [ 'nowiki', 'code' ] | ||
146 | def input_tag(self, tag): | ||
147 | return tag['tag'] in self.supported_tags | ||
148 | |||
149 | def str_tag(self, elt): | 145 | def str_tag(self, elt): |
150 | if elt['tag'] == 'nowiki': | 146 | if elt['tag'] == 'nowiki': |
151 | return elt['content'] | 147 | return self.format(elt['content']) |
152 | elif elt['tag'] == 'code': | 148 | elif elt['tag'] == 'code': |
153 | kwdict = { | 149 | self.nested += 1 |
154 | 'nested': self.nested + 1, | 150 | s = self.format(elt['content']) |
155 | 'lang': self.lang, | 151 | self.nested -= 1 |
156 | 'text': elt['content'], | 152 | return s #FIXME |
157 | 'html_base': self.html_base, | 153 | else: |
158 | 'image_base': self.image_base, | 154 | s = '<' + elt['tag'] |
159 | 'media_base': self.media_base } | 155 | if elt['args']: |
160 | markup = TextWiktionaryMarkup(**kwdict) | 156 | s += ' ' + elt['args'] |
161 | markup.debug_level = self.debug_level | 157 | s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' |
162 | markup.parse() | 158 | return s |
163 | return str(markup) | ||
164 | 159 | ||
165 | def format(self, elt): | 160 | def format(self, elt): |
166 | if elt['type'] == 'TEXT': | 161 | if elt['type'] == 'TEXT': |
@@ -17,10 +17,20 @@ | |||
17 | 17 | ||
18 | import sys | 18 | import sys |
19 | import getopt | 19 | import getopt |
20 | import StringIO | ||
20 | from wiki2html import * | 21 | from wiki2html import * |
21 | from wiki2text import * | 22 | from wiki2text import * |
22 | from wiki2texi import * | 23 | from wiki2texi import * |
23 | 24 | ||
25 | class DumpWikiMarkup (WikiMarkup): | ||
26 | def __str__(self): | ||
27 | if self.tree: | ||
28 | s = StringIO.StringIO() | ||
29 | self.dump(self.tree, 0, s) | ||
30 | return s.getvalue() | ||
31 | else: | ||
32 | return "" | ||
33 | |||
24 | def usage(code=0): | 34 | def usage(code=0): |
25 | print """ | 35 | print """ |
26 | usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] | 36 | usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] |
@@ -29,6 +39,9 @@ usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=va | |||
29 | sys.exit(code) | 39 | sys.exit(code) |
30 | 40 | ||
31 | handlers = { | 41 | handlers = { |
42 | 'dump': { | ||
43 | 'default': DumpWikiMarkup | ||
44 | }, | ||
32 | 'html': { | 45 | 'html': { |
33 | 'default': HtmlWikiMarkup, | 46 | 'default': HtmlWikiMarkup, |
34 | 'wiktionary': HtmlWiktionaryMarkup | 47 | 'wiktionary': HtmlWiktionaryMarkup |
@@ -51,9 +64,10 @@ def main(): | |||
51 | debug = 0 | 64 | debug = 0 |
52 | 65 | ||
53 | try: | 66 | try: |
54 | opts, args = getopt.getopt(sys.argv[1:], "d:I:hl:o:t:v", | 67 | opts, args = getopt.getopt(sys.argv[1:], "Dd:I:hl:o:t:v", |
55 | ["debug=", "help", "lang=", "option=", | 68 | ["dump", |
56 | "to", "type", "input-text", "input-type", | 69 | "debug=", "help", "lang=", "option=", |
70 | "to=", "type=", "input-text", "input-type=", | ||
57 | "verbose" ]) | 71 | "verbose" ]) |
58 | except getopt.GetoptError: | 72 | except getopt.GetoptError: |
59 | usage(1) | 73 | usage(1) |
@@ -77,6 +91,8 @@ def main(): | |||
77 | input_text = True | 91 | input_text = True |
78 | elif o in ("-d", "--debug"): | 92 | elif o in ("-d", "--debug"): |
79 | debug = eval(a) | 93 | debug = eval(a) |
94 | elif o in ("-D", "--dump"): | ||
95 | otype = 'dump' | ||
80 | 96 | ||
81 | if len(args) == 1: | 97 | if len(args) == 1: |
82 | if args[0] == '-': | 98 | if args[0] == '-': |
@@ -88,8 +104,8 @@ def main(): | |||
88 | 104 | ||
89 | kwdict['lang']=lang | 105 | kwdict['lang']=lang |
90 | 106 | ||
91 | if handlers.has_key(otype): | 107 | if otype in handlers: |
92 | if handlers[otype].has_key(itype): | 108 | if itype in handlers[otype]: |
93 | markup = handlers[otype][itype](**kwdict) | 109 | markup = handlers[otype][itype](**kwdict) |
94 | markup.debug_level = debug | 110 | markup.debug_level = debug |
95 | markup.parse() | 111 | markup.parse() |
diff --git a/wikimarkup.py b/wikimarkup.py index fde1ec1..9a79d1e 100644 --- a/wikimarkup.py +++ b/wikimarkup.py | |||
@@ -22,9 +22,9 @@ from types import * | |||
22 | __all__ = [ "BaseWikiMarkup", "WikiMarkup", | 22 | __all__ = [ "BaseWikiMarkup", "WikiMarkup", |
23 |