summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-12 23:11:40 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2015-07-12 23:11:40 +0300
commit28072898f1bd9a925d73ac187d560198d6345524 (patch)
treea46d781fb85d9dda61fc8f68e0ba6ec43d60ce55
parent75672b57a2d63f01d00795fe8d661d1efe7b6e8d (diff)
downloadwikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.gz
wikitrans-28072898f1bd9a925d73ac187d560198d6345524.tar.bz2
Improve tag handling and debugging
* wikimarkup.py: Rewrite tag recognition. Implement dump method. * wikicvt.py: New options -D (--dump), and -t dump * wiki2html.py (input_tag): Remove method (str_tag): Change handling of tags * wiki2texi.py: Likewise. * wiki2text.py: Likewise.
-rw-r--r--wiki2html.py28
-rw-r--r--wiki2texi.py37
-rw-r--r--wiki2text.py27
-rwxr-xr-xwikicvt.py26
-rw-r--r--wikimarkup.py317
5 files changed, 309 insertions, 126 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 441bc76..66939c4 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -169,31 +169,27 @@ class HtmlWikiMarkup (WikiMarkup):
169 self.envt[type]["elt"][n]) 169 self.envt[type]["elt"][n])
170 return "<%s>%s</%s>" % (self.envt[type]["hdr"], 170 return "<%s>%s</%s>" % (self.envt[type]["hdr"],
171 string, 171 string,
172 self.envt[type]["hdr"]) 172 self.envt[type]["hdr"])
173 return string 173 return string
174 174
175 supported_tags = [ 'nowiki', 'code' ]
176 def input_tag(self, tag):
177 return tag['tag'] in self.supported_tags
178
179 def str_tag(self, elt): 175 def str_tag(self, elt):
180 if elt['tag'] == 'nowiki': 176 if elt['tag'] == 'nowiki':
181 return '<pre>' + elt['content'] + '</pre>' 177 return '<pre>' + self.format(elt['content']) + '</pre>'
182 elif elt['tag'] == 'code': 178 elif elt['tag'] == 'code':
183 kwdict = { 179 self.nested += 1
184 'nested': self.nested + 1, 180 s = self.format(elt['content'])
185 'lang': self.lang, 181 self.nested -= 1
186 'text': elt['content'], 182 return '<pre><code>' + s + '</code></pre>' #FIXME
187 'html_base': self.html_base, 183 else:
188 'image_base': self.image_base, 184 s = '<' + elt['tag']
189 'media_base': self.media_base } 185 if elt['args']:
190 markup = HtmlWiktionaryMarkup(**kwdict) 186 s += ' ' + elt['args']
191 markup.debug_level = self.debug_level 187 s += '>'
192 markup.parse() 188 s += self.format(elt['content'])
193 return '<pre><code>' + str(markup) + '</code></pre>' #FIXME 189 return s + '</' + elt['tag'] + '>'
194 190
195 def str_para(self, elt): 191 def str_para(self, elt):
196 string = ""; 192 string = "";
197 for x in elt['content']: 193 for x in elt['content']:
198 string += self.format(x) 194 string += self.format(x)
199 return "<p>" + string + "</p>" 195 return "<p>" + string + "</p>"
diff --git a/wiki2texi.py b/wiki2texi.py
index 7cc67bd..0b3eb77 100644
--- a/wiki2texi.py
+++ b/wiki2texi.py
@@ -116,35 +116,34 @@ class TexiWikiMarkup (WikiMarkup):
116 for x in elt['content']: 116 for x in elt['content']:
117 string += self.format(x) 117 string += self.format(x)
118 return string 118 return string
119 else: 119 else:
120 return str(elt) 120 return str(elt)
121 121
122 supported_tags = [ 'nowiki', 'code' ]
123 def input_tag(self, tag):
124 return tag['tag'] in self.supported_tags
125
126 def str_tag(self, elt): 122 def str_tag(self, elt):
127 if elt['tag'] == 'nowiki': 123 if elt['tag'] == 'nowiki':
128 return '@example\n' + elt['content'] + '@end example\n' 124 return '@example\n' + self.format(elt['content']) + '@end example\n'
129 elif elt['tag'] == 'code': 125 elif elt['tag'] == 'code':
130 kwdict = { 126 self.nested += 1
131 'nested': self.nested + 1, 127 s = self.format(elt['content'])
132 'lang': self.lang, 128 self.nested -= 1
133 'text': elt['content'],
134 'html_base': self.html_base,
135 'image_base': self.image_base,
136 'media_base': self.media_base }
137 markup = TexiWikiMarkup(**kwdict)
138 markup.debug_level = self.debug_level
139 markup.parse()
140 s = str(markup)
141 if not s.endswith("\n"): 129 if not s.endswith("\n"):
142 s += "\n"; 130 s += "\n"
143 return '@example\n' + s + '@end example\n' 131 return '@example\n' + s + '@end example\n'
144 132 elif elt['tag'] == 'tt':
133 self.nested += 1
134 s = self.format(elt['content'])
135 self.nested -= 1
136 return "@code{%s}" % s
137 else:
138 s = '<' + elt['tag']
139 if elt['args']:
140 s += ' ' + elt['args']
141 s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
142 return s
143
145 def str_para(self, elt): 144 def str_para(self, elt):
146 string = ""; 145 string = "";
147 for x in elt['content']: 146 for x in elt['content']:
148 string += self.format(x) 147 string += self.format(x)
149 return "\n" + string + "\n" 148 return "\n" + string + "\n"
150 149
@@ -153,13 +152,13 @@ class TexiWikiMarkup (WikiMarkup):
153 for x in elt['content']: 152 for x in elt['content']:
154 string += self.format(x) 153 string += self.format(x)
155 if self.nested: 154 if self.nested:
156 return string 155 return string
157 if not string.endswith("\n"): 156 if not string.endswith("\n"):
158 string += "\n"; 157 string += "\n";
159 return '@example\n' + string + '@end example\n' 158 return '\n@example\n' + string + '@end example\n'
160 159
161 def concat(self, eltlist): 160 def concat(self, eltlist):
162 string = "" 161 string = ""
163 for x in eltlist: 162 for x in eltlist:
164 string += self.format(x) 163 string += self.format(x)
165 return string 164 return string
diff --git a/wiki2text.py b/wiki2text.py
index 27a7051..d4cab81 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -139,31 +139,26 @@ class TextWikiMarkup (WikiMarkup):
139 length = 0 139 length = 0
140 linebuf = "" 140 linebuf = ""
141 linebuf += " " * wsc + s 141 linebuf += " " * wsc + s
142 length += wsc + wlen 142 length += wsc + wlen
143 return output + linebuf 143 return output + linebuf
144 144
145 supported_tags = [ 'nowiki', 'code' ]
146 def input_tag(self, tag):
147 return tag['tag'] in self.supported_tags
148
149 def str_tag(self, elt): 145 def str_tag(self, elt):
150 if elt['tag'] == 'nowiki': 146 if elt['tag'] == 'nowiki':
151 return elt['content'] 147 return self.format(elt['content'])
152 elif elt['tag'] == 'code': 148 elif elt['tag'] == 'code':
153 kwdict = { 149 self.nested += 1
154 'nested': self.nested + 1, 150 s = self.format(elt['content'])
155 'lang': self.lang, 151 self.nested -= 1
156 'text': elt['content'], 152 return s #FIXME
157 'html_base': self.html_base, 153 else:
158 'image_base': self.image_base, 154 s = '<' + elt['tag']
159 'media_base': self.media_base } 155 if elt['args']:
160 markup = TextWiktionaryMarkup(**kwdict) 156 s += ' ' + elt['args']
161 markup.debug_level = self.debug_level 157 s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
162 markup.parse() 158 return s
163 return str(markup)
164 159
165 def format(self, elt): 160 def format(self, elt):
166 if elt['type'] == 'TEXT': 161 if elt['type'] == 'TEXT':
167 if isinstance(elt['content'],list): 162 if isinstance(elt['content'],list):
168 string = "" 163 string = ""
169 for s in elt['content']: 164 for s in elt['content']:
diff --git a/wikicvt.py b/wikicvt.py
index e61e28b..c8ca887 100755
--- a/wikicvt.py
+++ b/wikicvt.py
@@ -14,24 +14,37 @@
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18import sys 18import sys
19import getopt 19import getopt
20import StringIO
20from wiki2html import * 21from wiki2html import *
21from wiki2text import * 22from wiki2text import *
22from wiki2texi import * 23from wiki2texi import *
23 24
25class DumpWikiMarkup (WikiMarkup):
26 def __str__(self):
27 if self.tree:
28 s = StringIO.StringIO()
29 self.dump(self.tree, 0, s)
30 return s.getvalue()
31 else:
32 return ""
33
24def usage(code=0): 34def usage(code=0):
25 print """ 35 print """
26usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] 36usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val]
27 [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file 37 [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file
28""" % (sys.argv[0]) 38""" % (sys.argv[0])
29 sys.exit(code) 39 sys.exit(code)
30 40
31handlers = { 41handlers = {
42 'dump': {
43 'default': DumpWikiMarkup
44 },
32 'html': { 45 'html': {