summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-14 13:09:36 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2015-07-14 13:09:36 (GMT)
commit4097896542f2279700794104c7c0728beed38cd0 (patch) (unidiff)
tree664012755efc30126a0ccef42245ce3068f06adc
parent28072898f1bd9a925d73ac187d560198d6345524 (diff)
downloadwikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.gz
wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.bz2
Fix processing of environments (numbered/unnumbered lists, definition lists) and indented strings.
* wikimarkup.py (tokread): Always add 'continuation' key to DELIM entries. Delete whitespace following environment delimiters. (peektkn): Take an optional offset argument. (parse0): Handle indentations. * wiki2html.py (str_ind): Use <dl> to produce indentations. * wiki2texi.py (str_ind): End text with a newline. * testdata/colon.html: Update. * testdata/deflist.html: Update. * testdata/numlist.html: Update. * testdata/unlist.html: Update.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--testdata/colon.html4
-rw-r--r--testdata/deflist.html3
-rw-r--r--testdata/numlist.html2
-rw-r--r--testdata/unlist.html2
-rw-r--r--wiki2html.py2
-rw-r--r--wiki2texi.py2
-rw-r--r--wikimarkup.py33
7 files changed, 29 insertions, 19 deletions
diff --git a/testdata/colon.html b/testdata/colon.html
index b9e35ed..2fb6ed3 100644
--- a/testdata/colon.html
+++ b/testdata/colon.html
@@ -1,4 +1,4 @@
1<dl><dd> A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph. 1<dl><dd>A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph.
2Should only be used on talk pages. 2Should only be used on talk pages.
3For articles, you probably want the blockquote tag.</p><dl><dd> We use 1 colon to indent once.<dl><dd> We use 2 colons to indent twice.<dl><dd> 3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl> 3For articles, you probably want the blockquote tag.</p><dl><dd>We use 1 colon to indent once.</dd></dl><dl><dd><dl><dd>We use 2 colons to indent twice.</dd></dl></dd></dl><dl><dd><dl><dd><dl><dd>3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl>
4 4
diff --git a/testdata/deflist.html b/testdata/deflist.html
index 8986533..774f21f 100644
--- a/testdata/deflist.html
+++ b/testdata/deflist.html
@@ -1,2 +1 @@
1<dl><dt>item 1</dt><dd> definition 1</dd><dt>item 2</dt><dd> definition 2-1</dd><dd> definition 2-2</dd></dl> <dl><dt>item 1</dt><dd>definition 1</dd><dt>item 2</dt><dd>definition 2-1</dd><dd>definition 2-2</dd></dl>
2
diff --git a/testdata/numlist.html b/testdata/numlist.html
index ce9dd2d..0ce8a9a 100644
--- a/testdata/numlist.html
+++ b/testdata/numlist.html
@@ -1,2 +1,2 @@
1<ol><li> <i>Numbered lists</i> are:<ol><li> Very organized</li><li> Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li> New numbering starts with 1.</li></ol> 1<ol><li><i>Numbered lists</i> are:<ol><li>Very organized</li><li>Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li>New numbering starts with 1.</li></ol>
2 2
diff --git a/testdata/unlist.html b/testdata/unlist.html
index 87be30f..ac4d32b 100644
--- a/testdata/unlist.html
+++ b/testdata/unlist.html
@@ -1,2 +1,2 @@
1<ul><li> <i>Unordered lists</i> are easy to do:<ul><li> Start every line with a star.<ul><li> More stars indicate a deeper level.</li></ul></li></ul> Previous item continues.<ul><li> A newline</li></ul></li><li> in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul> 1<ul><li><i>Unordered lists</i> are easy to do:<ul><li>Start every line with a star.<ul><li>More stars indicate a deeper level.</li></ul>Previous item continues.</li><li>A newline</li></ul></li><li>in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul>
2 2
diff --git a/wiki2html.py b/wiki2html.py
index 66939c4..f3ea0e3 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -200,13 +200,13 @@ class HtmlWikiMarkup (WikiMarkup):
200 string += self.format(x) 200 string += self.format(x)
201 if self.nested: 201 if self.nested:
202 return string 202 return string
203 return '<pre>' + string + '</pre>' 203 return '<pre>' + string + '</pre>'
204 204
205 def str_ind(self, elt): 205 def str_ind(self, elt):
206 return ("&nbsp;" * 2 * elt['level']) + self.format(elt['content']) 206 return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level']
207 207
208 def format(self, elt): 208 def format(self, elt):
209 if elt['type'] == 'TEXT': 209 if elt['type'] == 'TEXT':
210 if isinstance(elt['content'],list): 210 if isinstance(elt['content'],list):
211 string = "" 211 string = ""
212 for s in elt['content']: 212 for s in elt['content']:
diff --git a/wiki2texi.py b/wiki2texi.py
index 0b3eb77..e9009ec 100644
--- a/wiki2texi.py
+++ b/wiki2texi.py
@@ -183,13 +183,13 @@ class TexiWikiMarkup (WikiMarkup):
183 return s + "\n" 183 return s + "\n"
184 184
185 def str_bar(self): 185 def str_bar(self):
186 return "\n-----\n" # FIXME 186 return "\n-----\n" # FIXME
187 187
188 def str_ind(self, elt): 188 def str_ind(self, elt):
189 return ("@w{ }" * elt['level']) + self.format(elt['content']) 189 return ("@w{ }" * elt['level']) + self.format(elt['content']) + '\n'
190 190
191 def str_env(self, elt): 191 def str_env(self, elt):
192 if elt['envtype'] == 'unnumbered': 192 if elt['envtype'] == 'unnumbered':
193 string = '\n@itemize @bullet\n' 193 string = '\n@itemize @bullet\n'
194 for s in elt['content']: 194 for s in elt['content']:
195 string += '@item ' + self.format(s['content']) + '\n\n' 195 string += '@item ' + self.format(s['content']) + '\n\n'
diff --git a/wikimarkup.py b/wikimarkup.py
index 9a79d1e..0c6d2f2 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -181,20 +181,29 @@ class BaseWikiMarkup(object):
181 if m: 181 if m:
182 if (pos < m.start(0)): 182 if (pos < m.start(0)):
183 yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) 183 yield({'type': 'TEXT', 'content': line[pos:m.start(0)]})
184 pos = m.end(0) 184 pos = m.end(0)
185 185
186 if m and line[m.start(0)] != '<': 186 if m and line[m.start(0)] != '<':
187 if m.group(0)[0] in envtypes and pos < len(line) and line[pos] == ":": 187 content = m.group(0)
188 yield({ 'type': 'DELIM', 188 if content[0] in envtypes:
189 'content': m.group(0), 189 t = { 'type': 'DELIM',
190 'continuation': True }) 190 'content': content,
191 pos += 1 191 'continuation': pos < len(line) and line[pos] == ":" }
192 if t['continuation']:
193 t['content'] += t['content'][0]
194 pos += 1
195
196 yield(t)
197
198 while pos < len(line) and line[pos] in [' ', '\t']:
199 pos += 1
192 else: 200 else:
193 yield({ 'type': 'DELIM', 201 yield({ 'type': 'DELIM',
194 'content': m.group(0) }) 202 'content': content,
203 'continuation': False})
195 else: 204 else:
196 if m: 205 if m:
197 pos -= 1 206 pos -= 1
198 t = None 207 t = None
199 m = otag.match(line, pos) 208 m = otag.match(line, pos)
200 if m and m.group('tag') in self.tags: 209 if m and m.group('tag') in self.tags:
@@ -302,14 +311,14 @@ class BaseWikiMarkup(object):
302 # Push the token on stack 311 # Push the token on stack
303 stack.append(i) 312 stack.append(i)
304 # Redefine all non-matched tokens as TEXT 313 # Redefine all non-matched tokens as TEXT
305 for i in stack: 314 for i in stack:
306 self.toklist[i]['type'] = 'TEXT' 315 self.toklist[i]['type'] = 'TEXT'
307 316
308 def peektkn(self): 317 def peektkn(self, off=0):
309 return self.toklist[self.tokind] 318 return self.toklist[self.tokind-off]
310 319
311 def setkn(self,val): 320 def setkn(self,val):
312 self.toklist[self.tokind] = val 321 self.toklist[self.tokind] = val
313 322
314 def getkn(self): 323 def getkn(self):
315 self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' 324 self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
@@ -601,13 +610,13 @@ class BaseWikiMarkup(object):
601 break 610 break
602 elif len(tok['content']) > lev: 611 elif len(tok['content']) > lev:
603 self.ungetkn() 612 self.ungetkn()
604 elt = self.parse_env(type, len(tok['content'])) 613 elt = self.parse_env(type, len(tok['content']))
605 else: 614 else:
606 elt = self.parse_line() 615 elt = self.parse_line()
607 if 'continuation' not in tok: 616 if not tok['continuation']:
608 list.append({ 'type': 'ELT', 617 list.append({ 'type': 'ELT',
609 'subtype': envtypes[tok['content'][0]][1], 618 'subtype': envtypes[tok['content'][0]][1],
610 'content': elt }) 619 'content': elt })
611 continue 620 continue
612 621
613 if list: 622 if list:
@@ -678,16 +687,18 @@ class BaseWikiMarkup(object):
678 return { 'type': 'BAR' } 687 return { 'type': 'BAR' }
679 elif tok['content'][0:2] == "==": 688 elif tok['content'][0:2] == "==":
680 return self.parse_header(tok['content']) 689 return self.parse_header(tok['content'])
681 elif tok['content'][0] in envtypes: 690 elif tok['content'][0] in envtypes:
682 type = envtypes[tok['content'][0]][0] 691 type = envtypes[tok['content'][0]][0]
683 lev = len(tok['content']) 692 lev = len(tok['content'])
693 if tok['content'][0] == ':':
694 t = self.peektkn(2)
695 if not (t['type'] == 'DELIM' and t['content'] == ';'):
696 return self.parse_indent(lev)
684 self.ungetkn() 697 self.ungetkn()
685 return self.parse_env(type, lev) 698 return self.parse_env(type, lev)
686 elif tok['content'][0] == ":":
687 return self.parse_indent(len(tok['content']))
688 else: 699 else:
689 self.ungetkn() 700 self.ungetkn()
690 return self.parse_para() 701 return self.parse_para()
691 elif toktype == 'NL': 702 elif toktype == 'NL':
692 return { 'type': 'TEXT', 'content': '\n' } 703 return { 'type': 'TEXT', 'content': '\n' }
693 elif toktype == 'OTAG': 704 elif toktype == 'OTAG':

Return to:

Send suggestions and report system problems to the System administrator.