diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-14 16:09:36 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-14 16:09:36 +0300 |
commit | 4097896542f2279700794104c7c0728beed38cd0 (patch) | |
tree | 664012755efc30126a0ccef42245ce3068f06adc | |
parent | 28072898f1bd9a925d73ac187d560198d6345524 (diff) | |
download | wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.gz wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.bz2 |
Fix processing of environments (numbered/unnumbered lists, definition lists) and indented strings.
* wikimarkup.py (tokread): Always add 'continuation' key to DELIM
entries. Delete whitespace following environment delimiters.
(peektkn): Take an optional offset argument.
(parse0): Handle indentations.
* wiki2html.py (str_ind): Use <dl> to produce indentations.
* wiki2texi.py (str_ind): End text with a newline.
* testdata/colon.html: Update.
* testdata/deflist.html: Update.
* testdata/numlist.html: Update.
* testdata/unlist.html: Update.
-rw-r--r-- | testdata/colon.html | 4 | ||||
-rw-r--r-- | testdata/deflist.html | 3 | ||||
-rw-r--r-- | testdata/numlist.html | 2 | ||||
-rw-r--r-- | testdata/unlist.html | 2 | ||||
-rw-r--r-- | wiki2html.py | 2 | ||||
-rw-r--r-- | wiki2texi.py | 2 | ||||
-rw-r--r-- | wikimarkup.py | 33 |
7 files changed, 29 insertions, 19 deletions
diff --git a/testdata/colon.html b/testdata/colon.html index b9e35ed..2fb6ed3 100644 --- a/testdata/colon.html +++ b/testdata/colon.html | |||
@@ -1,4 +1,4 @@ | |||
1 | <dl><dd> A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph. | 1 | <dl><dd>A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph. |
2 | Should only be used on talk pages. | 2 | Should only be used on talk pages. |
3 | For articles, you probably want the blockquote tag.</p><dl><dd> We use 1 colon to indent once.<dl><dd> We use 2 colons to indent twice.<dl><dd> 3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl> | 3 | For articles, you probably want the blockquote tag.</p><dl><dd>We use 1 colon to indent once.</dd></dl><dl><dd><dl><dd>We use 2 colons to indent twice.</dd></dl></dd></dl><dl><dd><dl><dd><dl><dd>3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl> |
4 | 4 | ||
diff --git a/testdata/deflist.html b/testdata/deflist.html index 8986533..774f21f 100644 --- a/testdata/deflist.html +++ b/testdata/deflist.html | |||
@@ -1,2 +1 @@ | |||
1 | <dl><dt>item 1</dt><dd> definition 1</dd><dt>item 2</dt><dd> definition 2-1</dd><dd> definition 2-2</dd></dl> | <dl><dt>item 1</dt><dd>definition 1</dd><dt>item 2</dt><dd>definition 2-1</dd><dd>definition 2-2</dd></dl> | |
2 | |||
diff --git a/testdata/numlist.html b/testdata/numlist.html index ce9dd2d..0ce8a9a 100644 --- a/testdata/numlist.html +++ b/testdata/numlist.html | |||
@@ -1,2 +1,2 @@ | |||
1 | <ol><li> <i>Numbered lists</i> are:<ol><li> Very organized</li><li> Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li> New numbering starts with 1.</li></ol> | 1 | <ol><li><i>Numbered lists</i> are:<ol><li>Very organized</li><li>Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li>New numbering starts with 1.</li></ol> |
2 | 2 | ||
diff --git a/testdata/unlist.html b/testdata/unlist.html index 87be30f..ac4d32b 100644 --- a/testdata/unlist.html +++ b/testdata/unlist.html | |||
@@ -1,2 +1,2 @@ | |||
1 | <ul><li> <i>Unordered lists</i> are easy to do:<ul><li> Start every line with a star.<ul><li> More stars indicate a deeper level.</li></ul></li></ul> Previous item continues.<ul><li> A newline</li></ul></li><li> in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul> | 1 | <ul><li><i>Unordered lists</i> are easy to do:<ul><li>Start every line with a star.<ul><li>More stars indicate a deeper level.</li></ul>Previous item continues.</li><li>A newline</li></ul></li><li>in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul> |
2 | 2 | ||
diff --git a/wiki2html.py b/wiki2html.py index 66939c4..f3ea0e3 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -200,13 +200,13 @@ class HtmlWikiMarkup (WikiMarkup): | |||
200 | string += self.format(x) | 200 | string += self.format(x) |
201 | if self.nested: | 201 | if self.nested: |
202 | return string | 202 | return string |
203 | return '<pre>' + string + '</pre>' | 203 | return '<pre>' + string + '</pre>' |
204 | 204 | ||
205 | def str_ind(self, elt): | 205 | def str_ind(self, elt): |
206 | return (" " * 2 * elt['level']) + self.format(elt['content']) | 206 | return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level'] |
207 | 207 | ||
208 | def format(self, elt): | 208 | def format(self, elt): |
209 | if elt['type'] == 'TEXT': | 209 | if elt['type'] == 'TEXT': |
210 | if isinstance(elt['content'],list): | 210 | if isinstance(elt['content'],list): |
211 | string = "" | 211 | string = "" |
212 | for s in elt['content']: | 212 | for s in elt['content']: |
diff --git a/wiki2texi.py b/wiki2texi.py index 0b3eb77..e9009ec 100644 --- a/wiki2texi.py +++ b/wiki2texi.py | |||
@@ -183,13 +183,13 @@ class TexiWikiMarkup (WikiMarkup): | |||
183 | return s + "\n" | 183 | return s + "\n" |
184 | 184 | ||
185 | def str_bar(self): | 185 | def str_bar(self): |
186 | return "\n-----\n" # FIXME | 186 | return "\n-----\n" # FIXME |
187 | 187 | ||
188 | def str_ind(self, elt): | 188 | def str_ind(self, elt): |
189 | return ("@w{ }" * elt['level']) + self.format(elt['content']) | 189 | return ("@w{ }" * elt['level']) + self.format(elt['content']) + '\n' |
190 | 190 | ||
191 | def str_env(self, elt): | 191 | def str_env(self, elt): |
192 | if elt['envtype'] == 'unnumbered': | 192 | if elt['envtype'] == 'unnumbered': |
193 | string = '\n@itemize @bullet\n' | 193 | string = '\n@itemize @bullet\n' |
194 | for s in elt['content']: | 194 | for s in elt['content']: |
195 | string += '@item ' + self.format(s['content']) + '\n\n' | 195 | string += '@item ' + self.format(s['content']) + '\n\n' |
diff --git a/wikimarkup.py b/wikimarkup.py index 9a79d1e..0c6d2f2 100644 --- a/wikimarkup.py +++ b/wikimarkup.py | |||
@@ -181,20 +181,29 @@ class BaseWikiMarkup(object): | |||
181 | if m: | 181 | if m: |
182 | if (pos < m.start(0)): | 182 | if (pos < m.start(0)): |
183 | yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) | 183 | yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) |
184 | pos = m.end(0) | 184 | pos = m.end(0) |
185 | 185 | ||
186 | if m and line[m.start(0)] != '<': | 186 | if m and line[m.start(0)] != '<': |
187 | if m.group(0)[0] in envtypes and pos < len(line) and line[pos] == ":": | 187 | content = m.group(0) |
188 | yield({ 'type': 'DELIM', | 188 | if content[0] in envtypes: |
189 | 'content': m.group(0), | 189 | t = { 'type': 'DELIM', |
190 | 'continuation': True }) | 190 | 'content': content, |
191 | pos += 1 | 191 | 'continuation': pos < len(line) and line[pos] == ":" } |
192 | if t['continuation']: | ||
193 | t['content'] += t['content'][0] | ||
194 | pos += 1 | ||
195 | |||
196 | yield(t) | ||
197 | |||
198 | while pos < len(line) and line[pos] in [' ', '\t']: | ||
199 | pos += 1 | ||
192 | else: | 200 | else: |
193 | yield({ 'type': 'DELIM', | 201 | yield({ 'type': 'DELIM', |
194 | 'content': m.group(0) }) | 202 | 'content': content, |
203 | 'continuation': False}) | ||
195 | else: | 204 | else: |
196 | if m: | 205 | if m: |
197 | pos -= 1 | 206 | pos -= 1 |
198 | t = None | 207 | t = None |
199 | m = otag.match(line, pos) | 208 | m = otag.match(line, pos) |
200 | if m and m.group('tag') in self.tags: | 209 | if m and m.group('tag') in self.tags: |
@@ -302,14 +311,14 @@ class BaseWikiMarkup(object): | |||
302 | # Push the token on stack | 311 | # Push the token on stack |
303 | stack.append(i) | 312 | stack.append(i) |
304 | # Redefine all non-matched tokens as TEXT | 313 | # Redefine all non-matched tokens as TEXT |
305 | for i in stack: | 314 | for i in stack: |
306 | self.toklist[i]['type'] = 'TEXT' | 315 | self.toklist[i]['type'] = 'TEXT' |
307 | 316 | ||
308 | def peektkn(self): | 317 | def peektkn(self, off=0): |
309 | return self.toklist[self.tokind] | 318 | return self.toklist[self.tokind-off] |
310 | 319 | ||
311 | def setkn(self,val): | 320 | def setkn(self,val): |
312 | self.toklist[self.tokind] = val | 321 | self.toklist[self.tokind] = val |
313 | 322 | ||
314 | def getkn(self): | 323 | def getkn(self): |
315 | self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' | 324 | self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL' |
@@ -601,13 +610,13 @@ class BaseWikiMarkup(object): | |||
601 | break | 610 | break |
602 | elif len(tok['content']) > lev: | 611 | elif len(tok['content']) > lev: |
603 | self.ungetkn() | 612 | self.ungetkn() |
604 | elt = self.parse_env(type, len(tok['content'])) | 613 | elt = self.parse_env(type, len(tok['content'])) |
605 | else: | 614 | else: |
606 | elt = self.parse_line() | 615 | elt = self.parse_line() |
607 | if 'continuation' not in tok: | 616 | if not tok['continuation']: |
608 | list.append({ 'type': 'ELT', | 617 | list.append({ 'type': 'ELT', |
609 | 'subtype': envtypes[tok['content'][0]][1], | 618 | 'subtype': envtypes[tok['content'][0]][1], |
610 | 'content': elt }) | 619 | 'content': elt }) |
611 | continue | 620 | continue |
612 | 621 | ||
613 | if list: | 622 | if list: |
@@ -678,16 +687,18 @@ class BaseWikiMarkup(object): | |||
678 | return { 'type': 'BAR' } | 687 | return { 'type': 'BAR' } |
679 | elif tok['content'][0:2] == "==": | 688 | elif tok['content'][0:2] == "==": |
680 | return self.parse_header(tok['content']) | 689 | return self.parse_header(tok['content']) |
681 | elif tok['content'][0] in envtypes: | 690 | elif tok['content'][0] in envtypes: |
682 | type = envtypes[tok['content'][0]][0] | 691 | type = envtypes[tok['content'][0]][0] |
683 | lev = len(tok['content']) | 692 | lev = len(tok['content']) |
693 | if tok['content'][0] == ':': | ||
694 | t = self.peektkn(2) | ||
695 | if not (t['type'] == 'DELIM' and t['content'] == ';'): | ||
696 | return self.parse_indent(lev) | ||
684 | self.ungetkn() | 697 | self.ungetkn() |
685 | return self.parse_env(type, lev) | 698 | return self.parse_env(type, lev) |
686 | elif tok['content'][0] == ":": | ||
687 | return self.parse_indent(len(tok['content'])) | ||
688 | else: | 699 | else: |
689 | self.ungetkn() | 700 | self.ungetkn() |
690 | return self.parse_para() | 701 | return self.parse_para() |
691 | elif toktype == 'NL': | 702 | elif toktype == 'NL': |
692 | return { 'type': 'TEXT', 'content': '\n' } | 703 | return { 'type': 'TEXT', 'content': '\n' } |
693 | elif toktype == 'OTAG': | 704 | elif toktype == 'OTAG': |