Fix processing of environments (numbered/unnumbered lists, definition lists) and indented strings.

* wikimarkup.py (tokread): Always add 'continuation' key to DELIM entries. Delete whitespace following environment delimiters. (peektkn): Take an optional offset argument. (parse0): Handle indentations. * wiki2html.py (str_ind): Use <dl> to produce indentations. * wiki2texi.py (str_ind): End text with a newline. * testdata/colon.html: Update. * testdata/deflist.html: Update. * testdata/numlist.html: Update. * testdata/unlist.html: Update.
author: Sergey Poznyakoff <gray@gnu.org.ua> 2015-07-14 16:09:36 +0300
committer: Sergey Poznyakoff <gray@gnu.org.ua> 2015-07-14 16:09:36 +0300
commit: 4097896542f2279700794104c7c0728beed38cd0 (patch)
tree: 664012755efc30126a0ccef42245ce3068f06adc
parent: 28072898f1bd9a925d73ac187d560198d6345524 (diff)
download: wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.gz
wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.bz2
7 files changed, 29 insertions, 19 deletions
diff --git a/testdata/colon.html b/testdata/colon.html
index b9e35ed..2fb6ed3 100644
--- a/testdata/colon.html
+++ b/testdata/colon.html
@@ -1,4 +1,4 @@
-<dl><dd> A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph.
+<dl><dd>A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph.
 Should only be used on talk pages.
-For articles, you probably want the blockquote tag.</p><dl><dd> We use 1 colon to indent once.<dl><dd> We use 2 colons to indent twice.<dl><dd> 3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl>
+For articles, you probably want the blockquote tag.</p><dl><dd>We use 1 colon to indent once.</dd></dl><dl><dd><dl><dd>We use 2 colons to indent twice.</dd></dl></dd></dl><dl><dd><dl><dd><dl><dd>3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl>
 
diff --git a/testdata/deflist.html b/testdata/deflist.html
index 8986533..774f21f 100644
--- a/testdata/deflist.html
+++ b/testdata/deflist.html
@@ -1,2 +1 @@
-<dl><dt>item 1</dt><dd> definition 1</dd><dt>item 2</dt><dd> definition 2-1</dd><dd> definition 2-2</dd></dl>
-
+<dl><dt>item 1</dt><dd>definition 1</dd><dt>item 2</dt><dd>definition 2-1</dd><dd>definition 2-2</dd></dl>
diff --git a/testdata/numlist.html b/testdata/numlist.html
index ce9dd2d..0ce8a9a 100644
--- a/testdata/numlist.html
+++ b/testdata/numlist.html
@@ -1,2 +1,2 @@
-<ol><li> <i>Numbered lists</i> are:<ol><li> Very organized</li><li> Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li> New numbering starts with 1.</li></ol>
+<ol><li><i>Numbered lists</i> are:<ol><li>Very organized</li><li>Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li>New numbering starts with 1.</li></ol>
 
diff --git a/testdata/unlist.html b/testdata/unlist.html
index 87be30f..ac4d32b 100644
--- a/testdata/unlist.html
+++ b/testdata/unlist.html
@@ -1,2 +1,2 @@
-<ul><li> <i>Unordered lists</i> are easy to do:<ul><li> Start every line with a star.<ul><li> More stars indicate a deeper level.</li></ul></li></ul> Previous item continues.<ul><li> A newline</li></ul></li><li> in a list  </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul>
+<ul><li><i>Unordered lists</i> are easy to do:<ul><li>Start every line with a star.<ul><li>More stars indicate a deeper level.</li></ul>Previous item continues.</li><li>A newline</li></ul></li><li>in a list  </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul>
 
diff --git a/wiki2html.py b/wiki2html.py
index 66939c4..f3ea0e3 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -194,25 +194,25 @@ class HtmlWikiMarkup (WikiMarkup):
             string += self.format(x)
         return "<p>" + string + "</p>"
 
     def str_pre(self, elt):
         string = "";
         for x in elt['content']:
             string += self.format(x)
         if self.nested:
             return string            
         return '<pre>' + string + '</pre>'
     
     def str_ind(self, elt):
-        return ("&nbsp;" * 2 * elt['level']) + self.format(elt['content'])
+        return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level']
     
     def format(self, elt):
         if elt['type'] == 'TEXT':
             if isinstance(elt['content'],list):
                 string = ""
                 for s in elt['content']:
                     string += s
             else:
                 string = elt['content']
             return string
         elif elt['type'] == 'TAG':
             return self.str_tag(elt)
diff --git a/wiki2texi.py b/wiki2texi.py
index 0b3eb77..e9009ec 100644
--- a/wiki2texi.py
+++ b/wiki2texi.py
@@ -177,25 +177,25 @@ class TexiWikiMarkup (WikiMarkup):
         if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
             s ="\n@* %s" % (self.format(elt['content']))
         else:
             s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n"
             if self.sectcomm[self.sectioning_model][0] == '@top':
                 s += "@node %s\n" % (self.nodename(elt['content']))
         return s + "\n"
         
     def str_bar(self):
         return "\n-----\n" # FIXME
 
     def str_ind(self, elt):
-        return ("@w{ }" * elt['level']) + self.format(elt['content'])
+        return ("@w{ }" * elt['level']) + self.format(elt['content']) + '\n'
 
     def str_env(self, elt):
         if elt['envtype'] == 'unnumbered':
             string = '\n@itemize @bullet\n'
             for s in elt['content']:
                 string += '@item ' + self.format(s['content']) + '\n\n'
             string += '@end itemize\n'
         elif elt['envtype'] == 'numbered':
             string = '\n@enumerate\n'
             for s in elt['content']:
                 string += '@item ' + self.format(s['content']) + '\n\n'
             string += '@end enumerate\n'
diff --git a/wikimarkup.py b/wikimarkup.py
index 9a79d1e..0c6d2f2 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -175,32 +175,41 @@ class BaseWikiMarkup(object):
                 line = None
                 continue
 
             self.dprint(100, "LINE: %s", line[pos:])
             m = delim.search(line, pos)
             
             if m:
                 if (pos < m.start(0)):
                     yield({'type': 'TEXT', 'content': line[pos:m.start(0)]})
                 pos = m.end(0)
 
             if m and line[m.start(0)] != '<':
-                if m.group(0)[0] in envtypes and pos < len(line) and line[pos] == ":":
-                    yield({ 'type': 'DELIM',
-                            'content': m.group(0),
-                            'continuation': True })
-                    pos += 1
+                content = m.group(0)
+                if content[0] in envtypes:
+                    t = { 'type': 'DELIM',
+                           'content': content,
+                           'continuation': pos < len(line) and line[pos] == ":" }
+                    if t['continuation']:
+                        t['content'] += t['content'][0]
+                        pos += 1
+                        
+                    yield(t)
+
+                    while pos < len(line) and line[pos] in [' ', '\t']:
+                        pos += 1 
                 else:
                     yield({ 'type': 'DELIM',
-                            'content': m.group(0) })
+                            'content': content,
+                            'continuation': False})
             else:
                 if m:
                     pos -= 1
                 t = None
                 m = otag.match(line, pos)
                 if m and m.group('tag') in self.tags:
                     rest = line[m.end(0):]
                     line = m.group('pfx')
                     pos = 0
                     t = { 'type': 'OTAG',
                           'tag': m.group('tag'),
                           'args': m.group('args') }
@@ -296,26 +305,26 @@ class BaseWikiMarkup(object):
                         #         and pop off the matching one 
                         stack.pop()
                     else:
                         # Push the token on stack
                         stack.append(i)
                 else:
                     # Push the token on stack
                     stack.append(i)
         # Redefine all non-matched tokens as TEXT
         for i in stack:
             self.toklist[i]['type'] = 'TEXT'
 
-    def peektkn(self):
-        return self.toklist[self.tokind]
+    def peektkn(self, off=0):
+        return self.toklist[self.tokind-off]
 
     def setkn(self,val):
         self.toklist[self.tokind] = val
     
     def getkn(self):
         self.newline = self.tokind == 0 or self.toklist[self.tokind-1]['type'] == 'NL'
         if self.tokind == len(self.toklist):
             return { 'type': 'NIL' }
         tok = self.toklist[self.tokind]
         self.tokind = self.tokind + 1
         return tok
     
@@ -595,25 +604,25 @@ class BaseWikiMarkup(object):
             tok = self.getkn()
             if tok['type'] == 'DELIM' \
                and tok['content'][0] in envtypes \
                and type == envtypes[tok['content'][0]][0]:
                 if len(tok['content']) < lev:
                     self.ungetkn()
                     break
                 elif len(tok['content']) > lev:
                     self.ungetkn()
                     elt = self.parse_env(type, len(tok['content']))
                 else:
                     elt = self.parse_line()
-                    if 'continuation' not in tok:
+                    if  not tok['continuation']:
                         list.append({ 'type': 'ELT',
                                       'subtype': envtypes[tok['content'][0]][1],
                                       'content': elt })
                         continue
 
                 if list:
                     if list[-1]['content']['type'] != 'SEQ':
                         x = list[-1]['content']['content']
                         # FIXME:
                         list[-1]['content'] = { 'type': 'SEQ', 'content': [x] }
                     list[-1]['content']['content'].append(elt)
             else:
@@ -672,28 +681,30 @@ class BaseWikiMarkup(object):
             return None
         elif toktype == 'TEXT':
             self.ungetkn()
             return self.parse_para()
         elif toktype == 'DELIM':
             if tok['content'] == "----":
                 return { 'type': 'BAR' }
             elif tok['content'][0:2] == "==":
                 return self.parse_header(tok['content'])
             elif tok['content'][0] in envtypes:
                 type = envtypes[tok['content'][0]][0]
                 lev = len(tok['content'])
+                if tok['content'][0] == ':':
+                    t = self.peektkn(2)
+                    if not (t['type'] == 'DELIM' and t['content'] == ';'):
+                        return self.parse_indent(lev)
                 self.ungetkn()
                 return self.parse_env(type, lev)
-            elif tok['content'][0] == ":":
-                return self.parse_indent(len(tok['content']))
             else:
                 self.ungetkn()
                 return self.parse_para()
         elif toktype == 'NL':
             return { 'type': 'TEXT', 'content': '\n' }
         elif toktype == 'OTAG':
             return self.parse_til(tok)
         else:
             return tok
 
     def parse(self):
         if not self.toklist:
author	Sergey Poznyakoff <gray@gnu.org.ua>	2015-07-14 16:09:36 +0300
committer	Sergey Poznyakoff <gray@gnu.org.ua>	2015-07-14 16:09:36 +0300
commit	4097896542f2279700794104c7c0728beed38cd0 (patch)
tree	664012755efc30126a0ccef42245ce3068f06adc
parent	28072898f1bd9a925d73ac187d560198d6345524 (diff)
download	wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.gz wikitrans-4097896542f2279700794104c7c0728beed38cd0.tar.bz2