summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2015-07-05 14:13:45 +0300
committerSergey Poznyakoff <gray@gnu.org>2015-07-05 14:18:07 +0300
commit729f5a4e4ad71ff62d56cae2336738d65dc574f9 (patch)
tree2e66ad09adf70c299d64eba158f68c8ef43e0268
parenta8d15328a95bc16c3d9f4ca06c0c69767899f678 (diff)
downloadwikitrans-729f5a4e4ad71ff62d56cae2336738d65dc574f9.tar.gz
wikitrans-729f5a4e4ad71ff62d56cae2336738d65dc574f9.tar.bz2
Fix most testcases.
* wikimarkup.py (envtypes): Redo as a dictionary. (ELT): New item type (BaseWikiMarkup): Fix list parsing. (parse): Comment out changes introduced by 9c42879. Must be rewritten. * wiki2html.py (HtmlWikiMarkup): Change handling of lists. * wiki2text.py: Likewise. * testdata/deflist.wiki: New testcase. * testdata/deflist.html: Likewise. * testdata/colon.html: Update. * testdata/headings.html: Update. * testdata/hz.html: Update. * testdata/numlist.html: Update. * testdata/unlist.html: Update.
-rw-r--r--test.py4
-rw-r--r--testdata/colon.html9
-rw-r--r--testdata/deflist.html2
-rw-r--r--testdata/deflist.wiki5
-rw-r--r--testdata/headings.html20
-rw-r--r--testdata/hz.html10
-rw-r--r--testdata/numlist.html7
-rw-r--r--testdata/unlist.html10
-rw-r--r--wiki2html.py22
-rw-r--r--wiki2text.py12
-rw-r--r--wikimarkup.py50
11 files changed, 77 insertions, 74 deletions
diff --git a/test.py b/test.py
index 22e5393..65787e0 100644
--- a/test.py
+++ b/test.py
@@ -37,12 +37,16 @@ class TestMarkupParserBasic (unittest.TestCase):
pass
def test_unlist(self):
self.assert_(self.__test('unlist'))
pass
+ def test_deflist(self):
+ self.assert_(self.__test('unlist'))
+ pass
+
def test_door(self):
self.assert_(self.__test('door'))
pass
def test_drzwi(self):
self.assert_(self.__test('drzwi'))
diff --git a/testdata/colon.html b/testdata/colon.html
index 9721b93..b9e35ed 100644
--- a/testdata/colon.html
+++ b/testdata/colon.html
@@ -1,9 +1,4 @@
-<dl><dd> A colon (:) indents a line or paragraph.
-</dd></dl>A newline starts a new paragraph.
+<dl><dd> A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph.
Should only be used on talk pages.
-For articles, you probably want the blockquote tag.
-<dl><dd> We use 1 colon to indent once.
-</dd><dl><dd> We use 2 colons to indent twice.
-</dd><dl><dd> 3 colons to indent 3 times, and so on.
-</dd></dl></dl></dl>
+For articles, you probably want the blockquote tag.</p><dl><dd> We use 1 colon to indent once.<dl><dd> We use 2 colons to indent twice.<dl><dd> 3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl>
diff --git a/testdata/deflist.html b/testdata/deflist.html
new file mode 100644
index 0000000..8986533
--- /dev/null
+++ b/testdata/deflist.html
@@ -0,0 +1,2 @@
+<dl><dt>item 1</dt><dd> definition 1</dd><dt>item 2</dt><dd> definition 2-1</dd><dd> definition 2-2</dd></dl>
+
diff --git a/testdata/deflist.wiki b/testdata/deflist.wiki
new file mode 100644
index 0000000..19cc7c6
--- /dev/null
+++ b/testdata/deflist.wiki
@@ -0,0 +1,5 @@
+;item 1
+: definition 1
+;item 2
+: definition 2-1
+: definition 2-2
diff --git a/testdata/headings.html b/testdata/headings.html
index 8b247a3..0ec26ac 100644
--- a/testdata/headings.html
+++ b/testdata/headings.html
@@ -1,15 +1,11 @@
-<h1> Section headings </h1>
-<i>Headings</i> organize your writing into
-sections. The Wiki software can automatically
-generate a <a href="http://pl.wiktionary.org/table%20of%20contents">table of contents</a> from them.
-
-<h2> Subsection </h2>Using more "equals" (=) signs creates a subsection.
+<h2> Section headings </h2>
-<h3> A smaller subsection </h3>
-Don't skip levels,
-like from two to four equals signs.
+<p><i>Headings</i> organize your writing into
+sections. The Wiki software can automatically
+generate a <a href="http://pl.wiktionary.org/wiki/table%20of%20contents">table of contents</a> from them.</p><h3> Subsection </h3>
+<p>Using more "equals" (=) signs creates a subsection.</p><h4> A smaller subsection </h4>
-Start with 2 equals signs not 1
+<p>Don't skip levels,
+like from two to four equals signs.</p><p>Start with 2 equals signs not 1
because 1 creates H1 tags
-which should be reserved for page title.
-
+which should be reserved for page title.</p>
diff --git a/testdata/hz.html b/testdata/hz.html
index 569d1fe..507a730 100644
--- a/testdata/hz.html
+++ b/testdata/hz.html
@@ -1,6 +1,4 @@
-You can make horizontal dividing lines (----)
-to separate text.
-<hr/>But you should usually use sections instead,
-so that they go in the table of contents.
-
-
+<p>You can make horizontal dividing lines (----)
+to separate text.</p><hr/>
+<p>But you should usually use sections instead,
+so that they go in the table of contents.</p>
diff --git a/testdata/numlist.html b/testdata/numlist.html
index c65d0a2..ce9dd2d 100644
--- a/testdata/numlist.html
+++ b/testdata/numlist.html
@@ -1,7 +1,2 @@
-<ol><li> <i>Numbered lists</i> are:
-</li><ol><li> Very organized
-</li><li> Easy to follow
-</li></ol></ol>A newline marks the end of the list.
-<ol><li> New numbering starts with 1.
-</li></ol>
+<ol><li> <i>Numbered lists</i> are:<ol><li> Very organized</li><li> Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li> New numbering starts with 1.</li></ol>
diff --git a/testdata/unlist.html b/testdata/unlist.html
index 8d3eab7..87be30f 100644
--- a/testdata/unlist.html
+++ b/testdata/unlist.html
@@ -1,10 +1,2 @@
-<ul><li> <i>Unordered lists</i> are easy to do:
-</li><ul><li> Start every line with a star.
-</li><ul><li> More stars indicate a deeper level.
-</li></ul><li> Previous item continues.
-</li><li> A newline
-</li></ul><li> in a list
-</li></ul>marks the end of the list.
-<ul><li>Of course you can start again.
-</li></ul>
+<ul><li> <i>Unordered lists</i> are easy to do:<ul><li> Start every line with a star.<ul><li> More stars indicate a deeper level.</li></ul></li></ul> Previous item continues.<ul><li> A newline</li></ul></li><li> in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul>
diff --git a/wiki2html.py b/wiki2html.py
index 81ada65..77cb97a 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -35,15 +35,18 @@ class HtmlWikiMarkup (WikiMarkup):
elif str in wiki_ns_re[self.lang]:
for elt in wiki_ns_re[self.lang][str]:
if str.beginswith(elt[0]) and str.endswith(elt[1]):
return elt[2]
return None
-
- envhdr = [ "ul", "ol", "dl" ]
- envel = [ "li", "li", "dd" ]
+ envt = { "unnumbered": { "hdr": "ul",
+ "elt": ["li"] },
+ "numbered": { "hdr": "ol",
+ "elt": ["li"] },
+ "defn": { "hdr": "dl",
+ "elt": ["dt","dd"] } }
def mktgt(self, tgt, lang = None):
if not lang:
lang = self.lang
return self.html_base % { 'lang' : lang } + urllib.quote(tgt)
@@ -157,19 +160,20 @@ class HtmlWikiMarkup (WikiMarkup):
type = elt[1]
lev = elt[2]
if lev > 4:
lev = 2
string = ""
for s in elt[3]:
- x = self.format(s)
- string += "<%s>%s</%s>" % (self.envel[type],
- self.format(s),
- self.envel[type])
- return "<%s>%s</%s>" % (self.envhdr[type],
+ n = s[1];
+ string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
+ self.format(s[2]),
+ self.envt[type]["elt"][n])
+ return "<%s>%s</%s>" % (self.envt[type]["hdr"],
string,
- self.envhdr[type])
+ self.envt[type]["hdr"])
+ return string
def str_para(self, elt):
string = "";
for x in elt[1]:
string += self.format(x)
return "<p>" + string + "</p>"
diff --git a/wiki2text.py b/wiki2text.py
index 0f8dd5f..005e551 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -204,18 +204,24 @@ class TextWikiMarkup (WikiMarkup):
lev = 1
string = ""
n = 1
for s in elt[3]:
if not string.endswith("\n"):
string += "\n"
- x = self.format(s)
- if type == ENVUNNUM:
+ x = self.format(s[2])
+ if type == "unnumbered":
string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" ")))
- elif type == ENVNUM:
+ elif type == "numbered":
string += self.fmtpara(self.indent(lev, "%d. %s" % (n, x)))
n += 1
+ elif type == "defn":
+ if s[1] == 0:
+ string += self.indent(lev-1, x)
+ else:
+ string += self.indent(lev+3, x)
+
if not string.endswith("\n"):
string += "\n"
elif elt[0] == IND:
string = (" " * elt[1]) + self.format(elt[2]) + '\n'
elif elt[0] == SEQ:
string = ""
diff --git a/wikimarkup.py b/wikimarkup.py
index fa60c80..2b6348e 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -22,13 +22,13 @@ from types import *
__all__ = [ "BaseWikiMarkup", "WikiMarkup",
"NIL", "TEXT", "DELIM", "NL", "PARA",
"IT", "BOLD", "LINK", "TMPL",
"BAR", "HDR", "REF", "ENV", "IND", "SEQ",
"ENVUNNUM", "ENVNUM", "envtypes" ]
-delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^:+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)")
+delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)")
NIL = 0
TEXT = 1
DELIM = 2
NL = 3
@@ -40,19 +40,24 @@ TMPL = 8
BAR = 9
HDR = 10
REF = 11
ENV = 12
IND = 13
SEQ = 14
+ELT = 15
# Environment types:
# Unnumbered list
ENVUNNUM = 0
# Numbered list
ENVNUM = 1
-envtypes = [ "*", "#" ]
+envtypes = { "*": [ "unnumbered", 0 ],
+ "#": [ "numbered", 0 ],
+ ";": [ "defn", 0 ],
+ ":": [ "defn", 1 ]
+ }
class BaseWikiMarkup:
toklist = None
tokind = 0
tree = None
@@ -90,13 +95,14 @@ class BaseWikiMarkup:
if m:
if (pos < m.start(0)):
self.dprint(100, "YIELD: TEXT %s", line[pos:m.start(0)])
yield(TEXT, line[pos:m.start(0)])
pos = m.end(0)
- if m.group(0)[0] in envtypes and line[pos] == ":":
+ if envtypes.has_key(m.group(0)[0]) and line[pos] == ":":
+ # FIXME?
self.dprint(100, "YIELD: DELIM %s, True", m.group(0))
yield(DELIM, m.group(0), True)
pos += 1
else:
self.dprint(100, "YIELD: DELIM %s", m.group(0))
yield(DELIM, m.group(0))
@@ -378,29 +384,29 @@ class BaseWikiMarkup:
def parse_env(self, type, lev):
self.dprint(80, "ENTER parse_env(%s,%s), tok %s",type,lev,self.peektkn())
list = []
while 1:
tok = self.getkn()
- if tok[0] == DELIM and tok[1][0] in envtypes and type == envtypes.index(tok[1][0]):
+ if tok[0] == DELIM and envtypes.has_key(tok[1][0]) and type == envtypes[tok[1][0]][0]:
if len(tok[1]) < lev:
self.ungetkn()
break
elif len(tok[1]) > lev:
self.ungetkn()
elt = self.parse_env(type, len(tok[1]))
else:
elt = self.parse_line()
if len(tok) == 2:
- list.append(elt)
+ list.append((ELT, envtypes[tok[1][0]][1], elt))
continue
- if list[-1][0] != SEQ:
- x = list[-1]
- list[-1] = (SEQ, [x])
- list[-1][1].append(elt)
+ if list[-1][2][0] != SEQ:
+ x = list[-1][2][1]
+ list[-1][2] = (SEQ, [x])
+ list[-1][2][1].append(elt)
else:
self.ungetkn()
break
self.dprint(80, "LEAVE parse_env=(ENV, %s, %s, %s)", type, lev, list)
return (ENV, type, lev, list)
@@ -420,14 +426,14 @@ class BaseWikiMarkup:
return self.parse_para()
elif toktype == DELIM:
if tok[1] == "----":
return (BAR,)
elif tok[1][0:2] == "==":
return self.parse_header(tok[1])
- elif tok[1][0] in envtypes:
- type = envtypes.index(tok[1][0])
+ elif envtypes.has_key(tok[1][0]):
+ type = envtypes[tok[1][0]][0]
lev = len(tok[1])
self.ungetkn()
return self.parse_env(type, lev)
elif tok[1][0] == ":":
return self.parse_indent(len(tok[1]))
else:
@@ -539,23 +545,23 @@ class WikiMarkup (BaseWikiMarkup):
if not (self.is_lang_link(x) or self.is_empty_text(x)):
return False
return True
def parse(self):
BaseWikiMarkup.parse(self)
- # Remove everything before the first header
- for i in range(0, len(self.tree)):
- if self.tree[i][0] == HDR:
- self.tree = self.tree[i:]
- break
- # Remove trailing links
- for i in range(len(self.tree)-1, 0, -1):
- if self.tree[i][0] == PARA \
- and not self.is_empty_para(self.tree[i][1]):
- self.tree = self.tree[0:i+1]
- break
+ # # Remove everything before the first header
+ # for i in range(0, len(self.tree)):
+ # if self.tree[i][0] == HDR:
+ # self.tree = self.tree[i:]
+ # break
+ # # Remove trailing links
+ # for i in range(len(self.tree)-1, 0, -1):
+ # if self.tree[i][0] == PARA \
+ # and not self.is_empty_para(self.tree[i][1]):
+ # self.tree = self.tree[0:i+1]
+ # break
# ISO 639
langtab = {
"aa": "Afar", # Afar
"ab": "Аҧсуа", # Abkhazian

Return to:

Send suggestions and report system problems to the System administrator.