diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2015-07-05 14:13:45 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2015-07-05 14:18:07 +0300 |
commit | 729f5a4e4ad71ff62d56cae2336738d65dc574f9 (patch) | |
tree | 2e66ad09adf70c299d64eba158f68c8ef43e0268 | |
parent | a8d15328a95bc16c3d9f4ca06c0c69767899f678 (diff) | |
download | wikitrans-729f5a4e4ad71ff62d56cae2336738d65dc574f9.tar.gz wikitrans-729f5a4e4ad71ff62d56cae2336738d65dc574f9.tar.bz2 |
Fix most testcases.
* wikimarkup.py (envtypes): Redo as a dictionary.
(ELT): New item type
(BaseWikiMarkup): Fix list parsing.
(parse): Comment out changes introduced by 9c42879.
Must be rewritten.
* wiki2html.py (HtmlWikiMarkup): Change handling of lists.
* wiki2text.py: Likewise.
* testdata/deflist.wiki: New testcase.
* testdata/deflist.html: Likewise.
* testdata/colon.html: Update.
* testdata/headings.html: Update.
* testdata/hz.html: Update.
* testdata/numlist.html: Update.
* testdata/unlist.html: Update.
-rw-r--r-- | test.py | 4 | ||||
-rw-r--r-- | testdata/colon.html | 9 | ||||
-rw-r--r-- | testdata/deflist.html | 2 | ||||
-rw-r--r-- | testdata/deflist.wiki | 5 | ||||
-rw-r--r-- | testdata/headings.html | 20 | ||||
-rw-r--r-- | testdata/hz.html | 10 | ||||
-rw-r--r-- | testdata/numlist.html | 7 | ||||
-rw-r--r-- | testdata/unlist.html | 10 | ||||
-rw-r--r-- | wiki2html.py | 22 | ||||
-rw-r--r-- | wiki2text.py | 12 | ||||
-rw-r--r-- | wikimarkup.py | 50 |
11 files changed, 77 insertions, 74 deletions
@@ -37,12 +37,16 @@ class TestMarkupParserBasic (unittest.TestCase): pass def test_unlist(self): self.assert_(self.__test('unlist')) pass + def test_deflist(self): + self.assert_(self.__test('unlist')) + pass + def test_door(self): self.assert_(self.__test('door')) pass def test_drzwi(self): self.assert_(self.__test('drzwi')) diff --git a/testdata/colon.html b/testdata/colon.html index 9721b93..b9e35ed 100644 --- a/testdata/colon.html +++ b/testdata/colon.html @@ -1,9 +1,4 @@ -<dl><dd> A colon (:) indents a line or paragraph. -</dd></dl>A newline starts a new paragraph. +<dl><dd> A colon (:) indents a line or paragraph.</dd></dl><p>A newline starts a new paragraph. Should only be used on talk pages. -For articles, you probably want the blockquote tag. -<dl><dd> We use 1 colon to indent once. -</dd><dl><dd> We use 2 colons to indent twice. -</dd><dl><dd> 3 colons to indent 3 times, and so on. -</dd></dl></dl></dl> +For articles, you probably want the blockquote tag.</p><dl><dd> We use 1 colon to indent once.<dl><dd> We use 2 colons to indent twice.<dl><dd> 3 colons to indent 3 times, and so on.</dd></dl></dd></dl></dd></dl> diff --git a/testdata/deflist.html b/testdata/deflist.html new file mode 100644 index 0000000..8986533 --- /dev/null +++ b/testdata/deflist.html @@ -0,0 +1,2 @@ +<dl><dt>item 1</dt><dd> definition 1</dd><dt>item 2</dt><dd> definition 2-1</dd><dd> definition 2-2</dd></dl> + diff --git a/testdata/deflist.wiki b/testdata/deflist.wiki new file mode 100644 index 0000000..19cc7c6 --- /dev/null +++ b/testdata/deflist.wiki @@ -0,0 +1,5 @@ +;item 1 +: definition 1 +;item 2 +: definition 2-1 +: definition 2-2 diff --git a/testdata/headings.html b/testdata/headings.html index 8b247a3..0ec26ac 100644 --- a/testdata/headings.html +++ b/testdata/headings.html @@ -1,15 +1,11 @@ -<h1> Section headings </h1> -<i>Headings</i> organize your writing into -sections. The Wiki software can automatically -generate a <a href="http://pl.wiktionary.org/table%20of%20contents">table of contents</a> from them. - -<h2> Subsection </h2>Using more "equals" (=) signs creates a subsection. +<h2> Section headings </h2> -<h3> A smaller subsection </h3> -Don't skip levels, -like from two to four equals signs. +<p><i>Headings</i> organize your writing into +sections. The Wiki software can automatically +generate a <a href="http://pl.wiktionary.org/wiki/table%20of%20contents">table of contents</a> from them.</p><h3> Subsection </h3> +<p>Using more "equals" (=) signs creates a subsection.</p><h4> A smaller subsection </h4> -Start with 2 equals signs not 1 +<p>Don't skip levels, +like from two to four equals signs.</p><p>Start with 2 equals signs not 1 because 1 creates H1 tags -which should be reserved for page title. - +which should be reserved for page title.</p> diff --git a/testdata/hz.html b/testdata/hz.html index 569d1fe..507a730 100644 --- a/testdata/hz.html +++ b/testdata/hz.html @@ -1,6 +1,4 @@ -You can make horizontal dividing lines (----) -to separate text. -<hr/>But you should usually use sections instead, -so that they go in the table of contents. - - +<p>You can make horizontal dividing lines (----) +to separate text.</p><hr/> +<p>But you should usually use sections instead, +so that they go in the table of contents.</p> diff --git a/testdata/numlist.html b/testdata/numlist.html index c65d0a2..ce9dd2d 100644 --- a/testdata/numlist.html +++ b/testdata/numlist.html @@ -1,7 +1,2 @@ -<ol><li> <i>Numbered lists</i> are: -</li><ol><li> Very organized -</li><li> Easy to follow -</li></ol></ol>A newline marks the end of the list. -<ol><li> New numbering starts with 1. -</li></ol> +<ol><li> <i>Numbered lists</i> are:<ol><li> Very organized</li><li> Easy to follow</li></ol></li></ol><p>A newline marks the end of the list.</p><ol><li> New numbering starts with 1.</li></ol> diff --git a/testdata/unlist.html b/testdata/unlist.html index 8d3eab7..87be30f 100644 --- a/testdata/unlist.html +++ b/testdata/unlist.html @@ -1,10 +1,2 @@ -<ul><li> <i>Unordered lists</i> are easy to do: -</li><ul><li> Start every line with a star. -</li><ul><li> More stars indicate a deeper level. -</li></ul><li> Previous item continues. -</li><li> A newline -</li></ul><li> in a list -</li></ul>marks the end of the list. -<ul><li>Of course you can start again. -</li></ul> +<ul><li> <i>Unordered lists</i> are easy to do:<ul><li> Start every line with a star.<ul><li> More stars indicate a deeper level.</li></ul></li></ul> Previous item continues.<ul><li> A newline</li></ul></li><li> in a list </li></ul><p>marks the end of the list.</p><ul><li>Of course you can start again.</li></ul> diff --git a/wiki2html.py b/wiki2html.py index 81ada65..77cb97a 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -35,15 +35,18 @@ class HtmlWikiMarkup (WikiMarkup): elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None - - envhdr = [ "ul", "ol", "dl" ] - envel = [ "li", "li", "dd" ] + envt = { "unnumbered": { "hdr": "ul", + "elt": ["li"] }, + "numbered": { "hdr": "ol", + "elt": ["li"] }, + "defn": { "hdr": "dl", + "elt": ["dt","dd"] } } def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + urllib.quote(tgt) @@ -157,19 +160,20 @@ class HtmlWikiMarkup (WikiMarkup): type = elt[1] lev = elt[2] if lev > 4: lev = 2 string = "" for s in elt[3]: - x = self.format(s) - string += "<%s>%s</%s>" % (self.envel[type], - self.format(s), - self.envel[type]) - return "<%s>%s</%s>" % (self.envhdr[type], + n = s[1]; + string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], + self.format(s[2]), + self.envt[type]["elt"][n]) + return "<%s>%s</%s>" % (self.envt[type]["hdr"], string, - self.envhdr[type]) + self.envt[type]["hdr"]) + return string def str_para(self, elt): string = ""; for x in elt[1]: string += self.format(x) return "<p>" + string + "</p>" diff --git a/wiki2text.py b/wiki2text.py index 0f8dd5f..005e551 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -204,18 +204,24 @@ class TextWikiMarkup (WikiMarkup): lev = 1 string = "" n = 1 for s in elt[3]: if not string.endswith("\n"): string += "\n" - x = self.format(s) - if type == ENVUNNUM: + x = self.format(s[2]) + if type == "unnumbered": string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) - elif type == ENVNUM: + elif type == "numbered": string += self.fmtpara(self.indent(lev, "%d. %s" % (n, x))) n += 1 + elif type == "defn": + if s[1] == 0: + string += self.indent(lev-1, x) + else: + string += self.indent(lev+3, x) + if not string.endswith("\n"): string += "\n" elif elt[0] == IND: string = (" " * elt[1]) + self.format(elt[2]) + '\n' elif elt[0] == SEQ: string = "" diff --git a/wikimarkup.py b/wikimarkup.py index fa60c80..2b6348e 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -22,13 +22,13 @@ from types import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "NIL", "TEXT", "DELIM", "NL", "PARA", "IT", "BOLD", "LINK", "TMPL", "BAR", "HDR", "REF", "ENV", "IND", "SEQ", "ENVUNNUM", "ENVNUM", "envtypes" ] -delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^:+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)") +delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)") NIL = 0 TEXT = 1 DELIM = 2 NL = 3 @@ -40,19 +40,24 @@ TMPL = 8 BAR = 9 HDR = 10 REF = 11 ENV = 12 IND = 13 SEQ = 14 +ELT = 15 # Environment types: # Unnumbered list ENVUNNUM = 0 # Numbered list ENVNUM = 1 -envtypes = [ "*", "#" ] +envtypes = { "*": [ "unnumbered", 0 ], + "#": [ "numbered", 0 ], + ";": [ "defn", 0 ], + ":": [ "defn", 1 ] + } class BaseWikiMarkup: toklist = None tokind = 0 tree = None @@ -90,13 +95,14 @@ class BaseWikiMarkup: if m: if (pos < m.start(0)): self.dprint(100, "YIELD: TEXT %s", line[pos:m.start(0)]) yield(TEXT, line[pos:m.start(0)]) pos = m.end(0) - if m.group(0)[0] in envtypes and line[pos] == ":": + if envtypes.has_key(m.group(0)[0]) and line[pos] == ":": + # FIXME? self.dprint(100, "YIELD: DELIM %s, True", m.group(0)) yield(DELIM, m.group(0), True) pos += 1 else: self.dprint(100, "YIELD: DELIM %s", m.group(0)) yield(DELIM, m.group(0)) @@ -378,29 +384,29 @@ class BaseWikiMarkup: def parse_env(self, type, lev): self.dprint(80, "ENTER parse_env(%s,%s), tok %s",type,lev,self.peektkn()) list = [] while 1: tok = self.getkn() - if tok[0] == DELIM and tok[1][0] in envtypes and type == envtypes.index(tok[1][0]): + if tok[0] == DELIM and envtypes.has_key(tok[1][0]) and type == envtypes[tok[1][0]][0]: if len(tok[1]) < lev: self.ungetkn() break elif len(tok[1]) > lev: self.ungetkn() elt = self.parse_env(type, len(tok[1])) else: elt = self.parse_line() if len(tok) == 2: - list.append(elt) + list.append((ELT, envtypes[tok[1][0]][1], elt)) continue - if list[-1][0] != SEQ: - x = list[-1] - list[-1] = (SEQ, [x]) - list[-1][1].append(elt) + if list[-1][2][0] != SEQ: + x = list[-1][2][1] + list[-1][2] = (SEQ, [x]) + list[-1][2][1].append(elt) else: self.ungetkn() break self.dprint(80, "LEAVE parse_env=(ENV, %s, %s, %s)", type, lev, list) return (ENV, type, lev, list) @@ -420,14 +426,14 @@ class BaseWikiMarkup: return self.parse_para() elif toktype == DELIM: if tok[1] == "----": return (BAR,) elif tok[1][0:2] == "==": return self.parse_header(tok[1]) - elif tok[1][0] in envtypes: - type = envtypes.index(tok[1][0]) + elif envtypes.has_key(tok[1][0]): + type = envtypes[tok[1][0]][0] lev = len(tok[1]) self.ungetkn() return self.parse_env(type, lev) elif tok[1][0] == ":": return self.parse_indent(len(tok[1])) else: @@ -539,23 +545,23 @@ class WikiMarkup (BaseWikiMarkup): if not (self.is_lang_link(x) or self.is_empty_text(x)): return False return True def parse(self): BaseWikiMarkup.parse(self) - # Remove everything before the first header - for i in range(0, len(self.tree)): - if self.tree[i][0] == HDR: - self.tree = self.tree[i:] - break - # Remove trailing links - for i in range(len(self.tree)-1, 0, -1): - if self.tree[i][0] == PARA \ - and not self.is_empty_para(self.tree[i][1]): - self.tree = self.tree[0:i+1] - break + # # Remove everything before the first header + # for i in range(0, len(self.tree)): + # if self.tree[i][0] == HDR: + # self.tree = self.tree[i:] + # break + # # Remove trailing links + # for i in range(len(self.tree)-1, 0, -1): + # if self.tree[i][0] == PARA \ + # and not self.is_empty_para(self.tree[i][1]): + # self.tree = self.tree[0:i+1] + # break # ISO 639 langtab = { "aa": "Afar", # Afar "ab": "Аҧсуа", # Abkhazian |