From a8cd24f0f5cbefccdefd2a4a5166b89c6c8f7a54 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Thu, 5 Mar 2009 20:25:52 +0200 Subject: Avoid losing newlines while parsing the input stream. Provide some rudimentary parsing for wiktionary templates --- wiki2html.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ wikicvt.py | 0 wikimarkup.py | 6 +++++- 3 files changed, 52 insertions(+), 7 deletions(-) mode change 100644 => 100755 wikicvt.py diff --git a/wiki2html.py b/wiki2html.py index 5a8fdcc..faab18b 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -18,6 +18,7 @@ from wikimarkup import * from types import TupleType from wikins import wiki_ns_re, wiki_ns +import re import urllib class HtmlWikiMarkup (WikiMarkup): @@ -45,13 +46,53 @@ class HtmlWikiMarkup (WikiMarkup): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + urllib.quote(tgt) + + def tmpl_term(self, s): + if len(s) == 2: + return s[1] + text = None + trans = None + for x in s[1:]: + m = re.match('(\w+)=', x) + if m: + if m.group(1) == "tr": + trans = x[m.end(1)+1:] + elif not text: + text = x + if text: + if trans: + text += ' [' + trans + ']' + return text + + def tmpl_proto(self, s): + text = 'Proto-' + s[1] + '' + if len(s) >= 4: + n = 0 + for x in s[2:-2]: + if n > 0: + text += ',' + n += 1 + text += ' ' + x + '' + text += ' (' + s[-2] + ')' + return text + def fmtlink(self, elt, istmpl): arg = self.format(elt[1][0]) + text = None if len(elt[1]) > 1: - text = self.format(elt[1][1]) - else: - text = None + text = '' + self.format(elt[1][1]) + '' + if istmpl: + s = map(self.format, elt[1]) + if re.match("t[+-]$", s[0]): + if len(s) > 2: + text = s[2] + elif s[0] == "term": + text = self.tmpl_term(s) + elif s[0] == "proto": + text = self.tmpl_proto(s) + return text + (qual,sep,tgt) = arg.partition(':') if tgt != '': ns = self.wiki_ns_name(qual) @@ -89,10 +130,10 @@ class HtmlWikiMarkup (WikiMarkup): else arg) def str_link(self, elt): - return self.fmtlink(elt, False) + " " + return self.fmtlink(elt, False) def str_tmpl(self, elt): - return self.fmtlink(elt, True) + " " + return self.fmtlink(elt, True) def str_ref(self, elt): target = elt[1] @@ -104,7 +145,7 @@ class HtmlWikiMarkup (WikiMarkup): def concat(self, eltlist): string = "" for x in eltlist: - string += " " + self.format(x) + string += self.format(x) return string def str_it(self, elt): diff --git a/wikicvt.py b/wikicvt.py old mode 100644 new mode 100755 diff --git a/wikimarkup.py b/wikimarkup.py index 716bc4a..a340628 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -183,6 +183,7 @@ class BaseWikiMarkup: if self.peektkn()[0] == NL: self.dprint(80, "LEAVE parse_fontmod=None") return None + seq.append((TEXT, '\n')) else: self.dprint(80, "LEAVE parse_fontmod=None") return None @@ -243,6 +244,7 @@ class BaseWikiMarkup: elif tok[0] == TEXT: list.append(tok) elif tok[0] == NL: + list.append((TEXT, '\n')) continue else: self.dprint(80, "LEAVE parse_ref=%s", "None") @@ -299,6 +301,7 @@ class BaseWikiMarkup: self.ungetkn() if self.is_block_delim(tok): break + textlist.append('\n') elif tok[0] == NIL: break elif tok[0] == DELIM: @@ -431,7 +434,8 @@ class BaseWikiMarkup: self.ungetkn() return self.parse_para() elif toktype == NL: - return self.parse0() + return (TEXT, '\n') +# return self.parse0() def parse(self): if not self.toklist: -- cgit v1.2.1