diff options
Diffstat (limited to 'wiki2html.py')
-rw-r--r-- | wiki2html.py | 73 |
1 files changed, 63 insertions, 10 deletions
diff --git a/wiki2html.py b/wiki2html.py index 907e3b1..7fa97b7 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -26,12 +26,44 @@ class HtmlWikiMarkup (WikiMarkup): 2. [[official position]]s : final 's' gets after closing </a> tag. Should be before. """ + + # FIXME: Awful kludge + image_kw = [ 'Image', + 'Grafika', + 'Bild', + 'Εικόνα', + 'Dosiero', + 'Slika', + 'Resim' + ] + + ST_INIT = 0 + ST_PARA = 1 + ST_OPEN = 2 + + state = [] + + def opara(self): + if self.state[-1] == self.ST_PARA: + self.state[-1] = self.ST_OPEN + return "<p>" + else: + return "" + def cpara(self): + state = self.state.pop(); + self.state.append(self.ST_INIT) + if state == self.ST_OPEN: + return "</p>" + else: + return "" + + def target(self, t): (qual,sep,tgt) = t.partition(':') r = None if tgt != '': - if qual in ('Image', 'Grafika'): + if qual in self.image_kw: t = self.image_base + urllib.quote(tgt) + '/250px-' + urllib.quote(tgt) elif qual == "Media": t = self.media_base + '/' + tgt @@ -96,11 +128,30 @@ class HtmlWikiMarkup (WikiMarkup): self.fmtok(tok[1], env), self.envel[env[1]]) - def str_seq(self, tok, env): + def str_para(self, tok, env): + s = self.cpara() + self.state.append(self.ST_PARA) + return s + + def fmtok(self, tok, env): + if type(tok) != TupleType: + return "" + if tok[0] in [ self.ENV, self.HDR ]: + s = self.cpara() + elif tok[0] == self.BAR: + s = self.str_para(tok, env) + elif tok[0] in [ self.NIL, self.SEQ ]: s = "" - for t in tok[1:]: - s += self.fmtok(t, env) - return s + else: + s = self.opara() + s1 = WikiMarkup.fmtok(self, tok, env) + if s1: + s += s1 + return s + + def __str__(self): + self.state = [ self.ST_PARA ] + return WikiMarkup.__str__(self) + self.cpara() @@ -109,7 +160,7 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup): A class for translating Wiktionary articles into HTML. This version does not do much, except that it tries to correctly format templates. But "tries" does not mean "does". The heuristics - used here is clearly not enogh to cope with it. + used here is clearly not enough to cope with it. 1. FIXME: The right solution would be to have a database of templates with their @@ -134,12 +185,14 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup): seq_pos = 0 def str_seq(self, tok, env): - s = "" + str = "" self.seq_pos=0 for t in tok[1:]: - s += self.fmtok(t, env) - self.seq_pos += 1 - return s + s = self.fmtok(t, env) + if s: + str += s + self.seq_pos += 1 + return str def str_tmpl(self, tok, env): arg = self.fmtok(tok[1], env) |