From e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff Date: Sat, 29 Nov 2008 15:16:12 +0200 Subject: Improve text output generation * wiki2html.py (mktgt): Add 3rd argument. All callers updated. * wiki2text.py (wiki_ns_name,mktgt): New functions (str_link,str_tmpl): Rewrite * wikicvt.py: New argument --input-text (not used yet) --- wiki2html.py | 8 ++++-- wiki2text.py | 92 ++++++++++++++++++++++++++++++++++-------------------------- wikicvt.py | 4 ++- 3 files changed, 60 insertions(+), 44 deletions(-) diff --git a/wiki2html.py b/wiki2html.py index b87923b..58195be 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -63,8 +63,10 @@ class HtmlWikiMarkup (WikiMarkup): envhdr = [ "ul", "ol", "dl" ] envel = [ "li", "li", "dd" ] - def mktgt(self, tgt): - return self.html_base % { 'lang' : self.lang } + urllib.quote(tgt) + def mktgt(self, tgt, lang = None): + if not lang: + lang = self.lang + return self.html_base % { 'lang' : lang } + urllib.quote(tgt) def link(self, tok, env, istmpl): arg = self.fmtok(tok[1], env) @@ -84,7 +86,7 @@ class HtmlWikiMarkup (WikiMarkup): else: tgt = self.mktgt(tgt) elif not istmpl and qual in self.langtab: - tgt = self.html_base % { 'lang' : qual } + urllib.quote(tgt) + tgt = self.mktgt(tgt, qual) if not text or text == '': text = self.langtab[qual] else: diff --git a/wiki2text.py b/wiki2text.py index 51a7853..564809f 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -17,6 +17,7 @@ from wikimarkup import * from types import TupleType +from wikins import wiki_ns_re, wiki_ns import urllib class TextWikiMarkup (WikiMarkup): @@ -42,23 +43,6 @@ class TextWikiMarkup (WikiMarkup): self.references = keywords['refs'] if 'markup' in keywords: self.markup = keywords['markup'] - - def target(self, t): - (qual,sep,tgt) = t.partition(':') - r = None - if tgt != '': - if qual == "Image": - t = self.image_base + '/' + urllib.quote(tgt) - elif qual == "Media": - t = self.media_base + '/' + tgt - elif qual in self.langtab: - t = self.html_base % { 'lang' : qual } + '/' + urllib.quote(tgt) - r = self.langtab[qual] - else: - t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) - else: - t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) - return t, r def xref(self, text, target): if text: @@ -66,30 +50,55 @@ class TextWikiMarkup (WikiMarkup): else: return "see " + target - def str_link(self, tok, env): + def wiki_ns_name(self, str): + if str in wiki_ns[self.lang]: + return wiki_ns[self.lang][str] + elif str in wiki_ns_re[self.lang]: + for elt in wiki_ns_re[self.lang][str]: + if str.beginswith(elt[0]) and str.endswith(elt[1]): + return elt[2] + return None + def mktgt(self, tgt, lang = None): + if not lang: + lang = self.lang + return self.html_base % { 'lang' : lang } + urllib.quote(tgt) + + def link(self, tok, env, istmpl): arg = self.fmtok(tok[1], env) text = self.fmtok(tok[2], env) - if self.references: - (target, r) = self.target(arg) - return self.xref(text if text else r, target) + (qual,sep,tgt) = arg.partition(':') + if tgt != '': + ns = self.wiki_ns_name(qual) + if ns: + if ns == 'NS_IMAGE': + text = "[%s: %s]" % (qual, text if text else arg) + tgt = self.image_base + '/' + \ + urllib.quote(tgt) + \ + '/250px-' + urllib.quote(tgt) + elif ns == 'NS_MEDIA': + text = "[%s]" % (qual) + else: + tgt = self.mktgt(tgt) + elif not istmpl and qual in self.langtab: + text = self.langtab[qual] + ": " + tgt + tgt = self.mktgt(tgt, qual) + else: + tgt = self.mktgt(tgt) else: - (qual,sep,tgt) = arg.partition(':') - if sep != '': - return "" - elif text: - return text + tgt = self.mktgt(arg) + if self.references: + return "%s (see %s) " % (text, tgt) + elif not text or text == '': return arg + else: + return text + + def str_link(self, tok, env): + return self.link(tok, env, False) def str_tmpl(self, tok, env): - arg = self.fmtok(tok[1], env) - (target, r) = self.target(arg) - text = self.fmtok(tok[2], env) - if not text and r: - text = r - if self.references: - return self.xref(text, target) - return text - + return self.link(tok, env, True) + def str_ref(self, tok, env): return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) @@ -165,14 +174,17 @@ class TextWiktionaryMarkup (TextWikiMarkup): s = "" self.seq_pos=0 for t in tok[1:]: - s += self.fmtok(t, env) + x = self.fmtok(t, env) + if x: + s += x self.seq_pos += 1 return s def str_tmpl(self, tok, env): arg = self.fmtok(tok[1], env) - if self.seq_pos > 0: - return arg - else: - return "\n" + arg + ":\n" + if arg and arg != '': + if self.seq_pos > 0: + return arg + else: + return "\n" + arg + ":\n" diff --git a/wikicvt.py b/wikicvt.py index d45a61f..758bcb1 100644 --- a/wikicvt.py +++ b/wikicvt.py @@ -35,7 +35,7 @@ def main(): try: opts, args = getopt.getopt(sys.argv[1:], "hl:o:tv", ["help", "lang=", "option=", - "text", "verbose" ]) + "text", "input-text", "verbose" ]) except getopt.GetoptError: usage(1) @@ -52,6 +52,8 @@ def main(): (kw,sep,val) = a.partition('=') if val != '': kwdict[kw] = eval(val) + if o == "--input-text": + input_text = True if len(args) == 1: if args[0] == '-': -- cgit v1.2.1