diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2008-11-29 15:16:12 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2008-11-29 15:16:12 +0200 |
commit | e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b (patch) | |
tree | 441aa7476fd40685be4d8287bfa90d8a169a4b6c | |
parent | 88befee77fdaacc2e0dc92beffd1c6a68f86a8b7 (diff) | |
download | wikitrans-e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b.tar.gz wikitrans-e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b.tar.bz2 |
Improve text output generation
* wiki2html.py (mktgt): Add 3rd argument. All callers updated.
* wiki2text.py (wiki_ns_name,mktgt): New functions
(str_link,str_tmpl): Rewrite
* wikicvt.py: New argument --input-text (not used yet)
-rw-r--r-- | wiki2html.py | 8 | ||||
-rw-r--r-- | wiki2text.py | 92 | ||||
-rw-r--r-- | wikicvt.py | 4 |
3 files changed, 60 insertions, 44 deletions
diff --git a/wiki2html.py b/wiki2html.py index b87923b..58195be 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -63,8 +63,10 @@ class HtmlWikiMarkup (WikiMarkup): | |||
63 | envhdr = [ "ul", "ol", "dl" ] | 63 | envhdr = [ "ul", "ol", "dl" ] |
64 | envel = [ "li", "li", "dd" ] | 64 | envel = [ "li", "li", "dd" ] |
65 | 65 | ||
66 | def mktgt(self, tgt): | 66 | def mktgt(self, tgt, lang = None): |
67 | return self.html_base % { 'lang' : self.lang } + urllib.quote(tgt) | 67 | if not lang: |
68 | lang = self.lang | ||
69 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) | ||
68 | 70 | ||
69 | def link(self, tok, env, istmpl): | 71 | def link(self, tok, env, istmpl): |
70 | arg = self.fmtok(tok[1], env) | 72 | arg = self.fmtok(tok[1], env) |
@@ -84,7 +86,7 @@ class HtmlWikiMarkup (WikiMarkup): | |||
84 | else: | 86 | else: |
85 | tgt = self.mktgt(tgt) | 87 | tgt = self.mktgt(tgt) |
86 | elif not istmpl and qual in self.langtab: | 88 | elif not istmpl and qual in self.langtab: |
87 | tgt = self.html_base % { 'lang' : qual } + urllib.quote(tgt) | 89 | tgt = self.mktgt(tgt, qual) |
88 | if not text or text == '': | 90 | if not text or text == '': |
89 | text = self.langtab[qual] | 91 | text = self.langtab[qual] |
90 | else: | 92 | else: |
diff --git a/wiki2text.py b/wiki2text.py index 51a7853..564809f 100644 --- a/wiki2text.py +++ b/wiki2text.py | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | from wikimarkup import * | 18 | from wikimarkup import * |
19 | from types import TupleType | 19 | from types import TupleType |
20 | from wikins import wiki_ns_re, wiki_ns | ||
20 | import urllib | 21 | import urllib |
21 | 22 | ||
22 | class TextWikiMarkup (WikiMarkup): | 23 | class TextWikiMarkup (WikiMarkup): |
@@ -42,23 +43,6 @@ class TextWikiMarkup (WikiMarkup): | |||
42 | self.references = keywords['refs'] | 43 | self.references = keywords['refs'] |
43 | if 'markup' in keywords: | 44 | if 'markup' in keywords: |
44 | self.markup = keywords['markup'] | 45 | self.markup = keywords['markup'] |
45 | |||
46 | def target(self, t): | ||
47 | (qual,sep,tgt) = t.partition(':') | ||
48 | r = None | ||
49 | if tgt != '': | ||
50 | if qual == "Image": | ||
51 | t = self.image_base + '/' + urllib.quote(tgt) | ||
52 | elif qual == "Media": | ||
53 | t = self.media_base + '/' + tgt | ||
54 | elif qual in self.langtab: | ||
55 | t = self.html_base % { 'lang' : qual } + '/' + urllib.quote(tgt) | ||
56 | r = self.langtab[qual] | ||
57 | else: | ||
58 | t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) | ||
59 | else: | ||
60 | t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t) | ||
61 | return t, r | ||
62 | 46 | ||
63 | def xref(self, text, target): | 47 | def xref(self, text, target): |
64 | if text: | 48 | if text: |
@@ -66,30 +50,55 @@ class TextWikiMarkup (WikiMarkup): | |||
66 | else: | 50 | else: |
67 | return "see " + target | 51 | return "see " + target |
68 | 52 | ||
69 | def str_link(self, tok, env): | 53 | def wiki_ns_name(self, str): |
54 | if str in wiki_ns[self.lang]: | ||
55 | return wiki_ns[self.lang][str] | ||
56 | elif str in wiki_ns_re[self.lang]: | ||
57 | for elt in wiki_ns_re[self.lang][str]: | ||
58 | if str.beginswith(elt[0]) and str.endswith(elt[1]): | ||
59 | return elt[2] | ||
60 | return None | ||
61 | def mktgt(self, tgt, lang = None): | ||
62 | if not lang: | ||
63 | lang = self.lang | ||
64 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) | ||
65 | |||
66 | def link(self, tok, env, istmpl): | ||
70 | arg = self.fmtok(tok[1], env) | 67 | arg = self.fmtok(tok[1], env) |
71 | text = self.fmtok(tok[2], env) | 68 | text = self.fmtok(tok[2], env) |
72 | if self.references: | 69 | (qual,sep,tgt) = arg.partition(':') |
73 | (target, r) = self.target(arg) | 70 | if tgt != '': |
74 | return self.xref(text if text else r, target) | 71 | ns = self.wiki_ns_name(qual) |
72 | if ns: | ||
73 | if ns == 'NS_IMAGE': | ||
74 | text = "[%s: %s]" % (qual, text if text else arg) | ||
75 | tgt = self.image_base + '/' + \ | ||
76 | urllib.quote(tgt) + \ | ||
77 | '/250px-' + urllib.quote(tgt) | ||
78 | elif ns == 'NS_MEDIA': | ||
79 | text = "[%s]" % (qual) | ||
80 | else: | ||
81 | tgt = self.mktgt(tgt) | ||
82 | elif not istmpl and qual in self.langtab: | ||
83 | text = self.langtab[qual] + ": " + tgt | ||
84 | tgt = self.mktgt(tgt, qual) | ||
85 | else: | ||
86 | tgt = self.mktgt(tgt) | ||
75 | else: | 87 | else: |
76 | (qual,sep,tgt) = arg.partition(':') | 88 | tgt = self.mktgt(arg) |
77 | if sep != '': | 89 | if self.references: |
78 | return "" | 90 | return "%s (see %s) " % (text, tgt) |
79 | elif text: | 91 | elif not text or text == '': |
80 | return text | ||
81 | return arg | 92 | return arg |
93 | else: | ||
94 | return text | ||
95 | |||
96 | def str_link(self, tok, env): | ||
97 | return self.link(tok, env, False) | ||
82 | 98 | ||
83 | def str_tmpl(self, tok, env): | 99 | def str_tmpl(self, tok, env): |
84 | arg = self.fmtok(tok[1], env) | 100 | return self.link(tok, env, True) |
85 | (target, r) = self.target(arg) | 101 | |
86 | text = self.fmtok(tok[2], env) | ||
87 | if not text and r: | ||
88 | text = r | ||
89 | if self.references: | ||
90 | return self.xref(text, target) | ||
91 | return text | ||
92 | |||
93 | def str_ref(self, tok, env): | 102 | def str_ref(self, tok, env): |
94 | return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) | 103 | return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) |
95 | 104 | ||
@@ -165,14 +174,17 @@ class TextWiktionaryMarkup (TextWikiMarkup): | |||
165 | s = "" | 174 | s = "" |
166 | self.seq_pos=0 | 175 | self.seq_pos=0 |
167 | for t in tok[1:]: | 176 | for t in tok[1:]: |
168 | s += self.fmtok(t, env) | 177 | x = self.fmtok(t, env) |
178 | if x: | ||
179 | s += x | ||
169 | self.seq_pos += 1 | 180 | self.seq_pos += 1 |
170 | return s | 181 | return s |
171 | 182 | ||
172 | def str_tmpl(self, tok, env): | 183 | def str_tmpl(self, tok, env): |
173 | arg = self.fmtok(tok[1], env) | 184 | arg = self.fmtok(tok[1], env) |
174 | if self.seq_pos > 0: | 185 | if arg and arg != '': |
175 | return arg | 186 | if self.seq_pos > 0: |
176 | else: | 187 | return arg |
177 | return "\n" + arg + ":\n" | 188 | else: |
189 | return "\n" + arg + ":\n" | ||
178 | 190 | ||
@@ -35,7 +35,7 @@ def main(): | |||
35 | try: | 35 | try: |
36 | opts, args = getopt.getopt(sys.argv[1:], "hl:o:tv", | 36 | opts, args = getopt.getopt(sys.argv[1:], "hl:o:tv", |
37 | ["help", "lang=", "option=", | 37 | ["help", "lang=", "option=", |
38 | "text", "verbose" ]) | 38 | "text", "input-text", "verbose" ]) |
39 | except getopt.GetoptError: | 39 | except getopt.GetoptError: |
40 | usage(1) | 40 | usage(1) |
41 | 41 | ||
@@ -52,6 +52,8 @@ def main(): | |||
52 | (kw,sep,val) = a.partition('=') | 52 | (kw,sep,val) = a.partition('=') |
53 | if val != '': | 53 | if val != '': |
54 | kwdict[kw] = eval(val) | 54 | kwdict[kw] = eval(val) |
55 | if o == "--input-text": | ||
56 | input_text = True | ||
55 | 57 | ||
56 | if len(args) == 1: | 58 | if len(args) == 1: |
57 | if args[0] == '-': | 59 | if args[0] == '-': |