summaryrefslogtreecommitdiff
path: root/wiki2text.py
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2008-11-29 15:16:12 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2008-11-29 15:16:12 +0200
commite3710f6c32aa9a9e6b737c4ebc64af0df2ea872b (patch)
tree441aa7476fd40685be4d8287bfa90d8a169a4b6c /wiki2text.py
parent88befee77fdaacc2e0dc92beffd1c6a68f86a8b7 (diff)
downloadwikitrans-e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b.tar.gz
wikitrans-e3710f6c32aa9a9e6b737c4ebc64af0df2ea872b.tar.bz2
Improve text output generation
* wiki2html.py (mktgt): Add 3rd argument. All callers updated. * wiki2text.py (wiki_ns_name,mktgt): New functions (str_link,str_tmpl): Rewrite * wikicvt.py: New argument --input-text (not used yet)
Diffstat (limited to 'wiki2text.py')
-rw-r--r--wiki2text.py92
1 files changed, 52 insertions, 40 deletions
diff --git a/wiki2text.py b/wiki2text.py
index 51a7853..564809f 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -14,12 +14,13 @@
14# 14#
15# You should have received a copy of the GNU General Public License 15# You should have received a copy of the GNU General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>. 16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17 17
18from wikimarkup import * 18from wikimarkup import *
19from types import TupleType 19from types import TupleType
20from wikins import wiki_ns_re, wiki_ns
20import urllib 21import urllib
21 22
22class TextWikiMarkup (WikiMarkup): 23class TextWikiMarkup (WikiMarkup):
23 """ 24 """
24 A (general-purpose Wiki->Text translator class. 25 A (general-purpose Wiki->Text translator class.
25 """ 26 """
@@ -39,60 +40,68 @@ class TextWikiMarkup (WikiMarkup):
39 if 'width' in keywords: 40 if 'width' in keywords:
40 self.width = keywords['width'] 41 self.width = keywords['width']
41 if 'refs' in keywords: 42 if 'refs' in keywords:
42 self.references = keywords['refs'] 43 self.references = keywords['refs']
43 if 'markup' in keywords: 44 if 'markup' in keywords:
44 self.markup = keywords['markup'] 45 self.markup = keywords['markup']
45
46 def target(self, t):
47 (qual,sep,tgt) = t.partition(':')
48 r = None
49 if tgt != '':
50 if qual == "Image":
51 t = self.image_base + '/' + urllib.quote(tgt)
52 elif qual == "Media":
53 t = self.media_base + '/' + tgt
54 elif qual in self.langtab:
55 t = self.html_base % { 'lang' : qual } + '/' + urllib.quote(tgt)
56 r = self.langtab[qual]
57 else:
58 t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t)
59 else:
60 t = self.html_base % { 'lang' : self.lang } + '/' + urllib.quote(t)
61 return t, r
62 46
63 def xref(self, text, target): 47 def xref(self, text, target):
64 if text: 48 if text:
65 return "%s (see %s) " % (text, target) 49 return "%s (see %s) " % (text, target)
66 else: 50 else:
67 return "see " + target 51 return "see " + target
68 52
69 def str_link(self, tok, env): 53 def wiki_ns_name(self, str):
54 if str in wiki_ns[self.lang]:
55 return wiki_ns[self.lang][str]
56 elif str in wiki_ns_re[self.lang]:
57 for elt in wiki_ns_re[self.lang][str]:
58 if str.beginswith(elt[0]) and str.endswith(elt[1]):
59 return elt[2]
60 return None
61 def mktgt(self, tgt, lang = None):
62 if not lang:
63 lang = self.lang
64 return self.html_base % { 'lang' : lang } + urllib.quote(tgt)
65
66 def link(self, tok, env, istmpl):
70 arg = self.fmtok(tok[1], env) 67 arg = self.fmtok(tok[1], env)
71 text = self.fmtok(tok[2], env) 68 text = self.fmtok(tok[2], env)
72 if self.references: 69 (qual,sep,tgt) = arg.partition(':')
73 (target, r) = self.target(arg) 70 if tgt != '':
74 return self.xref(text if text else r, target) 71 ns = self.wiki_ns_name(qual)
72 if ns:
73 if ns == 'NS_IMAGE':
74 text = "[%s: %s]" % (qual, text if text else arg)
75 tgt = self.image_base + '/' + \
76 urllib.quote(tgt) + \
77 '/250px-' + urllib.quote(tgt)
78 elif ns == 'NS_MEDIA':
79 text = "[%s]" % (qual)
80 else:
81 tgt = self.mktgt(tgt)
82 elif not istmpl and qual in self.langtab:
83 text = self.langtab[qual] + ": " + tgt
84 tgt = self.mktgt(tgt, qual)
85 else:
86 tgt = self.mktgt(tgt)
75 else: 87 else:
76 (qual,sep,tgt) = arg.partition(':') 88 tgt = self.mktgt(arg)
77 if sep != '': 89 if self.references:
78 return "" 90 return "%s (see %s) " % (text, tgt)
79 elif text: 91 elif not text or text == '':
80 return text
81 return arg 92 return arg
93 else:
94 return text
95
96 def str_link(self, tok, env):
97 return self.link(tok, env, False)
82 98
83 def str_tmpl(self, tok, env): 99 def str_tmpl(self, tok, env):
84 arg = self.fmtok(tok[1], env) 100 return self.link(tok, env, True)
85 (target, r) = self.target(arg) 101
86 text = self.fmtok(tok[2], env)
87 if not text and r:
88 text = r
89 if self.references:
90 return self.xref(text, target)
91 return text
92
93 def str_ref(self, tok, env): 102 def str_ref(self, tok, env):
94 return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env)) 103 return self.xref(self.fmtok(tok[2], env), self.fmtok(tok[1], env))
95 104
96 def str_it(self, tok, env): 105 def str_it(self, tok, env):
97 if self.markup: 106 if self.markup:
98 return "_" + self.fmtok(tok[1], env) + "_" 107 return "_" + self.fmtok(tok[1], env) + "_"
@@ -162,17 +171,20 @@ class TextWiktionaryMarkup (TextWikiMarkup):
162 seq_pos = 0 171 seq_pos = 0
163 172
164 def str_seq(self, tok, env): 173 def str_seq(self, tok, env):
165 s = "" 174 s = ""
166 self.seq_pos=0 175 self.seq_pos=0
167 for t in tok[1:]: 176 for t in tok[1:]:
168 s += self.fmtok(t, env) 177 x = self.fmtok(t, env)
178 if x:
179 s += x
169 self.seq_pos += 1 180 self.seq_pos += 1
170 return s 181 return s
171 182
172 def str_tmpl(self, tok, env): 183 def str_tmpl(self, tok, env):
173 arg = self.fmtok(tok[1], env) 184 arg = self.fmtok(tok[1], env)
174 if self.seq_pos > 0: 185 if arg and arg != '':
175 return arg 186 if self.seq_pos > 0:
176 else: 187 return arg
177 return "\n" + arg + ":\n" 188 else:
189 return "\n" + arg + ":\n"
178 190

Return to:

Send suggestions and report system problems to the System administrator.