diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-07 23:18:22 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-07 23:18:22 +0200 |
commit | 9c42879d35f0cfa36d06d1bdfe07c6284bcd71ef (patch) | |
tree | b1d6b37de99ee24c174a3b18a903ccdb164f5457 | |
parent | 6b699b060de765112fa03435b4afbd258262d1fb (diff) | |
download | wit-9c42879d35f0cfa36d06d1bdfe07c6284bcd71ef.tar.gz wit-9c42879d35f0cfa36d06d1bdfe07c6284bcd71ef.tar.bz2 |
Remove disambiguiations and graphics. Remove everything before the first header and any traling language links
-rw-r--r-- | wiki2html.py | 24 | ||||
-rw-r--r-- | wikimarkup.py | 36 |
2 files changed, 42 insertions, 18 deletions
diff --git a/wiki2html.py b/wiki2html.py index 63e1ec3..62c6da0 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -81,9 +81,13 @@ class HtmlWikiMarkup (WikiMarkup): arg = self.format(elt[1][0]) text = None if len(elt[1]) > 1: - text = '<span class="template">' + self.format(elt[1][1]) + '</span>' + s = map(self.format, elt[1]) + if s[0] == 'disambigR': + return "" + elif len(s) > 1 and s[1] == 'thumb': + return "" + text = '<span class="template">' + s[1] + '</span>' if istmpl: - s = map(self.format, elt[1]) if re.match("t[+-]$", s[0]): if len(s) > 2: text = s[2] @@ -98,21 +102,7 @@ class HtmlWikiMarkup (WikiMarkup): ns = self.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': - # FIXME - type = self.format(elt[1][2]) - width = self.format(elt[1][3]) - caption = self.format(elt[1][4]) - intwidth = int (width[:-2]) - return '<div class="thumb tright"><div class="thumbinner" style="width:%dpx;"><a href="%s" class="image" title="%s"><img alt="" src="%s" class="thumbimage" border="0"></a><div class="thumbcaption"><div class="magnify"><a href="%s" class="internal"><img src="/static/magnify-clip.png" alt="" width="15" height="11"></a></div>%s</div></div>' % \ - ((intwidth + 2), - self.html_base % {'lang': self.lang} + arg, - '', - self.image_base + '/' + \ - urllib.quote (tgt) + \ - '/' + width + '-' + urllib.quote (tgt), - self.html_base % {'lang': self.lang} + arg, - caption - ) + return '' elif ns == 'NS_MEDIA': tgt = self.media_base + '/' + tgt else: diff --git a/wikimarkup.py b/wikimarkup.py index a340628..160760a 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -510,7 +510,41 @@ class WikiMarkup (BaseWikiMarkup): else: return None - + def is_lang_link(self, elt): + if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1 \ + and elt[1][0][0] == TEXT: + m = re.match('(.+):', elt[1][0][1]) + if m and m.group(1) in self.langtab: + return True + return False + + def is_empty_text(self, elt): + if elt[0] == TEXT: + if isinstance(elt[1],list): + for s in elt[1]: + if re.search('\w', s): + return False + elif re.search('\w', s): + return False + else: + return True + return False + + def parse(self): + BaseWikiMarkup.parse(self) + # Remove everything before the first header + for i in range(0, len(self.tree)): + if self.tree[i][0] == HDR: + self.tree = self.tree[i:] + break + # Remove trailing links + for i in range(len(self.tree)-1, 0, -1): + if not (self.is_lang_link(self.tree[i]) \ + or self.is_empty_text(self.tree[i])): + self.tree = self.tree[0:i] + break + + # ISO 639 langtab = { "aa": "Afar", # Afar |