diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-08 00:30:59 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-08 00:30:59 +0200 |
commit | a8d15328a95bc16c3d9f4ca06c0c69767899f678 (patch) | |
tree | 9ec0df4e03725853c07cc5016979b74ef9707485 | |
parent | ec326ad225c6cda0051f1c7b3751639f4823d4ac (diff) | |
download | wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.gz wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.bz2 |
Another buch of dirty kludges
-rw-r--r-- | wiki2html.py | 2 | ||||
-rw-r--r-- | wiki2text.py | 2 | ||||
-rw-r--r-- | wikimarkup.py | 30 |
3 files changed, 22 insertions, 12 deletions
diff --git a/wiki2html.py b/wiki2html.py index 62c6da0..81ada65 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -79,13 +79,13 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): arg = self.format(elt[1][0]) text = None if len(elt[1]) > 1: s = map(self.format, elt[1]) - if s[0] == 'disambigR': + if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" elif len(s) > 1 and s[1] == 'thumb': return "" text = '<span class="template">' + s[1] + '</span>' if istmpl: if re.match("t[+-]$", s[0]): diff --git a/wiki2text.py b/wiki2text.py index 5fcd718..0f8dd5f 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -72,13 +72,13 @@ class TextWikiMarkup (WikiMarkup): text = s[1] else: s = None text = None if s: - if s[0] == 'disambigR': + if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" if len(s) > 1 and s[1] == 'thumb': return "" (qual,sep,tgt) = arg.partition(':') if tgt != '': ns = self.wiki_ns_name(qual) diff --git a/wikimarkup.py b/wikimarkup.py index 4eb4ed0..fa60c80 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -508,43 +508,53 @@ class WikiMarkup (BaseWikiMarkup): elif self.text: return self.text.pop(0) + '\n' else: return None def is_lang_link(self, elt): - if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1 \ - and elt[1][0][0] == TEXT: - m = re.match('(.+):', elt[1][0][1]) - if m and m.group(1) in self.langtab: - return True + if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1: + if elt[1][0][0] == TEXT: + m = re.match('([\w-]+):', elt[1][0][1]) + if m: # and m.group(1) in self.langtab: + return True + elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\ + elt[1][0][1][0][0] == TEXT: + m = re.match('([\w-]+):',elt[1][0][1][0][1]) + if m: # and m.group(1) in self.langtab: + return True return False def is_empty_text(self, elt): if elt[0] == TEXT: if isinstance(elt[1],list): for s in elt[1]: if re.search('\w', s): return False elif re.search('\w', elt[1]): return False - else: - return True + return True return False + + def is_empty_para(self, seq): + for x in seq: + if not (self.is_lang_link(x) or self.is_empty_text(x)): + return False + return True def parse(self): BaseWikiMarkup.parse(self) # Remove everything before the first header for i in range(0, len(self.tree)): if self.tree[i][0] == HDR: self.tree = self.tree[i:] break # Remove trailing links for i in range(len(self.tree)-1, 0, -1): - if not (self.is_lang_link(self.tree[i]) \ - or self.is_empty_text(self.tree[i])): - self.tree = self.tree[0:i] + if self.tree[i][0] == PARA \ + and not self.is_empty_para(self.tree[i][1]): + self.tree = self.tree[0:i+1] break # ISO 639 langtab = { "aa": "Afar", # Afar |