Another buch of dirty kludges

author: Sergey Poznyakoff <gray@gnu.org.ua> 2009-03-08 00:30:59 +0200
committer: Sergey Poznyakoff <gray@gnu.org.ua> 2009-03-08 00:30:59 +0200
commit: a8d15328a95bc16c3d9f4ca06c0c69767899f678 (patch)
tree: 9ec0df4e03725853c07cc5016979b74ef9707485
parent: ec326ad225c6cda0051f1c7b3751639f4823d4ac (diff)
download: wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.gz
wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.bz2
3 files changed, 22 insertions, 12 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 62c6da0..81ada65 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -79,13 +79,13 @@ class HtmlWikiMarkup (WikiMarkup):
     
     def fmtlink(self, elt, istmpl):
         arg = self.format(elt[1][0])
         text = None
         if len(elt[1]) > 1:
             s = map(self.format, elt[1])
-            if s[0] == 'disambigR':
+            if s[0] == 'disambigR' or s[0] == 'wikiquote':
                 return ""
             elif len(s) > 1 and s[1] == 'thumb':
                 return ""
             text = '<span class="template">' + s[1] + '</span>'
             if istmpl:
                 if re.match("t[+-]$", s[0]):
diff --git a/wiki2text.py b/wiki2text.py
index 5fcd718..0f8dd5f 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -72,13 +72,13 @@ class TextWikiMarkup (WikiMarkup):
             text = s[1]
         else:
             s = None
             text = None
 
         if s:
-            if s[0] == 'disambigR':
+            if s[0] == 'disambigR' or s[0] == 'wikiquote':
                 return ""
             if len(s) > 1 and s[1] == 'thumb':
                 return ""
         (qual,sep,tgt) = arg.partition(':')
         if tgt != '':
             ns = self.wiki_ns_name(qual)
diff --git a/wikimarkup.py b/wikimarkup.py
index 4eb4ed0..fa60c80 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -508,43 +508,53 @@ class WikiMarkup (BaseWikiMarkup):
         elif self.text:
             return self.text.pop(0) + '\n'
         else:
             return None
 
     def is_lang_link(self, elt):
-        if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1 \
-                and elt[1][0][0] == TEXT:
-            m = re.match('(.+):', elt[1][0][1])
-            if m and m.group(1) in self.langtab:
-                return True
+        if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1:
+            if elt[1][0][0] == TEXT:
+                m = re.match('([\w-]+):', elt[1][0][1])
+                if m: # and m.group(1) in self.langtab:
+                    return True
+            elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\
+                    elt[1][0][1][0][0] == TEXT:
+                m = re.match('([\w-]+):',elt[1][0][1][0][1])
+                if m: # and m.group(1) in self.langtab:
+                    return True
         return False
     
     def is_empty_text(self, elt):
         if elt[0] == TEXT:
             if isinstance(elt[1],list):
                 for s in elt[1]:
                     if re.search('\w', s):
                         return False
             elif re.search('\w', elt[1]):
                 return False
-            else:
-                return True
+            return True
         return False
+
+    def is_empty_para(self, seq):
+        for x in seq:             
+            if not (self.is_lang_link(x) or self.is_empty_text(x)):
+                return False
+        return True
     
     def parse(self):
         BaseWikiMarkup.parse(self)
         # Remove everything before the first header
         for i in range(0, len(self.tree)):
             if self.tree[i][0] == HDR:
                 self.tree = self.tree[i:]
                 break
         # Remove trailing links
         for i in range(len(self.tree)-1, 0, -1):
-            if not (self.is_lang_link(self.tree[i]) \
-                        or self.is_empty_text(self.tree[i])):
-                self.tree = self.tree[0:i]
+            if self.tree[i][0] == PARA \
+                    and not self.is_empty_para(self.tree[i][1]):
+                self.tree = self.tree[0:i+1]
                 break
                     
         
     # ISO 639 
     langtab = {
         "aa": "Afar",            # Afar
author	Sergey Poznyakoff <gray@gnu.org.ua>	2009-03-08 00:30:59 +0200
committer	Sergey Poznyakoff <gray@gnu.org.ua>	2009-03-08 00:30:59 +0200
commit	a8d15328a95bc16c3d9f4ca06c0c69767899f678 (patch)
tree	9ec0df4e03725853c07cc5016979b74ef9707485
parent	ec326ad225c6cda0051f1c7b3751639f4823d4ac (diff)
download	wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.gz wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.bz2