summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2009-03-08 00:30:59 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2009-03-08 00:30:59 +0200
commita8d15328a95bc16c3d9f4ca06c0c69767899f678 (patch)
tree9ec0df4e03725853c07cc5016979b74ef9707485
parentec326ad225c6cda0051f1c7b3751639f4823d4ac (diff)
downloadwikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.gz
wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.bz2
Another buch of dirty kludges
-rw-r--r--wiki2html.py2
-rw-r--r--wiki2text.py2
-rw-r--r--wikimarkup.py30
3 files changed, 22 insertions, 12 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 62c6da0..81ada65 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -79,13 +79,13 @@ class HtmlWikiMarkup (WikiMarkup):
79 79
80 def fmtlink(self, elt, istmpl): 80 def fmtlink(self, elt, istmpl):
81 arg = self.format(elt[1][0]) 81 arg = self.format(elt[1][0])
82 text = None 82 text = None
83 if len(elt[1]) > 1: 83 if len(elt[1]) > 1:
84 s = map(self.format, elt[1]) 84 s = map(self.format, elt[1])
85 if s[0] == 'disambigR': 85 if s[0] == 'disambigR' or s[0] == 'wikiquote':
86 return "" 86 return ""
87 elif len(s) > 1 and s[1] == 'thumb': 87 elif len(s) > 1 and s[1] == 'thumb':
88 return "" 88 return ""
89 text = '<span class="template">' + s[1] + '</span>' 89 text = '<span class="template">' + s[1] + '</span>'
90 if istmpl: 90 if istmpl:
91 if re.match("t[+-]$", s[0]): 91 if re.match("t[+-]$", s[0]):
diff --git a/wiki2text.py b/wiki2text.py
index 5fcd718..0f8dd5f 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -72,13 +72,13 @@ class TextWikiMarkup (WikiMarkup):
72 text = s[1] 72 text = s[1]
73 else: 73 else:
74 s = None 74 s = None
75 text = None 75 text = None
76 76
77 if s: 77 if s:
78 if s[0] == 'disambigR': 78 if s[0] == 'disambigR' or s[0] == 'wikiquote':
79 return "" 79 return ""
80 if len(s) > 1 and s[1] == 'thumb': 80 if len(s) > 1 and s[1] == 'thumb':
81 return "" 81 return ""
82 (qual,sep,tgt) = arg.partition(':') 82 (qual,sep,tgt) = arg.partition(':')
83 if tgt != '': 83 if tgt != '':
84 ns = self.wiki_ns_name(qual) 84 ns = self.wiki_ns_name(qual)
diff --git a/wikimarkup.py b/wikimarkup.py
index 4eb4ed0..fa60c80 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -508,43 +508,53 @@ class WikiMarkup (BaseWikiMarkup):
508 elif self.text: 508 elif self.text:
509 return self.text.pop(0) + '\n' 509 return self.text.pop(0) + '\n'
510 else: 510 else:
511 return None 511 return None
512 512
513 def is_lang_link(self, elt): 513 def is_lang_link(self, elt):
514 if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1 \ 514 if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1:
515 and elt[1][0][0] == TEXT: 515 if elt[1][0][0] == TEXT:
516 m = re.match('(.+):', elt[1][0][1]) 516 m = re.match('([\w-]+):', elt[1][0][1])
517 if m and m.group(1) in self.langtab: 517 if m: # and m.group(1) in self.langtab:
518 return True 518 return True
519 elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\
520 elt[1][0][1][0][0] == TEXT:
521 m = re.match('([\w-]+):',elt[1][0][1][0][1])
522 if m: # and m.group(1) in self.langtab:
523 return True
519 return False 524 return False
520 525
521 def is_empty_text(self, elt): 526 def is_empty_text(self, elt):
522 if elt[0] == TEXT: 527 if elt[0] == TEXT:
523 if isinstance(elt[1],list): 528 if isinstance(elt[1],list):
524 for s in elt[1]: 529 for s in elt[1]:
525 if re.search('\w', s): 530 if re.search('\w', s):
526 return False 531 return False
527 elif re.search('\w', elt[1]): 532 elif re.search('\w', elt[1]):
528 return False 533 return False
529 else: 534 return True
530 return True
531 return False 535 return False
536
537 def is_empty_para(self, seq):
538 for x in seq:
539 if not (self.is_lang_link(x) or self.is_empty_text(x)):
540 return False
541 return True
532 542
533 def parse(self): 543 def parse(self):
534 BaseWikiMarkup.parse(self) 544 BaseWikiMarkup.parse(self)
535 # Remove everything before the first header 545 # Remove everything before the first header
536 for i in range(0, len(self.tree)): 546 for i in range(0, len(self.tree)):
537 if self.tree[i][0] == HDR: 547 if self.tree[i][0] == HDR:
538 self.tree = self.tree[i:] 548 self.tree = self.tree[i:]
539 break 549 break
540 # Remove trailing links 550 # Remove trailing links
541 for i in range(len(self.tree)-1, 0, -1): 551 for i in range(len(self.tree)-1, 0, -1):
542 if not (self.is_lang_link(self.tree[i]) \ 552 if self.tree[i][0] == PARA \
543 or self.is_empty_text(self.tree[i])): 553 and not self.is_empty_para(self.tree[i][1]):
544 self.tree = self.tree[0:i] 554 self.tree = self.tree[0:i+1]
545 break 555 break
546 556
547 557
548 # ISO 639 558 # ISO 639
549 langtab = { 559 langtab = {
550 "aa": "Afar", # Afar 560 "aa": "Afar", # Afar

Return to:

Send suggestions and report system problems to the System administrator.