diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-08 00:30:59 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-08 00:30:59 +0200 |
commit | a8d15328a95bc16c3d9f4ca06c0c69767899f678 (patch) | |
tree | 9ec0df4e03725853c07cc5016979b74ef9707485 | |
parent | ec326ad225c6cda0051f1c7b3751639f4823d4ac (diff) | |
download | wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.gz wikitrans-a8d15328a95bc16c3d9f4ca06c0c69767899f678.tar.bz2 |
Another buch of dirty kludges
-rw-r--r-- | wiki2html.py | 2 | ||||
-rw-r--r-- | wiki2text.py | 2 | ||||
-rw-r--r-- | wikimarkup.py | 30 |
3 files changed, 22 insertions, 12 deletions
diff --git a/wiki2html.py b/wiki2html.py index 62c6da0..81ada65 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -79,13 +79,13 @@ class HtmlWikiMarkup (WikiMarkup): | |||
79 | 79 | ||
80 | def fmtlink(self, elt, istmpl): | 80 | def fmtlink(self, elt, istmpl): |
81 | arg = self.format(elt[1][0]) | 81 | arg = self.format(elt[1][0]) |
82 | text = None | 82 | text = None |
83 | if len(elt[1]) > 1: | 83 | if len(elt[1]) > 1: |
84 | s = map(self.format, elt[1]) | 84 | s = map(self.format, elt[1]) |
85 | if s[0] == 'disambigR': | 85 | if s[0] == 'disambigR' or s[0] == 'wikiquote': |
86 | return "" | 86 | return "" |
87 | elif len(s) > 1 and s[1] == 'thumb': | 87 | elif len(s) > 1 and s[1] == 'thumb': |
88 | return "" | 88 | return "" |
89 | text = '<span class="template">' + s[1] + '</span>' | 89 | text = '<span class="template">' + s[1] + '</span>' |
90 | if istmpl: | 90 | if istmpl: |
91 | if re.match("t[+-]$", s[0]): | 91 | if re.match("t[+-]$", s[0]): |
diff --git a/wiki2text.py b/wiki2text.py index 5fcd718..0f8dd5f 100644 --- a/wiki2text.py +++ b/wiki2text.py | |||
@@ -72,13 +72,13 @@ class TextWikiMarkup (WikiMarkup): | |||
72 | text = s[1] | 72 | text = s[1] |
73 | else: | 73 | else: |
74 | s = None | 74 | s = None |
75 | text = None | 75 | text = None |
76 | 76 | ||
77 | if s: | 77 | if s: |
78 | if s[0] == 'disambigR': | 78 | if s[0] == 'disambigR' or s[0] == 'wikiquote': |
79 | return "" | 79 | return "" |
80 | if len(s) > 1 and s[1] == 'thumb': | 80 | if len(s) > 1 and s[1] == 'thumb': |
81 | return "" | 81 | return "" |
82 | (qual,sep,tgt) = arg.partition(':') | 82 | (qual,sep,tgt) = arg.partition(':') |
83 | if tgt != '': | 83 | if tgt != '': |
84 | ns = self.wiki_ns_name(qual) | 84 | ns = self.wiki_ns_name(qual) |
diff --git a/wikimarkup.py b/wikimarkup.py index 4eb4ed0..fa60c80 100644 --- a/wikimarkup.py +++ b/wikimarkup.py | |||
@@ -508,43 +508,53 @@ class WikiMarkup (BaseWikiMarkup): | |||
508 | elif self.text: | 508 | elif self.text: |
509 | return self.text.pop(0) + '\n' | 509 | return self.text.pop(0) + '\n' |
510 | else: | 510 | else: |
511 | return None | 511 | return None |
512 | 512 | ||
513 | def is_lang_link(self, elt): | 513 | def is_lang_link(self, elt): |
514 | if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1 \ | 514 | if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1: |
515 | and elt[1][0][0] == TEXT: | 515 | if elt[1][0][0] == TEXT: |
516 | m = re.match('(.+):', elt[1][0][1]) | 516 | m = re.match('([\w-]+):', elt[1][0][1]) |
517 | if m and m.group(1) in self.langtab: | 517 | if m: # and m.group(1) in self.langtab: |
518 | return True | 518 | return True |
519 | elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\ | ||
520 | elt[1][0][1][0][0] == TEXT: | ||
521 | m = re.match('([\w-]+):',elt[1][0][1][0][1]) | ||
522 | if m: # and m.group(1) in self.langtab: | ||
523 | return True | ||
519 | return False | 524 | return False |
520 | 525 | ||
521 | def is_empty_text(self, elt): | 526 | def is_empty_text(self, elt): |
522 | if elt[0] == TEXT: | 527 | if elt[0] == TEXT: |
523 | if isinstance(elt[1],list): | 528 | if isinstance(elt[1],list): |
524 | for s in elt[1]: | 529 | for s in elt[1]: |
525 | if re.search('\w', s): | 530 | if re.search('\w', s): |
526 | return False | 531 | return False |
527 | elif re.search('\w', elt[1]): | 532 | elif re.search('\w', elt[1]): |
528 | return False | 533 | return False |
529 | else: | 534 | return True |
530 | return True | ||
531 | return False | 535 | return False |
536 | |||
537 | def is_empty_para(self, seq): | ||
538 | for x in seq: | ||
539 | if not (self.is_lang_link(x) or self.is_empty_text(x)): | ||
540 | return False | ||
541 | return True | ||
532 | 542 | ||
533 | def parse(self): | 543 | def parse(self): |
534 | BaseWikiMarkup.parse(self) | 544 | BaseWikiMarkup.parse(self) |
535 | # Remove everything before the first header | 545 | # Remove everything before the first header |
536 | for i in range(0, len(self.tree)): | 546 | for i in range(0, len(self.tree)): |
537 | if self.tree[i][0] == HDR: | 547 | if self.tree[i][0] == HDR: |
538 | self.tree = self.tree[i:] | 548 | self.tree = self.tree[i:] |
539 | break | 549 | break |
540 | # Remove trailing links | 550 | # Remove trailing links |
541 | for i in range(len(self.tree)-1, 0, -1): | 551 | for i in range(len(self.tree)-1, 0, -1): |
542 | if not (self.is_lang_link(self.tree[i]) \ | 552 | if self.tree[i][0] == PARA \ |
543 | or self.is_empty_text(self.tree[i])): | 553 | and not self.is_empty_para(self.tree[i][1]): |
544 | self.tree = self.tree[0:i] | 554 | self.tree = self.tree[0:i+1] |
545 | break | 555 | break |
546 | 556 | ||
547 | 557 | ||
548 | # ISO 639 | 558 | # ISO 639 |
549 | langtab = { | 559 | langtab = { |
550 | "aa": "Afar", # Afar | 560 | "aa": "Afar", # Afar |