diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-04 01:05:59 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-04 01:05:59 +0200 |
commit | 7153f67c6ea71d684d412efd032c309cd273ed19 (patch) | |
tree | 79da1d71cb0deaf38a2f018731058d640e1f720e | |
parent | d09b3d4fe0c738d77a084c52693b9f88162a5bdc (diff) | |
download | wikitrans-7153f67c6ea71d684d412efd032c309cd273ed19.tar.gz wikitrans-7153f67c6ea71d684d412efd032c309cd273ed19.tar.bz2 |
Fix parsing of nested bold/it markups
-rw-r--r-- | wikimarkup.py | 108 |
1 files changed, 31 insertions, 77 deletions
diff --git a/wikimarkup.py b/wikimarkup.py index abd0764..716bc4a 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -119,6 +119,24 @@ class BaseWikiMarkup: self.toklist = [] for tok in self.tokread(): self.toklist.append(tok) + # Determine and fix up the ordering of bold and italic markers + # This helps correctly parse inputs like: + # '''''Door''' files kan ik niet op tijd komen.'' + stack = [] + for i in range(0,len(self.toklist)): + if self.toklist[i][0] == DELIM \ + and (self.toklist[i][1] == "''" \ + or self.toklist[i][1] == "'''"): + if len(stack) > 0 \ + and self.toklist[stack[-1]][1] == self.toklist[i][1]: + stack.pop() + elif len(stack) > 1: + x = self.toklist[stack[-2]] + self.toklist[stack[-2]] = self.toklist[stack[-1]] + self.toklist[stack[-1]] = x + stack.pop() + else: + stack.append(i) def peektkn(self): return self.toklist[self.tokind] @@ -136,8 +154,9 @@ class BaseWikiMarkup: self.tokind = self.tokind - 1 return self.toklist[self.tokind] - def parse_bold(self, nested = False): - self.dprint(80, "ENTER parse_bold(%s), tok %s", nested, self.peektkn()) + def parse_fontmod(self,delim,what): + self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s", + delim, what, self.peektkn()) seq = [] textlist = [] while 1: @@ -145,74 +164,8 @@ class BaseWikiMarkup: if tok[0] == TEXT: textlist.append(tok[1]) elif tok[0] == DELIM: - if tok[1] == "'''": - break - elif tok[1] == "''" and not nested: - if textlist: - seq.append((TEXT, textlist)) - textlist = [] - x = self.parse_it(True) - if not x: - self.dprint(80, "LEAVE parse_bold=None") - return None - seq.append(x) - elif self.is_inline_delim(tok): - if textlist: - seq.append((TEXT, textlist)) - textlist = [] - x = self.parse_inline(tok) - if x: - seq.append(x) - else: - self.dprint(80, "LEAVE parse_bold=%s", "None") - return None - else: - self.dprint(80, "LEAVE parse_bold=None") - return None - elif tok[0] == NL: - if self.peektkn()[0] == NL: - self.dprint(80, "LEAVE parse_bold=None") - return None - else: - self.dprint(80, "LEAVE parse_bold=None") - return None - if textlist: - seq.append((TEXT, textlist)) - self.dprint(80, "LEAVE parse_bold=(BOLD, %s", seq) - return (BOLD, seq) - - def parse_it(self, nested = False): - self.dprint(80, "ENTER parse_it(%s), tok %s", nested, self.peektkn()) - seq = [] - textlist = [] - while 1: - tok = self.getkn() - if tok[0] == TEXT: - textlist.append(tok[1]) - elif tok[0] == DELIM: - if tok[1] == "''": + if tok[1] == delim: break - elif tok[1] == "'''": - if nested: - # The tokenizer always puts longest match before the - # shortest one, so "'''" goes before "''". Swap - # them if the need is: - ntok = self.peektkn() - if ntok[0] == DELIM and ntok[1] == "''": - self.setkn((DELIM, "'''")) - break - else: - self.dprint(80, "LEAVE parse_it=%s", "None") - return None - else: - if textlist: - seq.append((TEXT, textlist)) - textlist = [] - x = self.parse_bold(True) - if not x: - self.dprint(80, "LEAVE parse_it=%s", "None") - return None - seq.append(x) elif self.is_inline_delim(tok): if textlist: seq.append((TEXT, textlist)) @@ -221,22 +174,23 @@ class BaseWikiMarkup: if x: seq.append(x) else: - self.dprint(80, "LEAVE parse_it=%s", "None") + self.dprint(80, "LEAVE parse_fontmod=%s", "None") return None else: - self.dprint(80, "LEAVE parse_it=%s", "None") + self.dprint(80, "LEAVE parse_fontmod=None") return None elif tok[0] == NL: if self.peektkn()[0] == NL: - self.dprint(80, "LEAVE parse_it=%s", "None") + self.dprint(80, "LEAVE parse_fontmod=None") return None else: - self.dprint(80, "LEAVE parse_it=%s", "None") + self.dprint(80, "LEAVE parse_fontmod=None") return None if textlist: seq.append((TEXT, textlist)) - self.dprint(80, "LEAVE parse_it=(IT,%s)", seq) - return (IT, seq) + res = (what, seq) + self.dprint(80, "LEAVE parse_fontmod=%s", res) + return res def parse_link(self, type, delim): self.dprint(80, "ENTER parse_link(%s,%s), tok %s", @@ -312,9 +266,9 @@ class BaseWikiMarkup: self.dprint(80, "ENTER parse_inline(%s), tok %s", tok, self.peektkn()) tokind = self.tokind if tok[1] == "''": - x = self.parse_it() + x = self.parse_fontmod(tok[1], IT) elif tok[1] == "'''": - x = self.parse_bold() + x = self.parse_fontmod(tok[1], BOLD) elif tok[1] == "[": x = self.parse_ref() elif tok[1] == "[[": |