summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2009-03-03 23:05:59 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2009-03-03 23:05:59 (GMT)
commit7153f67c6ea71d684d412efd032c309cd273ed19 (patch) (side-by-side diff)
tree79da1d71cb0deaf38a2f018731058d640e1f720e
parentd09b3d4fe0c738d77a084c52693b9f88162a5bdc (diff)
downloadwikitrans-7153f67c6ea71d684d412efd032c309cd273ed19.tar.gz
wikitrans-7153f67c6ea71d684d412efd032c309cd273ed19.tar.bz2
Fix parsing of nested bold/it markups
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wikimarkup.py108
1 files changed, 31 insertions, 77 deletions
diff --git a/wikimarkup.py b/wikimarkup.py
index abd0764..716bc4a 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -119,6 +119,24 @@ class BaseWikiMarkup:
self.toklist = []
for tok in self.tokread():
self.toklist.append(tok)
+ # Determine and fix up the ordering of bold and italic markers
+ # This helps correctly parse inputs like:
+ # '''''Door''' files kan ik niet op tijd komen.''
+ stack = []
+ for i in range(0,len(self.toklist)):
+ if self.toklist[i][0] == DELIM \
+ and (self.toklist[i][1] == "''" \
+ or self.toklist[i][1] == "'''"):
+ if len(stack) > 0 \
+ and self.toklist[stack[-1]][1] == self.toklist[i][1]:
+ stack.pop()
+ elif len(stack) > 1:
+ x = self.toklist[stack[-2]]
+ self.toklist[stack[-2]] = self.toklist[stack[-1]]
+ self.toklist[stack[-1]] = x
+ stack.pop()
+ else:
+ stack.append(i)
def peektkn(self):
return self.toklist[self.tokind]
@@ -136,8 +154,9 @@ class BaseWikiMarkup:
self.tokind = self.tokind - 1
return self.toklist[self.tokind]
- def parse_bold(self, nested = False):
- self.dprint(80, "ENTER parse_bold(%s), tok %s", nested, self.peektkn())
+ def parse_fontmod(self,delim,what):
+ self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s",
+ delim, what, self.peektkn())
seq = []
textlist = []
while 1:
@@ -145,74 +164,8 @@ class BaseWikiMarkup:
if tok[0] == TEXT:
textlist.append(tok[1])
elif tok[0] == DELIM:
- if tok[1] == "'''":
- break
- elif tok[1] == "''" and not nested:
- if textlist:
- seq.append((TEXT, textlist))
- textlist = []
- x = self.parse_it(True)
- if not x:
- self.dprint(80, "LEAVE parse_bold=None")
- return None
- seq.append(x)
- elif self.is_inline_delim(tok):
- if textlist:
- seq.append((TEXT, textlist))
- textlist = []
- x = self.parse_inline(tok)
- if x:
- seq.append(x)
- else:
- self.dprint(80, "LEAVE parse_bold=%s", "None")
- return None
- else:
- self.dprint(80, "LEAVE parse_bold=None")
- return None
- elif tok[0] == NL:
- if self.peektkn()[0] == NL:
- self.dprint(80, "LEAVE parse_bold=None")
- return None
- else:
- self.dprint(80, "LEAVE parse_bold=None")
- return None
- if textlist:
- seq.append((TEXT, textlist))
- self.dprint(80, "LEAVE parse_bold=(BOLD, %s", seq)
- return (BOLD, seq)
-
- def parse_it(self, nested = False):
- self.dprint(80, "ENTER parse_it(%s), tok %s", nested, self.peektkn())
- seq = []
- textlist = []
- while 1:
- tok = self.getkn()
- if tok[0] == TEXT:
- textlist.append(tok[1])
- elif tok[0] == DELIM:
- if tok[1] == "''":
+ if tok[1] == delim:
break
- elif tok[1] == "'''":
- if nested:
- # The tokenizer always puts longest match before the
- # shortest one, so "'''" goes before "''". Swap
- # them if the need is:
- ntok = self.peektkn()
- if ntok[0] == DELIM and ntok[1] == "''":
- self.setkn((DELIM, "'''"))
- break
- else:
- self.dprint(80, "LEAVE parse_it=%s", "None")
- return None
- else:
- if textlist:
- seq.append((TEXT, textlist))
- textlist = []
- x = self.parse_bold(True)
- if not x:
- self.dprint(80, "LEAVE parse_it=%s", "None")
- return None
- seq.append(x)
elif self.is_inline_delim(tok):
if textlist:
seq.append((TEXT, textlist))
@@ -221,22 +174,23 @@ class BaseWikiMarkup:
if x:
seq.append(x)
else:
- self.dprint(80, "LEAVE parse_it=%s", "None")
+ self.dprint(80, "LEAVE parse_fontmod=%s", "None")
return None
else:
- self.dprint(80, "LEAVE parse_it=%s", "None")
+ self.dprint(80, "LEAVE parse_fontmod=None")
return None
elif tok[0] == NL:
if self.peektkn()[0] == NL:
- self.dprint(80, "LEAVE parse_it=%s", "None")
+ self.dprint(80, "LEAVE parse_fontmod=None")
return None
else:
- self.dprint(80, "LEAVE parse_it=%s", "None")
+ self.dprint(80, "LEAVE parse_fontmod=None")
return None
if textlist:
seq.append((TEXT, textlist))
- self.dprint(80, "LEAVE parse_it=(IT,%s)", seq)
- return (IT, seq)
+ res = (what, seq)
+ self.dprint(80, "LEAVE parse_fontmod=%s", res)
+ return res
def parse_link(self, type, delim):
self.dprint(80, "ENTER parse_link(%s,%s), tok %s",
@@ -312,9 +266,9 @@ class BaseWikiMarkup:
self.dprint(80, "ENTER parse_inline(%s), tok %s", tok, self.peektkn())
tokind = self.tokind
if tok[1] == "''":
- x = self.parse_it()
+ x = self.parse_fontmod(tok[1], IT)
elif tok[1] == "'''":
- x = self.parse_bold()
+ x = self.parse_fontmod(tok[1], BOLD)
elif tok[1] == "[":
x = self.parse_ref()
elif tok[1] == "[[":

Return to:

Send suggestions and report system problems to the System administrator.