diff options
-rw-r--r-- | wiki2html.py | 68 | ||||
-rw-r--r-- | wiki2text.py | 60 | ||||
-rw-r--r-- | wikimarkup.py | 257 |
3 files changed, 192 insertions, 193 deletions
diff --git a/wiki2html.py b/wiki2html.py index 77cb97a..eee592d 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -40,9 +40,9 @@ class HtmlWikiMarkup (WikiMarkup): envt = { "unnumbered": { "hdr": "ul", "elt": ["li"] }, - "numbered": { "hdr": "ol", + "numbered": { "hdr": "ol", "elt": ["li"] }, - "defn": { "hdr": "dl", + "defn": { "hdr": "dl", "elt": ["dt","dd"] } } def mktgt(self, tgt, lang = None): @@ -81,10 +81,10 @@ class HtmlWikiMarkup (WikiMarkup): def fmtlink(self, elt, istmpl): - arg = self.format(elt[1][0]) + arg = self.format(elt['content'][0]) text = None - if len(elt[1]) > 1: - s = map(self.format, elt[1]) + if len(elt['content']) > 1: + s = map(self.format, elt['content']) if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" elif len(s) > 1 and s[1] == 'thumb': @@ -129,8 +129,8 @@ class HtmlWikiMarkup (WikiMarkup): return self.fmtlink(elt, True) def str_ref(self, elt): - target = elt[1] - text = self.format(elt[2]) + target = elt['ref'] + text = self.format(elt['content']) return "<a href=\"%s\">%s</a>" % (target, text if (text and text != '') \ else target) @@ -142,30 +142,30 @@ class HtmlWikiMarkup (WikiMarkup): return string def str_it(self, elt): - return "<i>" + self.concat(elt[1]) + "</i>" + return "<i>" + self.concat(elt['content']) + "</i>" def str_bold(self, elt): - return "<b>" + self.concat(elt[1]) + "</b>" + return "<b>" + self.concat(elt['content']) + "</b>" def str_hdr(self, elt): - level = elt[1] + 1 + level = elt['level'] + 1 if level > 4: level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt[2]), level) + return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) def str_bar(self): return "<hr/>" def str_env(self, elt): - type = elt[1] - lev = elt[2] + type = elt['envtype'] + lev = elt['level'] if lev > 4: lev = 2 string = "" - for s in elt[3]: - n = s[1]; + for s in elt['content']: + n = s['subtype']; string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s[2]), + self.format(s['content']), self.envt[type]["elt"][n]) return "<%s>%s</%s>" % (self.envt[type]["hdr"], string, @@ -174,45 +174,45 @@ class HtmlWikiMarkup (WikiMarkup): def str_para(self, elt): string = ""; - for x in elt[1]: + for x in elt['content']: string += self.format(x) return "<p>" + string + "</p>" def str_ind(self, elt): - return (" " * 2 * elt[1]) + self.format(elt[2]) + return (" " * 2 * elt['level']) + self.format(elt['content']) def format(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): string = "" - for s in elt[1]: + for s in elt['content']: string += s else: - string = elt[1] + string = elt['content'] return string - elif elt[0] == PARA: + elif elt['type'] == 'PARA': return self.str_para(elt) - elif elt[0] == IT: + elif elt['type'] == 'IT': return self.str_it(elt) - elif elt[0] == BOLD: + elif elt['type'] == 'BOLD': return self.str_bold(elt) - elif elt[0] == LINK: + elif elt['type'] == 'LINK': return self.str_link(elt) - elif elt[0] == TMPL: + elif elt['type'] == 'TMPL': return self.str_tmpl(elt) - elif elt[0] == BAR: + elif elt['type'] == 'BAR': return self.str_bar() - elif elt[0] == HDR: + elif elt['type'] == 'HDR': return self.str_hdr(elt) - elif elt[0] == REF: + elif elt['type'] == 'REF': return self.str_ref(elt) - elif elt[0] == ENV: + elif elt['type'] == 'ENV': return self.str_env(elt) - elif elt[0] == IND: + elif elt['type'] == 'IND': return self.str_ind(elt) - elif elt[0] == SEQ: + elif elt['type'] == 'SEQ': string = "" - for x in elt[1]: + for x in elt['content']: string += self.format(x) return string else: diff --git a/wiki2text.py b/wiki2text.py index 005e551..c94ae51 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -66,9 +66,9 @@ class TextWikiMarkup (WikiMarkup): return self.html_base % { 'lang' : lang } + urllib.quote(tgt) def fmtlink(self, elt, istmpl): - arg = self.format(elt[1][0]) - if len(elt[1]) > 1: - s = map(self.format, elt[1]) + arg = self.format(elt['content'][0]) + if len(elt['content']) > 1: + s = map(self.format, elt['content']) text = s[1] else: s = None @@ -143,10 +143,10 @@ class TextWikiMarkup (WikiMarkup): return output + linebuf def format(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): string = "" - for s in elt[1]: + for s in elt['content']: if string: if string.endswith("."): string += " " @@ -154,22 +154,22 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s else: - string = elt[1] - elif elt[0] == PARA: + string = elt['content'] + elif elt['type'] == 'PARA': string = ""; - for x in elt[1]: + for x in elt['content']: string += self.format(x) string = self.fmtpara(string) + '\n\n' - elif elt[0] == IT: + elif elt['type'] == 'IT': string = "" - for x in elt[1]: + for x in elt['content']: s = self.format(x) if s: string += " " + s string = "_" + string.lstrip(" ") + "_" - elif elt[0] == BOLD: + elif elt['type'] == 'BOLD': string = "" - for x in elt[1]: + for x in elt['content']: s = self.format(x) if s: if string.endswith("."): @@ -178,36 +178,36 @@ class TextWikiMarkup (WikiMarkup): string += " " string += s string = string.upper() - elif elt[0] == LINK: + elif elt['type'] == 'LINK': string = self.fmtlink(elt, False) - elif elt[0] == TMPL: + elif elt['type'] == 'TMPL': s = self.fmtlink(elt, True) if s: string = '[' + s + ']' else: string = s - elif elt[0] == BAR: + elif elt['type'] == 'BAR': w = self.width if w < 5: w = 5 string = "\n" + ("-" * (w - 5)).center(w - 1) + "\n" - elif elt[0] == HDR: - level = elt[1] + elif elt['type'] == 'HDR': + level = elt['level'] string = "\n" + ("*" * level) + " " + \ - self.format(elt[2]).lstrip(" ") + "\n\n" - elif elt[0] == REF: - string = self.xref(self.format(elt[2]), elt[1]) - elif elt[0] == ENV: - type = elt[1] - lev = elt[2] + self.format(elt['content']).lstrip(" ") + "\n\n" + elif elt['type'] == 'REF': + string = self.xref(self.format(elt['content']), elt['ref']) + elif elt['type'] == 'ENV': + type = elt['envtype'] + lev = elt['level'] if lev > self.width - 4: lev = 1 string = "" n = 1 - for s in elt[3]: + for s in elt['content']: if not string.endswith("\n"): string += "\n" - x = self.format(s[2]) + x = self.format(s['content']) if type == "unnumbered": string += self.fmtpara(self.indent(lev, "- " + x.lstrip(" "))) elif type == "numbered": @@ -221,11 +221,11 @@ class TextWikiMarkup (WikiMarkup): if not string.endswith("\n"): string += "\n" - elif elt[0] == IND: - string = (" " * elt[1]) + self.format(elt[2]) + '\n' - elif elt[0] == SEQ: + elif elt['type'] == 'IND': + string = (" " * elt['level']) + self.format(elt['content']) + '\n' + elif elt['type'] == 'SEQ': string = "" - for x in elt[1]: + for x in elt['content']: if len(string) > 1 and not string[-1].isspace(): string += ' ' string += self.format(x) diff --git a/wikimarkup.py b/wikimarkup.py index 060b7eb..09c48eb 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -20,31 +20,10 @@ import re from types import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", - "NIL", "TEXT", "DELIM", "NL", "PARA", - "IT", "BOLD", "LINK", "TMPL", - "BAR", "HDR", "REF", "ENV", "IND", "SEQ", "envtypes" ] delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)") -NIL = 0 -TEXT = 1 -DELIM = 2 -NL = 3 - -PARA = 4 -IT = 5 -BOLD = 6 -LINK = 7 -TMPL = 8 -BAR = 9 -HDR = 10 -REF = 11 -ENV = 12 -IND = 13 -SEQ = 14 -ELT = 15 - # Environment types: envtypes = { "*": [ "unnumbered", 0 ], "#": [ "numbered", 0 ], @@ -77,12 +56,12 @@ class BaseWikiMarkup: if not line or line == "": self.dprint(100, "YIELD: NIL") - yield(NIL,) + yield({ 'type': 'NIL' }) break if line == '\n': self.dprint(100, "YIELD: NL") - yield(NL,line) + yield({ 'type': 'NL', 'content': line }) line = None continue @@ -92,26 +71,33 @@ class BaseWikiMarkup: if m: if (pos < m.start(0)): self.dprint(100, "YIELD: TEXT %s", line[pos:m.start(0)]) - yield(TEXT, line[pos:m.start(0)]) + yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) pos = m.end(0) if envtypes.has_key(m.group(0)[0]) and line[pos] == ":": # FIXME? self.dprint(100, "YIELD: DELIM %s, True", m.group(0)) - yield(DELIM, m.group(0), True) + # FIXME: What's "extra"? + yield({ 'type': 'DELIM', + 'content': m.group(0), + 'extra': True }) pos += 1 else: self.dprint(100, "YIELD: DELIM %s", m.group(0)) - yield(DELIM, m.group(0)) + yield({ 'type': 'DELIM', + 'content': m.group(0) }) else: if line[-1] == '\n': self.dprint(100, "YIELD: TEXT %s", line[pos:-1]) if line[pos:-1] != '': - yield(TEXT, line[pos:-1]) + yield({ 'type': 'TEXT', + 'content': line[pos:-1] }) self.dprint(100, "YIELD: NL") - yield(NL,'\n') + yield({ 'type': 'NL', + 'content': '\n' }) else: self.dprint(100, "YIELD: TEXT %s", line[pos:]) - yield(TEXT, line[pos:]) + yield({ 'type': 'TEXT', + 'content': line[pos:] }) line = None def input(self): @@ -126,11 +112,11 @@ class BaseWikiMarkup: # '''''Door''' files kan ik niet op tijd komen.'' stack = [] for i in range(0,len(self.toklist)): - if self.toklist[i][0] == DELIM \ - and (self.toklist[i][1] == "''" \ - or self.toklist[i][1] == "'''"): + if self.toklist[i]['type'] == 'DELIM' \ + and (self.toklist[i]['content'] == "''" \ + or self.toklist[i]['content'] == "'''"): if len(stack) > 0 \ - and self.toklist[stack[-1]][1] == self.toklist[i][1]: + and self.toklist[stack[-1]]['content'] == self.toklist[i]['content']: stack.pop() elif len(stack) > 1: x = self.toklist[stack[-2]] @@ -148,7 +134,7 @@ class BaseWikiMarkup: def getkn(self): tok = self.toklist[self.tokind] - if tok[0] != NIL: + if tok['type'] != 'NIL': self.tokind = self.tokind + 1 return tok @@ -163,14 +149,14 @@ class BaseWikiMarkup: textlist = [] while 1: tok = self.getkn() - if tok[0] == TEXT: - textlist.append(tok[1]) - elif tok[0] == DELIM: - if tok[1] == delim: + if tok['type'] == 'TEXT': + textlist.append(tok['content']) + elif tok['type'] == 'DELIM': + if tok['content'] == delim: break elif self.is_inline_delim(tok): if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) textlist = [] x = self.parse_inline(tok) if x: @@ -181,17 +167,17 @@ class BaseWikiMarkup: else: self.dprint(80, "LEAVE parse_fontmod=None") return None - elif tok[0] == NL: - if self.peektkn()[0] == NL: + elif tok['type'] == 'NL': + if self.peektkn()['type'] == 'NL': self.dprint(80, "LEAVE parse_fontmod=None") return None - seq.append((TEXT, '\n')) + seq.append({ 'type': 'TEXT', 'content': '\n' }) else: self.dprint(80, "LEAVE parse_fontmod=None") return None if textlist: - seq.append((TEXT, textlist)) - res = (what, seq) + seq.append({ 'type': 'TEXT', 'content': textlist }) + res = { 'type': what, 'content': seq } self.dprint(80, "LEAVE parse_fontmod=%s", res) return res @@ -202,14 +188,14 @@ class BaseWikiMarkup: list = [] while 1: tok = self.getkn() - if tok[0] == DELIM: - if tok[1] == delim: + if tok['type'] == 'DELIM': + if tok['content'] == delim: if list: - subtree.append((SEQ,list)) + subtree.append({ 'type': 'SEQ', 'content': list }) break - elif tok[1] == "|": + elif tok['content'] == "|": if len(list) > 1: - subtree.append((SEQ,list)) + subtree.append({ 'type': 'SEQ', 'content': list }) elif list: subtree.append(list[0]) list = [] @@ -220,21 +206,21 @@ class BaseWikiMarkup: else: self.dprint(80, "LEAVE parse_link=%s", "None") return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) else: self.dprint(80, "LEAVE parse_link=%s", "None") return None self.dprint(80, "LEAVE parse_link=(%s,%s)", type, subtree) - return (type, subtree) + return { 'type': type, 'content': subtree } def parse_ref(self): self.dprint(80, "ENTER parse_ref, tok %s", self.peektkn()) list = [] while 1: tok = self.getkn() - if tok[0] == DELIM: - if tok[1] == "]": + if tok['type'] == 'DELIM': + if tok['content'] == "]": break else: x = self.parse_inline(tok) @@ -243,42 +229,45 @@ class BaseWikiMarkup: else: self.dprint(80, "LEAVE parse_ref=%s", "None") return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == NL: - list.append((TEXT, '\n')) + elif tok['type'] == 'NL': + list.append({ 'type': 'TEXT', 'content': '\n' }) continue else: self.dprint(80, "LEAVE parse_ref=%s", "None") return None - if len(list) == 0 or list[0][0] != TEXT: + if len(list) == 0 or list[0]['type'] != 'TEXT': self.dprint(80, "LEAVE parse_ref=%s", "None") return None - (ref,sep,text) = list[0][1].partition(' ') - ret = (REF, ref, (SEQ, [(TEXT, text)] + list[1:])) + (ref,sep,text) = list[0]['content'].partition(' ') + ret = { 'type': 'REF', + 'ref': ref, + 'content': { 'type': 'SEQ', + 'content': [{ 'type': 'TEXT', 'content': text }] + list[1:] } } self.dprint(80, "LEAVE parse_ref= %s", ret) return ret inline_delims = [ "''", "'''", "[", "[[", "{{" ] def is_inline_delim(self, tok): - return tok[0] == DELIM and tok[1] in self.inline_delims + return tok['type'] == 'DELIM' and tok['content'] in self.inline_delims def is_block_delim(self, tok): - return tok[0] == DELIM and tok[1] not in self.inline_delims + return tok['type'] == 'DELIM' and tok['content'] not in self.inline_delims def parse_inline(self, tok): self.dprint(80, "ENTER parse_inline(%s), tok %s", tok, self.peektkn()) tokind = self.tokind - if tok[1] == "''": - x = self.parse_fontmod(tok[1], IT) - elif tok[1] == "'''": - x = self.parse_fontmod(tok[1], BOLD) - elif tok[1] == "[": + if tok['content'] == "''": + x = self.parse_fontmod(tok['content'], 'IT') + elif tok['content'] == "'''": + x = self.parse_fontmod(tok['content'], 'BOLD') + elif tok['content'] == "[": x = self.parse_ref() - elif tok[1] == "[[": - x = self.parse_link(LINK, "]]") - elif tok[1] == "{{": - x = self.parse_link(TMPL, "}}") + elif tok['content'] == "[[": + x = self.parse_link('LINK', "]]") + elif tok['content'] == "{{": + x = self.parse_link('TMPL', "}}") else: # FIXME self.dprint(80, "LEAVE parse_inline=%s", "None") x = None @@ -293,23 +282,23 @@ class BaseWikiMarkup: textlist = [] while 1: tok = self.getkn() - if tok[0] == TEXT: - textlist.append(tok[1]) - elif tok[0] == NL: + if tok['type'] == 'TEXT': + textlist.append(tok['content']) + elif tok['type'] == 'NL': tok = self.getkn() - if tok[0] == NL or tok[0] == NIL: + if tok['type'] == 'NL' or tok['type'] == 'NIL': break else: self.ungetkn() if self.is_block_delim(tok): break textlist.append('\n') - elif tok[0] == NIL: + elif tok['type'] == 'NIL': break - elif tok[0] == DELIM: + elif tok['type'] == 'DELIM': if self.is_inline_delim(tok): if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) textlist = [] x = self.parse_inline(tok) if x: @@ -318,27 +307,27 @@ class BaseWikiMarkup: seq.append(tok) break else: - seq.append((TEXT,tok[1])) + seq.append({ 'type': 'TEXT', 'content': tok['content'] }) # self.ungetkn() break if textlist: - seq.append((TEXT, textlist)) + seq.append({ 'type': 'TEXT', 'content': textlist }) self.dprint(80, "LEAVE parse_para=%s", seq) - return (PARA, seq) + return { 'type': 'PARA', 'content': seq } def parse_header(self, delim): self.dprint(80, "ENTER parse_header(%s), tok %s", delim, self.peektkn()) list = [] while 1: tok = self.getkn() - if tok[0] == NIL: + if tok['type'] == 'NIL': self.dprint(80, "LEAVE parse_header=%s", "None") return None - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == DELIM: - if tok[1] == delim: - if self.peektkn()[0] == NL: + elif tok['type'] == 'DELIM': + if tok['content'] == delim: + if self.peektkn()['type'] == 'NL': break else: self.dprint(80, "LEAVE parse_header=%s", "None") @@ -354,7 +343,9 @@ class BaseWikiMarkup: self.dprint(80, "LEAVE parse_header=%s", "None") return None self.dprint(80, "LEAVE parse_header=(HDR, %s, (SEQ,%s))",len(delim)-1,list) - return (HDR,len(delim)-1,(SEQ,list)) + return { 'type': 'HDR', + 'level': len(delim)-1, + 'content': { 'type': 'SEQ', 'content': list } } def parse_line(self): @@ -362,12 +353,12 @@ class BaseWikiMarkup: list = [] while 1: tok = self.getkn() - if tok[0] == NL or tok[0] == NIL: + if tok['type'] == 'NL' or tok['type'] == 'NIL': break - elif tok[0] == TEXT: + elif tok['type'] == 'TEXT': list.append(tok) - elif tok[0] == DELIM and tok[1][0] == ":": - list.append(self.parse_indent(len(tok[1]))) + elif tok['type'] == 'DELIM' and tok['content'][0] == ":": + list.append(self.parse_indent(len(tok['content']))) break else: x = self.parse_inline(tok) @@ -376,67 +367,72 @@ class BaseWikiMarkup: else: list.append(tok) self.dprint(80, "LEAVE parse_line=(SEQ, %s)", list) - return (SEQ, list) + return { 'type': 'SEQ', 'content': list } def parse_env(self, type, lev): self.dprint(80, "ENTER parse_env(%s,%s), tok %s",type,lev,self.peektkn()) list = [] while 1: tok = self.getkn() - if tok[0] == DELIM and envtypes.has_key(tok[1][0]) and type == envtypes[tok[1][0]][0]: - if len(tok[1]) < lev: + if tok['type'] == 'DELIM' \ + and envtypes.has_key(tok['content'][0]) \ + and type == envtypes[tok['content'][0]][0]: + if len(tok['content']) < lev: self.ungetkn() break - elif len(tok[1]) > lev: + elif len(tok['content']) > lev: self.ungetkn() - elt = self.parse_env(type, len(tok[1])) + elt = self.parse_env(type, len(tok['content'])) else: elt = self.parse_line() - if len(tok) == 2: - list.append((ELT, envtypes[tok[1][0]][1], elt)) + if len(tok.keys()) == 2: + list.append({ 'type': 'ELT', + 'subtype': envtypes[tok['content'][0]][1], + 'content': elt }) continue - - if list[-1][2][0] != SEQ: - x = list[-1][2][1] - list[-1][2] = (SEQ, [x]) - list[-1][2][1].append(elt) + + if list[-1]['content']['type'] != 'SEQ': + x = list[-1]['content']['content'] + # FIXME: + list[-1]['content'] = { 'type': 'SEQ', 'content': [x] } + list[-1]['content']['content'].append(elt) else: self.ungetkn() break self.dprint(80, "LEAVE parse_env=(ENV, %s, %s, %s)", type, lev, list) - return (ENV, type, lev, list) + return { 'type': 'ENV', 'envtype': type, 'level': lev, 'content': list } def parse_indent(self, lev): self.dprint(80, "ENTER parse_indent(%s), tok %s", lev, self.peektkn()) - x = (IND, lev, self.parse_line()) + x = { 'type': 'IND', 'level': lev, 'content': self.parse_line() } self.dprint(80, "LEAVE parse_indent=%s", x) return x def parse0(self): tok = self.getkn() - toktype = tok[0] - if toktype == NIL: + toktype = tok['type'] + if toktype == 'NIL': return None - elif toktype == TEXT: + elif toktype == 'TEXT': self.ungetkn() return self.parse_para() - elif toktype == DELIM: - if tok[1] == "----": - return (BAR,) - elif tok[1][0:2] == "==": - return self.parse_header(tok[1]) - elif envtypes.has_key(tok[1][0]): - type = envtypes[tok[1][0]][0] - lev = len(tok[1]) + elif toktype == 'DELIM': + if tok['content'] == "----": + return { 'type': 'BAR' } + elif tok['content'][0:2] == "==": + return self.parse_header(tok['content']) + elif envtypes.has_key(tok['content'][0]): + type = envtypes[tok['content'][0]][0] + lev = len(tok['content']) self.ungetkn() return self.parse_env(type, lev) - elif tok[1][0] == ":": - return self.parse_indent(len(tok[1])) + elif tok['content'][0] == ":": + return self.parse_indent(len(tok['content'])) else: self.ungetkn() return self.parse_para() - elif toktype == NL: - return (TEXT, '\n') + elif toktype == 'NL': + return { 'type': 'TEXT', 'content': '\n' } # return self.parse0() def parse(self): @@ -513,25 +509,28 @@ class WikiMarkup (BaseWikiMarkup): return None def is_lang_link(self, elt): - if elt[0] == LINK and isinstance(elt[1],list) and len(elt[1]) == 1: - if elt[1][0][0] == TEXT: - m = re.match('([\w-]+):', elt[1][0][1]) + if elt['type'] == 'LINK' \ + and isinstance(elt['content'], list) \ + and len(elt['content']) == 1: + if elt['content'][0]['type'] == TEXT: + m = re.match('([\w-]+):', elt['content'][0]['content']) if m: # and m.group(1) in self.langtab: return True - elif elt[1][0][0] == SEQ and len(elt[1][0][1]) == 1 and\ - elt[1][0][1][0][0] == TEXT: - m = re.match('([\w-]+):',elt[1][0][1][0][1]) + elif elt['content'][0]['type'] == 'SEQ' \ + and len(elt['content'][0]['content']) == 1 and\ + elt['content'][0]['content'][0]['type'] == TEXT: + m = re.match('([\w-]+):',elt['content'][0]['content'][0]['content']) if m: # and m.group(1) in self.langtab: return True return False def is_empty_text(self, elt): - if elt[0] == TEXT: - if isinstance(elt[1],list): - for s in elt[1]: + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): + for s in elt['content']: if re.search('\w', s): return False - elif re.search('\w', elt[1]): + elif re.search('\w', elt['content']): return False return True return False |