diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-18 17:43:36 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-18 17:43:36 +0300 |
commit | 8c0de7a515aa9d6db7b3ce4110a4e29261851abb (patch) | |
tree | 228c2bb1be5a907b373158c6c94cd8d51f23c419 /wikitrans | |
parent | 0c03a5a7b40b598b88f22f46b9e9086af6c59877 (diff) | |
download | wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.gz wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.bz2 |
Minor stylistic fixes.
Diffstat (limited to 'wikitrans')
-rw-r--r-- | wikitrans/wiki2html.py | 53 | ||||
-rw-r--r-- | wikitrans/wiki2texi.py | 82 | ||||
-rw-r--r-- | wikitrans/wiki2text.py | 66 | ||||
-rw-r--r-- | wikitrans/wikidump.py | 20 | ||||
-rw-r--r-- | wikitrans/wikimarkup.py | 46 | ||||
-rw-r--r-- | wikitrans/wikitoken.py | 72 |
6 files changed, 200 insertions, 139 deletions
diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py index fc6b142..0696dce 100644 --- a/wikitrans/wiki2html.py +++ b/wikitrans/wiki2html.py @@ -1,17 +1,17 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. @@ -40,16 +40,18 @@ try: from html import escape as html_escape except ImportError: from cgi import escape as html_escape - + __all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] + class HtmlSeqNode(WikiSeqNode): def format(self): s = '' for x in self.content: s += x.format() return s - + + class HtmlLinkNode(HtmlSeqNode): def format(self): arg = self.content[0].format() @@ -70,8 +72,8 @@ class HtmlLinkNode(HtmlSeqNode): elif s[0] == "proto": text = self.parser.tmpl_proto(s) return text - - (qual,sep,tgt) = arg.partition(':') + + (qual, sep, tgt) = arg.partition(':') if tgt != '': ns = self.parser.wiki_ns_name(qual) if ns: @@ -92,6 +94,7 @@ class HtmlLinkNode(HtmlSeqNode): return "<a href=\"%s\">%s</a>" % (tgt, text if (text and text != '') else arg) + class HtmlRefNode(WikiRefNode): def format(self): target = self.ref @@ -101,6 +104,7 @@ class HtmlRefNode(WikiRefNode): text if (text and text != '') else target ) + class HtmlFontNode(HtmlSeqNode): def format(self): comm = { 'IT': 'i', @@ -111,14 +115,16 @@ class HtmlFontNode(HtmlSeqNode): s += '</%s>' % comm[self.type] return s + class HtmlTextNode(HtmlSeqNode): def format(self): - if isinstance(self.content,list): + if isinstance(self.content, list): s = ''.join(self.content) else: s = html_escape(self.content, quote=False) return s + class HtmlHdrNode(WikiHdrNode): def format(self): level = self.level @@ -126,10 +132,12 @@ class HtmlHdrNode(WikiHdrNode): level = 6 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) + class HtmlBarNode(WikiNode): def format(self): return "<hr/>\n" + class HtmlEnvNode(WikiEnvNode): def format(self): type = self.envtype @@ -146,7 +154,8 @@ class HtmlEnvNode(WikiEnvNode): string, self.parser.envt[type]["hdr"]) return string - + + class HtmlTagNode(WikiTagNode): def format(self): if self.tag == 'code': @@ -156,13 +165,13 @@ class HtmlTagNode(WikiTagNode): return '<pre><code>' + s + '</code></pre>' #FIXME elif self.tag == 'ref': n = self.idx+1 - return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n) + return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n, n, n, n) elif self.tag == 'references': s = '<div class="references">\n' s += '<ol class="references">\n' n = 0 for ref in self.parser.references: - n += 1 + n += 1 s += ('<li id="cite_note-%d">' + '<span class="mw-cite-backlink">' + '<b><a href="#cite_ref-%d">^</a></b>' @@ -170,7 +179,7 @@ class HtmlTagNode(WikiTagNode): + '<span class="reference-text">' + ref.content.format() + '</span>' - + '</li>\n') % (n,n) + + '</li>\n') % (n, n) s += '</ol>\n</div>\n' return s else: @@ -180,11 +189,13 @@ class HtmlTagNode(WikiTagNode): s += '>' s += self.content.format() return s + '</' + self.tag + '>' - + + class HtmlParaNode(HtmlSeqNode): def format(self): return "<p>" + super(HtmlParaNode, self).format() + "</p>\n" + class HtmlPreNode(HtmlSeqNode): def format(self): s = super(HtmlPreNode, self).format() @@ -193,10 +204,11 @@ class HtmlPreNode(HtmlSeqNode): else: return '<pre>' + s + '</pre>' + class HtmlIndNode(WikiIndNode): def format(self): return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level - + class HtmlWikiMarkup(WikiMarkup): """A Wiki markup to HTML translator class. @@ -223,9 +235,9 @@ class HtmlWikiMarkup(WikiMarkup): [html_base=URL],[image_base=URL],[media_base=URL]) The arguments have the same meaning as in the WikiMarkup constructor. - + """ - + super(HtmlWikiMarkup, self).__init__(*args, **kwargs) self.token_class['LINK'] = HtmlLinkNode self.token_class['TMPL'] = HtmlLinkNode @@ -249,14 +261,14 @@ class HtmlWikiMarkup(WikiMarkup): for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] - return None - + return None + envt = { "unnumbered": { "hdr": "ul", "elt": ["li"] }, "numbered": { "hdr": "ol", "elt": ["li"] }, "defn": { "hdr": "dl", - "elt": ["dt","dd"] } } + "elt": ["dt","dd"] } } def mktgt(self, tgt, lang = None): if not lang: @@ -291,13 +303,14 @@ class HtmlWikiMarkup(WikiMarkup): text += ' <span class="proto">' + x + '</span>' text += ' <span class="meaning">(' + s[-2] + ')</span>' return text - + def __str__(self): str = "" for elt in self.tree: str += elt.format() return str + class HtmlWiktionaryMarkup(HtmlWikiMarkup): """A class for translating Wiktionary articles into HTML. diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py index 55dffe2..936a133 100644 --- a/wikitrans/wiki2texi.py +++ b/wikitrans/wiki2texi.py @@ -1,17 +1,17 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2015-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. @@ -30,16 +30,17 @@ from wikitrans.wikins import wiki_ns_re, wiki_ns import re import urllib + class Acc(list): - def prepend(self,x): - self.insert(0,x) - + def prepend(self, x): + self.insert(0, x) + def is_empty(self): return len(self) == 0 - + def clear(self): self = [] - + def tail(self, n = 1): s = Acc() i = len(self) @@ -65,33 +66,35 @@ class Acc(list): self += elt[0:-n] break n -= l - + def trimnl(self): if self.endswith('\n'): self.trim(1) - + def trimpara(self): if self.endswith('\n\n'): self.trim(2) - + def endswith(self, x): return self.tail(len(x)) == x - + def in_new_para(self): return self.is_empty() or self.endswith('\n\n') - + def __str__(self): return ''.join(self) + class TexiTextNode(WikiTextNode): def format(self): parser = self.parser - if isinstance(self.content,list): + if isinstance(self.content, list): for s in self.content: parser._print(s) else: parser._print(self.content) + class TexiTagNode(WikiTagNode): def format(self): parser = self.parser @@ -125,9 +128,10 @@ class TexiTagNode(WikiTagNode): parser._print('>'); self.content.format() parser._print('</' + self.tag + '>') - + + class TexiParaNode(WikiSeqNode): - def format(self): + def format(self): parser = self.parser if not parser.acc.in_new_para(): parser._print('\n', nl=True) @@ -135,7 +139,8 @@ class TexiParaNode(WikiSeqNode): x.format() if not parser.acc.in_new_para(): parser._print('\n', nl=True) - + + class TexiPreNode(WikiSeqNode): def format(self): parser = self.parser @@ -146,6 +151,7 @@ class TexiPreNode(WikiSeqNode): if not parser.nested: parser._print('@end example\n', nl=True, escape=False) + class TexiFontNode(WikiSeqNode): def format(self): parser = self.parser @@ -156,6 +162,7 @@ class TexiFontNode(WikiSeqNode): x.format() parser._print('}', escape=False) + class TexiHdrNode(WikiHdrNode): def format(self): parser = self.parser @@ -174,10 +181,12 @@ class TexiHdrNode(WikiHdrNode): parser._print('\n') parser._print(None, nl=True) + class TexiBarNode(WikiNode): def format(self): self.parser._print("\n-----\n") + class TexiIndNode(WikiIndNode): def format(self): parser = self.parser @@ -185,6 +194,7 @@ class TexiIndNode(WikiIndNode): self.content.format() parser._print(None, nl=True) + class TexiEnvNode(WikiEnvNode): def format(self): parser = self.parser @@ -216,7 +226,8 @@ class TexiEnvNode(WikiEnvNode): parser._print(None, nl=True) parser._print('\n') parser._print('@end table\n', nl=True, escape=False) - + + class TexiLinkNode(WikiSeqNode): def format(self): parser = self.parser @@ -242,12 +253,13 @@ class TexiLinkNode(WikiSeqNode): if len(s) > 1 and s[1] == 'thumb': return - (qual,sep,tgt) = arg.partition(':') + (qual, sep, tgt) = arg.partition(':') if text: parser._print("@ref{%s,%s}" % (qual, text), escape=False) else: parser._print("@ref{%s}" % qual, escape=False) + class TexiRefNode(WikiRefNode): def format(self): parser = self.parser @@ -259,10 +271,11 @@ class TexiRefNode(WikiRefNode): parser._print("@uref{%s,%s}" % (target, text), escape=False) else: parser._print("@uref{%s}" % target, escape=False) - + + class TexiWikiMarkup(WikiMarkup): """Wiki markup to Texinfo translator class. - + Usage: x = TexiWikiMarkup(file="input.wiki") @@ -270,16 +283,16 @@ class TexiWikiMarkup(WikiMarkup): x.parse() # Print it as Texi: print(str(x)) - + """ - + nested = 0 sectcomm = { 'numbered': [ '@top', - '@chapter', - '@section', - '@subsection', + '@chapter', + '@section', + '@subsection', '@subsubsection' ], 'unnumbered': [ @@ -317,7 +330,7 @@ class TexiWikiMarkup(WikiMarkup): For a discussion of generic arguments, see the constructor of the WikiMarkup class. - + Additional arguments: sectioning_model=MODEL @@ -342,9 +355,9 @@ class TexiWikiMarkup(WikiMarkup): "@section A" on output. Now, if given "sectioning_start=1", this directive will produce "@subsection A" instead. """ - + super(TexiWikiMarkup, self).__init__(*args, **keywords) - + self.token_class['TEXT'] = TexiTextNode self.token_class['TAG'] = TexiTagNode self.token_class['PARA'] = TexiParaNode @@ -357,7 +370,7 @@ class TexiWikiMarkup(WikiMarkup): self.token_class['ENV'] = TexiEnvNode self.token_class['LINK'] = TexiLinkNode self.token_class['REF'] = TexiRefNode - + if "sectioning_model" in keywords: val = keywords["sectioning_model"] if val in self.sectcomm: @@ -394,17 +407,10 @@ class TexiWikiMarkup(WikiMarkup): s = self.acc self.acc = val return str(s) - + def __str__(self): self._begin_print() for elt in self.tree: elt.format() self.acc.trimpara() return self._end_print() - - - - - - - diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py index 88e7610..7585bff 100644 --- a/wikitrans/wiki2text.py +++ b/wikitrans/wiki2text.py @@ -1,17 +1,17 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. @@ -35,6 +35,7 @@ try: except ImportError: from urllib.parse import quote as url_quote + class TextSeqNode(WikiSeqNode): def format(self): string = "" @@ -43,10 +44,11 @@ class TextSeqNode(WikiSeqNode): string += ' ' string += x.format() return string - + + class TextTextNode(WikiTextNode): def format(self): - if isinstance(self.content,list): + if isinstance(self.content, list): string = "" for s in self.content: if string: @@ -59,6 +61,7 @@ class TextTextNode(WikiTextNode): string = self.content return string + class TextPreNode(WikiSeqNode): def format(self): string = "" @@ -67,6 +70,7 @@ class TextPreNode(WikiSeqNode): string += '\n' return string + class TextParaNode(WikiSeqNode): def format(self): string = "" @@ -75,6 +79,7 @@ class TextParaNode(WikiSeqNode): string = self.parser.fmtpara(string) + '\n\n' return string + class TextItNode(WikiSeqNode): def format(self): string = "" @@ -83,7 +88,8 @@ class TextItNode(WikiSeqNode): if s: string += " " + s return "_" + string.lstrip(" ") + "_" - + + class TextBoldNode(WikiSeqNode): def format(self): string = "" @@ -95,6 +101,7 @@ class TextBoldNode(WikiSeqNode): string += x.format() return string.upper() + class TextLinkNode(WikiSeqNode): def format(self): arg = self.content[0].format() @@ -110,7 +117,7 @@ class TextLinkNode(WikiSeqNode): return "" if len(s) > 1 and s[1] == 'thumb': return "" - (qual,sep,tgt) = arg.partition(':') + (qual, sep, tgt) = arg.partition(':') if tgt != '': ns = self.parser.wiki_ns_name(qual) if ns: @@ -138,11 +145,13 @@ class TextLinkNode(WikiSeqNode): return arg else: return text - + + class TextTmplNode(TextLinkNode): def format(self): return '[' + super(TextTmplNode, self).format() + ']' - + + class TextBarNode(WikiNode): def format(self): w = self.parser.width @@ -150,6 +159,7 @@ class TextBarNode(WikiNode): w = 5 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" + class TextHdrNode(WikiHdrNode): def format(self): return ("\n" @@ -157,7 +167,8 @@ class TextHdrNode(WikiHdrNode): + " " + self.content.format().lstrip(" ") + "\n\n") - + + class TextRefNode(WikiRefNode): def format(self): text = self.content.format() @@ -166,6 +177,7 @@ class TextRefNode(WikiRefNode): else: return "see " + self.ref + class TextEnvNode(WikiEnvNode): def format(self): type = self.envtype @@ -188,16 +200,16 @@ class TextEnvNode(WikiEnvNode): string += self.parser.indent(lev-1, x) else: string += self.parser.indent(lev+3, x) - if not string.endswith("\n"): string += "\n" - return string + class TextIndNode(WikiIndNode): def format(self): return (" " * self.level) + self.content.format() + '\n' + class TextTagNode(WikiTagNode): def format(self): if self.tag == 'code': @@ -215,8 +227,8 @@ class TextTagNode(WikiTagNode): if self.args: s += ' ' + str(self.args) s += '>' + self.content.format() + '</' + self.tag + '>' - return s - + return s + class TextWikiMarkup(WikiMarkup): """A Wiki markup to plain text translator. @@ -228,7 +240,7 @@ class TextWikiMarkup(WikiMarkup): x.parse() # Print it as plain text: print(str(x)) - + """ # Output width @@ -243,7 +255,7 @@ class TextWikiMarkup(WikiMarkup): # Array of footnote references references = [] - + def __init__(self, *args, **keywords): """Create a TextWikiMarkup object. @@ -254,16 +266,16 @@ class TextWikiMarkup(WikiMarkup): Most arguments have the same meaning as in the WikiMarkup constructor. Class-specific arguments: - + width=N - Limit output width to N columns. Default is 78. + Limit output width to N columns. Default is 78. show_urls=False By default, the link URLs are displayed in parentheses next to the link text. If this argument is given, only the link text will be displayed. """ - - super(TextWikiMarkup,self).__init__(*args, **keywords) + + super(TextWikiMarkup, self).__init__(*args, **keywords) if 'width' in keywords: self.width = keywords['width'] if 'show_urls' in keywords: @@ -283,7 +295,7 @@ class TextWikiMarkup(WikiMarkup): self.token_class['ENV'] = TextEnvNode self.token_class['IND'] = TextIndNode self.token_class['TAG'] = TextTagNode - + def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] @@ -292,15 +304,15 @@ class TextWikiMarkup(WikiMarkup): if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None - + def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + url_quote(tgt) - + def indent(self, lev, text): if text.find('\n') == -1: - s = (" " * lev) + text + s = (" " * lev) + text else: s = "" for elt in text.split('\n'): @@ -309,7 +321,7 @@ class TextWikiMarkup(WikiMarkup): if not text.endswith('\n'): s = s.rstrip('\n') return s - + def fmtpara(self, input): output = "" linebuf = "" @@ -331,13 +343,14 @@ class TextWikiMarkup(WikiMarkup): linebuf += " " * wsc + s length += wsc + wlen return output + linebuf - + def __str__(self): str = "" for elt in self.tree: str += elt.format() return str + class TextWiktionaryMarkup(TextWikiMarkup): """A class for translating Wiktionary articles into plain text. @@ -345,4 +358,3 @@ class TextWiktionaryMarkup(TextWikiMarkup): """ html_base='http://%(lang)s.wiktionary.org/wiki/' - diff --git a/wikitrans/wikidump.py b/wikitrans/wikidump.py index d5f651c..bc71876 100644 --- a/wikitrans/wikidump.py +++ b/wikitrans/wikidump.py @@ -1,16 +1,16 @@ # Wiki "dump" format. -*- coding: utf-8 -*- # Copyright (C) 2015-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. @@ -28,6 +28,7 @@ from wikitrans.wikitoken import * import json from wikitrans.wikimarkup import WikiMarkup + class DumpReferences(object): idx = 0 def __len__(self): @@ -43,13 +44,14 @@ class DumpWikiMarkup(WikiMarkup): x = DumpWikiMarkup(file="input.wiki") # Parse the input: x.parse() - # Print a JSON dump of the parse tree + # Print a JSON dump of the parse tree print(str(x)) - + """ - + indent = None references = DumpReferences() + def __init__(self, **kwarg): """Create a DumpWikiMarkup object. @@ -64,14 +66,14 @@ class DumpWikiMarkup(WikiMarkup): indent=N Basic indent offset for JSON objects. """ - n = kwarg.pop('indent', None) if n != None: self.indent = int(n) - super(DumpWikiMarkup,self).__init__(self, **kwarg) + super(DumpWikiMarkup, self).__init__(self, **kwarg) + def __str__(self): return json.dumps(self.tree, cls=WikiNodeEncoder, indent=self.indent, - separators=(',',': '), + separators=(',', ': '), sort_keys=True) diff --git a/wikitrans/wikimarkup.py b/wikitrans/wikimarkup.py index a3e17d5..d199335 100644 --- a/wikitrans/wikimarkup.py +++ b/wikitrans/wikimarkup.py @@ -38,16 +38,20 @@ from wikitrans.wikitoken import * __all__ = [ "WikiMarkupParser", "WikiMarkup", "TagAttributes", "TagAttributeSyntaxError" ] + class UnexpectedTokenError(Exception): def __init__(self, value): self.value = value + class TagAttributeSyntaxError(Exception): def __init__(self, value): self.value = value + def __str__(self): return repr(self.value) + class TagAttributes(object): """A dictionary-like collection of tag attributes. @@ -59,7 +63,7 @@ class TagAttributes(object): for a in attr: ... """ - + attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?") valseg = re.compile("^[^\\\"]+") tab = {} @@ -94,26 +98,36 @@ class TagAttributes(object): self.tab[name] = val else: raise TagAttributeSyntaxError(s) + def __len__(self): return len(self.tab) + def __getitem__(self, key): return self.tab[key] + def __contains__(self, key): return key in self.tab + def __iter__(self): for key in self.tab: yield(key) + def has_key(self, key): return self.__contains__(key) + def __setitem__(self, key, value): self.tab[key] = value + def __delitem__(self, key): del self.tab[key] + def __str__(self): return self.printable + def __repr__(self): return self.printable + class WikiMarkupParser(object): """Parser for Wiki markup language. @@ -195,7 +209,7 @@ class WikiMarkupParser(object): 'HDR': WikiHdrNode } - def _new_node(self,**kwarg): + def _new_node(self, **kwarg): return self.token_class[kwarg['type']](self, **kwarg) def tokread(self): @@ -266,7 +280,7 @@ class WikiMarkupParser(object): content=m.group(0))) continue else: - yield(self._new_node(type='TEXT',content=m.group(0))) + yield(self._new_node(type='TEXT', content=m.group(0))) continue else: m = self.ctag.match(line, pos) @@ -306,13 +320,12 @@ class WikiMarkupParser(object): if line: if line[-1] == '\n': if line[pos:-1] != '': - yield(self._new_node(type='TEXT',content=line[pos:-1])) + yield(self._new_node(type='TEXT', content=line[pos:-1])) yield(self._new_node(type='NL')) else: - yield(self._new_node(type='TEXT',content=line[pos:])) + yield(self._new_node(type='TEXT', content=line[pos:])) line = None - def input(self): """Return next physical line from the input. @@ -349,7 +362,7 @@ class WikiMarkupParser(object): # 3a. '''a b ''c d''''' # 3b. ''a b '''c d''''' stack = [] - for i in range(0,len(self.toklist)): + for i in range(0, len(self.toklist)): if (self.toklist[i].type == 'DELIM' and (self.toklist[i].content == "''" or self.toklist[i].content == "'''")): @@ -400,7 +413,7 @@ class WikiMarkupParser(object): self.dprint(20, "lookahead(%s): %s", off, tok) return tok - def setkn(self,val): + def setkn(self, val): """Store token val at the current token index.""" self.toklist[self.tokind] = val @@ -471,7 +484,7 @@ class WikiMarkupParser(object): acc['textlist'] = [] if (isinstance(tok, WikiContentNode) - and isinstance(tok.content,str) + and isinstance(tok.content, str) and re.match("^[ \t]", tok.content)): type = 'PRE' rx = re.compile("^\S") @@ -574,7 +587,7 @@ class WikiMarkupParser(object): self.dprint(80, "LEAVE parse_indent=%s", x) return x - def parse_fontmod(self,delim,what): + def parse_fontmod(self, delim, what): """Parse font modification directive (bold or italics). Arguments: @@ -627,7 +640,7 @@ class WikiMarkupParser(object): return None seq = [] - (ref,sep,text) = tok.content.partition(' ') + (ref, sep, text) = tok.content.partition(' ') if text: seq.insert(0, self._new_node(type='TEXT', content=text)) @@ -735,7 +748,7 @@ class WikiMarkupParser(object): if od in self.close_delim: cd = self.close_delim[od] lev = 0 - for i,tok in enumerate(self.toklist[self.tokind+1:]): + for i, tok in enumerate(self.toklist[self.tokind+1:]): if tok.type == 'NIL': break elif tok.type == 'DELIM': @@ -766,7 +779,7 @@ class WikiMarkupParser(object): if tag.args: s += ' ' + str(tag.args) s += '>' - node = self._new_node(type='TEXT',content=s) + node = self._new_node(type='TEXT', content=s) if tag.content: self.tree[self.tokind:self.tokind] = tag.content self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node) @@ -786,13 +799,13 @@ class WikiMarkupParser(object): elif tok.type == 'NL': tok = self._new_node(type = 'TEXT', content = '\n') list.append(tok) - self.clear_mark() ret = self._new_node(type = 'TAG', tag = tag.tag, args = tag.args, isblock = tag.isblock, - content = self._new_node(type = 'SEQ', content = list)) + content = self._new_node(type = 'SEQ', + content = list)) self.dprint(80, "LEAVE parse_tag = %s", ret) return ret @@ -800,7 +813,7 @@ class WikiMarkupParser(object): """Parse a block environment (numbered, unnumbered, or definition list).""" type = self.envtypes[tok.content[0]][0] lev = len(tok.content) - self.dprint(80, "ENTER parse_env(%s,%s)",type,lev) + self.dprint(80, "ENTER parse_env(%s,%s)", type, lev) list = [] while True: if (tok.type == 'DELIM' @@ -831,7 +844,6 @@ class WikiMarkupParser(object): break tok = self.getkn() - ret = self._new_node(type='ENV', envtype=type, level=lev, diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py index 49c6c68..0678a75 100644 --- a/wikitrans/wikitoken.py +++ b/wikitrans/wikitoken.py @@ -41,21 +41,26 @@ from __future__ import print_function import re import json + class WikiNodeEncoder(json.JSONEncoder): """Custom JSONEncoder subclass for serializing WikiNode and its subclasses.""" + def default(self, obj): - if isinstance(obj,WikiNode): - return obj.jsonEncode() + if isinstance(obj, WikiNode): + return obj.json_encode() return json.JSONEncoder.default(self, obj) + def jsonencoder(func): def _mkencoder(self): json = func(self) json['wikinode'] = self.__class__.__name__ json['type'] = self.type return json + return _mkencoder + class WikiNode(object): """Generic parse tree node. @@ -71,7 +76,7 @@ class WikiNode(object): def __init__(self, parser, **kwargs): self.parser = parser for key in kwargs: - if hasattr(self,key): + if hasattr(self, key): self.__dict__[key] = kwargs[key] else: raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key)) @@ -80,7 +85,7 @@ class WikiNode(object): return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True) @jsonencoder - def jsonEncode(self): + def json_encode(self): ret = {} for x in dir(self): if x == 'parser' or x.startswith('_') or type(x) == 'function': @@ -96,6 +101,7 @@ class WikiNode(object): """ pass + class WikiContentNode(WikiNode): """Generic content node. @@ -110,21 +116,22 @@ class WikiContentNode(WikiNode): < |