diff options
Diffstat (limited to 'WikiTrans/wiki2html.py')
-rw-r--r-- | WikiTrans/wiki2html.py | 322 |
1 files changed, 156 insertions, 166 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 0d92e0a..0309ae3 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py @@ -15,7 +15,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +from __future__ import print_function from wikimarkup import * +from wikitoken import * from wikins import wiki_ns_re, wiki_ns import re try: @@ -24,7 +26,143 @@ except ImportError: from urllib.parse import quote as url_quote __all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] + +class HtmlSeqNode(WikiSeqNode): + def format(self): + s = '' + for x in self.content: + s += x.format() + return s + +class HtmlLinkNode(HtmlSeqNode): + def format(self): + arg = self.content[0].format() + text = None + if len(self.content) > 1: + s = [x for x in map(lambda x: x.format(), self.content)] + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return "" + elif len(s) > 1 and s[1] == 'thumb': + return "" + text = '<span class="template">' + s[1] + '</span>' + if self.type == 'TMPL': + if re.match("t[+-]$", s[0]): + if len(s) > 2: + text = s[2] + elif s[0] == "term": + text = self.parser.tmpl_term(s) + elif s[0] == "proto": + text = self.parser.tmpl_proto(s) + return text + + (qual,sep,tgt) = arg.partition(':') + if tgt != '': + ns = self.parser.wiki_ns_name(qual) + if ns: + if ns == 'NS_IMAGE': + return '' + elif ns == 'NS_MEDIA': + tgt = self.parser.media_base + '/' + tgt + else: + tgt = self.parser.mktgt(tgt) + elif self.type == 'LINK' and qual in self.parser.langtab: + tgt = self.parser.mktgt(tgt, qual) + if not text or text == '': + text = self.parser.langtab[qual] + else: + tgt = self.parser.mktgt(tgt) + else: + tgt = self.parser.mktgt(arg) + return "<a href=\"%s\">%s</a>" % (tgt, + text if (text and text != '') \ + else arg) + +class HtmlRefNode(WikiRefNode): + def format(self): + target = self.ref + text = self.content.format() + return "<a href=\"%s\">%s</a>" % (target, + text if (text and text != '') \ + else target) + +class HtmlFontNode(HtmlSeqNode): + def format(self): + comm = { 'IT': 'i', + 'BOLD': 'b' } + s = '<%s>' % comm[self.type] + for x in self.content: + s += x.format() + s += '</%s>' % comm[self.type] + return s + +class HtmlTextNode(HtmlSeqNode): + def format(self): + if isinstance(self.content,list): + s = ''.join(self.content) + else: + s = self.content + return s +class HtmlHdrNode(WikiHdrNode): + def format(self): + level = self.level + if level > 4: + level = 4 + return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) + +class HtmlBarNode(WikiNode): + def format(self): + return "<hr/>\n" + +class HtmlEnvNode(WikiEnvNode): + def format(self): + type = self.envtype + lev = self.level + if lev > 4: + lev = 2 + string = "" + for s in self.content: + n = s.subtype; + string += "<%s>%s</%s>" % (self.parser.envt[type]["elt"][n], + s.content.format(), + self.parser.envt[type]["elt"][n]) + return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"], + string, + self.parser.envt[type]["hdr"]) + return string + +class HtmlTagNode(WikiTagNode): + def format(self): + if self.tag == 'code': + self.parser.nested += 1 + s = self.content.format() + self.parser.nested -= 1 + return '<pre><code>' + s + '</code></pre>' #FIXME + else: + s = '<' + self.tag + if self.args: + s += ' ' + str(self.args) + s += '>' + s += self.content.format() + return s + '</' + self.tag + '>' + +class HtmlParaNode(HtmlSeqNode): + def format(self): + return "<p>" + super(HtmlParaNode, self).format() + "</p>\n" + +class HtmlPreNode(HtmlSeqNode): + def format(self): + s = super(HtmlPreNode, self).format() + if self.parser.nested: + return s + else: + return '<pre>' + s + '</pre>' + +class HtmlIndNode(WikiIndNode): + def format(self): + return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level + + class HtmlWikiMarkup (WikiMarkup): """ A (hopefully) general-purpose Wiki->HTML translator class. @@ -35,6 +173,23 @@ class HtmlWikiMarkup (WikiMarkup): nested = 0 + def __init__(self, *args, **kwargs): + super(HtmlWikiMarkup, self).__init__(*args, **kwargs) + self.token_class['LINK'] = HtmlLinkNode + self.token_class['TMPL'] = HtmlLinkNode + self.token_class['REF'] = HtmlRefNode + self.token_class['IT'] = HtmlFontNode + self.token_class['BOLD'] = HtmlFontNode + self.token_class['HDR'] = HtmlHdrNode + self.token_class['BAR'] = HtmlBarNode + self.token_class['ENV'] = HtmlEnvNode + self.token_class['TAG'] = HtmlTagNode + self.token_class['PARA'] = HtmlParaNode + self.token_class['PRE'] = HtmlPreNode + self.token_class['IND'] = HtmlIndNode + self.token_class['TEXT'] = HtmlTextNode + self.token_class['SEQ'] = HtmlSeqNode + def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] @@ -85,175 +240,10 @@ class HtmlWikiMarkup (WikiMarkup): text += ' <span class="meaning">(' + s[-2] + ')</span>' return text - - def fmtlink(self, elt, istmpl): - arg = self.format(elt.content[0]) - text = None - if len(elt.content) > 1: - s = [x for x in map(self.format, elt.content)] - if s[0] == 'disambigR' or s[0] == 'wikiquote': - return "" - elif len(s) > 1 and s[1] == 'thumb': - return "" - text = '<span class="template">' + s[1] + '</span>' - if istmpl: - if re.match("t[+-]$", s[0]): - if len(s) > 2: - text = s[2] - elif s[0] == "term": - text = self.tmpl_term(s) - elif s[0] == "proto": - text = self.tmpl_proto(s) - return text - - (qual,sep,tgt) = arg.partition(':') - if tgt != '': - ns = self.wiki_ns_name(qual) - if ns: - if ns == 'NS_IMAGE': - return '' - elif ns == 'NS_MEDIA': - tgt = self.media_base + '/' + tgt - else: - tgt = self.mktgt(tgt) - elif not istmpl and qual in self.langtab: - tgt = self.mktgt(tgt, qual) - if not text or text == '': - text = self.langtab[qual] - else: - tgt = self.mktgt(tgt) - else: - tgt = self.mktgt(arg) - return "<a href=\"%s\">%s</a>" % (tgt, - text if (text and text != '') \ - else arg) - - def str_link(self, elt): - return self.fmtlink(elt, False) - - def str_tmpl(self, elt): - return self.fmtlink(elt, True) - - def str_ref(self, elt): - target = elt.ref - text = self.format(elt.content) - return "<a href=\"%s\">%s</a>" % (target, - text if (text and text != '') \ - else target) - - def concat(self, eltlist): - string = "" - for x in eltlist: - string += self.format(x) - return string - - def str_it(self, elt): - return "<i>" + self.concat(elt.content) + "</i>" - - def str_bold(self, elt): - return "<b>" + self.concat(elt.content) + "</b>" - - def str_hdr(self, elt): - level = elt.level - if level > 4: - level = 4 - return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level) - - def str_bar(self): - return "<hr/>\n" - - def str_env(self, elt): - type = elt.envtype - lev = elt.level - if lev > 4: - lev = 2 - string = "" - for s in elt.content: - n = s.subtype; - string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s.content), - self.envt[type]["elt"][n]) - return "<%s>%s</%s>" % (self.envt[type]["hdr"], - string, - self.envt[type]["hdr"]) - return string - - def str_tag(self, elt): - if elt.tag == 'code': - self.nested += 1 - s = self.format(elt.content) - self.nested -= 1 - return '<pre><code>' + s + '</code></pre>' #FIXME - else: - s = '<' + elt.tag - if elt.args: - s += ' ' + str(elt.args) - s += '>' - s += self.format(elt.content) - return s + '</' + elt.tag + '>' - - def str_para(self, elt): - string = ""; - for x in elt.content: - string += self.format(x) - return "<p>" + string + "</p>\n" - - def str_pre(self, elt): - string = ""; - for x in elt.content: - string += self.format(x) - if self.nested: - return string - return '<pre>' + string + '</pre>' - - def str_ind(self, elt): - return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level - - def format(self, elt): - if elt.type == 'TEXT': - if isinstance(elt.content,list): - string = "" - for s in elt.content: - string += s - else: - string = elt.content - return string - elif elt.type == 'TAG': - return self.str_tag(elt) - elif elt.type == 'PARA': - return self.str_para(elt) - elif elt.type == 'PRE': - return self.str_pre(elt) - elif elt.type == 'IT': - return self.str_it(elt) - elif elt.type == 'BOLD': - return self.str_bold(elt) - elif elt.type == 'LINK': - return self.str_link(elt) - elif elt.type == 'TMPL': - return self.str_tmpl(elt) - elif elt.type == 'BAR': - return self.str_bar() - elif elt.type == 'HDR': - return self.str_hdr(elt) - elif elt.type == 'REF': - return self.str_ref(elt) - elif elt.type == 'ENV': - return self.str_env(elt) - elif elt.type == 'IND': - return self.str_ind(elt) - elif elt.type == 'SEQ': - string = "" - for x in elt.content: - string += self.format(x) - return string - else: - return str(elt) - def __str__(self): str = "" for elt in self.tree: - str += self.format(elt) + str += elt.format() return str class HtmlWiktionaryMarkup (HtmlWikiMarkup): |