summaryrefslogtreecommitdiff
path: root/WikiTrans/wiki2html.py
diff options
context:
space:
mode:
Diffstat (limited to 'WikiTrans/wiki2html.py')
-rw-r--r--WikiTrans/wiki2html.py322
1 files changed, 156 insertions, 166 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 0d92e0a..0309ae3 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -15,7 +15,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+from __future__ import print_function
from wikimarkup import *
+from wikitoken import *
from wikins import wiki_ns_re, wiki_ns
import re
try:
@@ -24,7 +26,143 @@ except ImportError:
from urllib.parse import quote as url_quote
__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
+
+class HtmlSeqNode(WikiSeqNode):
+ def format(self):
+ s = ''
+ for x in self.content:
+ s += x.format()
+ return s
+
+class HtmlLinkNode(HtmlSeqNode):
+ def format(self):
+ arg = self.content[0].format()
+ text = None
+ if len(self.content) > 1:
+ s = [x for x in map(lambda x: x.format(), self.content)]
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return ""
+ elif len(s) > 1 and s[1] == 'thumb':
+ return ""
+ text = '<span class="template">' + s[1] + '</span>'
+ if self.type == 'TMPL':
+ if re.match("t[+-]$", s[0]):
+ if len(s) > 2:
+ text = s[2]
+ elif s[0] == "term":
+ text = self.parser.tmpl_term(s)
+ elif s[0] == "proto":
+ text = self.parser.tmpl_proto(s)
+ return text
+
+ (qual,sep,tgt) = arg.partition(':')
+ if tgt != '':
+ ns = self.parser.wiki_ns_name(qual)
+ if ns:
+ if ns == 'NS_IMAGE':
+ return ''
+ elif ns == 'NS_MEDIA':
+ tgt = self.parser.media_base + '/' + tgt
+ else:
+ tgt = self.parser.mktgt(tgt)
+ elif self.type == 'LINK' and qual in self.parser.langtab:
+ tgt = self.parser.mktgt(tgt, qual)
+ if not text or text == '':
+ text = self.parser.langtab[qual]
+ else:
+ tgt = self.parser.mktgt(tgt)
+ else:
+ tgt = self.parser.mktgt(arg)
+ return "<a href=\"%s\">%s</a>" % (tgt,
+ text if (text and text != '') \
+ else arg)
+
+class HtmlRefNode(WikiRefNode):
+ def format(self):
+ target = self.ref
+ text = self.content.format()
+ return "<a href=\"%s\">%s</a>" % (target,
+ text if (text and text != '') \
+ else target)
+
+class HtmlFontNode(HtmlSeqNode):
+ def format(self):
+ comm = { 'IT': 'i',
+ 'BOLD': 'b' }
+ s = '<%s>' % comm[self.type]
+ for x in self.content:
+ s += x.format()
+ s += '</%s>' % comm[self.type]
+ return s
+
+class HtmlTextNode(HtmlSeqNode):
+ def format(self):
+ if isinstance(self.content,list):
+ s = ''.join(self.content)
+ else:
+ s = self.content
+ return s
+class HtmlHdrNode(WikiHdrNode):
+ def format(self):
+ level = self.level
+ if level > 4:
+ level = 4
+ return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
+
+class HtmlBarNode(WikiNode):
+ def format(self):
+ return "<hr/>\n"
+
+class HtmlEnvNode(WikiEnvNode):
+ def format(self):
+ type = self.envtype
+ lev = self.level
+ if lev > 4:
+ lev = 2
+ string = ""
+ for s in self.content:
+ n = s.subtype;
+ string += "<%s>%s</%s>" % (self.parser.envt[type]["elt"][n],
+ s.content.format(),
+ self.parser.envt[type]["elt"][n])
+ return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"],
+ string,
+ self.parser.envt[type]["hdr"])
+ return string
+
+class HtmlTagNode(WikiTagNode):
+ def format(self):
+ if self.tag == 'code':
+ self.parser.nested += 1
+ s = self.content.format()
+ self.parser.nested -= 1
+ return '<pre><code>' + s + '</code></pre>' #FIXME
+ else:
+ s = '<' + self.tag
+ if self.args:
+ s += ' ' + str(self.args)
+ s += '>'
+ s += self.content.format()
+ return s + '</' + self.tag + '>'
+
+class HtmlParaNode(HtmlSeqNode):
+ def format(self):
+ return "<p>" + super(HtmlParaNode, self).format() + "</p>\n"
+
+class HtmlPreNode(HtmlSeqNode):
+ def format(self):
+ s = super(HtmlPreNode, self).format()
+ if self.parser.nested:
+ return s
+ else:
+ return '<pre>' + s + '</pre>'
+
+class HtmlIndNode(WikiIndNode):
+ def format(self):
+ return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
+
+
class HtmlWikiMarkup (WikiMarkup):
"""
A (hopefully) general-purpose Wiki->HTML translator class.
@@ -35,6 +173,23 @@ class HtmlWikiMarkup (WikiMarkup):
nested = 0
+ def __init__(self, *args, **kwargs):
+ super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
+ self.token_class['LINK'] = HtmlLinkNode
+ self.token_class['TMPL'] = HtmlLinkNode
+ self.token_class['REF'] = HtmlRefNode
+ self.token_class['IT'] = HtmlFontNode
+ self.token_class['BOLD'] = HtmlFontNode
+ self.token_class['HDR'] = HtmlHdrNode
+ self.token_class['BAR'] = HtmlBarNode
+ self.token_class['ENV'] = HtmlEnvNode
+ self.token_class['TAG'] = HtmlTagNode
+ self.token_class['PARA'] = HtmlParaNode
+ self.token_class['PRE'] = HtmlPreNode
+ self.token_class['IND'] = HtmlIndNode
+ self.token_class['TEXT'] = HtmlTextNode
+ self.token_class['SEQ'] = HtmlSeqNode
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
@@ -85,175 +240,10 @@ class HtmlWikiMarkup (WikiMarkup):
text += ' <span class="meaning">(' + s[-2] + ')</span>'
return text
-
- def fmtlink(self, elt, istmpl):
- arg = self.format(elt.content[0])
- text = None
- if len(elt.content) > 1:
- s = [x for x in map(self.format, elt.content)]
- if s[0] == 'disambigR' or s[0] == 'wikiquote':
- return ""
- elif len(s) > 1 and s[1] == 'thumb':
- return ""
- text = '<span class="template">' + s[1] + '</span>'
- if istmpl:
- if re.match("t[+-]$", s[0]):
- if len(s) > 2:
- text = s[2]
- elif s[0] == "term":
- text = self.tmpl_term(s)
- elif s[0] == "proto":
- text = self.tmpl_proto(s)
- return text
-
- (qual,sep,tgt) = arg.partition(':')
- if tgt != '':
- ns = self.wiki_ns_name(qual)
- if ns:
- if ns == 'NS_IMAGE':
- return ''
- elif ns == 'NS_MEDIA':
- tgt = self.media_base + '/' + tgt
- else:
- tgt = self.mktgt(tgt)
- elif not istmpl and qual in self.langtab:
- tgt = self.mktgt(tgt, qual)
- if not text or text == '':
- text = self.langtab[qual]
- else:
- tgt = self.mktgt(tgt)
- else:
- tgt = self.mktgt(arg)
- return "<a href=\"%s\">%s</a>" % (tgt,
- text if (text and text != '') \
- else arg)
-
- def str_link(self, elt):
- return self.fmtlink(elt, False)
-
- def str_tmpl(self, elt):
- return self.fmtlink(elt, True)
-
- def str_ref(self, elt):
- target = elt.ref
- text = self.format(elt.content)
- return "<a href=\"%s\">%s</a>" % (target,
- text if (text and text != '') \
- else target)
-
- def concat(self, eltlist):
- string = ""
- for x in eltlist:
- string += self.format(x)
- return string
-
- def str_it(self, elt):
- return "<i>" + self.concat(elt.content) + "</i>"
-
- def str_bold(self, elt):
- return "<b>" + self.concat(elt.content) + "</b>"
-
- def str_hdr(self, elt):
- level = elt.level
- if level > 4:
- level = 4
- return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level)
-
- def str_bar(self):
- return "<hr/>\n"
-
- def str_env(self, elt):
- type = elt.envtype
- lev = elt.level
- if lev > 4:
- lev = 2
- string = ""
- for s in elt.content:
- n = s.subtype;
- string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
- self.format(s.content),
- self.envt[type]["elt"][n])
- return "<%s>%s</%s>" % (self.envt[type]["hdr"],
- string,
- self.envt[type]["hdr"])
- return string
-
- def str_tag(self, elt):
- if elt.tag == 'code':
- self.nested += 1
- s = self.format(elt.content)
- self.nested -= 1
- return '<pre><code>' + s + '</code></pre>' #FIXME
- else:
- s = '<' + elt.tag
- if elt.args:
- s += ' ' + str(elt.args)
- s += '>'
- s += self.format(elt.content)
- return s + '</' + elt.tag + '>'
-
- def str_para(self, elt):
- string = "";
- for x in elt.content:
- string += self.format(x)
- return "<p>" + string + "</p>\n"
-
- def str_pre(self, elt):
- string = "";
- for x in elt.content:
- string += self.format(x)
- if self.nested:
- return string
- return '<pre>' + string + '</pre>'
-
- def str_ind(self, elt):
- return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level
-
- def format(self, elt):
- if elt.type == 'TEXT':
- if isinstance(elt.content,list):
- string = ""
- for s in elt.content:
- string += s
- else:
- string = elt.content
- return string
- elif elt.type == 'TAG':
- return self.str_tag(elt)
- elif elt.type == 'PARA':
- return self.str_para(elt)
- elif elt.type == 'PRE':
- return self.str_pre(elt)
- elif elt.type == 'IT':
- return self.str_it(elt)
- elif elt.type == 'BOLD':
- return self.str_bold(elt)
- elif elt.type == 'LINK':
- return self.str_link(elt)
- elif elt.type == 'TMPL':
- return self.str_tmpl(elt)
- elif elt.type == 'BAR':
- return self.str_bar()
- elif elt.type == 'HDR':
- return self.str_hdr(elt)
- elif elt.type == 'REF':
- return self.str_ref(elt)
- elif elt.type == 'ENV':
- return self.str_env(elt)
- elif elt.type == 'IND':
- return self.str_ind(elt)
- elif elt.type == 'SEQ':
- string = ""
- for x in elt.content:
- string += self.format(x)
- return string
- else:
- return str(elt)
-
def __str__(self):
str = ""
for elt in self.tree:
- str += self.format(elt)
+ str += elt.format()
return str
class HtmlWiktionaryMarkup (HtmlWikiMarkup):

Return to:

Send suggestions and report system problems to the System administrator.