summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--WikiTrans/wiki2html.py322
-rw-r--r--WikiTrans/wiki2texi.py375
-rw-r--r--WikiTrans/wiki2text.py200
-rw-r--r--WikiTrans/wikimarkup.py7
-rw-r--r--WikiTrans/wikitoken.py13
5 files changed, 548 insertions, 369 deletions
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 0d92e0a..0309ae3 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -15,7 +15,9 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+from __future__ import print_function
from wikimarkup import *
+from wikitoken import *
from wikins import wiki_ns_re, wiki_ns
import re
try:
@@ -24,7 +26,143 @@ except ImportError:
from urllib.parse import quote as url_quote
__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ]
+
+class HtmlSeqNode(WikiSeqNode):
+ def format(self):
+ s = ''
+ for x in self.content:
+ s += x.format()
+ return s
+
+class HtmlLinkNode(HtmlSeqNode):
+ def format(self):
+ arg = self.content[0].format()
+ text = None
+ if len(self.content) > 1:
+ s = [x for x in map(lambda x: x.format(), self.content)]
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return ""
+ elif len(s) > 1 and s[1] == 'thumb':
+ return ""
+ text = '<span class="template">' + s[1] + '</span>'
+ if self.type == 'TMPL':
+ if re.match("t[+-]$", s[0]):
+ if len(s) > 2:
+ text = s[2]
+ elif s[0] == "term":
+ text = self.parser.tmpl_term(s)
+ elif s[0] == "proto":
+ text = self.parser.tmpl_proto(s)
+ return text
+
+ (qual,sep,tgt) = arg.partition(':')
+ if tgt != '':
+ ns = self.parser.wiki_ns_name(qual)
+ if ns:
+ if ns == 'NS_IMAGE':
+ return ''
+ elif ns == 'NS_MEDIA':
+ tgt = self.parser.media_base + '/' + tgt
+ else:
+ tgt = self.parser.mktgt(tgt)
+ elif self.type == 'LINK' and qual in self.parser.langtab:
+ tgt = self.parser.mktgt(tgt, qual)
+ if not text or text == '':
+ text = self.parser.langtab[qual]
+ else:
+ tgt = self.parser.mktgt(tgt)
+ else:
+ tgt = self.parser.mktgt(arg)
+ return "<a href=\"%s\">%s</a>" % (tgt,
+ text if (text and text != '') \
+ else arg)
+
+class HtmlRefNode(WikiRefNode):
+ def format(self):
+ target = self.ref
+ text = self.content.format()
+ return "<a href=\"%s\">%s</a>" % (target,
+ text if (text and text != '') \
+ else target)
+
+class HtmlFontNode(HtmlSeqNode):
+ def format(self):
+ comm = { 'IT': 'i',
+ 'BOLD': 'b' }
+ s = '<%s>' % comm[self.type]
+ for x in self.content:
+ s += x.format()
+ s += '</%s>' % comm[self.type]
+ return s
+
+class HtmlTextNode(HtmlSeqNode):
+ def format(self):
+ if isinstance(self.content,list):
+ s = ''.join(self.content)
+ else:
+ s = self.content
+ return s
+class HtmlHdrNode(WikiHdrNode):
+ def format(self):
+ level = self.level
+ if level > 4:
+ level = 4
+ return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
+
+class HtmlBarNode(WikiNode):
+ def format(self):
+ return "<hr/>\n"
+
+class HtmlEnvNode(WikiEnvNode):
+ def format(self):
+ type = self.envtype
+ lev = self.level
+ if lev > 4:
+ lev = 2
+ string = ""
+ for s in self.content:
+ n = s.subtype;
+ string += "<%s>%s</%s>" % (self.parser.envt[type]["elt"][n],
+ s.content.format(),
+ self.parser.envt[type]["elt"][n])
+ return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"],
+ string,
+ self.parser.envt[type]["hdr"])
+ return string
+
+class HtmlTagNode(WikiTagNode):
+ def format(self):
+ if self.tag == 'code':
+ self.parser.nested += 1
+ s = self.content.format()
+ self.parser.nested -= 1
+ return '<pre><code>' + s + '</code></pre>' #FIXME
+ else:
+ s = '<' + self.tag
+ if self.args:
+ s += ' ' + str(self.args)
+ s += '>'
+ s += self.content.format()
+ return s + '</' + self.tag + '>'
+
+class HtmlParaNode(HtmlSeqNode):
+ def format(self):
+ return "<p>" + super(HtmlParaNode, self).format() + "</p>\n"
+
+class HtmlPreNode(HtmlSeqNode):
+ def format(self):
+ s = super(HtmlPreNode, self).format()
+ if self.parser.nested:
+ return s
+ else:
+ return '<pre>' + s + '</pre>'
+
+class HtmlIndNode(WikiIndNode):
+ def format(self):
+ return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level
+
+
class HtmlWikiMarkup (WikiMarkup):
"""
A (hopefully) general-purpose Wiki->HTML translator class.
@@ -35,6 +173,23 @@ class HtmlWikiMarkup (WikiMarkup):
nested = 0
+ def __init__(self, *args, **kwargs):
+ super(HtmlWikiMarkup, self).__init__(*args, **kwargs)
+ self.token_class['LINK'] = HtmlLinkNode
+ self.token_class['TMPL'] = HtmlLinkNode
+ self.token_class['REF'] = HtmlRefNode
+ self.token_class['IT'] = HtmlFontNode
+ self.token_class['BOLD'] = HtmlFontNode
+ self.token_class['HDR'] = HtmlHdrNode
+ self.token_class['BAR'] = HtmlBarNode
+ self.token_class['ENV'] = HtmlEnvNode
+ self.token_class['TAG'] = HtmlTagNode
+ self.token_class['PARA'] = HtmlParaNode
+ self.token_class['PRE'] = HtmlPreNode
+ self.token_class['IND'] = HtmlIndNode
+ self.token_class['TEXT'] = HtmlTextNode
+ self.token_class['SEQ'] = HtmlSeqNode
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
@@ -85,175 +240,10 @@ class HtmlWikiMarkup (WikiMarkup):
text += ' <span class="meaning">(' + s[-2] + ')</span>'
return text
-
- def fmtlink(self, elt, istmpl):
- arg = self.format(elt.content[0])
- text = None
- if len(elt.content) > 1:
- s = [x for x in map(self.format, elt.content)]
- if s[0] == 'disambigR' or s[0] == 'wikiquote':
- return ""
- elif len(s) > 1 and s[1] == 'thumb':
- return ""
- text = '<span class="template">' + s[1] + '</span>'
- if istmpl:
- if re.match("t[+-]$", s[0]):
- if len(s) > 2:
- text = s[2]
- elif s[0] == "term":
- text = self.tmpl_term(s)
- elif s[0] == "proto":
- text = self.tmpl_proto(s)
- return text
-
- (qual,sep,tgt) = arg.partition(':')
- if tgt != '':
- ns = self.wiki_ns_name(qual)
- if ns:
- if ns == 'NS_IMAGE':
- return ''
- elif ns == 'NS_MEDIA':
- tgt = self.media_base + '/' + tgt
- else:
- tgt = self.mktgt(tgt)
- elif not istmpl and qual in self.langtab:
- tgt = self.mktgt(tgt, qual)
- if not text or text == '':
- text = self.langtab[qual]
- else:
- tgt = self.mktgt(tgt)
- else:
- tgt = self.mktgt(arg)
- return "<a href=\"%s\">%s</a>" % (tgt,
- text if (text and text != '') \
- else arg)
-
- def str_link(self, elt):
- return self.fmtlink(elt, False)
-
- def str_tmpl(self, elt):
- return self.fmtlink(elt, True)
-
- def str_ref(self, elt):
- target = elt.ref
- text = self.format(elt.content)
- return "<a href=\"%s\">%s</a>" % (target,
- text if (text and text != '') \
- else target)
-
- def concat(self, eltlist):
- string = ""
- for x in eltlist:
- string += self.format(x)
- return string
-
- def str_it(self, elt):
- return "<i>" + self.concat(elt.content) + "</i>"
-
- def str_bold(self, elt):
- return "<b>" + self.concat(elt.content) + "</b>"
-
- def str_hdr(self, elt):
- level = elt.level
- if level > 4:
- level = 4
- return "<h%s>%s</h%s>\n\n" % (level, self.format(elt.content), level)
-
- def str_bar(self):
- return "<hr/>\n"
-
- def str_env(self, elt):
- type = elt.envtype
- lev = elt.level
- if lev > 4:
- lev = 2
- string = ""
- for s in elt.content:
- n = s.subtype;
- string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
- self.format(s.content),
- self.envt[type]["elt"][n])
- return "<%s>%s</%s>" % (self.envt[type]["hdr"],
- string,
- self.envt[type]["hdr"])
- return string
-
- def str_tag(self, elt):
- if elt.tag == 'code':
- self.nested += 1
- s = self.format(elt.content)
- self.nested -= 1
- return '<pre><code>' + s + '</code></pre>' #FIXME
- else:
- s = '<' + elt.tag
- if elt.args:
- s += ' ' + str(elt.args)
- s += '>'
- s += self.format(elt.content)
- return s + '</' + elt.tag + '>'
-
- def str_para(self, elt):
- string = "";
- for x in elt.content:
- string += self.format(x)
- return "<p>" + string + "</p>\n"
-
- def str_pre(self, elt):
- string = "";
- for x in elt.content:
- string += self.format(x)
- if self.nested:
- return string
- return '<pre>' + string + '</pre>'
-
- def str_ind(self, elt):
- return ("<dl><dd>" * elt.level) + self.format(elt.content) + "</dd></dl>" * elt.level
-
- def format(self, elt):
- if elt.type == 'TEXT':
- if isinstance(elt.content,list):
- string = ""
- for s in elt.content:
- string += s
- else:
- string = elt.content
- return string
- elif elt.type == 'TAG':
- return self.str_tag(elt)
- elif elt.type == 'PARA':
- return self.str_para(elt)
- elif elt.type == 'PRE':
- return self.str_pre(elt)
- elif elt.type == 'IT':
- return self.str_it(elt)
- elif elt.type == 'BOLD':
- return self.str_bold(elt)
- elif elt.type == 'LINK':
- return self.str_link(elt)
- elif elt.type == 'TMPL':
- return self.str_tmpl(elt)
- elif elt.type == 'BAR':
- return self.str_bar()
- elif elt.type == 'HDR':
- return self.str_hdr(elt)
- elif elt.type == 'REF':
- return self.str_ref(elt)
- elif elt.type == 'ENV':
- return self.str_env(elt)
- elif elt.type == 'IND':
- return self.str_ind(elt)
- elif elt.type == 'SEQ':
- string = ""
- for x in elt.content:
- string += self.format(x)
- return string
- else:
- return str(elt)
-
def __str__(self):
str = ""
for elt in self.tree:
- str += self.format(elt)
+ str += elt.format()
return str
class HtmlWiktionaryMarkup (HtmlWikiMarkup):
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index f55fc06..106ad77 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -16,10 +16,182 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from wikimarkup import *
+from wikitoken import *
from wikins import wiki_ns_re, wiki_ns
import re
import urllib
+class TexiTextNode(WikiTextNode):
+ def format(self):
+ parser = self.parser
+ if isinstance(self.content,list):
+ for s in self.content:
+ parser._print(s)
+ else:
+ parser._print(self.content)
+
+class TexiTagNode(WikiTagNode):
+ def format(self):
+ parser = self.parser
+ if self.tag in ['code', 'tt']:
+ save = parser._begin_print()
+ parser.nested += 1
+ self.content.format()
+ parser.nested -= 1
+ s = parser._end_print(save)
+ if self.isblock:
+ parser._print('@example', nl=True, escape=False)
+ parser._print(s, escape=False)
+ parser._print('@end example\n', nl=True, escape=False)
+ else:
+ parser._print('@code{%s}' % s, escape=False)
+ elif self.tag == 'div':
+ if self.args and 'id' in self.args:
+ parser._print("@anchor{%s}\n" % self.args['id'],
+ nl=True, escape=False)
+ self.content.format()
+ else:
+ parser._print('<' + self.tag)
+ if self.args:
+ parser._print(' ' + self.args)
+ parser._print('>');
+ self.content.format()
+ parser._print('</' + self.tag + '>')
+
+class TexiParaNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ if not parser.acc.endswith('\n\n'):
+ parser._print('\n', nl=True)
+ for x in self.content:
+ x.format()
+ if not parser.acc.endswith('\n\n'):
+ parser._print('\n', nl=True)
+
+class TexiPreNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ if not parser.nested:
+ parser._print('@example\n', nl=True, escape=False)
+ for x in self.content:
+ x.format()
+ if not parser.nested:
+ parser._print('@end example\n', nl=True, escape=False)
+
+class TexiFontNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ comm = { 'IT': 'i',
+ 'BOLD': 'b' }
+ parser._print('@%s{' % comm[self.type], escape=False)
+ for x in self.content:
+ x.format()
+ parser._print('}', escape=False)
+
+class TexiHdrNode(WikiHdrNode):
+ def format(self):
+ parser = self.parser
+ level = self.level
+ # FIXME
+ if level > len(parser.sectcomm[parser.sectioning_model]) - 1 - parser.sectioning_start:
+ parser._print("@* ", nl=True, escape=False)
+ self.content.format()
+ else:
+ parser._print(parser.sectcomm[parser.sectioning_model][level - parser.sectioning_start] + " ", nl=True, escape=False)
+ self.content.format()
+ parser._print(None, nl=True)
+ if parser.sectcomm[parser.sectioning_model][0] == '@top':
+ parser._print('@node ', nl=True, escape=False)
+ self.content.format()
+ parser._print('\n')
+ parser._print(None, nl=True)
+
+class TexiBarNode(WikiNode):
+ def format(self):
+ self.parser._print("\n-----\n")
+
+class TexiIndNode(WikiIndNode):
+ def format(self):
+ parser = self.parser
+ parser._print("@w{ }" * self.level, nl=True, escape=False)
+ self.content.format()
+ parser._print(None, nl=True)
+
+class TexiEnvNode(WikiEnvNode):
+ def format(self):
+ parser = self.parser
+ if self.envtype == 'unnumbered':
+ parser._print('@itemize @bullet\n', nl=True, escape=False)
+ for s in self.content:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end itemize\n', nl=True, escape=False)
+ elif self.envtype == 'numbered':
+ parser._print('@enumerate\n', nl=True, escape=False)
+ for s in self.content:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end enumerate\n', nl=True, escape=False)
+ elif self.envtype == 'defn':
+ parser._print('@table @asis\n', nl=True, escape=False)
+ for s in self.content:
+ if s.subtype == 0:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ else:
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end table\n', nl=True, escape=False)
+
+class TexiLinkNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ save = parser._begin_print()
+ self.content[0].format()
+ arg = parser._end_print()
+ if len(self.content) > 1:
+ s = []
+ for x in self.content[0:2]:
+ parser._begin_print()
+ x.format()
+ s.append(parser._end_print())
+ text = s[1]
+ else:
+ s = None
+ text = None
+
+ parser._end_print(save)
+
+ if s:
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return
+ if len(s) > 1 and s[1] == 'thumb':
+ return
+
+ (qual,sep,tgt) = arg.partition(':')
+ if text:
+ parser._print("@ref{%s,%s}" % (qual, text), escape=False)
+ else:
+ parser._print("@ref{%s}" % qual, escape=False)
+
+class TexiRefNode(WikiRefNode):
+ def format(self):
+ parser = self.parser
+ target = self.ref
+ save = parser._begin_print()
+ self.content.format()
+ text = parser._end_print(save)
+ if text and text != '':
+ parser._print("@uref{%s,%s}" % (target, text), escape=False)
+ else:
+ parser._print("@uref{%s}" % target, escape=False)
+
class TexiWikiMarkup (WikiMarkup):
nested = 0
sectcomm = {
@@ -58,6 +230,20 @@ class TexiWikiMarkup (WikiMarkup):
def __init__(self, *args, **keywords):
super(TexiWikiMarkup, self).__init__(*args, **keywords)
+
+ self.token_class['TEXT'] = TexiTextNode
+ self.token_class['TAG'] = TexiTagNode
+ self.token_class['PARA'] = TexiParaNode
+ self.token_class['PRE'] = TexiPreNode
+ self.token_class['IT'] = TexiFontNode
+ self.token_class['BOLD'] = TexiFontNode
+ self.token_class['HDR'] = TexiHdrNode
+ self.token_class['BAR'] = TexiBarNode
+ self.token_class['IND'] = TexiIndNode
+ self.token_class['ENV'] = TexiEnvNode
+ self.token_class['LINK'] = TexiLinkNode
+ self.token_class['REF'] = TexiRefNode
+
if "sectioning-model" in keywords:
val = keywords["sectioning-model"]
if val in self.sectcomm:
@@ -98,196 +284,9 @@ class TexiWikiMarkup (WikiMarkup):
def __str__(self):
self._begin_print()
for elt in self.tree:
- self.format(elt)
+ elt.format()
return self._end_print()
- def format(self, elt):
- if elt.type == 'TEXT':
- if isinstance(elt.content,list):
- for s in elt.content:
- self._print(s)
- else:
- self._print(elt.content)
- elif elt.type == 'TAG':
- self.str_tag(elt)
- elif elt.type == 'PARA':
- self.str_para(elt)
- elif elt.type == 'PRE':
- self.str_pre(elt)
- elif elt.type == 'IT':
- self.str_it(elt)
- elif elt.type == 'BOLD':
- self.str_bold(elt)
- elif elt.type == 'LINK':
- self.str_link(elt)
- elif elt.type == 'TMPL':
- self.str_tmpl(elt)
- elif elt.type == 'BAR':
- self.str_bar()
- elif elt.type == 'HDR':
- self.str_hdr(elt)
- elif elt.type == 'REF':
- self.str_ref(elt)
- elif elt.type == 'ENV':
- self.str_env(elt)
- elif elt.type == 'IND':
- self.str_ind(elt)
- elif elt.type == 'SEQ':
- for x in elt.content:
- self.format(x)
- else:
- self._print(str(elt))
-
- def str_tag(self, elt):
- if elt.tag in ['code', 'tt']:
- save = self._begin_print()
- self.nested += 1
- self.format(elt.content)
- self.nested -= 1
- s = self._end_print(save)
- if elt.isblock:
- self._print('@example', nl=True, escape=False)
- self._print(s, escape=False)
- self._print('@end example\n', nl=True, escape=False)
- else:
- self._print('@code{%s}' % s, escape=False)
- elif elt.tag == 'div':
- if elt.args and 'id' in elt.args:
- self._print("@anchor{%s}\n" % elt.args['id'],
- nl=True, escape=False)
- self.format(elt.content)
- else:
- self._print('<' + elt.tag)
- if elt.args:
- self._print(' ' + elt.args)
- self._print('>');
- self.format(elt.content);
- self._print('</' + elt.tag + '>')
-
- def str_para(self, elt):
- if self.acc and not self.acc.endswith('\n\n'):
- self._print('\n', nl=True)
- for x in elt.content:
- self.format(x)
- if self.acc and not self.acc.endswith('\n\n'):
- self._print('\n', nl=True)
-
- def str_pre(self, elt):
- if not self.nested:
- self._print('@example\n', nl=True, escape=False)
- for x in elt.content:
- self.format(x)
- if not self.nested:
- self._print('@end example\n', nl=True, escape=False)
-
- def concat(self, eltlist):
- for x in eltlist:
- self.format(x)
-
- def str_it(self, elt):
- self._print('@i{', escape=False)
- self.concat(elt.content)
- self._print('}', escape=False)
-
- def str_bold(self, elt):
- self._print('@b{', escape=False)
- self.concat(elt.content)
- self._print('}', escape=False)
-
- def str_hdr(self, elt):
- level = elt.level
- if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
- self._print("@* ", nl=True, escape=False)
- self.format(elt.content)
- else:
- self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False)
- self.format(elt.content)
- self._print(None, nl=True)
- if self.sectcomm[self.sectioning_model][0] == '@top':
- self._print('@node ', nl=True, escape=False)
- self.format(elt.content)
- self._print('\n')
- self._print(None, nl=True)
-
- def str_bar(self):
- self._print("\n-----\n")
-
- def str_ind(self, elt):
- self._print("@w{ }" * elt.level, nl=True, escape=False)
- self.format(elt.content)
- self._print(None, nl=True)
-
- def str_env(self, elt):
- if elt.envtype == 'unnumbered':
- self._print('@itemize @bullet\n', nl=True, escape=False)
- for s in elt.content:
- self._print('@item ', nl=True, escape=False)
- self.format(s.content)
- self._print(None, nl=True)
- self._print('\n')
- self._print('@end itemize\n', nl=True, escape=False)
- elif elt.envtype == 'numbered':
- self._print('@enumerate\n', nl=True, escape=False)
- for s in elt.content:
- self._print('@item ', nl=True, escape=False)
- self.format(s.content)
- self._print(None, nl=True)
- self._print('\n')
- self._print('@end enumerate\n', nl=True, escape=False)
- elif elt.envtype == 'defn':
- self._print('@table @asis\n', nl=True, escape=False)
- for s in elt.content:
- if s.subtype == 0:
- self._print('@item ', nl=True, escape=False)
- self.format(s.content)
- self._print(None, nl=True)
- else:
- self.format(s.content)
- self._print(None, nl=True)
- self._print('\n')
- self._print('@end table\n', nl=True, escape=False)
-
- def str_link(self, elt):
- save = self._begin_print()
- self.format(elt.content[0])
- arg = self._end_print()
- if len(elt.content) > 1:
- s = []
- for x in elt.content[0:2]:
- self._begin_print()
- self.format(x)
- s.append(self._end_print())
- text = s[1]
- else:
- s = None
- text = None
-
- self._end_print(save)
-
- if s:
- if s[0] == 'disambigR' or s[0] == 'wikiquote':
- return
- if len(s) > 1 and s[1] == 'thumb':
- return
-
- (qual,sep,tgt) = arg.partition(':')
- if text:
- self._print("@ref{%s,%s}" % (qual, text), escape=False)
- else:
- self._print("@ref{%s}" % qual, escape=False)
-
- def str_tmpl(self, elt):
- self._print("FIXME: str_tmpl not implemented\n")
-
- def str_ref(self, elt):
- target = elt.ref
- save = self._begin_print()
- self.format(elt.content)
- text = self._end_print(save)
- if text and text != '':
- self._print("@uref{%s,%s}" % (target, text), escape=False)
- else:
- self._print("@uref{%s}" % target, escape=False)
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index ee1748c..b5bd708 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -15,11 +15,186 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+from wikitoken import *
from wikimarkup import *
from wikins import wiki_ns_re, wiki_ns
import re
import urllib
+class TextSeqNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in self.content:
+ if len(string) > 1 and not string[-1].isspace():
+ string += ' '
+ string += x.format()
+ return string
+
+class TextTextNode(WikiTextNode):
+ def format(self):
+ if isinstance(elt.content,list):
+ string = ""
+ for s in elt.content:
+ if string:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += s
+ else:
+ string = elt.content
+ return string
+
+class TextPreNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ string += x.format()
+ string += '\n'
+ return string
+
+class TextParaNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ string += x.format()
+ string = self.parser.fmtpara(string) + '\n\n'
+ return string
+
+class TextItNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ s = x.format()
+ if s:
+ string += " " + s
+ return "_" + string.lstrip(" ") + "_"
+
+class TextBoldNode(WikiSeqNode):
+ def format(self):
+ string = ""
+ for x in elt.content:
+ if string.endswith("."):
+ string += " "
+ else:
+ string += " "
+ string += x.format()
+ return string.upper()
+
+class TextLinkNode(WikiSeqNode):
+ def format(self):
+ arg = self.content[0].format()
+ if len(self.content) > 1:
+ s = [x for x in map(lambda x: x.format(), self.content)]
+ text = s[1]
+ else:
+ s = None
+ text = None
+
+ if s:
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return ""
+ if len(s) > 1 and s[1] == 'thumb':
+ return ""
+ (qual,sep,tgt) = arg.partition(':')
+ if tgt != '':
+ ns = self.parser.wiki_ns_name(qual)
+ if ns:
+ if ns == 'NS_IMAGE':
+ if not self.parser.references:
+ return ""
+ text = "[%s: %s]" % (qual, text if text else arg)
+ tgt = self.image_base + '/' + \
+ urllib.quote(tgt) + \
+ '/250px-' + urllib.quote(tgt)
+ elif ns == 'NS_MEDIA':
+ text = "[%s]" % (qual)
+ else:
+ tgt = self.parser.mktgt(tgt)
+ elif self.type == 'LINK' and qual in self.parser.langtab:
+ text = self.parser.langtab[qual] + ": " + tgt
+ tgt = self.parser.mktgt(tgt, qual)
+ else:
+ tgt = self.parser.mktgt(tgt)
+ else:
+ tgt = self.parser.mktgt(arg)
+ if self.parser.references:
+ return "%s (see %s) " % (text, tgt)
+ elif not text or text == '':
+ return arg
+ else:
+ return text
+
+class TextTmplNode(TextLinkNode):
+ def format(self):
+ return '[' + super(TextTmplNode, self).format() + ']'
+
+class TextBarNode(WikiNode):
+ def format(self):
+ w = self.parser.width
+ if w < 5:
+ w = 5
+ return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
+class TextHdrNode(WikiHdrNode):
+ def format(self):
+ return "\n" + ("*" * self.level) + " " + \
+ elt.content.format().lstrip(" ") + "\n\n"
+
+class TextRefNode(WikiRefNode):
+ def format(self):
+ text = self.content.format()
+ if text:
+ return "%s (see %s) " % (text, self.ref)
+ else:
+ return "see " + self.ref
+
+class TextEnvNode(WikiEnvNode):
+ def format(self):
+ type = self.envtype
+ lev = self.level
+ if lev > self.parser.width - 4:
+ lev = 1
+ string = ""
+ n = 1
+ for s in self.content:
+ if not string.endswith("\n"):
+ string += "\n"
+ x = s.content.format()
+ if type == "unnumbered":
+ string += self.parser.fmtpara(self.parser.indent(lev, "- " + x.lstrip(" ")))
+ elif type == "numbered":
+ string += self.parser.fmtpara(self.parser.indent(lev, "%d. %s" % (n, x)))
+ n += 1
+ elif type == "defn":
+ if s.subtype == 0:
+ string += self.parser.indent(lev-1, x)
+ else:
+ string += self.parser.indent(lev+3, x)
+
+ if not string.endswith("\n"):
+ string += "\n"
+
+ return string
+
+class TextIndNode(WikiIndNode):
+ def format(self):
+ return (" " * self.level) + self.content.format() + '\n'
+
+class TextTagNode(WikiTagNode):
+ def format(self):
+ if self.tag == 'code':
+ self.parser.nested += 1
+ s = elt.content.format()
+ self.parser.nested -= 1
+ else:
+ s = '<' + self.tag
+ if self.args:
+ s += ' ' + str(self.args)
+ s += '>' + elt.content.format() + '</' + self.tag + '>'
+ return s
+
+
class TextWikiMarkup (WikiMarkup):
"""
A (general-purpose Wiki->Text translator class.
@@ -36,20 +211,29 @@ class TextWikiMarkup (WikiMarkup):
num = 0
def __init__(self, *args, **keywords):
- WikiMarkup.__init__(self, *args, **keywords)
+ super(TextWikiMarkup,self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
if 'refs' in keywords:
self.references = keywords['refs']
if 'markup' in keywords:
self.markup = keywords['markup']
-
- def xref(self, text, target):
- if text:
- return "%s (see %s) " % (text, target)
- else:
- return "see " + target
-
+ self.token_class['SEQ'] = TextSeqNode
+ self.token_class['TEXT'] = TextTextNode
+ self.token_class['PRE'] = TextPreNode
+ self.token_class['PARA'] = TextParaNode
+ self.token_class['SEQ'] = TextSeqNode
+ self.token_class['IT'] = TextItNode
+ self.token_class['BOLD'] = TextBoldNode
+ self.token_class['LINK'] = TextLinkNode
+ self.token_class['TMPL'] = TextTmplNode
+ self.token_class['BAR'] = TextBarNode
+ self.token_class['HDR'] = TextHdrNode
+ self.token_class['REF'] = TextRefNode
+ self.token_class['ENV'] = TextEnvNode
+ self.token_class['IND'] = TextIndNode
+ self.token_class['TAG'] = TextTagNode
+
def wiki_ns_name(self, str):
if str in wiki_ns[self.lang]:
return wiki_ns[self.lang][str]
diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py
index 026f1d7..ad0f675 100644
--- a/WikiTrans/wikimarkup.py
+++ b/WikiTrans/wikimarkup.py
@@ -149,7 +149,7 @@ class BaseWikiMarkup(object):
}
def __createWikiNode(self,**kwarg):
- return self.token_class[kwarg['type']](**kwarg)
+ return self.token_class[kwarg['type']](self, **kwarg)
def tokread(self):
line = None
@@ -362,8 +362,7 @@ class BaseWikiMarkup(object):
self.toklist[self.tokind-1] = tok
return tok
- def dump(self, tree, file=sys.stdout):
-
+ def dump(self, tree, file=sys.stdout):
for node in tree:
file.write(str(node))
file.write('\n')
@@ -785,7 +784,7 @@ class BaseWikiMarkup(object):