diff options
Diffstat (limited to 'wikitrans')
-rw-r--r-- | wikitrans/__init__.py | 26 | ||||
-rw-r--r-- | wikitrans/wiki2html.py | 320 | ||||
-rw-r--r-- | wikitrans/wiki2texi.py | 410 | ||||
-rw-r--r-- | wikitrans/wiki2text.py | 348 | ||||
-rw-r--r-- | wikitrans/wikidump.py | 77 | ||||
-rw-r--r-- | wikitrans/wikimarkup.py | 1285 | ||||
-rw-r--r-- | wikitrans/wikins.py | 3040 | ||||
-rw-r--r-- | wikitrans/wikitoken.py | 318 |
8 files changed, 5824 insertions, 0 deletions
diff --git a/wikitrans/__init__.py b/wikitrans/__init__.py new file mode 100644 index 0000000..5832e38 --- /dev/null +++ b/wikitrans/__init__.py @@ -0,0 +1,26 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2008-2018 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +__all__ = [ + "wikitoken", + "wikimarkup", + "wikidump", + "wiki2html", + "wiki2text", + "wiki2texi", + "wikins" +] diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py new file mode 100644 index 0000000..ce65bae --- /dev/null +++ b/wikitrans/wiki2html.py @@ -0,0 +1,320 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2008-2018 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Wiki markup to HTML translator. + +Classes: + +HtmlWikiMarkup -- Converts Wiki material to HTML. +HtmlWiktionaryMarkup -- Reserved for future use. Currently does the same as + HtmlWikiMarkup. + +""" + +from __future__ import print_function +from wikitrans.wikimarkup import * +from wikitrans.wikitoken import * +from wikitrans.wikins import wiki_ns_re, wiki_ns +import re +try: + from urllib import quote as url_quote +except ImportError: + from urllib.parse import quote as url_quote + +try: + from html import escape as html_escape +except ImportError: + from cgi import escape as html_escape + +__all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] + +class HtmlSeqNode(WikiSeqNode): + def format(self): + s = '' + for x in self.content: + s += x.format() + return s + +class HtmlLinkNode(HtmlSeqNode): + def format(self): + arg = self.content[0].format() + text = None + if len(self.content) > 1: + s = [x for x in map(lambda x: x.format(), self.content)] + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return "" + elif len(s) > 1 and s[1] == 'thumb': + return "" + text = '<span class="template">' + s[1] + '</span>' + if self.type == 'TMPL': + if re.match("t[+-]$", s[0]): + if len(s) > 2: + text = s[2] + elif s[0] == "term": + text = self.parser.tmpl_term(s) + elif s[0] == "proto": + text = self.parser.tmpl_proto(s) + return text + + (qual,sep,tgt) = arg.partition(':') + if tgt != '': + ns = self.parser.wiki_ns_name(qual) + if ns: + if ns == 'NS_IMAGE': + return '' + elif ns == 'NS_MEDIA': + tgt = self.parser.media_base + '/' + tgt + else: + tgt = self.parser.mktgt(tgt) + elif self.type == 'LINK' and qual in self.parser.langtab: + tgt = self.parser.mktgt(tgt, qual) + if not text or text == '': + text = self.parser.langtab[qual] + else: + tgt = self.parser.mktgt(tgt) + else: + tgt = self.parser.mktgt(arg) + return "<a href=\"%s\">%s</a>" % (tgt, + text if (text and text != '') else arg) + +class HtmlRefNode(WikiRefNode): + def format(self): + target = self.ref + text = self.content.format() + return "<a href=\"%s\">%s</a>" % ( + target, + text if (text and text != '') else target + ) + +class HtmlFontNode(HtmlSeqNode): + def format(self): + comm = { 'IT': 'i', + 'BOLD': 'b' } + s = '<%s>' % comm[self.type] + for x in self.content: + s += x.format() + s += '</%s>' % comm[self.type] + return s + +class HtmlTextNode(HtmlSeqNode): + def format(self): + if isinstance(self.content,list): + s = ''.join(self.content) + else: + s = html_escape(self.content, quote=False) + return s + +class HtmlHdrNode(WikiHdrNode): + def format(self): + level = self.level + if level > 6: + level = 6 + return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) + +class HtmlBarNode(WikiNode): + def format(self): + return "<hr/>\n" + +class HtmlEnvNode(WikiEnvNode): + def format(self): + type = self.envtype + lev = self.level + if lev > 4: + lev = 2 + string = "" + for s in self.content: + n = s.subtype; + string += "<%s>%s</%s>" % (self.parser.envt[type]["elt"][n], + s.content.format(), + self.parser.envt[type]["elt"][n]) + return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"], + string, + self.parser.envt[type]["hdr"]) + return string + +class HtmlTagNode(WikiTagNode): + def format(self): + if self.tag == 'code': + self.parser.nested += 1 + s = self.content.format() + self.parser.nested -= 1 + return '<pre><code>' + s + '</code></pre>' #FIXME + elif self.tag == 'ref': + n = self.idx+1 + return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n) + elif self.tag == 'references': + s = '<div class="references">\n' + s += '<ol class="references">\n' + n = 0 + for ref in self.parser.references: + n += 1 + s += ('<li id="cite_note-%d">' + + '<span class="mw-cite-backlink">' + + '<b><a href="#cite_ref-%d">^</a></b>' + + '</span>' + + '<span class="reference-text">' + + ref.content.format() + + '</span>' + + '</li>\n') % (n,n) + s += '</ol>\n</div>\n' + return s + else: + s = '<' + self.tag + if self.args: + s += ' ' + str(self.args) + s += '>' + s += self.content.format() + return s + '</' + self.tag + '>' + +class HtmlParaNode(HtmlSeqNode): + def format(self): + return "<p>" + super(HtmlParaNode, self).format() + "</p>\n" + +class HtmlPreNode(HtmlSeqNode): + def format(self): + s = super(HtmlPreNode, self).format() + if self.parser.nested: + return s + else: + return '<pre>' + s + '</pre>' + +class HtmlIndNode(WikiIndNode): + def format(self): + return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level + + +class HtmlWikiMarkup(WikiMarkup): + """A Wiki markup to HTML translator class. + + Usage: + + x = HtmlWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as HTML: + print(str(x)) + + Known bugs: + * [[official position]]s + Final 's' gets after closing </a> tag. Should be before. + """ + + nested = 0 + references = [] + def __init__(self, *args, **kwargs): + """Create a HtmlWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + lang=CODE + Specifies source language. Default is 'en'. This variable can be + referred to as '%(lang)s' in the keyword arguments below. + html_base=URL + Base URL for cross-references. Default is + 'http://%(lang)s.wiktionary.org/wiki/' + image_base=URL + Base URL for images. Default is + 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf' + media_base=URL + Base URL for media files. Default is + 'http://www.mediawiki.org/xml/export-0.3' + """ + + super(HtmlWikiMarkup, self).__init__(*args, **kwargs) + self.token_class['LINK'] = HtmlLinkNode + self.token_class['TMPL'] = HtmlLinkNode + self.token_class['REF'] = HtmlRefNode + self.token_class['IT'] = HtmlFontNode + self.token_class['BOLD'] = HtmlFontNode + self.token_class['HDR'] = HtmlHdrNode + self.token_class['BAR'] = HtmlBarNode + self.token_class['ENV'] = HtmlEnvNode + self.token_class['TAG'] = HtmlTagNode + self.token_class['PARA'] = HtmlParaNode + self.token_class['PRE'] = HtmlPreNode + self.token_class['IND'] = HtmlIndNode + self.token_class['TEXT'] = HtmlTextNode + self.token_class['SEQ'] = HtmlSeqNode + + def wiki_ns_name(self, str): + if str in wiki_ns[self.lang]: + return wiki_ns[self.lang][str] + elif str in wiki_ns_re[self.lang]: + for elt in wiki_ns_re[self.lang][str]: + if str.beginswith(elt[0]) and str.endswith(elt[1]): + return elt[2] + return None + + envt = { "unnumbered": { "hdr": "ul", + "elt": ["li"] }, + "numbered": { "hdr": "ol", + "elt": ["li"] }, + "defn": { "hdr": "dl", + "elt": ["dt","dd"] } } + + def mktgt(self, tgt, lang = None): + if not lang: + lang = self.lang + return self.html_base % { 'lang' : lang } + url_quote(tgt) + + def tmpl_term(self, s): + if len(s) == 2: + return s[1] + text = None + trans = None + for x in s[1:]: + m = re.match('(\w+)=', x) + if m: + if m.group(1) == "tr": + trans = x[m.end(1)+1:] + elif not text: + text = x + if text: + if trans: + text += ' <span class="trans">[' + trans + ']</span>' + return text + + def tmpl_proto(self, s): + text = '<span class="proto-lang">Proto-' + s[1] + '</span>' + if len(s) >= 4: + n = 0 + for x in s[2:-2]: + if n > 0: + text += ',' + n += 1 + text += ' <span class="proto">' + x + '</span>' + text += ' <span class="meaning">(' + s[-2] + ')</span>' + return text + + def __str__(self): + str = "" + for elt in self.tree: + str += elt.format() + return str + +class HtmlWiktionaryMarkup(HtmlWikiMarkup): + """A class for translating Wiktionary articles into HTML. + + Reserved for future use. Currently does the same as HtmlWikiMarkup. + """ diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py new file mode 100644 index 0000000..d9e5f52 --- /dev/null +++ b/wikitrans/wiki2texi.py @@ -0,0 +1,410 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2015-2018 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Wiki markup to Texinfo translator. + +Classes: + +TexiWikiMarkup -- Converts Wiki material to Texinfo. + +""" + +from wikitrans.wikimarkup import * +from wikitrans.wikitoken import * +from wikitrans.wikins import wiki_ns_re, wiki_ns +import re +import urllib + +class Acc(list): + def prepend(self,x): + self.insert(0,x) + + def is_empty(self): + return len(self) == 0 + + def clear(self): + self = [] + + def tail(self, n = 1): + s = Acc() + i = len(self) + while i > 0 and n > 0: + elt = self[i-1] + l = len(elt) + if l == 0: + continue + elif l > n: + l = n + s.prepend(elt[-n:]) + n -= l + i -= 1 + return str(s) + + def trim(self, n): + while len(self) and n > 0: + elt = self.pop() + l = len(elt) + if l == 0: + continue + elif l > n: + self += elt[0:-n] + break + n -= l + + def trimnl(self): + if self.endswith('\n'): + self.trim(1) + + def trimpara(self): + if self.endswith('\n\n'): + self.trim(2) + + def endswith(self, x): + return self.tail(len(x)) == x + + def in_new_para(self): + return self.is_empty() or self.endswith('\n\n') + + def __str__(self): + return ''.join(self) + +class TexiTextNode(WikiTextNode): + def format(self): + parser = self.parser + if isinstance(self.content,list): + for s in self.content: + parser._print(s) + else: + parser._print(self.content) + +class TexiTagNode(WikiTagNode): + def format(self): + parser = self.parser + if self.tag in ['code', 'tt']: + save = parser._begin_print() + parser.nested += 1 + self.content.format() + parser.nested -= 1 + s = parser._end_print(save) + if self.isblock: + parser._print('@example', nl=True, escape=False) + parser._print(s, escape=False) + parser._print('@end example\n', nl=True, escape=False) + else: + parser._print('@code{%s}' % s, escape=False) + elif self.tag == 'div': + if self.args and 'id' in self.args: + parser._print("@anchor{%s}\n" % self.args['id'], + nl=True, escape=False) + self.content.format() + elif self.tag == 'ref': + parser._print('@footnote{', escape=False); + self.content.format(); + parser._print('}', escape=False) + elif self.tag == 'references': + pass + else: + parser._print('<' + self.tag) + if self.args: + parser._print(' ' + self.args) + parser._print('>'); + self.content.format() + parser._print('</' + self.tag + '>') + +class TexiParaNode(WikiSeqNode): + def format(self): + parser = self.parser + if not parser.acc.in_new_para(): + parser._print('\n', nl=True) + for x in self.content: + x.format() + if not parser.acc.in_new_para(): + parser._print('\n', nl=True) + +class TexiPreNode(WikiSeqNode): + def format(self): + parser = self.parser + if not parser.nested: + parser._print('@example\n', nl=True, escape=False) + for x in self.content: + x.format() + if not parser.nested: + parser._print('@end example\n', nl=True, escape=False) + +class TexiFontNode(WikiSeqNode): + def format(self): + parser = self.parser + comm = { 'IT': 'i', + 'BOLD': 'b' } + parser._print('@%s{' % comm[self.type], escape=False) + for x in self.content: + x.format() + parser._print('}', escape=False) + +class TexiHdrNode(WikiHdrNode): + def format(self): + parser = self.parser + level = self.level + # FIXME + if level > len(parser.sectcomm[parser.sectioning_model]) - 1 - parser.sectioning_start: + parser._print("@* ", nl=True, escape=False) + self.content.format() + else: + parser._print(parser.sectcomm[parser.sectioning_model][level - parser.sectioning_start] + " ", nl=True, escape=False) + self.content.format() + parser._print(None, nl=True) + if parser.sectcomm[parser.sectioning_model][0] == '@top': + parser._print('@node ', nl=True, escape=False) + self.content.format() + parser._print('\n') + parser._print(None, nl=True) + +class TexiBarNode(WikiNode): + def format(self): + self.parser._print("\n-----\n") + +class TexiIndNode(WikiIndNode): + def format(self): + parser = self.parser + parser._print("@w{ }" * self.level, nl=True, escape=False) + self.content.format() + parser._print(None, nl=True) + +class TexiEnvNode(WikiEnvNode): + def format(self): + parser = self.parser + if self.envtype == 'unnumbered': + parser._print('@itemize @bullet\n', nl=True, escape=False) + for s in self.content: + parser._print('@item ', nl=True, escape=False) + s.content.format() + parser._print(None, nl=True) + parser._print('\n') + parser._print('@end itemize\n', nl=True, escape=False) + elif self.envtype == 'numbered': + parser._print('@enumerate\n', nl=True, escape=False) + for s in self.content: + parser._print('@item ', nl=True, escape=False) + s.content.format() + parser._print(None, nl=True) + parser._print('\n') + parser._print('@end enumerate\n', nl=True, escape=False) + elif self.envtype == 'defn': + parser._print('@table @asis\n', nl=True, escape=False) + for s in self.content: + if s.subtype == 0: + parser._print('@item ', nl=True, escape=False) + s.content.format() + parser._print(None, nl=True) + else: + s.content.format() + parser._print(None, nl=True) + parser._print('\n') + parser._print('@end table\n', nl=True, escape=False) + +class TexiLinkNode(WikiSeqNode): + def format(self): + parser = self.parser + save = parser._begin_print() + self.content[0].format() + arg = parser._end_print() + if len(self.content) > 1: + s = [] + for x in self.content[0:2]: + parser._begin_print() + x.format() + s.append(parser._end_print()) + text = s[1] + else: + s = None + text = None + + parser._end_print(save) + + if s: + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return + if len(s) > 1 and s[1] == 'thumb': + return + + (qual,sep,tgt) = arg.partition(':') + if text: + parser._print("@ref{%s,%s}" % (qual, text), escape=False) + else: + parser._print("@ref{%s}" % qual, escape=False) + +class TexiRefNode(WikiRefNode): + def format(self): + parser = self.parser + target = self.ref + save = parser._begin_print() + self.content.format() + text = parser._end_print(save) + if text and text != '': + parser._print("@uref{%s,%s}" % (target, text), escape=False) + else: + parser._print("@uref{%s}" % target, escape=False) + +class TexiWikiMarkup(WikiMarkup): + """Wiki markup to Texinfo translator class. + + Usage: + + x = TexiWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as Texi: + print(str(x)) + + """ + + nested = 0 + sectcomm = { + 'numbered': [ + '@top', + '@chapter', + '@section', + '@subsection', + '@subsubsection' + ], + 'unnumbered': [ + '@top', + '@unnumbered', + '@unnumberedsec', + '@unnumberedsubsec', + '@unnumberedsubsubsec' + ], + 'appendix': [ + '@top', + '@appendix', + '@appendixsec', + '@appendixsubsec', + '@appendixsubsubsec' + ], + 'heading': [ + '@majorheading' + '@chapheading', + '@heading', + '@subheading', + '@subsubheading' + ] + } + + sectioning_model = 'numbered' + sectioning_start = 0 + + def __init__(self, *args, **keywords): + """Create a TexiWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + + sectioning_model=MODEL + Select the Texinfo sectioning model for the output document. Possible + values are: + + 'numbered' + Top of document is marked with "@top". Headings ("=", "==", + "===", etc) produce "@chapter", "@section", "@subsection", etc. + 'unnumbered' + Unnumbered sectioning: "@top", "@unnumbered", "@unnumberedsec", + "@unnumberedsubsec". + 'appendix' + Sectioning suitable for appendix entries: "@top", "@appendix", + "@appendixsec", "@appendixsubsec", etc. + 'heading' + Use heading directives to reflect sectioning: "@majorheading", + "@chapheading", "@heading", "@subheading", etc. + sectioning_start=N + Shift resulting heading level by N positions. For example, supposing + "sectioning_model='numbered'", "== A ==" normally produces + "@section A" on output. Now, if given "sectioning_start=1", this + directive will produce "@subsection A" instead. + """ + + super(TexiWikiMarkup, self).__init__(*args, **keywords) + + self.token_class['TEXT'] = TexiTextNode + self.token_class['TAG'] = TexiTagNode + self.token_class['PARA'] = TexiParaNode + self.token_class['PRE'] = TexiPreNode + self.token_class['IT'] = TexiFontNode + self.token_class['BOLD'] = TexiFontNode + self.token_class['HDR'] = TexiHdrNode + self.token_class['BAR'] = TexiBarNode + self.token_class['IND'] = TexiIndNode + self.token_class['ENV'] = TexiEnvNode + self.token_class['LINK'] = TexiLinkNode + self.token_class['REF'] = TexiRefNode + + if "sectioning_model" in keywords: + val = keywords["sectioning_model"] + if val in self.sectcomm: + self.sectioning_model = val + else: + raise ValueError("Invalid value for sectioning model: %s" % val) + if "sectioning_start" in keywords: + val = keywords["sectioning_start"] + if val < 0 or val > 4: + raise ValueError("Invalid value for sectioning start: %s" % val) + else: + self.sectioning_start = val + + replchars = re.compile(r'([@{}])') + acc = Acc() + + def _print(self, text, **kw): + nl = kw.pop('nl', False) + escape = kw.pop('escape', True) + if nl and not self.acc.endswith('\n'): + self.acc += '\n' + if text: + if escape: + self.acc += self.replchars.sub(r'@\1', text) + else: + self.acc += text + + def _begin_print(self): + s = self.acc + self.acc = Acc() + return s + + def _end_print(self, val = None): + s = self.acc + self.acc = val + return str(s) + + def __str__(self): + self._begin_print() + for elt in self.tree: + elt.format() + self.acc.trimpara() + return self._end_print() + + + + + + + diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py new file mode 100644 index 0000000..1fbc61b --- /dev/null +++ b/wikitrans/wiki2text.py @@ -0,0 +1,348 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2008-2018 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Wiki markup to plain text translator. + +Classes: + +TextWikiMarkup -- Converts Wiki material to plain text. +TextWiktionaryMarkup -- Reserved for future use. Currently does the same as + TextWikiMarkup. + +""" + +from wikitrans.wikitoken import * +from wikitrans.wikimarkup import * +from wikitrans.wikins import wiki_ns_re, wiki_ns +import re +try: + from urllib import quote as url_quote +except ImportError: + from urllib.parse import quote as url_quote + +class TextSeqNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + if len(string) > 1 and not string[-1].isspace(): + string += ' ' + string += x.format() + return string + +class TextTextNode(WikiTextNode): + def format(self): + if isinstance(self.content,list): + string = "" + for s in self.content: + if string: + if string.endswith("."): + string += " " + else: + string += " " + string += s + else: + string = self.content + return string + +class TextPreNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + string += x.format() + string += '\n' + return string + +class TextParaNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + string += x.format() + string = self.parser.fmtpara(string) + '\n\n' + return string + +class TextItNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + s = x.format() + if s: + string += " " + s + return "_" + string.lstrip(" ") + "_" + +class TextBoldNode(WikiSeqNode): + def format(self): + string = "" + for x in self.content: + if string.endswith("."): + string += " " + else: + string += " " + string += x.format() + return string.upper() + +class TextLinkNode(WikiSeqNode): + def format(self): + arg = self.content[0].format() + if len(self.content) > 1: + s = [x for x in map(lambda x: x.format(), self.content)] + text = s[1] + else: + s = None + text = None + + if s: + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return "" + if len(s) > 1 and s[1] == 'thumb': + return "" + (qual,sep,tgt) = arg.partition(':') + if tgt != '': + ns = self.parser.wiki_ns_name(qual) + if ns: + if ns == 'NS_IMAGE': + if not self.parser.show_urls: + return "" + text = "[%s: %s]" % (qual, text if text else arg) + tgt = "%s/%s/250px-%s" % (self.image_base, + url_quote(tgt), + url_quote(tgt)) + elif ns == 'NS_MEDIA': + text = "[%s]" % (qual) + else: + tgt = self.parser.mktgt(tgt) + elif self.type == 'LINK' and qual in self.parser.langtab: + text = self.parser.langtab[qual] + ": " + tgt + tgt = self.parser.mktgt(tgt, qual) + else: + tgt = self.parser.mktgt(tgt) + else: + tgt = self.parser.mktgt(arg) + if self.parser.show_urls: + return "%s (see %s) " % (text, tgt) + elif not text or text == '': + return arg + else: + return text + +class TextTmplNode(TextLinkNode): + def format(self): + return '[' + super(TextTmplNode, self).format() + ']' + +class TextBarNode(WikiNode): + def format(self): + w = self.parser.width + if w < 5: + w = 5 + return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" + +class TextHdrNode(WikiHdrNode): + def format(self): + return ("\n" + + ("*" * self.level) + + " " + + self.content.format().lstrip(" ") + + "\n\n") + +class TextRefNode(WikiRefNode): + def format(self): + text = self.content.format() + if text: + return "%s (see %s) " % (text, self.ref) + else: + return "see " + self.ref + +class TextEnvNode(WikiEnvNode): + def format(self): + type = self.envtype + lev = self.level + if lev > self.parser.width - 4: + lev = 1 + string = "" + n = 1 + for s in self.content: + if not string.endswith("\n"): + string += "\n" + x = s.content.format() + if type == "unnumbered": + string += self.parser.indent(lev, "- " + x.lstrip(" ")) + elif type == "numbered": + string += self.parser.indent(lev, "%d. %s" % (n, x)) + n += 1 + elif type == "defn": + if s.subtype == 0: + string += self.parser.indent(lev-1, x) + else: + string += self.parser.indent(lev+3, x) + + if not string.endswith("\n"): + string += "\n" + + return string + +class TextIndNode(WikiIndNode): + def format(self): + return (" " * self.level) + self.content.format() + '\n' + +class TextTagNode(WikiTagNode): + def format(self): + if self.tag == 'code': + self.parser.nested += 1 + s = self.content.format() + self.parser.nested -= 1 + elif self.tag == 'ref': + s = '[%d]' % (self.idx+1) + elif self.tag == 'references': + s = '\nReferences:\n' + for ref in self.parser.references: + s += ('[%d]. ' % (ref.idx+1)) + ref.content.format() + '\n' + else: + s = '<' + self.tag + if self.args: + s += ' ' + str(self.args) + s += '>' + self.content.format() + '</' + self.tag + '>' + return s + + +class TextWikiMarkup(WikiMarkup): + """A Wiki markup to plain text translator. + + Usage: + + x = TextWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as plain text: + print(str(x)) + + """ + + # Output width + width = 78 + # Do not show references. + show_urls = False + # Provide a minimum markup + markup = True + + # Number of current element in the environment + num = 0 + + # Array of footnote references + references = [] + + def __init__(self, *args, **keywords): + """Create a TextWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + + width=N + Limit output width to N columns. Default is 78. + show_urls=False + By default, the link URLs are displayed in parentheses next to the + link text. If this argument is given, only the link text will be + displayed. + """ + + super(TextWikiMarkup,self).__init__(*args, **keywords) + if 'width' in keywords: |