diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-16 13:20:06 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-16 13:26:15 +0300 |
commit | eaf9325ddcff786f3fcd5b9047327ef6e397e778 (patch) | |
tree | 2f0336efbb1deab9651c5eeb1b5dd753538a5c8e /wiki2html.py | |
parent | 8e11d7f20459697c883df1e421df02006f749792 (diff) | |
download | wikitrans-eaf9325ddcff786f3fcd5b9047327ef6e397e778.tar.gz wikitrans-eaf9325ddcff786f3fcd5b9047327ef6e397e778.tar.bz2 |
Restructure the package.
The idea is to switch from using this project as a git submodule
to having it distributed via PyPI. Since the name 'wit' is already
registered there, the package is renamed to 'wikitrans'.
* setup.py: Use setuptools
Rename package to wikitrans.
* wikicvt.py: Remove. Replaced with:
* bin/wikitrans: New file.
* __init__.py: Move to WikiTrans/__init__.py
* wiki2html.py: Move to WikiTrans/wiki2html.py
* wiki2texi.py: Move to WikiTrans/wiki2texi.py
* wiki2text.py: Move to WikiTrans/wiki2text.py
* wikimarkup.py: Move to WikiTrans/wikimarkup.py
* wikins.py: Move to WikiTrans/wikins.py
* test.py: Move to tests/test.py
* MANIFEST.in: New file.
* README.rst: New file.
* .gitignore: Update.
Diffstat (limited to 'wiki2html.py')
-rw-r--r-- | wiki2html.py | 281 |
1 files changed, 0 insertions, 281 deletions
diff --git a/wiki2html.py b/wiki2html.py deleted file mode 100644 index 05d4642..0000000 --- a/wiki2html.py +++ /dev/null @@ -1,281 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# Copyright (C) 2008,2015 Sergey Poznyakoff -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -from wikimarkup import * -from wikins import wiki_ns_re, wiki_ns -import re -try: - from urllib import quote as url_quote -except ImportError: - from urllib.parse import quote as url_quote - -class HtmlWikiMarkup (WikiMarkup): - """ - A (hopefully) general-purpose Wiki->HTML translator class. - FIXME: 1. See WikiMarkup for a list - 2. [[official position]]s : final 's' gets after closing </a> tag. - Should be before. - """ - - def wiki_ns_name(self, str): - if str in wiki_ns[self.lang]: - return wiki_ns[self.lang][str] - elif str in wiki_ns_re[self.lang]: - for elt in wiki_ns_re[self.lang][str]: - if str.beginswith(elt[0]) and str.endswith(elt[1]): - return elt[2] - return None - - envt = { "unnumbered": { "hdr": "ul", - "elt": ["li"] }, - "numbered": { "hdr": "ol", - "elt": ["li"] }, - "defn": { "hdr": "dl", - "elt": ["dt","dd"] } } - - def mktgt(self, tgt, lang = None): - if not lang: - lang = self.lang - return self.html_base % { 'lang' : lang } + url_quote(tgt) - - def tmpl_term(self, s): - if len(s) == 2: - return s[1] - text = None - trans = None - for x in s[1:]: - m = re.match('(\w+)=', x) - if m: - if m.group(1) == "tr": - trans = x[m.end(1)+1:] - elif not text: - text = x - if text: - if trans: - text += ' <span class="trans">[' + trans + ']</span>' - return text - - def tmpl_proto(self, s): - text = '<span class="proto-lang">Proto-' + s[1] + '</span>' - if len(s) >= 4: - n = 0 - for x in s[2:-2]: - if n > 0: - text += ',' - n += 1 - text += ' <span class="proto">' + x + '</span>' - text += ' <span class="meaning">(' + s[-2] + ')</span>' - return text - - - def fmtlink(self, elt, istmpl): - arg = self.format(elt['content'][0]) - text = None - if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] - if s[0] == 'disambigR' or s[0] == 'wikiquote': - return "" - elif len(s) > 1 and s[1] == 'thumb': - return "" - text = '<span class="template">' + s[1] + '</span>' - if istmpl: - if re.match("t[+-]$", s[0]): - if len(s) > 2: - text = s[2] - elif s[0] == "term": - text = self.tmpl_term(s) - elif s[0] == "proto": - text = self.tmpl_proto(s) - return text - - (qual,sep,tgt) = arg.partition(':') - if tgt != '': - ns = self.wiki_ns_name(qual) - if ns: - if ns == 'NS_IMAGE': - return '' - elif ns == 'NS_MEDIA': - tgt = self.media_base + '/' + tgt - else: - tgt = self.mktgt(tgt) - elif not istmpl and qual in self.langtab: - tgt = self.mktgt(tgt, qual) - if not text or text == '': - text = self.langtab[qual] - else: - tgt = self.mktgt(tgt) - else: - tgt = self.mktgt(arg) - return "<a href=\"%s\">%s</a>" % (tgt, - text if (text and text != '') \ - else arg) - - def str_link(self, elt): - return self.fmtlink(elt, False) - - def str_tmpl(self, elt): - return self.fmtlink(elt, True) - - def str_ref(self, elt): - target = elt['ref'] - text = self.format(elt['content']) - return "<a href=\"%s\">%s</a>" % (target, - text if (text and text != '') \ - else target) - - def concat(self, eltlist): - string = "" - for x in eltlist: - string += self.format(x) - return string - - def str_it(self, elt): - return "<i>" + self.concat(elt['content']) + "</i>" - - def str_bold(self, elt): - return "<b>" + self.concat(elt['content']) + "</b>" - - def str_hdr(self, elt): - level = elt['level'] + 1 - if level > 4: - level = 4 - return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level) - - def str_bar(self): - return "<hr/>" - - def str_env(self, elt): - type = elt['envtype'] - lev = elt['level'] - if lev > 4: - lev = 2 - string = "" - for s in elt['content']: - n = s['subtype']; - string += "<%s>%s</%s>" % (self.envt[type]["elt"][n], - self.format(s['content']), - self.envt[type]["elt"][n]) - return "<%s>%s</%s>" % (self.envt[type]["hdr"], - string, - self.envt[type]["hdr"]) - return string - - def str_tag(self, elt): - if elt['tag'] == 'code': - self.nested += 1 - s = self.format(elt['content']) - self.nested -= 1 - return '<pre><code>' + s + '</code></pre>' #FIXME - else: - s = '<' + elt['tag'] - if elt['args']: - s += ' ' + str(elt['args']) - s += '>' - s += self.format(elt['content']) - return s + '</' + elt['tag'] + '>' - - def str_para(self, elt): - string = ""; - for x in elt['content']: - string += self.format(x) - return "<p>" + string + "</p>" - - def str_pre(self, elt): - string = ""; - for x in elt['content']: - string += self.format(x) - if self.nested: - return string - return '<pre>' + string + '</pre>' - - def str_ind(self, elt): - return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level'] - - def format(self, elt): - if elt['type'] == 'TEXT': - if isinstance(elt['content'],list): - string = "" - for s in elt['content']: - string += s - else: - string = elt['content'] - return string - elif elt['type'] == 'TAG': - return self.str_tag(elt) - elif elt['type'] == 'PARA': - return self.str_para(elt) - elif elt['type'] == 'PRE': - return self.str_pre(elt) - elif elt['type'] == 'IT': - return self.str_it(elt) - elif elt['type'] == 'BOLD': - return self.str_bold(elt) - elif elt['type'] == 'LINK': - return self.str_link(elt) - elif elt['type'] == 'TMPL': - return self.str_tmpl(elt) - elif elt['type'] == 'BAR': - return self.str_bar() - elif elt['type'] == 'HDR': - return self.str_hdr(elt) - elif elt['type'] == 'REF': - return self.str_ref(elt) - elif elt['type'] == 'ENV': - return self.str_env(elt) - elif elt['type'] == 'IND': - return self.str_ind(elt) - elif elt['type'] == 'SEQ': - string = "" - for x in elt['content']: - string += self.format(x) - return string - else: - return str(elt) - - def __str__(self): - str = "" - for elt in self.tree: - str += self.format(elt) - return str - -class HtmlWiktionaryMarkup (HtmlWikiMarkup): - """ - A class for translating Wiktionary articles into HTML. - This version does not do much, except that it tries to correctly - format templates. But "tries" does not mean "does". The heuristics - used here is clearly not enough to cope with it. - - 1. FIXME: - The right solution would be to have a database of templates with their - semantics and to decide on their rendering depending on that. E.g. - {{term}} in en.wiktionary means "replace this with the search term". - This, however, does not work in other wiktionaries. There are - also more complex templates, e.g.: {{t+|bg|врата|n|p|tr=vrata|sc=Cyrl}} - I don't know what it means. Couldn't find any documentation either. - Again, this template does not work in other dictionaries. - - 2. Capitulation notice: - Given the: - 1. waste amount of wiktionaries available, - 2. abundance of various templates for each wictionary, - 3. apparent lack of documentation thereof, - 4. the lack of standardized language-independent templates, - I dont see any way to cope with the template-rendering task within a - reasonable amount of time. - - Faeci quod potui, faciant meliora potentes. - """ |