summaryrefslogtreecommitdiff
path: root/wiki2html.py
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-16 13:20:06 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2015-07-16 13:26:15 +0300
commiteaf9325ddcff786f3fcd5b9047327ef6e397e778 (patch)
tree2f0336efbb1deab9651c5eeb1b5dd753538a5c8e /wiki2html.py
parent8e11d7f20459697c883df1e421df02006f749792 (diff)
downloadwikitrans-eaf9325ddcff786f3fcd5b9047327ef6e397e778.tar.gz
wikitrans-eaf9325ddcff786f3fcd5b9047327ef6e397e778.tar.bz2
Restructure the package.
The idea is to switch from using this project as a git submodule to having it distributed via PyPI. Since the name 'wit' is already registered there, the package is renamed to 'wikitrans'. * setup.py: Use setuptools Rename package to wikitrans. * wikicvt.py: Remove. Replaced with: * bin/wikitrans: New file. * __init__.py: Move to WikiTrans/__init__.py * wiki2html.py: Move to WikiTrans/wiki2html.py * wiki2texi.py: Move to WikiTrans/wiki2texi.py * wiki2text.py: Move to WikiTrans/wiki2text.py * wikimarkup.py: Move to WikiTrans/wikimarkup.py * wikins.py: Move to WikiTrans/wikins.py * test.py: Move to tests/test.py * MANIFEST.in: New file. * README.rst: New file. * .gitignore: Update.
Diffstat (limited to 'wiki2html.py')
-rw-r--r--wiki2html.py281
1 files changed, 0 insertions, 281 deletions
diff --git a/wiki2html.py b/wiki2html.py
deleted file mode 100644
index 05d4642..0000000
--- a/wiki2html.py
+++ /dev/null
@@ -1,281 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-# Copyright (C) 2008,2015 Sergey Poznyakoff
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-from wikimarkup import *
-from wikins import wiki_ns_re, wiki_ns
-import re
-try:
- from urllib import quote as url_quote
-except ImportError:
- from urllib.parse import quote as url_quote
-
-class HtmlWikiMarkup (WikiMarkup):
- """
- A (hopefully) general-purpose Wiki->HTML translator class.
- FIXME: 1. See WikiMarkup for a list
- 2. [[official position]]s : final 's' gets after closing </a> tag.
- Should be before.
- """
-
- def wiki_ns_name(self, str):
- if str in wiki_ns[self.lang]:
- return wiki_ns[self.lang][str]
- elif str in wiki_ns_re[self.lang]:
- for elt in wiki_ns_re[self.lang][str]:
- if str.beginswith(elt[0]) and str.endswith(elt[1]):
- return elt[2]
- return None
-
- envt = { "unnumbered": { "hdr": "ul",
- "elt": ["li"] },
- "numbered": { "hdr": "ol",
- "elt": ["li"] },
- "defn": { "hdr": "dl",
- "elt": ["dt","dd"] } }
-
- def mktgt(self, tgt, lang = None):
- if not lang:
- lang = self.lang
- return self.html_base % { 'lang' : lang } + url_quote(tgt)
-
- def tmpl_term(self, s):
- if len(s) == 2:
- return s[1]
- text = None
- trans = None
- for x in s[1:]:
- m = re.match('(\w+)=', x)
- if m:
- if m.group(1) == "tr":
- trans = x[m.end(1)+1:]
- elif not text:
- text = x
- if text:
- if trans:
- text += ' <span class="trans">[' + trans + ']</span>'
- return text
-
- def tmpl_proto(self, s):
- text = '<span class="proto-lang">Proto-' + s[1] + '</span>'
- if len(s) >= 4:
- n = 0
- for x in s[2:-2]:
- if n > 0:
- text += ','
- n += 1
- text += ' <span class="proto">' + x + '</span>'
- text += ' <span class="meaning">(' + s[-2] + ')</span>'
- return text
-
-
- def fmtlink(self, elt, istmpl):
- arg = self.format(elt['content'][0])
- text = None
- if len(elt['content']) > 1:
- s = [x for x in map(self.format, elt['content'])]
- if s[0] == 'disambigR' or s[0] == 'wikiquote':
- return ""
- elif len(s) > 1 and s[1] == 'thumb':
- return ""
- text = '<span class="template">' + s[1] + '</span>'
- if istmpl:
- if re.match("t[+-]$", s[0]):
- if len(s) > 2:
- text = s[2]
- elif s[0] == "term":
- text = self.tmpl_term(s)
- elif s[0] == "proto":
- text = self.tmpl_proto(s)
- return text
-
- (qual,sep,tgt) = arg.partition(':')
- if tgt != '':
- ns = self.wiki_ns_name(qual)
- if ns:
- if ns == 'NS_IMAGE':
- return ''
- elif ns == 'NS_MEDIA':
- tgt = self.media_base + '/' + tgt
- else:
- tgt = self.mktgt(tgt)
- elif not istmpl and qual in self.langtab:
- tgt = self.mktgt(tgt, qual)
- if not text or text == '':
- text = self.langtab[qual]
- else:
- tgt = self.mktgt(tgt)
- else:
- tgt = self.mktgt(arg)
- return "<a href=\"%s\">%s</a>" % (tgt,
- text if (text and text != '') \
- else arg)
-
- def str_link(self, elt):
- return self.fmtlink(elt, False)
-
- def str_tmpl(self, elt):
- return self.fmtlink(elt, True)
-
- def str_ref(self, elt):
- target = elt['ref']
- text = self.format(elt['content'])
- return "<a href=\"%s\">%s</a>" % (target,
- text if (text and text != '') \
- else target)
-
- def concat(self, eltlist):
- string = ""
- for x in eltlist:
- string += self.format(x)
- return string
-
- def str_it(self, elt):
- return "<i>" + self.concat(elt['content']) + "</i>"
-
- def str_bold(self, elt):
- return "<b>" + self.concat(elt['content']) + "</b>"
-
- def str_hdr(self, elt):
- level = elt['level'] + 1
- if level > 4:
- level = 4
- return "<h%s>%s</h%s>" % (level, self.format(elt['content']), level)
-
- def str_bar(self):
- return "<hr/>"
-
- def str_env(self, elt):
- type = elt['envtype']
- lev = elt['level']
- if lev > 4:
- lev = 2
- string = ""
- for s in elt['content']:
- n = s['subtype'];
- string += "<%s>%s</%s>" % (self.envt[type]["elt"][n],
- self.format(s['content']),
- self.envt[type]["elt"][n])
- return "<%s>%s</%s>" % (self.envt[type]["hdr"],
- string,
- self.envt[type]["hdr"])
- return string
-
- def str_tag(self, elt):
- if elt['tag'] == 'code':
- self.nested += 1
- s = self.format(elt['content'])
- self.nested -= 1
- return '<pre><code>' + s + '</code></pre>' #FIXME
- else:
- s = '<' + elt['tag']
- if elt['args']:
- s += ' ' + str(elt['args'])
- s += '>'
- s += self.format(elt['content'])
- return s + '</' + elt['tag'] + '>'
-
- def str_para(self, elt):
- string = "";
- for x in elt['content']:
- string += self.format(x)
- return "<p>" + string + "</p>"
-
- def str_pre(self, elt):
- string = "";
- for x in elt['content']:
- string += self.format(x)
- if self.nested:
- return string
- return '<pre>' + string + '</pre>'
-
- def str_ind(self, elt):
- return ("<dl><dd>" * elt['level']) + self.format(elt['content']) + "</dd></dl>" * elt['level']
-
- def format(self, elt):
- if elt['type'] == 'TEXT':
- if isinstance(elt['content'],list):
- string = ""
- for s in elt['content']:
- string += s
- else:
- string = elt['content']
- return string
- elif elt['type'] == 'TAG':
- return self.str_tag(elt)
- elif elt['type'] == 'PARA':
- return self.str_para(elt)
- elif elt['type'] == 'PRE':
- return self.str_pre(elt)
- elif elt['type'] == 'IT':
- return self.str_it(elt)
- elif elt['type'] == 'BOLD':
- return self.str_bold(elt)
- elif elt['type'] == 'LINK':
- return self.str_link(elt)
- elif elt['type'] == 'TMPL':
- return self.str_tmpl(elt)
- elif elt['type'] == 'BAR':
- return self.str_bar()
- elif elt['type'] == 'HDR':
- return self.str_hdr(elt)
- elif elt['type'] == 'REF':
- return self.str_ref(elt)
- elif elt['type'] == 'ENV':
- return self.str_env(elt)
- elif elt['type'] == 'IND':
- return self.str_ind(elt)
- elif elt['type'] == 'SEQ':
- string = ""
- for x in elt['content']:
- string += self.format(x)
- return string
- else:
- return str(elt)
-
- def __str__(self):
- str = ""
- for elt in self.tree:
- str += self.format(elt)
- return str
-
-class HtmlWiktionaryMarkup (HtmlWikiMarkup):
- """
- A class for translating Wiktionary articles into HTML.
- This version does not do much, except that it tries to correctly
- format templates. But "tries" does not mean "does". The heuristics
- used here is clearly not enough to cope with it.
-
- 1. FIXME:
- The right solution would be to have a database of templates with their
- semantics and to decide on their rendering depending on that. E.g.
- {{term}} in en.wiktionary means "replace this with the search term".
- This, however, does not work in other wiktionaries. There are
- also more complex templates, e.g.: {{t+|bg|врата|n|p|tr=vrata|sc=Cyrl}}
- I don't know what it means. Couldn't find any documentation either.
- Again, this template does not work in other dictionaries.
-
- 2. Capitulation notice:
- Given the:
- 1. waste amount of wiktionaries available,
- 2. abundance of various templates for each wictionary,
- 3. apparent lack of documentation thereof,
- 4. the lack of standardized language-independent templates,
- I dont see any way to cope with the template-rendering task within a
- reasonable amount of time.
-
- Faeci quod potui, faciant meliora potentes.
- """

Return to:

Send suggestions and report system problems to the System administrator.