diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-07 16:19:56 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-07 16:19:56 +0300 |
commit | 323ba4ba7d144f3da5cf70ad64b3366eebca5f20 (patch) | |
tree | 922fc74662a809ecf4a416e5570dc3d6e7994492 | |
parent | edd43da79765b81d3f1b51c8d132196cb1429fff (diff) | |
download | wikitrans-323ba4ba7d144f3da5cf70ad64b3366eebca5f20.tar.gz wikitrans-323ba4ba7d144f3da5cf70ad64b3366eebca5f20.tar.bz2 |
Initial implementation of Texinfo translator class
* wiki2html.py (str_pre): Don't add <pre> tags if nested
* wiki2texi.py: New file.
* wikicvt.py: Add --type (--to, -t) and --input-type (-I) options.
* wikimarkup.py (BaseWikiMarkup): Use new object style.
(tokread): Remove 'extra' keyword for the sake of parse_env
-rw-r--r-- | wiki2html.py | 5 | ||||
-rw-r--r-- | wiki2texi.py | 249 | ||||
-rwxr-xr-x | wikicvt.py | 57 | ||||
-rw-r--r-- | wikimarkup.py | 5 |
4 files changed, 295 insertions, 21 deletions
diff --git a/wiki2html.py b/wiki2html.py index d9c049c..f8dfb63 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -96,13 +96,14 @@ class HtmlWikiMarkup (WikiMarkup): text = s[2] elif s[0] == "term": text = self.tmpl_term(s) elif s[0] == "proto": text = self.tmpl_proto(s) return text - + + print arg (qual,sep,tgt) = arg.partition(':') if tgt != '': ns = self.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': return '' @@ -199,12 +200,14 @@ class HtmlWikiMarkup (WikiMarkup): return "<p>" + string + "</p>" def str_pre(self, elt): string = ""; for x in elt['content']: string += self.format(x) + if self.nested: + return string return '<pre>' + string + '</pre>' def str_ind(self, elt): return (" " * 2 * elt['level']) + self.format(elt['content']) def format(self, elt): diff --git a/wiki2texi.py b/wiki2texi.py new file mode 100644 index 0000000..7cc67bd --- /dev/null +++ b/wiki2texi.py @@ -0,0 +1,249 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# Copyright (C) 2015 Sergey Poznyakoff +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from wikimarkup import * +from types import TupleType +from wikins import wiki_ns_re, wiki_ns +import re +import urllib + +class TexiWikiMarkup (WikiMarkup): + sectcomm = { + 'numbered': [ + '@top', + '@chapter', + '@section', + '@subsection', + '@subsubsection' + ], + 'unnumbered': [ + '@top', + '@unnumbered', + '@unnumberedsec', + '@unnumberedsubsec', + '@unnumberedsubsubsec' + ], + 'appendix': [ + '@top', + '@appendix', + '@appendixsec', + '@appendixsubsec', + '@appendixsubsubsec' + ], + 'heading': [ + '@majorheading' + '@chapheading', + '@heading', + '@subheading', + '@subsubheading' + ] + } + + sectioning_model = 'numbered' + sectioning_start = 0 + + def __init__(self, *args, **keywords): + super(TexiWikiMarkup, self).__init__(*args, **keywords) + if keywords.has_key("sectioning-model"): + val = keywords["sectioning-model"] + if self.sectcomm.has_key(val): + self.sectioning_model = val + else: + raise ValueError("Invalid value for sectioning model: %s" % val) + if keywords.has_key("sectioning-start"): + val = keywords["sectioning-start"] + if val < 0 or val > 4: + raise ValueError("Invalid value for sectioning start: %s" % val) + else: + self.sectioning_start = val + + + def __str__(self): + str = "" + for elt in self.tree: + str += self.format(elt) + return str + + def format(self, elt): + if elt['type'] == 'TEXT': + if isinstance(elt['content'],list): + string = "" + for s in elt['content']: + string += s + else: + string = elt['content'] + return string + elif elt['type'] == 'TAG': + return self.str_tag(elt) + elif elt['type'] == 'PARA': + return self.str_para(elt) + elif elt['type'] == 'PRE': + return self.str_pre(elt) + elif elt['type'] == 'IT': + return self.str_it(elt) + elif elt['type'] == 'BOLD': + return self.str_bold(elt) + elif elt['type'] == 'LINK': + return self.str_link(elt) + elif elt['type'] == 'TMPL': + return self.str_tmpl(elt) + elif elt['type'] == 'BAR': + return self.str_bar() + elif elt['type'] == 'HDR': + return self.str_hdr(elt) + elif elt['type'] == 'REF': + return self.str_ref(elt) + elif elt['type'] == 'ENV': + return self.str_env(elt) + elif elt['type'] == 'IND': + return self.str_ind(elt) + elif elt['type'] == 'SEQ': + string = "" + for x in elt['content']: + string += self.format(x) + return string + else: + return str(elt) + + supported_tags = [ 'nowiki', 'code' ] + def input_tag(self, tag): + return tag['tag'] in self.supported_tags + + def str_tag(self, elt): + if elt['tag'] == 'nowiki': + return '@example\n' + elt['content'] + '@end example\n' + elif elt['tag'] == 'code': + kwdict = { + 'nested': self.nested + 1, + 'lang': self.lang, + 'text': elt['content'], + 'html_base': self.html_base, + 'image_base': self.image_base, + 'media_base': self.media_base } + markup = TexiWikiMarkup(**kwdict) + markup.debug_level = self.debug_level + markup.parse() + s = str(markup) + if not s.endswith("\n"): + s += "\n"; + return '@example\n' + s + '@end example\n' + + def str_para(self, elt): + string = ""; + for x in elt['content']: + string += self.format(x) + return "\n" + string + "\n" + + def str_pre(self, elt): + string = ""; + for x in elt['content']: + string += self.format(x) + if self.nested: + return string + if not string.endswith("\n"): + string += "\n"; + return '@example\n' + string + '@end example\n' + + def concat(self, eltlist): + string = "" + for x in eltlist: + string += self.format(x) + return string + + def str_it(self, elt): + return "@i{" + self.concat(elt['content']) + "}" + + def str_bold(self, elt): + return "@b{" + self.concat(elt['content']) + "}" + + def nodename(self, elt): + return self.format(elt) # FIXME + + def str_hdr(self, elt): + level = elt['level'] + if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: + s ="\n@* %s" % (self.format(elt['content'])) + else: + s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n" + if self.sectcomm[self.sectioning_model][0] == '@top': + s += "@node %s\n" % (self.nodename(elt['content'])) + return s + "\n" + + def str_bar(self): + return "\n-----\n" # FIXME + + def str_ind(self, elt): + return ("@w{ }" * elt['level']) + self.format(elt['content']) + + def str_env(self, elt): + if elt['envtype'] == 'unnumbered': + string = '\n@itemize @bullet\n' + for s in elt['content']: + string += '@item ' + self.format(s['content']) + '\n\n' + string += '@end itemize\n' + elif elt['envtype'] == 'numbered': + string = '\n@enumerate\n' + for s in elt['content']: + string += '@item ' + self.format(s['content']) + '\n\n' + string += '@end enumerate\n' + elif elt['envtype'] == 'defn': + string = "\n@table @asis\n" + for s in elt['content']: + if s['subtype'] == 0: + string += "@item " + self.format(s['content']) + '\n' + else: + string += self.format(s['content']) + '\n' + string += '@end table\n' + return string + + def str_link(self, elt): + # FIXME: A very crude version + arg = self.format(elt['content'][0]) + if len(elt['content']) > 1: + s = map(self.format, elt['content']) + text = s[1] + else: + s = None + text = None + + if s: + if s[0] == 'disambigR' or s[0] == 'wikiquote': + return "" + if len(s) > 1 and s[1] == 'thumb': + return "" + + (qual,sep,tgt) = arg.partition(':') + if text: + return "@ref{%s,%s}" % (qual, text) + else: + return "@ref{%s}" % qual + + def str_tmpl(self, elt): + return "FIXME: str_tmpl not implemented\n" + + def str_ref(self, elt): + target = elt['ref'] + text = self.format(elt['content']) + if text and text != '': + return "@uref{%s,%s}" % (target, text) + else: + return "@uref{%s}" % target + + + + + @@ -16,47 +16,66 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. import sys import getopt from wiki2html import * from wiki2text import * +from wiki2texi import * def usage(code=0): print """ -usage: %s [-hvt] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] - [--text] [--help] [--verbose] file +usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val] + [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file """ % (sys.argv[0]) sys.exit(code) +handlers = { + 'html': { + 'default': HtmlWikiMarkup, + 'wiktionary': HtmlWiktionaryMarkup + }, + 'text': { + 'default': TextWikiMarkup, + 'wiktionary': TextWiktionaryMarkup + }, + 'texi': { + 'default': TexiWikiMarkup + } +} + def main(): verbose_flag = 0 - html = 1 + itype = 'default' + otype = 'html' lang = "pl" kwdict = {} debug = 0 try: - opts, args = getopt.getopt(sys.argv[1:], "d:hl:o:tv", + opts, args = getopt.getopt(sys.argv[1:], "d:I:hl:o:t:v", ["debug=", "help", "lang=", "option=", - "text", "input-text", "verbose" ]) + "to", "type", "input-text", "input-type", + "verbose" ]) except getopt.GetoptError: usage(1) for o, a in opts: if o in ("-h", "--help"): usage() elif o in ("-v", "--verbose"): verbose_flag = verbose_flag + 1 - elif o in ("-t", "--text"): - html = 0 + elif o in ("-I", "--input-type"): + itype = a + elif o in ("-t", "--to", "--type"): + otype = a elif o in ("-l", "--lang"): lang = a elif o in ("-o", "--option"): (kw,sep,val) = a.partition('=') if val != '': - kwdict[kw] = eval(val) + kwdict[kw] = val elif o == "--input-text": input_text = True elif o in ("-d", "--debug"): debug = eval(a) if len(args) == 1: @@ -64,19 +83,23 @@ def main(): kwdict['file'] = sys.stdin else: kwdict['filename'] = args[0] else: usage(1) - kwdict['lang']=lang - if html: - markup = HtmlWiktionaryMarkup(**kwdict) + kwdict['lang']=lang + + if handlers.has_key(otype): + if handlers[otype].has_key(itype): + markup = handlers[otype][itype](**kwdict) + markup.debug_level = debug + markup.parse() + print str(markup) + exit(0) + else: + print "unsupported input type: %s" % (itype) else: - markup = TextWiktionaryMarkup(**kwdict) - markup.debug_level = debug - markup.parse() - print str(markup) -# if verbose_flag > 0: -# markup.output() + print "unsupported output type: %s" % (otype) + exit(1) if __name__ == '__main__': main() diff --git a/wikimarkup.py b/wikimarkup.py index aa15ab6..fde1ec1 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -36,13 +36,13 @@ close_delim = { envtypes = { "*": [ "unnumbered", 0 ], "#": [ "numbered", 0 ], ";": [ "defn", 0 ], ":": [ "defn", 1 ] } -class BaseWikiMarkup: +class BaseWikiMarkup(object): toklist = None tokind = 0 newline = 0 tree = None @@ -84,14 +84,13 @@ class BaseWikiMarkup: yield({'type': 'TEXT', 'content': line[pos:m.start(0)]}) pos = m.end(0) if envtypes.has_key(m.group(0)[0]) and line[pos] == ":": # FIXME? # FIXME: What's "extra"? yield({ 'type': 'DELIM', - 'content': m.group(0), - 'extra': True }) + 'content': m.group(0) }) pos += 1 else: yield({ 'type': 'DELIM', 'content': m.group(0) }) else: m = otag.match(line) |