summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-07 13:19:56 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2015-07-07 13:19:56 (GMT)
commit323ba4ba7d144f3da5cf70ad64b3366eebca5f20 (patch) (side-by-side diff)
tree922fc74662a809ecf4a416e5570dc3d6e7994492
parentedd43da79765b81d3f1b51c8d132196cb1429fff (diff)
downloadwit-323ba4ba7d144f3da5cf70ad64b3366eebca5f20.tar.gz
wit-323ba4ba7d144f3da5cf70ad64b3366eebca5f20.tar.bz2
Initial implementation of Texinfo translator class
* wiki2html.py (str_pre): Don't add <pre> tags if nested * wiki2texi.py: New file. * wikicvt.py: Add --type (--to, -t) and --input-type (-I) options. * wikimarkup.py (BaseWikiMarkup): Use new object style. (tokread): Remove 'extra' keyword for the sake of parse_env
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2html.py5
-rw-r--r--wiki2texi.py249
-rwxr-xr-xwikicvt.py57
-rw-r--r--wikimarkup.py5
4 files changed, 295 insertions, 21 deletions
diff --git a/wiki2html.py b/wiki2html.py
index d9c049c..f8dfb63 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -99,7 +99,8 @@ class HtmlWikiMarkup (WikiMarkup):
elif s[0] == "proto":
text = self.tmpl_proto(s)
return text
-
+
+ print arg
(qual,sep,tgt) = arg.partition(':')
if tgt != '':
ns = self.wiki_ns_name(qual)
@@ -202,6 +203,8 @@ class HtmlWikiMarkup (WikiMarkup):
string = "";
for x in elt['content']:
string += self.format(x)
+ if self.nested:
+ return string
return '<pre>' + string + '</pre>'
def str_ind(self, elt):
diff --git a/wiki2texi.py b/wiki2texi.py
new file mode 100644
index 0000000..7cc67bd
--- a/dev/null
+++ b/wiki2texi.py
@@ -0,0 +1,249 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2015 Sergey Poznyakoff
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from wikimarkup import *
+from types import TupleType
+from wikins import wiki_ns_re, wiki_ns
+import re
+import urllib
+
+class TexiWikiMarkup (WikiMarkup):
+ sectcomm = {
+ 'numbered': [
+ '@top',
+ '@chapter',
+ '@section',
+ '@subsection',
+ '@subsubsection'
+ ],
+ 'unnumbered': [
+ '@top',
+ '@unnumbered',
+ '@unnumberedsec',
+ '@unnumberedsubsec',
+ '@unnumberedsubsubsec'
+ ],
+ 'appendix': [
+ '@top',
+ '@appendix',
+ '@appendixsec',
+ '@appendixsubsec',
+ '@appendixsubsubsec'
+ ],
+ 'heading': [
+ '@majorheading'
+ '@chapheading',
+ '@heading',
+ '@subheading',
+ '@subsubheading'
+ ]
+ }
+
+ sectioning_model = 'numbered'
+ sectioning_start = 0
+
+ def __init__(self, *args, **keywords):
+ super(TexiWikiMarkup, self).__init__(*args, **keywords)
+ if keywords.has_key("sectioning-model"):
+ val = keywords["sectioning-model"]
+ if self.sectcomm.has_key(val):
+ self.sectioning_model = val
+ else:
+ raise ValueError("Invalid value for sectioning model: %s" % val)
+ if keywords.has_key("sectioning-start"):
+ val = keywords["sectioning-start"]
+ if val < 0 or val > 4:
+ raise ValueError("Invalid value for sectioning start: %s" % val)
+ else:
+ self.sectioning_start = val
+
+
+ def __str__(self):
+ str = ""
+ for elt in self.tree:
+ str += self.format(elt)
+ return str
+
+ def format(self, elt):
+ if elt['type'] == 'TEXT':
+ if isinstance(elt['content'],list):
+ string = ""
+ for s in elt['content']:
+ string += s
+ else:
+ string = elt['content']
+ return string
+ elif elt['type'] == 'TAG':
+ return self.str_tag(elt)
+ elif elt['type'] == 'PARA':
+ return self.str_para(elt)
+ elif elt['type'] == 'PRE':
+ return self.str_pre(elt)
+ elif elt['type'] == 'IT':
+ return self.str_it(elt)
+ elif elt['type'] == 'BOLD':
+ return self.str_bold(elt)
+ elif elt['type'] == 'LINK':
+ return self.str_link(elt)
+ elif elt['type'] == 'TMPL':
+ return self.str_tmpl(elt)
+ elif elt['type'] == 'BAR':
+ return self.str_bar()
+ elif elt['type'] == 'HDR':
+ return self.str_hdr(elt)
+ elif elt['type'] == 'REF':
+ return self.str_ref(elt)
+ elif elt['type'] == 'ENV':
+ return self.str_env(elt)
+ elif elt['type'] == 'IND':
+ return self.str_ind(elt)
+ elif elt['type'] == 'SEQ':
+ string = ""
+ for x in elt['content']:
+ string += self.format(x)
+ return string
+ else:
+ return str(elt)
+
+ supported_tags = [ 'nowiki', 'code' ]
+ def input_tag(self, tag):
+ return tag['tag'] in self.supported_tags
+
+ def str_tag(self, elt):
+ if elt['tag'] == 'nowiki':
+ return '@example\n' + elt['content'] + '@end example\n'
+ elif elt['tag'] == 'code':
+ kwdict = {
+ 'nested': self.nested + 1,
+ 'lang': self.lang,
+ 'text': elt['content'],
+ 'html_base': self.html_base,
+ 'image_base': self.image_base,
+ 'media_base': self.media_base }
+ markup = TexiWikiMarkup(**kwdict)
+ markup.debug_level = self.debug_level
+ markup.parse()
+ s = str(markup)
+ if not s.endswith("\n"):
+ s += "\n";
+ return '@example\n' + s + '@end example\n'
+
+ def str_para(self, elt):
+ string = "";
+ for x in elt['content']:
+ string += self.format(x)
+ return "\n" + string + "\n"
+
+ def str_pre(self, elt):
+ string = "";
+ for x in elt['content']:
+ string += self.format(x)
+ if self.nested:
+ return string
+ if not string.endswith("\n"):
+ string += "\n";
+ return '@example\n' + string + '@end example\n'
+
+ def concat(self, eltlist):
+ string = ""
+ for x in eltlist:
+ string += self.format(x)
+ return string
+
+ def str_it(self, elt):
+ return "@i{" + self.concat(elt['content']) + "}"
+
+ def str_bold(self, elt):
+ return "@b{" + self.concat(elt['content']) + "}"
+
+ def nodename(self, elt):
+ return self.format(elt) # FIXME
+
+ def str_hdr(self, elt):
+ level = elt['level']
+ if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
+ s ="\n@* %s" % (self.format(elt['content']))
+ else:
+ s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n"
+ if self.sectcomm[self.sectioning_model][0] == '@top':
+ s += "@node %s\n" % (self.nodename(elt['content']))
+ return s + "\n"
+
+ def str_bar(self):
+ return "\n-----\n" # FIXME
+
+ def str_ind(self, elt):
+ return ("@w{ }" * elt['level']) + self.format(elt['content'])
+
+ def str_env(self, elt):
+ if elt['envtype'] == 'unnumbered':
+ string = '\n@itemize @bullet\n'
+ for s in elt['content']:
+ string += '@item ' + self.format(s['content']) + '\n\n'
+ string += '@end itemize\n'
+ elif elt['envtype'] == 'numbered':
+ string = '\n@enumerate\n'
+ for s in elt['content']:
+ string += '@item ' + self.format(s['content']) + '\n\n'
+ string += '@end enumerate\n'
+ elif elt['envtype'] == 'defn':
+ string = "\n@table @asis\n"
+ for s in elt['content']:
+ if s['subtype'] == 0:
+ string += "@item " + self.format(s['content']) + '\n'
+ else:
+ string += self.format(s['content']) + '\n'
+ string += '@end table\n'
+ return string
+
+ def str_link(self, elt):
+ # FIXME: A very crude version
+ arg = self.format(elt['content'][0])
+ if len(elt['content']) > 1:
+ s = map(self.format, elt['content'])
+ text = s[1]
+ else:
+ s = None
+ text = None
+
+ if s:
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return ""
+ if len(s) > 1 and s[1] == 'thumb':
+ return ""
+
+ (qual,sep,tgt) = arg.partition(':')
+ if text:
+ return "@ref{%s,%s}" % (qual, text)
+ else:
+ return "@ref{%s}" % qual
+
+ def str_tmpl(self, elt):
+ return "FIXME: str_tmpl not implemented\n"
+
+ def str_ref(self, elt):
+ target = elt['ref']
+ text = self.format(elt['content'])
+ if text and text != '':
+ return "@uref{%s,%s}" % (target, text)
+ else:
+ return "@uref{%s}" % target
+
+
+
+
+
diff --git a/wikicvt.py b/wikicvt.py
index 4806045..e61e28b 100755
--- a/wikicvt.py
+++ b/wikicvt.py
@@ -19,25 +19,42 @@ import sys
import getopt
from wiki2html import *
from wiki2text import *
+from wiki2texi import *
def usage(code=0):
print """
-usage: %s [-hvt] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val]
- [--text] [--help] [--verbose] file
+usage: %s [-hvt] [-I INTYPE] [-l lang] [-o kw=val] [--lang=lang] [--option kw=val]
+ [--input-type=INTYPE] [--type=OUTTYPE] [--help] [--verbose] file
""" % (sys.argv[0])
sys.exit(code)
+handlers = {
+ 'html': {
+ 'default': HtmlWikiMarkup,
+ 'wiktionary': HtmlWiktionaryMarkup
+ },
+ 'text': {
+ 'default': TextWikiMarkup,
+ 'wiktionary': TextWiktionaryMarkup
+ },
+ 'texi': {
+ 'default': TexiWikiMarkup
+ }
+}
+
def main():
verbose_flag = 0
- html = 1
+ itype = 'default'
+ otype = 'html'
lang = "pl"
kwdict = {}
debug = 0
try:
- opts, args = getopt.getopt(sys.argv[1:], "d:hl:o:tv",
+ opts, args = getopt.getopt(sys.argv[1:], "d:I:hl:o:t:v",
["debug=", "help", "lang=", "option=",
- "text", "input-text", "verbose" ])
+ "to", "type", "input-text", "input-type",
+ "verbose" ])
except getopt.GetoptError:
usage(1)
@@ -46,14 +63,16 @@ def main():
usage()
elif o in ("-v", "--verbose"):
verbose_flag = verbose_flag + 1
- elif o in ("-t", "--text"):
- html = 0
+ elif o in ("-I", "--input-type"):
+ itype = a
+ elif o in ("-t", "--to", "--type"):
+ otype = a
elif o in ("-l", "--lang"):
lang = a
elif o in ("-o", "--option"):
(kw,sep,val) = a.partition('=')
if val != '':
- kwdict[kw] = eval(val)
+ kwdict[kw] = val
elif o == "--input-text":
input_text = True
elif o in ("-d", "--debug"):
@@ -67,16 +86,20 @@ def main():
else:
usage(1)
- kwdict['lang']=lang
- if html:
- markup = HtmlWiktionaryMarkup(**kwdict)
+ kwdict['lang']=lang
+
+ if handlers.has_key(otype):
+ if handlers[otype].has_key(itype):
+ markup = handlers[otype][itype](**kwdict)
+ markup.debug_level = debug
+ markup.parse()
+ print str(markup)
+ exit(0)
+ else:
+ print "unsupported input type: %s" % (itype)
else:
- markup = TextWiktionaryMarkup(**kwdict)
- markup.debug_level = debug
- markup.parse()
- print str(markup)
-# if verbose_flag > 0:
-# markup.output()
+ print "unsupported output type: %s" % (otype)
+ exit(1)
if __name__ == '__main__':
main()
diff --git a/wikimarkup.py b/wikimarkup.py
index aa15ab6..fde1ec1 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -39,7 +39,7 @@ envtypes = { "*": [ "unnumbered", 0 ],
":": [ "defn", 1 ]
}
-class BaseWikiMarkup:
+class BaseWikiMarkup(object):
toklist = None
tokind = 0
@@ -87,8 +87,7 @@ class BaseWikiMarkup:
# FIXME?
# FIXME: What's "extra"?
yield({ 'type': 'DELIM',
- 'content': m.group(0),
- 'extra': True })
+ 'content': m.group(0) })
pos += 1
else:
yield({ 'type': 'DELIM',

Return to:

Send suggestions and report system problems to the System administrator.