summaryrefslogtreecommitdiffabout
path: root/wikitrans/wiki2texi.py
Side-by-side diff
Diffstat (limited to 'wikitrans/wiki2texi.py') (more/less context) (ignore whitespace changes)
-rw-r--r--wikitrans/wiki2texi.py410
1 files changed, 410 insertions, 0 deletions
diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py
new file mode 100644
index 0000000..d9e5f52
--- a/dev/null
+++ b/wikitrans/wiki2texi.py
@@ -0,0 +1,410 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright (C) 2015-2018 Sergey Poznyakoff
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""
+Wiki markup to Texinfo translator.
+
+Classes:
+
+TexiWikiMarkup -- Converts Wiki material to Texinfo.
+
+"""
+
+from wikitrans.wikimarkup import *
+from wikitrans.wikitoken import *
+from wikitrans.wikins import wiki_ns_re, wiki_ns
+import re
+import urllib
+
+class Acc(list):
+ def prepend(self,x):
+ self.insert(0,x)
+
+ def is_empty(self):
+ return len(self) == 0
+
+ def clear(self):
+ self = []
+
+ def tail(self, n = 1):
+ s = Acc()
+ i = len(self)
+ while i > 0 and n > 0:
+ elt = self[i-1]
+ l = len(elt)
+ if l == 0:
+ continue
+ elif l > n:
+ l = n
+ s.prepend(elt[-n:])
+ n -= l
+ i -= 1
+ return str(s)
+
+ def trim(self, n):
+ while len(self) and n > 0:
+ elt = self.pop()
+ l = len(elt)
+ if l == 0:
+ continue
+ elif l > n:
+ self += elt[0:-n]
+ break
+ n -= l
+
+ def trimnl(self):
+ if self.endswith('\n'):
+ self.trim(1)
+
+ def trimpara(self):
+ if self.endswith('\n\n'):
+ self.trim(2)
+
+ def endswith(self, x):
+ return self.tail(len(x)) == x
+
+ def in_new_para(self):
+ return self.is_empty() or self.endswith('\n\n')
+
+ def __str__(self):
+ return ''.join(self)
+
+class TexiTextNode(WikiTextNode):
+ def format(self):
+ parser = self.parser
+ if isinstance(self.content,list):
+ for s in self.content:
+ parser._print(s)
+ else:
+ parser._print(self.content)
+
+class TexiTagNode(WikiTagNode):
+ def format(self):
+ parser = self.parser
+ if self.tag in ['code', 'tt']:
+ save = parser._begin_print()
+ parser.nested += 1
+ self.content.format()
+ parser.nested -= 1
+ s = parser._end_print(save)
+ if self.isblock:
+ parser._print('@example', nl=True, escape=False)
+ parser._print(s, escape=False)
+ parser._print('@end example\n', nl=True, escape=False)
+ else:
+ parser._print('@code{%s}' % s, escape=False)
+ elif self.tag == 'div':
+ if self.args and 'id' in self.args:
+ parser._print("@anchor{%s}\n" % self.args['id'],
+ nl=True, escape=False)
+ self.content.format()
+ elif self.tag == 'ref':
+ parser._print('@footnote{', escape=False);
+ self.content.format();
+ parser._print('}', escape=False)
+ elif self.tag == 'references':
+ pass
+ else:
+ parser._print('<' + self.tag)
+ if self.args:
+ parser._print(' ' + self.args)
+ parser._print('>');
+ self.content.format()
+ parser._print('</' + self.tag + '>')
+
+class TexiParaNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ if not parser.acc.in_new_para():
+ parser._print('\n', nl=True)
+ for x in self.content:
+ x.format()
+ if not parser.acc.in_new_para():
+ parser._print('\n', nl=True)
+
+class TexiPreNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ if not parser.nested:
+ parser._print('@example\n', nl=True, escape=False)
+ for x in self.content:
+ x.format()
+ if not parser.nested:
+ parser._print('@end example\n', nl=True, escape=False)
+
+class TexiFontNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ comm = { 'IT': 'i',
+ 'BOLD': 'b' }
+ parser._print('@%s{' % comm[self.type], escape=False)
+ for x in self.content:
+ x.format()
+ parser._print('}', escape=False)
+
+class TexiHdrNode(WikiHdrNode):
+ def format(self):
+ parser = self.parser
+ level = self.level
+ # FIXME
+ if level > len(parser.sectcomm[parser.sectioning_model]) - 1 - parser.sectioning_start:
+ parser._print("@* ", nl=True, escape=False)
+ self.content.format()
+ else:
+ parser._print(parser.sectcomm[parser.sectioning_model][level - parser.sectioning_start] + " ", nl=True, escape=False)
+ self.content.format()
+ parser._print(None, nl=True)
+ if parser.sectcomm[parser.sectioning_model][0] == '@top':
+ parser._print('@node ', nl=True, escape=False)
+ self.content.format()
+ parser._print('\n')
+ parser._print(None, nl=True)
+
+class TexiBarNode(WikiNode):
+ def format(self):
+ self.parser._print("\n-----\n")
+
+class TexiIndNode(WikiIndNode):
+ def format(self):
+ parser = self.parser
+ parser._print("@w{ }" * self.level, nl=True, escape=False)
+ self.content.format()
+ parser._print(None, nl=True)
+
+class TexiEnvNode(WikiEnvNode):
+ def format(self):
+ parser = self.parser
+ if self.envtype == 'unnumbered':
+ parser._print('@itemize @bullet\n', nl=True, escape=False)
+ for s in self.content:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end itemize\n', nl=True, escape=False)
+ elif self.envtype == 'numbered':
+ parser._print('@enumerate\n', nl=True, escape=False)
+ for s in self.content:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end enumerate\n', nl=True, escape=False)
+ elif self.envtype == 'defn':
+ parser._print('@table @asis\n', nl=True, escape=False)
+ for s in self.content:
+ if s.subtype == 0:
+ parser._print('@item ', nl=True, escape=False)
+ s.content.format()
+ parser._print(None, nl=True)
+ else:
+ s.content.format()
+ parser._print(None, nl=True)
+ parser._print('\n')
+ parser._print('@end table\n', nl=True, escape=False)
+
+class TexiLinkNode(WikiSeqNode):
+ def format(self):
+ parser = self.parser
+ save = parser._begin_print()
+ self.content[0].format()
+ arg = parser._end_print()
+ if len(self.content) > 1:
+ s = []
+ for x in self.content[0:2]:
+ parser._begin_print()
+ x.format()
+ s.append(parser._end_print())
+ text = s[1]
+ else:
+ s = None
+ text = None
+
+ parser._end_print(save)
+
+ if s:
+ if s[0] == 'disambigR' or s[0] == 'wikiquote':
+ return
+ if len(s) > 1 and s[1] == 'thumb':
+ return
+
+ (qual,sep,tgt) = arg.partition(':')
+ if text:
+ parser._print("@ref{%s,%s}" % (qual, text), escape=False)
+ else:
+ parser._print("@ref{%s}" % qual, escape=False)
+
+class TexiRefNode(WikiRefNode):
+ def format(self):
+ parser = self.parser
+ target = self.ref
+ save = parser._begin_print()
+ self.content.format()
+ text = parser._end_print(save)
+ if text and text != '':
+ parser._print("@uref{%s,%s}" % (target, text), escape=False)
+ else:
+ parser._print("@uref{%s}" % target, escape=False)
+
+class TexiWikiMarkup(WikiMarkup):
+ """Wiki markup to Texinfo translator class.
+
+ Usage:
+
+ x = TexiWikiMarkup(file="input.wiki")
+ # Parse the input:
+ x.parse()
+ # Print it as Texi:
+ print(str(x))
+
+ """
+
+ nested = 0
+ sectcomm = {
+ 'numbered': [
+ '@top',
+ '@chapter',
+ '@section',
+ '@subsection',
+ '@subsubsection'
+ ],
+ 'unnumbered': [
+ '@top',
+ '@unnumbered',
+ '@unnumberedsec',
+ '@unnumberedsubsec',
+ '@unnumberedsubsubsec'
+ ],
+ 'appendix': [
+ '@top',
+ '@appendix',
+ '@appendixsec',
+ '@appendixsubsec',
+ '@appendixsubsubsec'
+ ],
+ 'heading': [
+ '@majorheading'
+ '@chapheading',
+ '@heading',
+ '@subheading',
+ '@subsubheading'
+ ]
+ }
+
+ sectioning_model = 'numbered'
+ sectioning_start = 0
+
+ def __init__(self, *args, **keywords):
+ """Create a TexiWikiMarkup object.
+
+ Arguments:
+
+ filename=FILE
+ Read Wiki material from the file named FILE.
+ file=FD
+ Read Wiki material from file object FD.
+ text=STRING
+ Read Wiki material from STRING.
+
+ sectioning_model=MODEL
+ Select the Texinfo sectioning model for the output document. Possible
+ values are:
+
+ 'numbered'
+ Top of document is marked with "@top". Headings ("=", "==",
+ "===", etc) produce "@chapter", "@section", "@subsection", etc.
+ 'unnumbered'
+ Unnumbered sectioning: "@top", "@unnumbered", "@unnumberedsec",
+ "@unnumberedsubsec".
+ 'appendix'
+ Sectioning suitable for appendix entries: "@top", "@appendix",
+ "@appendixsec", "@appendixsubsec", etc.
+ 'heading'
+ Use heading directives to reflect sectioning: "@majorheading",
+ "@chapheading", "@heading", "@subheading", etc.
+ sectioning_start=N
+ Shift resulting heading level by N positions. For example, supposing
+ "sectioning_model='numbered'", "== A ==" normally produces
+ "@section A" on output. Now, if given "sectioning_start=1", this
+ directive will produce "@subsection A" instead.
+ """
+
+ super(TexiWikiMarkup, self).__init__(*args, **keywords)
+
+ self.token_class['TEXT'] = TexiTextNode
+ self.token_class['TAG'] = TexiTagNode
+ self.token_class['PARA'] = TexiParaNode
+ self.token_class['PRE'] = TexiPreNode
+ self.token_class['IT'] = TexiFontNode
+ self.token_class['BOLD'] = TexiFontNode
+ self.token_class['HDR'] = TexiHdrNode
+ self.token_class['BAR'] = TexiBarNode
+ self.token_class['IND'] = TexiIndNode
+ self.token_class['ENV'] = TexiEnvNode
+ self.token_class['LINK'] = TexiLinkNode
+ self.token_class['REF'] = TexiRefNode
+
+ if "sectioning_model" in keywords:
+ val = keywords["sectioning_model"]
+ if val in self.sectcomm:
+ self.sectioning_model = val
+ else:
+ raise ValueError("Invalid value for sectioning model: %s" % val)
+ if "sectioning_start" in keywords:
+ val = keywords["sectioning_start"]
+ if val < 0 or val > 4:
+ raise ValueError("Invalid value for sectioning start: %s" % val)
+ else:
+ self.sectioning_start = val
+
+ replchars = re.compile(r'([@{}])')
+ acc = Acc()
+
+ def _print(self, text, **kw):
+ nl = kw.pop('nl', False)
+ escape = kw.pop('escape', True)
+ if nl and not self.acc.endswith('\n'):
+ self.acc += '\n'
+ if text:
+ if escape:
+ self.acc += self.replchars.sub(r'@\1', text)
+ else:
+ self.acc += text
+
+ def _begin_print(self):
+ s = self.acc
+ self.acc = Acc()
+ return s
+
+ def _end_print(self, val = None):
+ s = self.acc
+ self.acc = val
+ return str(s)
+
+ def __str__(self):
+ self._begin_print()
+ for elt in self.tree:
+ elt.format()
+ self.acc.trimpara()
+ return self._end_print()
+
+
+
+
+
+
+

Return to:

Send suggestions and report system problems to the System administrator.