summaryrefslogtreecommitdiff
path: root/WikiTrans
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-17 16:24:15 +0300
committerSergey Poznyakoff <gray@gnu.org.ua>2015-07-17 18:21:23 +0300
commit63f5f9902f83bd65fd2a37239ab9d6e5876924fd (patch)
tree75320e385f5399bfdfbfd26dd2175a78ac326236 /WikiTrans
parentdd481f6030fe140fa3e321cfe08a38f53e549aed (diff)
downloadwikitrans-63f5f9902f83bd65fd2a37239ab9d6e5876924fd.tar.gz
wikitrans-63f5f9902f83bd65fd2a37239ab9d6e5876924fd.tar.bz2
wiki2texi: improve formatting
Diffstat (limited to 'WikiTrans')
-rw-r--r--WikiTrans/wiki2texi.py217
-rw-r--r--WikiTrans/wikimarkup.py6
2 files changed, 136 insertions, 87 deletions
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index 6e32c56..06e854a 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -70,180 +70,229 @@ class TexiWikiMarkup (WikiMarkup):
else:
self.sectioning_start = val
-
+ replchars = re.compile(r'([@{}])')
+ acc = None
+
+ def _print(self, text, **kw):
+ nl = kw.pop('nl', False)
+ escape = kw.pop('escape', True)
+ if nl and not self.acc.endswith('\n'):
+ self.acc += '\n'
+ if text:
+ if escape:
+ self.acc += self.replchars.sub(r'@\1', text)
+ else:
+ self.acc += text
+
+ def _begin_print(self, val = ''):
+ s = self.acc
+ self.acc = val
+ return s
+
+ def _end_print(self, val = None):
+ s = self.acc
+ self.acc = val
+ return s
+
def __str__(self):
- str = ""
+ self._begin_print()
for elt in self.tree:
- str += self.format(elt)
- return str
+ self.format(elt)
+ return self._end_print()
def format(self, elt):
if elt['type'] == 'TEXT':
if isinstance(elt['content'],list):
- string = ""
for s in elt['content']:
- string += s
+ self._print(s)
else:
- string = elt['content']
- return string
+ self._print(elt['content'])
elif elt['type'] == 'TAG':
- return self.str_tag(elt)
+ self.str_tag(elt)
elif elt['type'] == 'PARA':
- return self.str_para(elt)
+ self.str_para(elt)
elif elt['type'] == 'PRE':
- return self.str_pre(elt)
+ self.str_pre(elt)
elif elt['type'] == 'IT':
- return self.str_it(elt)
+ self.str_it(elt)
elif elt['type'] == 'BOLD':
- return self.str_bold(elt)
+ self.str_bold(elt)
elif elt['type'] == 'LINK':
- return self.str_link(elt)
+ self.str_link(elt)
elif elt['type'] == 'TMPL':
- return self.str_tmpl(elt)
+ self.str_tmpl(elt)
elif elt['type'] == 'BAR':
- return self.str_bar()
+ self.str_bar()
elif elt['type'] == 'HDR':
- return self.str_hdr(elt)
+ self.str_hdr(elt)
elif elt['type'] == 'REF':
- return self.str_ref(elt)
+ self.str_ref(elt)
elif elt['type'] == 'ENV':
- return self.str_env(elt)
+ self.str_env(elt)
elif elt['type'] == 'IND':
- return self.str_ind(elt)
+ self.str_ind(elt)
elif elt['type'] == 'SEQ':
- string = ""
for x in elt['content']:
- string += self.format(x)
- return string
+ self.format(x)
else:
- return str(elt)
+ self._print(str(elt))
def str_tag(self, elt):
if elt['tag'] == 'code':
+ save = self._begin_print()
self.nested += 1
- s = self.format(elt['content'])
+ self.format(elt['content'])
self.nested -= 1
- if not s.endswith("\n"):
- s += "\n"
- return '@example\n' + s + '@end example\n'
+ s = self._end_print(save)
+ if s.startswith('\n'):
+ self._print('@example', nl=True, escape=False)
+ self._print(s)
+ self._print('@end example\n', nl=True, escape=False)
+ else:
+ self._print('@code{%s}' % s, escape=False)
elif elt['tag'] == 'tt':
+ self._print('@code{', escape=False)
self.nested += 1
s = self.format(elt['content'])
self.nested -= 1
- return "@code{%s}" % s
+ self._print('}', escape=False)
elif elt['tag'] == 'div':
- s = ''
if 'args' in elt and 'id' in elt['args']:
- s += "\n@anchor{%s}\n" % elt['args']['id']
- s += self.format(elt['content'])
- return s
+ self._print("@anchor{%s}\n" % elt['args']['id'],
+ nl=True, escape=False)
+ self.format(elt['content'])
else:
- s = '<' + elt['tag']
+ self._print('<' + elt['tag'])
if elt['args']:
- s += ' ' + elt['args']
- s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
- return s
+ self._print(' ' + elt['args'])
+ self._print('>');
+ self.format(elt['content']);
+ self._print('</' + elt['tag'] + '>')
def str_para(self, elt):
- string = "";
+ if self.acc and not self.acc.endswith('\n\n'):
+ self._print('\n', nl=True)
for x in elt['content']:
- string += self.format(x)
- return "\n" + string + "\n"
-
+ self.format(x)
+ if self.acc and not self.acc.endswith('\n\n'):
+ self._print('\n', nl=True)
+
def str_pre(self, elt):
- string = "";
+ if not self.nested:
+ self._print('@example\n', nl=True, escape=False)
for x in elt['content']:
- string += self.format(x)
- if self.nested:
- return string
- if not string.endswith("\n"):
- string += "\n";
- return '\n@example\n' + string + '@end example\n'
+ self.format(x)
+ if not self.nested:
+ self._print('@end example\n', nl=True, escape=False)
def concat(self, eltlist):
- string = ""
for x in eltlist:
- string += self.format(x)
- return string
+ self.format(x)
def str_it(self, elt):
- return "@i{" + self.concat(elt['content']) + "}"
+ self._print('@i{', escape=False)
+ self.concat(elt['content'])
+ self._print('}', escape=False)
def str_bold(self, elt):
- return "@b{" + self.concat(elt['content']) + "}"
+ self._print('@b{', escape=False)
+ self.concat(elt['content'])
+ self._print('}', escape=False)
- def nodename(self, elt):
- return self.format(elt) # FIXME
-
def str_hdr(self, elt):
level = elt['level']
if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start:
- s ="\n@* %s" % (self.format(elt['content']))
+ self._print("@* ", nl=True, escape=False)
+ self.format(elt['content'])
else:
- s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n"
+ self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False)
+ self.format(elt['content'])
+ self._print(None, nl=True)
if self.sectcomm[self.sectioning_model][0] == '@top':
- s += "@node %s\n" % (self.nodename(elt['content']))
- return s + "\n"
+ self._print('@node ', nl=True, escape=False)
+ self.format(elt['content'])
+ self._print('\n')
+ self._print(None, nl=True)
def str_bar(self):
- return "\n-----\n" # FIXME
+ self._print("\n-----\n")
def str_ind(self, elt):
- return ("@w{ }" * elt['level']) + self.format(elt['content']) + '\n'
+ self._print("@w{ }" * elt['level'], nl=True, escape=False)
+ self.format(elt['content'])
+ self._print(None, nl=True)
def str_env(self, elt):
if elt['envtype'] == 'unnumbered':
- string = '\n@itemize @bullet\n'
+ self._print('@itemize @bullet\n', nl=True, escape=False)
for s in elt['content']:
- string += '@item ' + self.format(s['content']) + '\n\n'
- string += '@end itemize\n'
+ self._print('@item ', nl=True, escape=False)
+ self.format(s['content'])
+ self._print(None, nl=True)
+ self._print('\n')
+ self._print('@end itemize\n', nl=True, escape=False)
elif elt['envtype'] == 'numbered':
- string = '\n@enumerate\n'
+ self._print('@enumerate\n', nl=True, escape=False)
for s in elt['content']:
- string += '@item ' + self.format(s['content']) + '\n\n'
- string += '@end enumerate\n'
+ self._print('@item ', nl=True, escape=False)
+ self.format(s['content'])
+ self._print(None, nl=True)
+ self._print('\n')
+ self._print('@end enumerate\n', nl=True, escape=False)
elif elt['envtype'] == 'defn':
- string = "\n@table @asis\n"
+ self._print('@table @asis\n', nl=True, escape=False)
for s in elt['content']:
if s['subtype'] == 0:
- string += "@item " + self.format(s['content']) + '\n'
+ self._print('@item ', nl=True, escape=False)
+ self.format(s['content'])
+ self._print(None, nl=True)
else:
- string += self.format(s['content']) + '\n'
- string += '@end table\n'
- return string
+ self.format(s['content'])
+ self._print(None, nl=True)
+ self._print('\n')
+ self._print('@end table\n', nl=True, escape=False)
def str_link(self, elt):
- # FIXME: A very crude version
- arg = self.format(elt['content'][0])
+ save = self._begin_print()
+ self.format(elt['content'][0])
+ arg = self._end_print()
if len(elt['content']) > 1:
- s = [x for x in map(self.format, elt['content'])]
+ s = []
+ for x in elt['content'][0:2]:
+ self._begin_print()
+ self.format(x)
+ s.append(self._end_print())
text = s[1]
else:
s = None
text = None
+ self._end_print(save)
+
if s:
if s[0] == 'disambigR' or s[0] == 'wikiquote':
- return ""
+ return
if len(s) > 1 and s[1] == 'thumb':
- return ""
+ return
(qual,sep,tgt) = arg.partition(':')
if text:
- return "@ref{%s,%s}" % (qual, text)
+ self._print("@ref{%s,%s}" % (qual, text), escape=False)
else:
- return "@ref{%s}" % qual
+ self._print("@ref{%s}" % qual, escape=False)
def str_tmpl(self, elt):
- return "FIXME: str_tmpl not implemented\n"
+ self._print("FIXME: str_tmpl not implemented\n")
def str_ref(self, elt):
target = elt['ref']
- text = self.format(elt['content'])
+ save = self._begin_print()
+ self.format(elt['content'])
+ text = self._end_print(save)
if text and text != '':
- return "@uref{%s,%s}" % (target, text)
+ self._print("@uref{%s,%s}" % (target, text), escape=False)
else:
- return "@uref{%s}" % target
+ self._print("@uref{%s}" % target, escape=False)
diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py
index bccd73e..ba30269 100644
--- a/WikiTrans/wikimarkup.py
+++ b/WikiTrans/wikimarkup.py
@@ -86,7 +86,7 @@ class TagAttributes(object):
class BaseWikiMarkup(object):
- delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
+ delim = re.compile("^==+\s*|\s*==+\s*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^>]+))?\s*(?P<closed>/)?>")
ctag = re.compile("</(?P<tag>[a-zA-Z0-9_]+)\s*>")
refstart = re.compile("^https?://")
@@ -317,10 +317,10 @@ class BaseWikiMarkup(object):
pos += 1
else:
yield({ 'type': 'DELIM',
- 'content': content,
+ 'content': content.strip(),
'continuation': False})
continue
-
+
if line:
if line[-1] == '\n':
if line[pos:-1] != '':

Return to:

Send suggestions and report system problems to the System administrator.