diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-17 16:24:15 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-07-17 18:21:23 +0300 |
commit | 63f5f9902f83bd65fd2a37239ab9d6e5876924fd (patch) | |
tree | 75320e385f5399bfdfbfd26dd2175a78ac326236 /WikiTrans | |
parent | dd481f6030fe140fa3e321cfe08a38f53e549aed (diff) | |
download | wikitrans-63f5f9902f83bd65fd2a37239ab9d6e5876924fd.tar.gz wikitrans-63f5f9902f83bd65fd2a37239ab9d6e5876924fd.tar.bz2 |
wiki2texi: improve formatting
Diffstat (limited to 'WikiTrans')
-rw-r--r-- | WikiTrans/wiki2texi.py | 217 | ||||
-rw-r--r-- | WikiTrans/wikimarkup.py | 6 |
2 files changed, 136 insertions, 87 deletions
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index 6e32c56..06e854a 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py @@ -70,180 +70,229 @@ class TexiWikiMarkup (WikiMarkup): else: self.sectioning_start = val - + replchars = re.compile(r'([@{}])') + acc = None + + def _print(self, text, **kw): + nl = kw.pop('nl', False) + escape = kw.pop('escape', True) + if nl and not self.acc.endswith('\n'): + self.acc += '\n' + if text: + if escape: + self.acc += self.replchars.sub(r'@\1', text) + else: + self.acc += text + + def _begin_print(self, val = ''): + s = self.acc + self.acc = val + return s + + def _end_print(self, val = None): + s = self.acc + self.acc = val + return s + def __str__(self): - str = "" + self._begin_print() for elt in self.tree: - str += self.format(elt) - return str + self.format(elt) + return self._end_print() def format(self, elt): if elt['type'] == 'TEXT': if isinstance(elt['content'],list): - string = "" for s in elt['content']: - string += s + self._print(s) else: - string = elt['content'] - return string + self._print(elt['content']) elif elt['type'] == 'TAG': - return self.str_tag(elt) + self.str_tag(elt) elif elt['type'] == 'PARA': - return self.str_para(elt) + self.str_para(elt) elif elt['type'] == 'PRE': - return self.str_pre(elt) + self.str_pre(elt) elif elt['type'] == 'IT': - return self.str_it(elt) + self.str_it(elt) elif elt['type'] == 'BOLD': - return self.str_bold(elt) + self.str_bold(elt) elif elt['type'] == 'LINK': - return self.str_link(elt) + self.str_link(elt) elif elt['type'] == 'TMPL': - return self.str_tmpl(elt) + self.str_tmpl(elt) elif elt['type'] == 'BAR': - return self.str_bar() + self.str_bar() elif elt['type'] == 'HDR': - return self.str_hdr(elt) + self.str_hdr(elt) elif elt['type'] == 'REF': - return self.str_ref(elt) + self.str_ref(elt) elif elt['type'] == 'ENV': - return self.str_env(elt) + self.str_env(elt) elif elt['type'] == 'IND': - return self.str_ind(elt) + self.str_ind(elt) elif elt['type'] == 'SEQ': - string = "" for x in elt['content']: - string += self.format(x) - return string + self.format(x) else: - return str(elt) + self._print(str(elt)) def str_tag(self, elt): if elt['tag'] == 'code': + save = self._begin_print() self.nested += 1 - s = self.format(elt['content']) + self.format(elt['content']) self.nested -= 1 - if not s.endswith("\n"): - s += "\n" - return '@example\n' + s + '@end example\n' + s = self._end_print(save) + if s.startswith('\n'): + self._print('@example', nl=True, escape=False) + self._print(s) + self._print('@end example\n', nl=True, escape=False) + else: + self._print('@code{%s}' % s, escape=False) elif elt['tag'] == 'tt': + self._print('@code{', escape=False) self.nested += 1 s = self.format(elt['content']) self.nested -= 1 - return "@code{%s}" % s + self._print('}', escape=False) elif elt['tag'] == 'div': - s = '' if 'args' in elt and 'id' in elt['args']: - s += "\n@anchor{%s}\n" % elt['args']['id'] - s += self.format(elt['content']) - return s + self._print("@anchor{%s}\n" % elt['args']['id'], + nl=True, escape=False) + self.format(elt['content']) else: - s = '<' + elt['tag'] + self._print('<' + elt['tag']) if elt['args']: - s += ' ' + elt['args'] - s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>' - return s + self._print(' ' + elt['args']) + self._print('>'); + self.format(elt['content']); + self._print('</' + elt['tag'] + '>') def str_para(self, elt): - string = ""; + if self.acc and not self.acc.endswith('\n\n'): + self._print('\n', nl=True) for x in elt['content']: - string += self.format(x) - return "\n" + string + "\n" - + self.format(x) + if self.acc and not self.acc.endswith('\n\n'): + self._print('\n', nl=True) + def str_pre(self, elt): - string = ""; + if not self.nested: + self._print('@example\n', nl=True, escape=False) for x in elt['content']: - string += self.format(x) - if self.nested: - return string - if not string.endswith("\n"): - string += "\n"; - return '\n@example\n' + string + '@end example\n' + self.format(x) + if not self.nested: + self._print('@end example\n', nl=True, escape=False) def concat(self, eltlist): - string = "" for x in eltlist: - string += self.format(x) - return string + self.format(x) def str_it(self, elt): - return "@i{" + self.concat(elt['content']) + "}" + self._print('@i{', escape=False) + self.concat(elt['content']) + self._print('}', escape=False) def str_bold(self, elt): - return "@b{" + self.concat(elt['content']) + "}" + self._print('@b{', escape=False) + self.concat(elt['content']) + self._print('}', escape=False) - def nodename(self, elt): - return self.format(elt) # FIXME - def str_hdr(self, elt): level = elt['level'] if level > len(self.sectcomm[self.sectioning_model]) - 1 - self.sectioning_start: - s ="\n@* %s" % (self.format(elt['content'])) + self._print("@* ", nl=True, escape=False) + self.format(elt['content']) else: - s = self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " " + self.format(elt['content']) + "\n" + self._print(self.sectcomm[self.sectioning_model][level - self.sectioning_start] + " ", nl=True, escape=False) + self.format(elt['content']) + self._print(None, nl=True) if self.sectcomm[self.sectioning_model][0] == '@top': - s += "@node %s\n" % (self.nodename(elt['content'])) - return s + "\n" + self._print('@node ', nl=True, escape=False) + self.format(elt['content']) + self._print('\n') + self._print(None, nl=True) def str_bar(self): - return "\n-----\n" # FIXME + self._print("\n-----\n") def str_ind(self, elt): - return ("@w{ }" * elt['level']) + self.format(elt['content']) + '\n' + self._print("@w{ }" * elt['level'], nl=True, escape=False) + self.format(elt['content']) + self._print(None, nl=True) def str_env(self, elt): if elt['envtype'] == 'unnumbered': - string = '\n@itemize @bullet\n' + self._print('@itemize @bullet\n', nl=True, escape=False) for s in elt['content']: - string += '@item ' + self.format(s['content']) + '\n\n' - string += '@end itemize\n' + self._print('@item ', nl=True, escape=False) + self.format(s['content']) + self._print(None, nl=True) + self._print('\n') + self._print('@end itemize\n', nl=True, escape=False) elif elt['envtype'] == 'numbered': - string = '\n@enumerate\n' + self._print('@enumerate\n', nl=True, escape=False) for s in elt['content']: - string += '@item ' + self.format(s['content']) + '\n\n' - string += '@end enumerate\n' + self._print('@item ', nl=True, escape=False) + self.format(s['content']) + self._print(None, nl=True) + self._print('\n') + self._print('@end enumerate\n', nl=True, escape=False) elif elt['envtype'] == 'defn': - string = "\n@table @asis\n" + self._print('@table @asis\n', nl=True, escape=False) for s in elt['content']: if s['subtype'] == 0: - string += "@item " + self.format(s['content']) + '\n' + self._print('@item ', nl=True, escape=False) + self.format(s['content']) + self._print(None, nl=True) else: - string += self.format(s['content']) + '\n' - string += '@end table\n' - return string + self.format(s['content']) + self._print(None, nl=True) + self._print('\n') + self._print('@end table\n', nl=True, escape=False) def str_link(self, elt): - # FIXME: A very crude version - arg = self.format(elt['content'][0]) + save = self._begin_print() + self.format(elt['content'][0]) + arg = self._end_print() if len(elt['content']) > 1: - s = [x for x in map(self.format, elt['content'])] + s = [] + for x in elt['content'][0:2]: + self._begin_print() + self.format(x) + s.append(self._end_print()) text = s[1] else: s = None text = None + self._end_print(save) + if s: if s[0] == 'disambigR' or s[0] == 'wikiquote': - return "" + return if len(s) > 1 and s[1] == 'thumb': - return "" + return (qual,sep,tgt) = arg.partition(':') if text: - return "@ref{%s,%s}" % (qual, text) + self._print("@ref{%s,%s}" % (qual, text), escape=False) else: - return "@ref{%s}" % qual + self._print("@ref{%s}" % qual, escape=False) def str_tmpl(self, elt): - return "FIXME: str_tmpl not implemented\n" + self._print("FIXME: str_tmpl not implemented\n") def str_ref(self, elt): target = elt['ref'] - text = self.format(elt['content']) + save = self._begin_print() + self.format(elt['content']) + text = self._end_print(save) if text and text != '': - return "@uref{%s,%s}" % (target, text) + self._print("@uref{%s,%s}" % (target, text), escape=False) else: - return "@uref{%s}" % target + self._print("@uref{%s}" % target, escape=False) diff --git a/WikiTrans/wikimarkup.py b/WikiTrans/wikimarkup.py index bccd73e..ba30269 100644 --- a/WikiTrans/wikimarkup.py +++ b/WikiTrans/wikimarkup.py @@ -86,7 +86,7 @@ class TagAttributes(object): class BaseWikiMarkup(object): - delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<") + delim = re.compile("^==+\s*|\s*==+\s*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<") otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^>]+))?\s*(?P<closed>/)?>") ctag = re.compile("</(?P<tag>[a-zA-Z0-9_]+)\s*>") refstart = re.compile("^https?://") @@ -317,10 +317,10 @@ class BaseWikiMarkup(object): pos += 1 else: yield({ 'type': 'DELIM', - 'content': content, + 'content': content.strip(), 'continuation': False}) continue - + if line: if line[-1] == '\n': if line[pos:-1] != '': |