diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-18 17:43:36 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-18 17:43:36 +0300 |
commit | 8c0de7a515aa9d6db7b3ce4110a4e29261851abb (patch) | |
tree | 228c2bb1be5a907b373158c6c94cd8d51f23c419 | |
parent | 0c03a5a7b40b598b88f22f46b9e9086af6c59877 (diff) | |
download | wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.gz wikitrans-8c0de7a515aa9d6db7b3ce4110a4e29261851abb.tar.bz2 |
Minor stylistic fixes.
-rw-r--r-- | README.rst | 6 | ||||
-rwxr-xr-x | bin/wikitrans | 3 | ||||
-rw-r--r-- | wikitrans/wiki2html.py | 53 | ||||
-rw-r--r-- | wikitrans/wiki2texi.py | 82 | ||||
-rw-r--r-- | wikitrans/wiki2text.py | 66 | ||||
-rw-r--r-- | wikitrans/wikidump.py | 20 | ||||
-rw-r--r-- | wikitrans/wikimarkup.py | 46 | ||||
-rw-r--r-- | wikitrans/wikitoken.py | 72 |
8 files changed, 205 insertions, 143 deletions
@@ -121,13 +121,13 @@ supposed to provide a wiktionary-specific form of ``HtmlWikiMarkup``. Currently both classes are equivalent, except that the default value for ``html_base`` in ``HtmlWiktionaryMarkup`` is ``http://%(lang)s.wikipedia.org/wiki/``. The ``wikitrans`` utility ========================= -This command line utility converts the supplied text to a selected +This command line utility converts the supplied text to selected output format. The usage syntax is:: wikitrans [OPTIONS] ARG If ARG looks like a URL, the wiki text to be converted will be downloaded from that URL. @@ -157,17 +157,17 @@ Options are: ``-I ITYPE``, ``--input-type=ITYPE`` Set input document type. *ITYPE* is one of: ``default`` or ``wiktionary``. ``-t OTYPE``, ``--to=OTYPE``, ``--type=OTYPE`` Set output document type (``html`` (the default), ``texi``, ``text``, or ``dump``). ``-l LANG``, ``--lang=LANG`` - Set input document language + Set input document language. ``-o KW=VAL``, ``--option=KW=VAL`` Pass the keyword argument ``KW=VAL`` to the parser class construct. ``-d DEBUG``, ``--debug=DEBUG`` - Set debug level (0..100) + Set debug level (0..100). ``-D``, ``--dump`` Dump parse tree and exit; same as ``--type=dump``. ``-b URL``, ``--base-url=URL`` Set base url. Note: when using ``--base-url`` or passing URL as an argument (2nd and 3rd diff --git a/bin/wikitrans b/bin/wikitrans index caaa885..87de020 100755 --- a/bin/wikitrans +++ b/bin/wikitrans @@ -56,13 +56,14 @@ handlers = { }, 'text': { 'default': TextWikiMarkup, 'wiktionary': TextWiktionaryMarkup }, 'texi': { - 'default': TexiWikiMarkup + 'default': TexiWikiMarkup, + 'wiktionary': TextWikiMarkup } } def setkw(option, opt, value, parser): if not parser.values.kwdict: parser.values.kwdict = {} diff --git a/wikitrans/wiki2html.py b/wikitrans/wiki2html.py index fc6b142..0696dce 100644 --- a/wikitrans/wiki2html.py +++ b/wikitrans/wiki2html.py @@ -1,20 +1,20 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ Wiki markup to HTML translator. @@ -37,22 +37,24 @@ except ImportError: from urllib.parse import quote as url_quote try: from html import escape as html_escape except ImportError: from cgi import escape as html_escape - + __all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] + class HtmlSeqNode(WikiSeqNode): def format(self): s = '' for x in self.content: s += x.format() return s - + + class HtmlLinkNode(HtmlSeqNode): def format(self): arg = self.content[0].format() text = None if len(self.content) > 1: s = [x for x in map(lambda x: x.format(), self.content)] @@ -67,14 +69,14 @@ class HtmlLinkNode(HtmlSeqNode): text = s[2] elif s[0] == "term": text = self.parser.tmpl_term(s) elif s[0] == "proto": text = self.parser.tmpl_proto(s) return text - - (qual,sep,tgt) = arg.partition(':') + + (qual, sep, tgt) = arg.partition(':') if tgt != '': ns = self.parser.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': return '' elif ns == 'NS_MEDIA': @@ -89,50 +91,56 @@ class HtmlLinkNode(HtmlSeqNode): tgt = self.parser.mktgt(tgt) else: tgt = self.parser.mktgt(arg) return "<a href=\"%s\">%s</a>" % (tgt, text if (text and text != '') else arg) + class HtmlRefNode(WikiRefNode): def format(self): target = self.ref text = self.content.format() return "<a href=\"%s\">%s</a>" % ( target, text if (text and text != '') else target ) + class HtmlFontNode(HtmlSeqNode): def format(self): comm = { 'IT': 'i', 'BOLD': 'b' } s = '<%s>' % comm[self.type] for x in self.content: s += x.format() s += '</%s>' % comm[self.type] return s + class HtmlTextNode(HtmlSeqNode): def format(self): - if isinstance(self.content,list): + if isinstance(self.content, list): s = ''.join(self.content) else: s = html_escape(self.content, quote=False) return s + class HtmlHdrNode(WikiHdrNode): def format(self): level = self.level if level > 6: level = 6 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) + class HtmlBarNode(WikiNode): def format(self): return "<hr/>\n" + class HtmlEnvNode(WikiEnvNode): def format(self): type = self.envtype lev = self.level if lev > 4: lev = 2 @@ -143,63 +151,67 @@ class HtmlEnvNode(WikiEnvNode): s.content.format(), self.parser.envt[type]["elt"][n]) return "<%s>%s</%s>" % (self.parser.envt[type]["hdr"], string, self.parser.envt[type]["hdr"]) return string - + + class HtmlTagNode(WikiTagNode): def format(self): if self.tag == 'code': self.parser.nested += 1 s = self.content.format() self.parser.nested -= 1 return '<pre><code>' + s + '</code></pre>' #FIXME elif self.tag == 'ref': n = self.idx+1 - return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n) + return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n, n, n, n) elif self.tag == 'references': s = '<div class="references">\n' s += '<ol class="references">\n' n = 0 for ref in self.parser.references: - n += 1 + n += 1 s += ('<li id="cite_note-%d">' + '<span class="mw-cite-backlink">' + '<b><a href="#cite_ref-%d">^</a></b>' + '</span>' + '<span class="reference-text">' + ref.content.format() + '</span>' - + '</li>\n') % (n,n) + + '</li>\n') % (n, n) s += '</ol>\n</div>\n' return s else: s = '<' + self.tag if self.args: s += ' ' + str(self.args) s += '>' s += self.content.format() return s + '</' + self.tag + '>' - + + class HtmlParaNode(HtmlSeqNode): def format(self): return "<p>" + super(HtmlParaNode, self).format() + "</p>\n" + class HtmlPreNode(HtmlSeqNode): def format(self): s = super(HtmlPreNode, self).format() if self.parser.nested: return s else: return '<pre>' + s + '</pre>' + class HtmlIndNode(WikiIndNode): def format(self): return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level - + class HtmlWikiMarkup(WikiMarkup): """A Wiki markup to HTML translator class. Usage: @@ -220,15 +232,15 @@ class HtmlWikiMarkup(WikiMarkup): """Create a HtmlWikiMarkup object. HtmlWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE], [html_base=URL],[image_base=URL],[media_base=URL]) The arguments have the same meaning as in the WikiMarkup constructor. - + """ - + super(HtmlWikiMarkup, self).__init__(*args, **kwargs) self.token_class['LINK'] = HtmlLinkNode self.token_class['TMPL'] = HtmlLinkNode self.token_class['REF'] = HtmlRefNode self.token_class['IT'] = HtmlFontNode self.token_class['BOLD'] = HtmlFontNode @@ -246,20 +258,20 @@ class HtmlWikiMarkup(WikiMarkup): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] - return None - + return None + envt = { "unnumbered": { "hdr": "ul", "elt": ["li"] }, "numbered": { "hdr": "ol", "elt": ["li"] }, "defn": { "hdr": "dl", - "elt": ["dt","dd"] } } + "elt": ["dt","dd"] } } def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + url_quote(tgt) @@ -288,19 +300,20 @@ class HtmlWikiMarkup(WikiMarkup): if n > 0: text += ',' n += 1 text += ' <span class="proto">' + x + '</span>' text += ' <span class="meaning">(' + s[-2] + ')</span>' return text - + def __str__(self): str = "" for elt in self.tree: str += elt.format() return str + class HtmlWiktionaryMarkup(HtmlWikiMarkup): """A class for translating Wiktionary articles into HTML. Reserved for future use. Currently does the same as HtmlWikiMarkup. """ diff --git a/wikitrans/wiki2texi.py b/wikitrans/wiki2texi.py index 55dffe2..936a133 100644 --- a/wikitrans/wiki2texi.py +++ b/wikitrans/wiki2texi.py @@ -1,20 +1,20 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2015-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ Wiki markup to Texinfo translator. @@ -27,22 +27,23 @@ TexiWikiMarkup -- Converts Wiki material to Texinfo. from wikitrans.wikimarkup import * from wikitrans.wikitoken import * from wikitrans.wikins import wiki_ns_re, wiki_ns import re import urllib + class Acc(list): - def prepend(self,x): - self.insert(0,x) - + def prepend(self, x): + self.insert(0, x) + def is_empty(self): return len(self) == 0 - + def clear(self): self = [] - + def tail(self, n = 1): s = Acc() i = len(self) while i > 0 and n > 0: elt = self[i-1] l = len(elt) @@ -62,39 +63,41 @@ class Acc(list): if l == 0: continue elif l > n: self += elt[0:-n] break n -= l - + def trimnl(self): if self.endswith('\n'): self.trim(1) - + def trimpara(self): if self.endswith('\n\n'): self.trim(2) - + def endswith(self, x): return self.tail(len(x)) == x - + def in_new_para(self): return self.is_empty() or self.endswith('\n\n') - + def __str__(self): return ''.join(self) + class TexiTextNode(WikiTextNode): def format(self): parser = self.parser - if isinstance(self.content,list): + if isinstance(self.content, list): for s in self.content: parser._print(s) else: parser._print(self.content) + class TexiTagNode(WikiTagNode): def format(self): parser = self.parser if self.tag in ['code', 'tt']: save = parser._begin_print() parser.nested += 1 @@ -122,43 +125,47 @@ class TexiTagNode(WikiTagNode): parser._print('<' + self.tag) if self.args: parser._print(' ' + self.args) parser._print('>'); self.content.format() parser._print('</' + self.tag + '>') - + + class TexiParaNode(WikiSeqNode): - def format(self): + def format(self): parser = self.parser if not parser.acc.in_new_para(): parser._print('\n', nl=True) for x in self.content: x.format() if not parser.acc.in_new_para(): parser._print('\n', nl=True) - + + class TexiPreNode(WikiSeqNode): def format(self): parser = self.parser if not parser.nested: parser._print('@example\n', nl=True, escape=False) for x in self.content: x.format() if not parser.nested: parser._print('@end example\n', nl=True, escape=False) + class TexiFontNode(WikiSeqNode): def format(self): parser = self.parser comm = { 'IT': 'i', 'BOLD': 'b' } parser._print('@%s{' % comm[self.type], escape=False) for x in self.content: x.format() parser._print('}', escape=False) + class TexiHdrNode(WikiHdrNode): def format(self): parser = self.parser level = self.level # FIXME if level > len(parser.sectcomm[parser.sectioning_model]) - 1 - parser.sectioning_start: @@ -171,23 +178,26 @@ class TexiHdrNode(WikiHdrNode): if parser.sectcomm[parser.sectioning_model][0] == '@top': parser._print('@node ', nl=True, escape=False) self.content.format() parser._print('\n') parser._print(None, nl=True) + class TexiBarNode(WikiNode): def format(self): self.parser._print("\n-----\n") + class TexiIndNode(WikiIndNode): def format(self): parser = self.parser parser._print("@w{ }" * self.level, nl=True, escape=False) self.content.format() parser._print(None, nl=True) + class TexiEnvNode(WikiEnvNode): def format(self): parser = self.parser if self.envtype == 'unnumbered': parser._print('@itemize @bullet\n', nl=True, escape=False) for s in self.content: @@ -213,13 +223,14 @@ class TexiEnvNode(WikiEnvNode): parser._print(None, nl=True) else: s.content.format() parser._print(None, nl=True) parser._print('\n') parser._print('@end table\n', nl=True, escape=False) - + + class TexiLinkNode(WikiSeqNode): def format(self): parser = self.parser save = parser._begin_print() self.content[0].format() arg = parser._end_print() @@ -239,50 +250,52 @@ class TexiLinkNode(WikiSeqNode): if s: if s[0] == 'disambigR' or s[0] == 'wikiquote': return if len(s) > 1 and s[1] == 'thumb': return - (qual,sep,tgt) = arg.partition(':') + (qual, sep, tgt) = arg.partition(':') if text: parser._print("@ref{%s,%s}" % (qual, text), escape=False) else: parser._print("@ref{%s}" % qual, escape=False) + class TexiRefNode(WikiRefNode): def format(self): parser = self.parser target = self.ref save = parser._begin_print() self.content.format() text = parser._end_print(save) if text and text != '': parser._print("@uref{%s,%s}" % (target, text), escape=False) else: parser._print("@uref{%s}" % target, escape=False) - + + class TexiWikiMarkup(WikiMarkup): """Wiki markup to Texinfo translator class. - + Usage: x = TexiWikiMarkup(file="input.wiki") # Parse the input: x.parse() # Print it as Texi: print(str(x)) - + """ - + nested = 0 sectcomm = { 'numbered': [ '@top', - '@chapter', - '@section', - '@subsection', + '@chapter', + '@section', + '@subsection', '@subsubsection' ], 'unnumbered': [ '@top', '@unnumbered', '@unnumberedsec', @@ -314,13 +327,13 @@ class TexiWikiMarkup(WikiMarkup): TexiWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE], [html_base=URL],[image_base=URL],[media_base=URL], [sectioning_model=MODEL],[sectioning_start=N]) For a discussion of generic arguments, see the constructor of the WikiMarkup class. - + Additional arguments: sectioning_model=MODEL Select the Texinfo sectioning model for the output document. Possible values are: @@ -339,28 +352,28 @@ class TexiWikiMarkup(WikiMarkup): sectioning_start=N Shift resulting heading level by N positions. For example, supposing "sectioning_model='numbered'", "== A ==" normally produces "@section A" on output. Now, if given "sectioning_start=1", this directive will produce "@subsection A" instead. """ - + super(TexiWikiMarkup, self).__init__(*args, **keywords) - + self.token_class['TEXT'] = TexiTextNode self.token_class['TAG'] = TexiTagNode self.token_class['PARA'] = TexiParaNode self.token_class['PRE'] = TexiPreNode self.token_class['IT'] = TexiFontNode self.token_class['BOLD'] = TexiFontNode self.token_class['HDR'] = TexiHdrNode self.token_class['BAR'] = TexiBarNode self.token_class['IND'] = TexiIndNode self.token_class['ENV'] = TexiEnvNode self.token_class['LINK'] = TexiLinkNode self.token_class['REF'] = TexiRefNode - + if "sectioning_model" in keywords: val = keywords["sectioning_model"] if val in self.sectcomm: self.sectioning_model = val else: raise ValueError("Invalid value for sectioning model: %s" % val) @@ -391,20 +404,13 @@ class TexiWikiMarkup(WikiMarkup): return s def _end_print(self, val = None): s = self.acc self.acc = val return str(s) - + def __str__(self): self._begin_print() for elt in self.tree: elt.format() self.acc.trimpara() return self._end_print() - - - - - - - diff --git a/wikitrans/wiki2text.py b/wikitrans/wiki2text.py index 88e7610..7585bff 100644 --- a/wikitrans/wiki2text.py +++ b/wikitrans/wiki2text.py @@ -1,20 +1,20 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ Wiki markup to plain text translator. @@ -32,72 +32,79 @@ from wikitrans.wikins import wiki_ns_re, wiki_ns import re try: from urllib import quote as url_quote except ImportError: from urllib.parse import quote as url_quote + class TextSeqNode(WikiSeqNode): def format(self): string = "" for x in self.content: if len(string) > 1 and not string[-1].isspace(): string += ' ' string += x.format() return string - + + class TextTextNode(WikiTextNode): def format(self): - if isinstance(self.content,list): + if isinstance(self.content, list): string = "" for s in self.content: if string: if string.endswith("."): string += " " else: string += " " string += s else: string = self.content return string + class TextPreNode(WikiSeqNode): def format(self): string = "" for x in self.content: string += x.format() string += '\n' return string + class TextParaNode(WikiSeqNode): def format(self): string = "" for x in self.content: string += x.format() string = self.parser.fmtpara(string) + '\n\n' return string + class TextItNode(WikiSeqNode): def format(self): string = "" for x in self.content: s = x.format() if s: string += " " + s return "_" + string.lstrip(" ") + "_" - + + class TextBoldNode(WikiSeqNode): def format(self): string = "" for x in self.content: if string.endswith("."): string += " " else: string += " " string += x.format() return string.upper() + class TextLinkNode(WikiSeqNode): def format(self): arg = self.content[0].format() if len(self.content) > 1: s = [x for x in map(lambda x: x.format(), self.content)] text = s[1] @@ -107,13 +114,13 @@ class TextLinkNode(WikiSeqNode): if s: if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" if len(s) > 1 and s[1] == 'thumb': return "" - (qual,sep,tgt) = arg.partition(':') + (qual, sep, tgt) = arg.partition(':') if tgt != '': ns = self.parser.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': if not self.parser.show_urls: return "" @@ -135,40 +142,45 @@ class TextLinkNode(WikiSeqNode): if self.parser.show_urls: return "%s (see %s) " % (text, tgt) elif not text or text == '': return arg else: return text - + + class TextTmplNode(TextLinkNode): def format(self): return '[' + super(TextTmplNode, self).format() + ']' - + + class TextBarNode(WikiNode): def format(self): w = self.parser.width if w < 5: w = 5 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" + class TextHdrNode(WikiHdrNode): def format(self): return ("\n" + ("*" * self.level) + " " + self.content.format().lstrip(" ") + "\n\n") - + + class TextRefNode(WikiRefNode): def format(self): text = self.content.format() if text: return "%s (see %s) " % (text, self.ref) else: return "see " + self.ref + class TextEnvNode(WikiEnvNode): def format(self): type = self.envtype lev = self.level if lev > self.parser.width - 4: lev = 1 @@ -185,22 +197,22 @@ class TextEnvNode(WikiEnvNode): n += 1 elif type == "defn": if s.subtype == 0: string += self.parser.indent(lev-1, x) else: string += self.parser.indent(lev+3, x) - if not string.endswith("\n"): string += "\n" - return string + class TextIndNode(WikiIndNode): def format(self): return (" " * self.level) + self.content.format() + '\n' + class TextTagNode(WikiTagNode): def format(self): if self.tag == 'code': self.parser.nested += 1 s = self.content.format() self.parser.nested -= 1 @@ -212,26 +224,26 @@ class TextTagNode(WikiTagNode): s += ('[%d]. ' % (ref.idx+1)) + ref.content.format() + '\n' else: s = '<' + self.tag if self.args: s += ' ' + str(self.args) s += '>' + self.content.format() + '</' + self.tag + '>' - return s - + return s + class TextWikiMarkup(WikiMarkup): """A Wiki markup to plain text translator. Usage: x = TextWikiMarkup(file="input.wiki") # Parse the input: x.parse() # Print it as plain text: print(str(x)) - + """ # Output width width = 78 # Do not show references. show_urls = False @@ -240,33 +252,33 @@ class TextWikiMarkup(WikiMarkup): # Number of current element in the environment num = 0 # Array of footnote references references = [] - + def __init__(self, *args, **keywords): """Create a TextWikiMarkup object. TextWikiMarkup([filename=FILE],[file=FD],[text=STRING],[lang=CODE], [html_base=URL],[image_base=URL],[media_base=URL], [width=N],[show_urls=False]) Most arguments have the same meaning as in the WikiMarkup constructor. Class-specific arguments: - + width=N - Limit output width to N columns. Default is 78. + Limit output width to N columns. Default is 78. show_urls=False By default, the link URLs are displayed in parentheses next to the link text. If this argument is given, only the link text will be displayed. """ - - super(TextWikiMarkup,self).__init__(*args, **keywords) + + super(TextWikiMarkup, self).__init__(*args, **keywords) if 'width' in keywords: self.width = keywords['width'] if 'show_urls' in keywords: self.show_urls = keywords['show_urls'] self.token_class['SEQ'] = TextSeqNode self.token_class['TEXT'] = TextTextNode @@ -280,39 +292,39 @@ class TextWikiMarkup(WikiMarkup): self.token_class['BAR'] = TextBarNode self.token_class['HDR'] = TextHdrNode self.token_class['REF'] = TextRefNode self.token_class['ENV'] = TextEnvNode self.token_class['IND'] = TextIndNode self.token_class['TAG'] = TextTagNode - + def wiki_ns_name(self, str): if str in wiki_ns[self.lang]: return wiki_ns[self.lang][str] elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None - + def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + url_quote(tgt) - + def indent(self, lev, text): if text.find('\n') == -1: - s = (" " * lev) + text + s = (" " * lev) + text else: s = "" for elt in text.split('\n'): if elt: s += (" " * lev) + elt + '\n' if not text.endswith('\n'): s = s.rstrip('\n') return s - + def fmtpara(self, input): output = "" linebuf = "" length = 0 for s in input.split(): wlen = len(s) @@ -328,21 +340,21 @@ class TextWikiMarkup(WikiMarkup): wsc = 0 length = 0 linebuf = "" linebuf += " " * wsc + s length += wsc + wlen return output + linebuf - + def __str__(self): str = "" for elt in self.tree: str += elt.format() return str + class TextWiktionaryMarkup(TextWikiMarkup): """A class for translating Wiktionary articles into plain text. Reserved for future use. Currently does the same as TextWikiMarkup. """ html_base='http://%(lang)s.wiktionary.org/wiki/' - diff --git a/wikitrans/wikidump.py b/wikitrans/wikidump.py index d5f651c..bc71876 100644 --- a/wikitrans/wikidump.py +++ b/wikitrans/wikidump.py @@ -1,19 +1,19 @@ # Wiki "dump" format. -*- coding: utf-8 -*- # Copyright (C) 2015-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ Print Wiki parse tree as JSON. @@ -25,12 +25,13 @@ DumpWikiMarkup from __future__ import print_function from wikitrans.wikitoken import * import json from wikitrans.wikimarkup import WikiMarkup + class DumpReferences(object): idx = 0 def __len__(self): return self.idx + 1 def append(self, obj): self.idx += 1 @@ -40,19 +41,20 @@ class DumpWikiMarkup(WikiMarkup): Usage: x = DumpWikiMarkup(file="input.wiki") # Parse the input: x.parse() - # Print a JSON dump of the parse tree + # Print a JSON dump of the parse tree print(str(x)) - + """ - + indent = None references = DumpReferences() + def __init__(self, **kwarg): """Create a DumpWikiMarkup object. Arguments: filename=FILE @@ -61,17 +63,17 @@ class DumpWikiMarkup(WikiMarkup): Read Wiki material from file object FD. text=STRING Read Wiki material from STRING. indent=N |