diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-16 15:45:00 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-17 13:17:11 +0300 |
commit | 7186dbab7f1c1227e9229866e086bc417e3e4e52 (patch) | |
tree | f29114e9ff7a7b023dd3d611a9bc8808f5cf5bbd | |
parent | d9e26129527ce84f626eb44ff95e4ecfbc5bc92a (diff) | |
download | wikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.gz wikitrans-7186dbab7f1c1227e9229866e086bc417e3e4e52.tar.bz2 |
Fix PEP 8 issues.
-rw-r--r-- | tests/test_html.py | 8 | ||||
-rw-r--r-- | tests/test_texi.py | 6 | ||||
-rw-r--r-- | tests/test_text.py | 8 | ||||
-rw-r--r-- | tests/wikitest.py (renamed from tests/WikiTest.py) | 6 | ||||
-rw-r--r-- | wikitrans/__init__.py (renamed from WikiTrans/__init__.py) | 0 | ||||
-rw-r--r-- | wikitrans/wiki2html.py (renamed from WikiTrans/wiki2html.py) | 115 | ||||
-rw-r--r-- | wikitrans/wiki2texi.py (renamed from WikiTrans/wiki2texi.py) | 63 | ||||
-rw-r--r-- | wikitrans/wiki2text.py (renamed from WikiTrans/wiki2text.py) | 74 | ||||
-rw-r--r-- | wikitrans/wikidump.py (renamed from WikiTrans/wikidump.py) | 41 | ||||
-rw-r--r-- | wikitrans/wikimarkup.py (renamed from WikiTrans/wikimarkup.py) | 784 | ||||
-rw-r--r-- | wikitrans/wikins.py (renamed from WikiTrans/wikins.py) | 0 | ||||
-rw-r--r-- | wikitrans/wikitoken.py (renamed from WikiTrans/wikitoken.py) | 154 |
12 files changed, 802 insertions, 457 deletions
diff --git a/tests/test_html.py b/tests/test_html.py index 3da57f6..5a15cb8 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,14 +1,14 @@ #!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import print_function import unittest -from WikiTrans.wiki2html import HtmlWiktionaryMarkup -from WikiTest import populateMethods +from wikitrans.wiki2html import HtmlWikiMarkup +from wikitest import populate_methods -class TestWiktionaryMarkup (unittest.TestCase): +class TestWikiMarkup (unittest.TestCase): pass -populateMethods(TestWiktionaryMarkup, HtmlWiktionaryMarkup, '.html') +populate_methods(TestWikiMarkup, HtmlWikiMarkup, '.html') if __name__ == '__main__': unittest.main() diff --git a/tests/test_texi.py b/tests/test_texi.py index 75314c9..ddd26c7 100644 --- a/tests/test_texi.py +++ b/tests/test_texi.py @@ -1,14 +1,14 @@ #!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import print_function import unittest -from WikiTrans.wiki2texi import TexiWikiMarkup -from WikiTest import populateMethods +from wikitrans.wiki2texi import TexiWikiMarkup +from wikitest import populate_methods class TestTexiWikiMarkup (unittest.TestCase): pass -populateMethods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi') +populate_methods(TestTexiWikiMarkup, TexiWikiMarkup, '.texi') if __name__ == '__main__': unittest.main() diff --git a/tests/test_text.py b/tests/test_text.py index a06f519..b3d0a12 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -1,14 +1,14 @@ #!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import print_function import unittest -from WikiTrans.wiki2text import TextWiktionaryMarkup -from WikiTest import populateMethods +from wikitrans.wiki2text import TextWikiMarkup +from wikitest import populate_methods -class TestTextWiktionaryMarkup (unittest.TestCase): +class TestTextWikiMarkup (unittest.TestCase): pass -populateMethods(TestTextWiktionaryMarkup, TextWiktionaryMarkup, '.text') +populate_methods(TestTextWikiMarkup, TextWikiMarkup, '.text') if __name__ == '__main__': unittest.main() diff --git a/tests/WikiTest.py b/tests/wikitest.py index 1429f5e..ff26227 100644 --- a/tests/WikiTest.py +++ b/tests/wikitest.py @@ -1,34 +1,34 @@ #!/usr/bin/python # -*- coding: utf-8 -*- from __future__ import print_function from glob import glob import os.path -def MarkupTest(classname, name_in, name_out): +def wiki_markup_test(classname, name_in, name_out): fh = open(name_out) buf = ''.join(fh.readlines()).strip() fh.close() hwm = classname(filename=name_in, lang="en") hwm.parse() if str(hwm).strip() == buf: return True # fail print("\n>>>%s<<<" % buf) print(">>>%s<<<" % str(hwm).strip()) return False -def populateMethods(cls, wcls, suffix): +def populate_methods(cls, wcls, suffix): def settest(self, base, wiki_name, pat_name): def dyntest(self): - self.assertTrue(MarkupTest(wcls, wiki_name, pat_name)) + self.assertTrue(wiki_markup_test(wcls, wiki_name, pat_name)) meth = 'test_' + wcls.__name__ + '_' + base dyntest.__name__ = meth setattr(cls, meth, dyntest) for file in glob('testdata/*.wiki'): if os.path.isfile(file): patfile = file[:len(file) - 5] + suffix base, ext = os.path.splitext(os.path.basename(file)) if os.path.exists(patfile) and os.path.isfile(patfile): settest(cls, base, file, patfile) diff --git a/WikiTrans/__init__.py b/wikitrans/__init__.py index 5832e38..5832e38 100644 --- a/WikiTrans/__init__.py +++ b/wikitrans/__init__.py diff --git a/WikiTrans/wiki2html.py b/wikitrans/wiki2html.py index 6147642..ce65bae 100644 --- a/WikiTrans/wiki2html.py +++ b/wikitrans/wiki2html.py @@ -6,28 +6,39 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Wiki markup to HTML translator. + +Classes: + +HtmlWikiMarkup -- Converts Wiki material to HTML. +HtmlWiktionaryMarkup -- Reserved for future use. Currently does the same as + HtmlWikiMarkup. + +""" + from __future__ import print_function -from WikiTrans.wikimarkup import * -from WikiTrans.wikitoken import * -from WikiTrans.wikins import wiki_ns_re, wiki_ns +from wikitrans.wikimarkup import * +from wikitrans.wikitoken import * +from wikitrans.wikins import wiki_ns_re, wiki_ns import re try: from urllib import quote as url_quote except ImportError: from urllib.parse import quote as url_quote try: from html import escape as html_escape except ImportError: from cgi import escape as html_escape __all__ = [ "HtmlWikiMarkup", "HtmlWiktionaryMarkup" ] @@ -70,34 +81,34 @@ class HtmlLinkNode(HtmlSeqNode): tgt = self.parser.media_base + '/' + tgt else: tgt = self.parser.mktgt(tgt) elif self.type == 'LINK' and qual in self.parser.langtab: tgt = self.parser.mktgt(tgt, qual) if not text or text == '': text = self.parser.langtab[qual] else: tgt = self.parser.mktgt(tgt) else: tgt = self.parser.mktgt(arg) return "<a href=\"%s\">%s</a>" % (tgt, - text if (text and text != '') \ - else arg) + text if (text and text != '') else arg) class HtmlRefNode(WikiRefNode): def format(self): target = self.ref text = self.content.format() - return "<a href=\"%s\">%s</a>" % (target, - text if (text and text != '') \ - else target) + return "<a href=\"%s\">%s</a>" % ( + target, + text if (text and text != '') else target + ) class HtmlFontNode(HtmlSeqNode): def format(self): comm = { 'IT': 'i', 'BOLD': 'b' } s = '<%s>' % comm[self.type] for x in self.content: s += x.format() s += '</%s>' % comm[self.type] return s class HtmlTextNode(HtmlSeqNode): @@ -143,32 +154,32 @@ class HtmlTagNode(WikiTagNode): s = self.content.format() self.parser.nested -= 1 return '<pre><code>' + s + '</code></pre>' #FIXME elif self.tag == 'ref': n = self.idx+1 return '<sup id="cite_ref-%d" class="reference"><a name="cite_ref-%d" href=#cite_note-%d">%d</a></sup>' % (n,n,n,n) elif self.tag == 'references': s = '<div class="references">\n' s += '<ol class="references">\n' n = 0 for ref in self.parser.references: n += 1 - s += ('<li id="cite_note-%d">' + \ - '<span class="mw-cite-backlink">' + \ - '<b><a href="#cite_ref-%d">^</a></b>' + \ - '</span>' + \ - '<span class="reference-text">' + \ - ref.content.format() + \ - '</span>' + \ - '</li>\n') % (n,n) + s += ('<li id="cite_note-%d">' + + '<span class="mw-cite-backlink">' + + '<b><a href="#cite_ref-%d">^</a></b>' + + '</span>' + + '<span class="reference-text">' + + ref.content.format() + + '</span>' + + '</li>\n') % (n,n) s += '</ol>\n</div>\n' return s else: s = '<' + self.tag if self.args: s += ' ' + str(self.args) s += '>' s += self.content.format() return s + '</' + self.tag + '>' class HtmlParaNode(HtmlSeqNode): def format(self): @@ -178,35 +189,67 @@ class HtmlPreNode(HtmlSeqNode): def format(self): s = super(HtmlPreNode, self).format() if self.parser.nested: return s else: return '<pre>' + s + '</pre>' class HtmlIndNode(WikiIndNode): def format(self): return ("<dl><dd>" * self.level) + self.content.format() + "</dd></dl>" * self.level -class HtmlWikiMarkup (WikiMarkup): - """ - A (hopefully) general-purpose Wiki->HTML translator class. - FIXME: 1. See WikiMarkup for a list - 2. [[official position]]s : final 's' gets after closing </a> tag. - Should be before. +class HtmlWikiMarkup(WikiMarkup): + """A Wiki markup to HTML translator class. + + Usage: + + x = HtmlWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as HTML: + print(str(x)) + + Known bugs: + * [[official position]]s + Final 's' gets after closing </a> tag. Should be before. """ nested = 0 references = [] def __init__(self, *args, **kwargs): + """Create a HtmlWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + lang=CODE + Specifies source language. Default is 'en'. This variable can be + referred to as '%(lang)s' in the keyword arguments below. + html_base=URL + Base URL for cross-references. Default is + 'http://%(lang)s.wiktionary.org/wiki/' + image_base=URL + Base URL for images. Default is + 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf' + media_base=URL + Base URL for media files. Default is + 'http://www.mediawiki.org/xml/export-0.3' + """ + super(HtmlWikiMarkup, self).__init__(*args, **kwargs) self.token_class['LINK'] = HtmlLinkNode self.token_class['TMPL'] = HtmlLinkNode self.token_class['REF'] = HtmlRefNode self.token_class['IT'] = HtmlFontNode self.token_class['BOLD'] = HtmlFontNode self.token_class['HDR'] = HtmlHdrNode self.token_class['BAR'] = HtmlBarNode self.token_class['ENV'] = HtmlEnvNode self.token_class['TAG'] = HtmlTagNode self.token_class['PARA'] = HtmlParaNode self.token_class['PRE'] = HtmlPreNode @@ -261,39 +304,17 @@ class HtmlWikiMarkup (WikiMarkup): text += ',' n += 1 text += ' <span class="proto">' + x + '</span>' text += ' <span class="meaning">(' + s[-2] + ')</span>' return text def __str__(self): str = "" for elt in self.tree: str += elt.format() return str -class HtmlWiktionaryMarkup (HtmlWikiMarkup): - """ - A class for translating Wiktionary articles into HTML. - This version does not do much, except that it tries to correctly - format templates. But "tries" does not mean "does". The heuristics - used here is clearly not enough to cope with it. - - 1. FIXME: - The right solution would be to have a database of templates with their - semantics and to decide on their rendering depending on that. E.g. - {{term}} in en.wiktionary means "replace this with the search term". - This, however, does not work in other wiktionaries. There are - also more complex templates, e.g.: {{t+|bg|врата|n|p|tr=vrata|sc=Cyrl}} - I don't know what it means. Couldn't find any documentation either. - Again, this template does not work in other dictionaries. +class HtmlWiktionaryMarkup(HtmlWikiMarkup): + """A class for translating Wiktionary articles into HTML. - 2. Capitulation notice: - Given the: - 1. vast amount of wiktionaries available, - 2. abundance of various templates for each wictionary, - 3. apparent lack of documentation thereof, - 4. the lack of standardized language-independent templates, - I dont see any way to cope with the template-rendering task within a - reasonable amount of time. - - Faeci quod potui, faciant meliora potentes. + Reserved for future use. Currently does the same as HtmlWikiMarkup. """ diff --git a/WikiTrans/wiki2texi.py b/wikitrans/wiki2texi.py index 7297195..d9e5f52 100644 --- a/WikiTrans/wiki2texi.py +++ b/wikitrans/wiki2texi.py @@ -6,27 +6,36 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from WikiTrans.wikimarkup import * -from WikiTrans.wikitoken import * -from WikiTrans.wikins import wiki_ns_re, wiki_ns +""" +Wiki markup to Texinfo translator. + +Classes: + +TexiWikiMarkup -- Converts Wiki material to Texinfo. + +""" + +from wikitrans.wikimarkup import * +from wikitrans.wikitoken import * +from wikitrans.wikins import wiki_ns_re, wiki_ns import re import urllib class Acc(list): def prepend(self,x): self.insert(0,x) def is_empty(self): return len(self) == 0 def clear(self): self = [] @@ -242,25 +251,37 @@ class TexiLinkNode(WikiSeqNode): class TexiRefNode(WikiRefNode): def format(self): parser = self.parser target = self.ref save = parser._begin_print() self.content.format() text = parser._end_print(save) if text and text != '': parser._print("@uref{%s,%s}" % (target, text), escape=False) else: parser._print("@uref{%s}" % target, escape=False) -class TexiWikiMarkup (WikiMarkup): +class TexiWikiMarkup(WikiMarkup): + """Wiki markup to Texinfo translator class. + + Usage: + + x = TexiWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as Texi: + print(str(x)) + + """ + nested = 0 sectcomm = { 'numbered': [ '@top', '@chapter', '@section', '@subsection', '@subsubsection' ], 'unnumbered': [ '@top', '@unnumbered', @@ -279,24 +300,58 @@ class TexiWikiMarkup (WikiMarkup): '@majorheading' '@chapheading', '@heading', '@subheading', '@subsubheading' ] } sectioning_model = 'numbered' sectioning_start = 0 def __init__(self, *args, **keywords): + """Create a TexiWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + + sectioning_model=MODEL + Select the Texinfo sectioning model for the output document. Possible + values are: + + 'numbered' + Top of document is marked with "@top". Headings ("=", "==", + "===", etc) produce "@chapter", "@section", "@subsection", etc. + 'unnumbered' + Unnumbered sectioning: "@top", "@unnumbered", "@unnumberedsec", + "@unnumberedsubsec". + 'appendix' + Sectioning suitable for appendix entries: "@top", "@appendix", + "@appendixsec", "@appendixsubsec", etc. + 'heading' + Use heading directives to reflect sectioning: "@majorheading", + "@chapheading", "@heading", "@subheading", etc. + sectioning_start=N + Shift resulting heading level by N positions. For example, supposing + "sectioning_model='numbered'", "== A ==" normally produces + "@section A" on output. Now, if given "sectioning_start=1", this + directive will produce "@subsection A" instead. + """ + super(TexiWikiMarkup, self).__init__(*args, **keywords) self.token_class['TEXT'] = TexiTextNode self.token_class['TAG'] = TexiTagNode self.token_class['PARA'] = TexiParaNode self.token_class['PRE'] = TexiPreNode self.token_class['IT'] = TexiFontNode self.token_class['BOLD'] = TexiFontNode self.token_class['HDR'] = TexiHdrNode self.token_class['BAR'] = TexiBarNode self.token_class['IND'] = TexiIndNode self.token_class['ENV'] = TexiEnvNode diff --git a/WikiTrans/wiki2text.py b/wikitrans/wiki2text.py index cb3a183..1fbc61b 100644 --- a/WikiTrans/wiki2text.py +++ b/wikitrans/wiki2text.py @@ -6,27 +6,38 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from WikiTrans.wikitoken import * -from WikiTrans.wikimarkup import * -from WikiTrans.wikins import wiki_ns_re, wiki_ns +""" +Wiki markup to plain text translator. + +Classes: + +TextWikiMarkup -- Converts Wiki material to plain text. +TextWiktionaryMarkup -- Reserved for future use. Currently does the same as + TextWikiMarkup. + +""" + +from wikitrans.wikitoken import * +from wikitrans.wikimarkup import * +from wikitrans.wikins import wiki_ns_re, wiki_ns import re try: from urllib import quote as url_quote except ImportError: from urllib.parse import quote as url_quote class TextSeqNode(WikiSeqNode): def format(self): string = "" for x in self.content: if len(string) > 1 and not string[-1].isspace(): string += ' ' @@ -98,27 +109,27 @@ class TextLinkNode(WikiSeqNode): if s[0] == 'disambigR' or s[0] == 'wikiquote': return "" if len(s) > 1 and s[1] == 'thumb': return "" (qual,sep,tgt) = arg.partition(':') if tgt != '': ns = self.parser.wiki_ns_name(qual) if ns: if ns == 'NS_IMAGE': if not self.parser.show_urls: return "" text = "[%s: %s]" % (qual, text if text else arg) - tgt = self.image_base + '/' + \ - url_quote(tgt) + \ - '/250px-' + url_quote(tgt) + tgt = "%s/%s/250px-%s" % (self.image_base, + url_quote(tgt), + url_quote(tgt)) elif ns == 'NS_MEDIA': text = "[%s]" % (qual) else: tgt = self.parser.mktgt(tgt) elif self.type == 'LINK' and qual in self.parser.langtab: text = self.parser.langtab[qual] + ": " + tgt tgt = self.parser.mktgt(tgt, qual) else: tgt = self.parser.mktgt(tgt) else: tgt = self.parser.mktgt(arg) if self.parser.show_urls: @@ -132,26 +143,29 @@ class TextTmplNode(TextLinkNode): def format(self): return '[' + super(TextTmplNode, self).format() + ']' class TextBarNode(WikiNode): def format(self): w = self.parser.width if w < 5: w = 5 return "\n" + ("-" * (w - 5)).center(w - 1) + "\n" class TextHdrNode(WikiHdrNode): def format(self): - return "\n" + ("*" * self.level) + " " + \ - self.content.format().lstrip(" ") + "\n\n" + return ("\n" + + ("*" * self.level) + + " " + + self.content.format().lstrip(" ") + + "\n\n") class TextRefNode(WikiRefNode): def format(self): text = self.content.format() if text: return "%s (see %s) " % (text, self.ref) else: return "see " + self.ref class TextEnvNode(WikiEnvNode): def format(self): type = self.envtype @@ -195,43 +209,70 @@ class TextTagNode(WikiTagNode): elif self.tag == 'references': s = '\nReferences:\n' for ref in self.parser.references: s += ('[%d]. ' % (ref.idx+1)) + ref.content.format() + '\n' else: s = '<' + self.tag if self.args: s += ' ' + str(self.args) s += '>' + self.content.format() + '</' + self.tag + '>' return s -class TextWikiMarkup (WikiMarkup): - """ - A (general-purpose Wiki->Text translator class. +class TextWikiMarkup(WikiMarkup): + """A Wiki markup to plain text translator. + + Usage: + + x = TextWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print it as plain text: + print(str(x)) + """ # Output width width = 78 # Do not show references. show_urls = False # Provide a minimum markup markup = True # Number of current element in the environment num = 0 # Array of footnote references references = [] def __init__(self, *args, **keywords): + """Create a TextWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + + width=N + Limit output width to N columns. Default is 78. + show_urls=False + By default, the link URLs are displayed in parentheses next to the + link text. If this argument is given, only the link text will be + displayed. + """ + super(TextWikiMarkup,self).__init__(*args, **keywords) if 'width' in keywords: self.width = keywords['width'] if 'show_urls' in keywords: self.show_urls = keywords['show_urls'] self.token_class['SEQ'] = TextSeqNode self.token_class['TEXT'] = TextTextNode self.token_class['PRE'] = TextPreNode self.token_class['PARA'] = TextParaNode self.token_class['SEQ'] = TextSeqNode self.token_class['IT'] = TextItNode self.token_class['BOLD'] = TextBoldNode @@ -249,25 +290,25 @@ class TextWikiMarkup (WikiMarkup): return wiki_ns[self.lang][str] elif str in wiki_ns_re[self.lang]: for elt in wiki_ns_re[self.lang][str]: if str.beginswith(elt[0]) and str.endswith(elt[1]): return elt[2] return None def mktgt(self, tgt, lang = None): if not lang: lang = self.lang return self.html_base % { 'lang' : lang } + url_quote(tgt) - def indent (self, lev, text): + def indent(self, lev, text): if text.find('\n') == -1: s = (" " * lev) + text else: s = "" for elt in text.split('\n'): if elt: s += (" " * lev) + elt + '\n' if not text.endswith('\n'): s = s.rstrip('\n') return s def fmtpara(self, input): @@ -289,18 +330,19 @@ class TextWikiMarkup (WikiMarkup): length = 0 linebuf = "" linebuf += " " * wsc + s length += wsc + wlen return output + linebuf def __str__(self): str = "" for elt in self.tree: str += elt.format() return str -class TextWiktionaryMarkup (TextWikiMarkup): - """ - See documentation for HtmlWiktionaryMarkup +class TextWiktionaryMarkup(TextWikiMarkup): + """A class for translating Wiktionary articles into plain text. + + Reserved for future use. Currently does the same as TextWikiMarkup. """ - # FIXME: It is supposed to do something about templates + diff --git a/WikiTrans/wikidump.py b/wikitrans/wikidump.py index 7457dfa..d5f651c 100644 --- a/WikiTrans/wikidump.py +++ b/wikitrans/wikidump.py @@ -5,38 +5,73 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Print Wiki parse tree as JSON. + +Classes: + +DumpWikiMarkup + +""" + from __future__ import print_function -from WikiTrans.wikitoken import * +from wikitrans.wikitoken import * import json -from WikiTrans.wikimarkup import WikiMarkup +from wikitrans.wikimarkup import WikiMarkup class DumpReferences(object): idx = 0 def __len__(self): return self.idx + 1 def append(self, obj): self.idx += 1 class DumpWikiMarkup(WikiMarkup): + """Produce a JSON dump of the Wiki markup parse tree. + + Usage: + + x = DumpWikiMarkup(file="input.wiki") + # Parse the input: + x.parse() + # Print a JSON dump of the parse tree + print(str(x)) + + """ + indent = None references = DumpReferences() def __init__(self, **kwarg): + """Create a DumpWikiMarkup object. + + Arguments: + + filename=FILE + Read Wiki material from the file named FILE. + file=FD + Read Wiki material from file object FD. + text=STRING + Read Wiki material from STRING. + indent=N + Basic indent offset for JSON objects. + """ + n = kwarg.pop('indent', None) if n != None: self.indent = int(n) - WikiMarkup.__init__(self, **kwarg) + super(DumpWikiMarkup,self).__init__(self, **kwarg) def __str__(self): return json.dumps(self.tree, cls=WikiNodeEncoder, indent=self.indent, separators=(',',': '), sort_keys=True) diff --git a/WikiTrans/wikimarkup.py b/wikitrans/wikimarkup.py index 6cbf5de..77c3b30 100644 --- a/WikiTrans/wikimarkup.py +++ b/wikitrans/wikimarkup.py @@ -1,49 +1,74 @@ #!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff -# +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. +""" +Wiki markup parser. + +This module provides two class: + +WikiMarkupParser: + An abstract parser class, which serves as a base class for all markup + classes in this package. + +WikiMarkup + A subclass of the above, providing basic input method. + +""" + from __future__ import print_function import sys import re from types import * -from WikiTrans.wikitoken import * +from wikitrans.wikitoken import * -__all__ = [ "BaseWikiMarkup", "WikiMarkup", - "TagAttributes", "TagAttributeSyntax" ] +__all__ = [ "WikiMarkupParser", "WikiMarkup", + "TagAttributes", "TagAttributeSyntaxError" ] -class UnexpectedToken(Exception): +class UnexpectedTokenError(Exception): def __init__(self, value): self.value = value -class TagAttributeSyntax(Exception): +class TagAttributeSyntaxError(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class TagAttributes(object): + """A dictionary-like collection of tag attributes. + + Example: + + attr = TagAttributes('href="foo" length=2') + if 'href' in attr: + print(x['href']) # returns "foo" + for a in attr: + ... + """ + attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?") valseg = re.compile("^[^\\\"]+") tab = {} printable = None def __init__(self, string): if not string: self.printable = '' return self.printable = string s = string self.tab = {} while s != '': @@ -59,706 +84,778 @@ class TagAttributes(object): val += m.group(0) s = s[m.end(0):] if s[0] == '\\': val += s[1] s += 2 elif s[0] == '"': s = s[1:] break else: val = 1 self.tab[name] = val else: - raise TagAttributeSyntax(s) + raise TagAttributeSyntaxError(s) def __len__(self): return len(self.tab) def __getitem__(self, key): return self.tab[key] def __contains__(self, key): return key in self.tab def __iter__(self): for key in self.tab: yield(key) def has_key(self, key): return self.__contains__(key) def __setitem__(self, key, value): self.tab[key] = value def __delitem__(self, key): del self.tab[key] def __str__(self): return self.printable def __repr__(self): return self.printable -class BaseWikiMarkup(object): +class WikiMarkupParser(object): + """Parser for Wiki markup language. + + Given input in Wiki markup language creates an abstract parse tree for it. + This is a base class for actual parsers. The subclasses must provide the + input |