summaryrefslogtreecommitdiff
path: root/wikitrans/wikitoken.py
diff options
context:
space:
mode:
Diffstat (limited to 'wikitrans/wikitoken.py')
-rw-r--r--wikitrans/wikitoken.py318
1 files changed, 318 insertions, 0 deletions
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py
new file mode 100644
index 0000000..49c6c68
--- /dev/null
+++ b/wikitrans/wikitoken.py
@@ -0,0 +1,318 @@
1# Wiki tokens. -*- coding: utf-8 -*-
2# Copyright (C) 2015-2018 Sergey Poznyakoff
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3, or (at your option)
7# any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17"""
18Wiki markup tokens and associated classes.
19
20This module defines classes for the basic nodes of the Wiki markup parse tree:
21
22WikiNode -- Abstract parse tree node.
23WikiContentNode -- A node associated with some content.
24WikiSeqNode -- A sequence of nodes.
25WikiTextNode -- Textual content.
26WikiDelimNode -- Delimiter.
27WikiTagNode -- Tag (e.g. <tt>, </tt>, <tt />, etc.)
28WikiRefNode -- Wiki reference (e.g. [target|name])
29WikiHdrNode -- Heading (e.g. == Section ==)
30WikiEltNode -- Environment element.
31WikiEnvNode -- Environment (numbered or unnumbered list, definition, etc.)
32WikiIndNode -- Indent node.
33
34Auxiliary classes:
35
36WikiNodeEncoder -- Custom JSONEncoder subclass for serializing objects of the
37 above classes.
38"""
39
40from __future__ import print_function
41import re
42import json
43
44class WikiNodeEncoder(json.JSONEncoder):
45 """Custom JSONEncoder subclass for serializing WikiNode and its subclasses."""
46 def default(self, obj):
47 if isinstance(obj,WikiNode):
48 return obj.jsonEncode()
49 return json.JSONEncoder.default(self, obj)
50
51def jsonencoder(func):
52 def _mkencoder(self):
53 json = func(self)
54 json['wikinode'] = self.__class__.__name__
55 json['type'] = self.type
56 return json
57 return _mkencoder
58
59class WikiNode(object):
60 """Generic parse tree node.
61
62 Attributes:
63
64 type -- actual type of this object (string)
65 parser -- parser instance that owns this node
66 """
67
68 type = 'UNDEF'
69 parser = None
70
71 def __init__(self, parser, **kwargs):
72 self.parser = parser
73 for key in kwargs:
74 if hasattr(self,key):
75 self.__dict__[key] = kwargs[key]
76 else:
77 raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key))
78
79 def __str__(self):
80 return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True)
81
82 @jsonencoder
83 def jsonEncode(self):
84 ret = {}
85 for x in dir(self):
86 if x == 'parser' or x.startswith('_') or type(x) == 'function':
87 continue
88 if x in self.__dict__:
89 ret[x] = self.__dict__[x]
90 return ret
91
92 def format(self):
93 """Abstract formatting function.
94
95 Derived classes must override it.
96 """
97 pass
98
99class WikiContentNode(WikiNode):
100 """Generic content node.
101
102 Attributes:
103
104 content -- Actual content
105 """
106
107 content = None
108
109 def format(self):
110 pass
111
112 @jsonencoder
113 def jsonEncode(self):
114 ret = {}
115 if self.content:
116 if self.type == 'TEXT':
117 ret['content'] = self.content
118 elif isinstance(self.content,list):
119 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
120 elif isinstance(self.content,WikiNode):
121 ret['content'] = self.content.jsonEncode()
122 else:
123 ret['content'] = self.content
124 else:
125 ret['content'] = None
126 return ret
127
128class WikiSeqNode(WikiContentNode):
129 """Generic sequence of nodes.
130
131 Attributes:
132
133 content -- list of nodes.
134 """
135
136 def format(self):
137 for x in self.content:
138 x.format()
139
140 @jsonencoder
141 def jsonEncode(self):
142 ret = {}
143 if not self.content:
144 ret['content'] = None
145 elif isinstance(self.content,list):
146 ret['content'] = map(lambda x: x.jsonEncode(), self.content)
147 elif isinstance(self.content,WikiNode):
148 ret['content'] = self.content.jsonEncode()
149 else:
150 ret['content'] = self.content
151 return ret
152
153
154# ##############
155
156class WikiTextNode(WikiContentNode):
157 """Text node.
158
159 Attributes:
160
161 type -- 'TEXT'
162 content -- string
163 """
164
165 type = 'TEXT'
166
167 @jsonencoder
168 def jsonEncode(self):
169 return {
170 'content': self.content
171 }
172
173class WikiDelimNode(WikiContentNode):
174 """Delimiter node.
175
176 Attributes:
177
178 type -- 'DELIM'
179 content -- actual delimiter string
180 isblock -- boolean indicating whether it is a block delimiter
181 continuation -- True if continuation is expected
182 """
183
184 type = 'DELIM'
185 isblock=False
186 continuation = False
187
188class WikiTagNode(WikiContentNode):
189 """A Wiki tag.
190
191 Attributes:
192
193 tag -- actual tag name (with '<', '>', and eventual '/' stripped)
194 isblock -- True if this is a block tag
195 args -- List of tag arguments
196 idx -- If this is a "see also" reference, index of this ref in the
197 list of references.
198 FIXME: Perhaps this merits a subclass?
199 """
200
201 tag = None
202 isblock = False
203 args = None
204 idx = None
205
206 def __init__(self, *args, **keywords):
207 super(WikiTagNode, self).__init__(*args, **keywords)
208 if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'):
209 self.idx = len(self.parser.references)
210 self.parser.references.append(self)
211
212 @jsonencoder
213 def jsonEncode(self):
214 return {
215 'tag': self.tag,
216 'isblock': self.isblock,
217 'args': self.args.tab if self.args else None,
218 'content': self.content.jsonEncode() if self.content else None,
219 'idx': self.idx
220 }
221
222class WikiRefNode(WikiContentNode):
223 """Reference node.
224
225 This class represents a wiki reference, such as "[ref|content]".
226
227 Attributes:
228
229 ref -- actual reference
230 content -- content string
231 """
232
233 type = 'REF'
234 ref = None
235 @jsonencoder
236 def jsonEncode(self):
237 return {
238 'ref': self.ref,
239 'content': self.content.jsonEncode()
240 }