diff options
Diffstat (limited to 'wikitrans/wikitoken.py')
-rw-r--r-- | wikitrans/wikitoken.py | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/wikitrans/wikitoken.py b/wikitrans/wikitoken.py new file mode 100644 index 0000000..49c6c68 --- /dev/null +++ b/wikitrans/wikitoken.py | |||
@@ -0,0 +1,318 @@ | |||
1 | # Wiki tokens. -*- coding: utf-8 -*- | ||
2 | # Copyright (C) 2015-2018 Sergey Poznyakoff | ||
3 | # | ||
4 | # This program is free software; you can redistribute it and/or modify | ||
5 | # it under the terms of the GNU General Public License as published by | ||
6 | # the Free Software Foundation; either version 3, or (at your option) | ||
7 | # any later version. | ||
8 | # | ||
9 | # This program is distributed in the hope that it will be useful, | ||
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | # GNU General Public License for more details. | ||
13 | # | ||
14 | # You should have received a copy of the GNU General Public License | ||
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
16 | |||
17 | """ | ||
18 | Wiki markup tokens and associated classes. | ||
19 | |||
20 | This module defines classes for the basic nodes of the Wiki markup parse tree: | ||
21 | |||
22 | WikiNode -- Abstract parse tree node. | ||
23 | WikiContentNode -- A node associated with some content. | ||
24 | WikiSeqNode -- A sequence of nodes. | ||
25 | WikiTextNode -- Textual content. | ||
26 | WikiDelimNode -- Delimiter. | ||
27 | WikiTagNode -- Tag (e.g. <tt>, </tt>, <tt />, etc.) | ||
28 | WikiRefNode -- Wiki reference (e.g. [target|name]) | ||
29 | WikiHdrNode -- Heading (e.g. == Section ==) | ||
30 | WikiEltNode -- Environment element. | ||
31 | WikiEnvNode -- Environment (numbered or unnumbered list, definition, etc.) | ||
32 | WikiIndNode -- Indent node. | ||
33 | |||
34 | Auxiliary classes: | ||
35 | |||
36 | WikiNodeEncoder -- Custom JSONEncoder subclass for serializing objects of the | ||
37 | above classes. | ||
38 | """ | ||
39 | |||
40 | from __future__ import print_function | ||
41 | import re | ||
42 | import json | ||
43 | |||
44 | class WikiNodeEncoder(json.JSONEncoder): | ||
45 | """Custom JSONEncoder subclass for serializing WikiNode and its subclasses.""" | ||
46 | def default(self, obj): | ||
47 | if isinstance(obj,WikiNode): | ||
48 | return obj.jsonEncode() | ||
49 | return json.JSONEncoder.default(self, obj) | ||
50 | |||
51 | def jsonencoder(func): | ||
52 | def _mkencoder(self): | ||
53 | json = func(self) | ||
54 | json['wikinode'] = self.__class__.__name__ | ||
55 | json['type'] = self.type | ||
56 | return json | ||
57 | return _mkencoder | ||
58 | |||
59 | class WikiNode(object): | ||
60 | """Generic parse tree node. | ||
61 | |||
62 | Attributes: | ||
63 | |||
64 | type -- actual type of this object (string) | ||
65 | parser -- parser instance that owns this node | ||
66 | """ | ||
67 | |||
68 | type = 'UNDEF' | ||
69 | parser = None | ||
70 | |||
71 | def __init__(self, parser, **kwargs): | ||
72 | self.parser = parser | ||
73 | for key in kwargs: | ||
74 | if hasattr(self,key): | ||
75 | self.__dict__[key] = kwargs[key] | ||
76 | else: | ||
77 | raise AttributeError("'%s' has no attribute '%s'" % (self.__class__.__name__, key)) | ||
78 | |||
79 | def __str__(self): | ||
80 | return json.dumps(self, cls=WikiNodeEncoder, sort_keys=True) | ||
81 | |||
82 | @jsonencoder | ||
83 | def jsonEncode(self): | ||
84 | ret = {} | ||
85 | for x in dir(self): | ||
86 | if x == 'parser' or x.startswith('_') or type(x) == 'function': | ||
87 | continue | ||
88 | if x in self.__dict__: | ||
89 | ret[x] = self.__dict__[x] | ||
90 | return ret | ||
91 | |||
92 | def format(self): | ||
93 | """Abstract formatting function. | ||
94 | |||
95 | Derived classes must override it. | ||
96 | """ | ||
97 | pass | ||
98 | |||
99 | class WikiContentNode(WikiNode): | ||
100 | """Generic content node. | ||
101 | |||
102 | Attributes: | ||
103 | |||
104 | content -- Actual content | ||
105 | """ | ||
106 | |||
107 | content = None | ||
108 | |||
109 | def format(self): | ||
110 | pass | ||
111 | |||
112 | @jsonencoder | ||
113 | def jsonEncode(self): | ||
114 | ret = {} | ||
115 | if self.content: | ||
116 | if self.type == 'TEXT': | ||
117 | ret['content'] = self.content | ||
118 | elif isinstance(self.content,list): | ||
119 | ret['content'] = map(lambda x: x.jsonEncode(), self.content) | ||
120 | elif isinstance(self.content,WikiNode): | ||
121 | ret['content'] = self.content.jsonEncode() | ||
122 | else: | ||
123 | ret['content'] = self.content | ||
124 | else: | ||
125 | ret['content'] = None | ||
126 | return ret | ||
127 | |||
128 | class WikiSeqNode(WikiContentNode): | ||
129 | """Generic sequence of nodes. | ||
130 | |||
131 | Attributes: | ||
132 | |||
133 | content -- list of nodes. | ||
134 | """ | ||
135 | |||
136 | def format(self): | ||
137 | for x in self.content: | ||
138 | x.format() | ||
139 | |||
140 | @jsonencoder | ||
141 | def jsonEncode(self): | ||
142 | ret = {} | ||
143 | if not self.content: | ||
144 | ret['content'] = None | ||
145 | elif isinstance(self.content,list): | ||
146 | ret['content'] = map(lambda x: x.jsonEncode(), self.content) | ||
147 | elif isinstance(self.content,WikiNode): | ||
148 | ret['content'] = self.content.jsonEncode() | ||
149 | else: | ||
150 | ret['content'] = self.content | ||
151 | return ret | ||
152 | |||
153 | |||
154 | # ############## | ||
155 | |||
156 | class WikiTextNode(WikiContentNode): | ||
157 | """Text node. | ||
158 | |||
159 | Attributes: | ||
160 | |||
161 | type -- 'TEXT' | ||
162 | content -- string | ||
163 | """ | ||
164 | |||
165 | type = 'TEXT' | ||
166 | |||
167 | @jsonencoder | ||
168 | def jsonEncode(self): | ||
169 | return { | ||
170 | 'content': self.content | ||
171 | } | ||
172 | |||
173 | class WikiDelimNode(WikiContentNode): | ||
174 | """Delimiter node. | ||
175 | |||
176 | Attributes: | ||
177 | |||
178 | type -- 'DELIM' | ||
179 | content -- actual delimiter string | ||
180 | isblock -- boolean indicating whether it is a block delimiter | ||
181 | continuation -- True if continuation is expected | ||
182 | """ | ||
183 | |||
184 | type = 'DELIM' | ||
185 | isblock=False | ||
186 | continuation = False | ||
187 | |||
188 | class WikiTagNode(WikiContentNode): | ||
189 | """A Wiki tag. | ||
190 | |||
191 | Attributes: | ||
192 | |||
193 | tag -- actual tag name (with '<', '>', and eventual '/' stripped) | ||
194 | isblock -- True if this is a block tag | ||
195 | args -- List of tag arguments | ||
196 | idx -- If this is a "see also" reference, index of this ref in the | ||
197 | list of references. | ||
198 | FIXME: Perhaps this merits a subclass? | ||
199 | """ | ||
200 | |||
201 | tag = None | ||
202 | isblock = False | ||
203 | args = None | ||
204 | idx = None | ||
205 | |||
206 | def __init__(self, *args, **keywords): | ||
207 | super(WikiTagNode, self).__init__(*args, **keywords) | ||
208 | if self.type == 'TAG' and self.tag == 'ref' and hasattr(self.parser,'references'): | ||
209 | self.idx = len(self.parser.references) | ||
210 | self.parser.references.append(self) | ||
211 | |||
212 | @jsonencoder | ||
213 | def jsonEncode(self): | ||
214 | return { | ||
215 | 'tag': self.tag, | ||
216 | 'isblock': self.isblock, | ||
217 | 'args': self.args.tab if self.args else None, | ||
218 | 'content': self.content.jsonEncode() if self.content else None, | ||
219 | 'idx': self.idx | ||
220 | } | ||
221 | |||
222 | class WikiRefNode(WikiContentNode): | ||
223 | """Reference node. | ||
224 | |||
225 | This class represents a wiki reference, such as "[ref|content]". | ||
226 | |||
227 | Attributes: | ||
228 | |||
229 | ref -- actual reference | ||
230 | content -- content string | ||
231 | """ | ||
232 | |||
233 | type = 'REF' | ||
234 | ref = None | ||
235 | @jsonencoder | ||
236 | def jsonEncode(self): | ||
237 | return { | ||
238 | 'ref': self.ref, | ||
239 | 'content': self.content.jsonEncode() | ||
240 | } | ||