diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-05 20:25:52 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2009-03-05 20:25:52 +0200 |
commit | a8cd24f0f5cbefccdefd2a4a5166b89c6c8f7a54 (patch) | |
tree | f9dfb0da028c041fe06b1baa29a995fa90c2162a | |
parent | 7153f67c6ea71d684d412efd032c309cd273ed19 (diff) | |
download | wikitrans-a8cd24f0f5cbefccdefd2a4a5166b89c6c8f7a54.tar.gz wikitrans-a8cd24f0f5cbefccdefd2a4a5166b89c6c8f7a54.tar.bz2 |
Avoid losing newlines while parsing the input stream. Provide some rudimentary parsing for wiktionary templates
-rw-r--r-- | wiki2html.py | 53 | ||||
-rwxr-xr-x[-rw-r--r--] | wikicvt.py | 0 | ||||
-rw-r--r-- | wikimarkup.py | 6 |
3 files changed, 52 insertions, 7 deletions
diff --git a/wiki2html.py b/wiki2html.py index 5a8fdcc..faab18b 100644 --- a/wiki2html.py +++ b/wiki2html.py | |||
@@ -18,6 +18,7 @@ | |||
18 | from wikimarkup import * | 18 | from wikimarkup import * |
19 | from types import TupleType | 19 | from types import TupleType |
20 | from wikins import wiki_ns_re, wiki_ns | 20 | from wikins import wiki_ns_re, wiki_ns |
21 | import re | ||
21 | import urllib | 22 | import urllib |
22 | 23 | ||
23 | class HtmlWikiMarkup (WikiMarkup): | 24 | class HtmlWikiMarkup (WikiMarkup): |
@@ -45,13 +46,53 @@ class HtmlWikiMarkup (WikiMarkup): | |||
45 | if not lang: | 46 | if not lang: |
46 | lang = self.lang | 47 | lang = self.lang |
47 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) | 48 | return self.html_base % { 'lang' : lang } + urllib.quote(tgt) |
49 | |||
50 | def tmpl_term(self, s): | ||
51 | if len(s) == 2: | ||
52 | return s[1] | ||
53 | text = None | ||
54 | trans = None | ||
55 | for x in s[1:]: | ||
56 | m = re.match('(\w+)=', x) | ||
57 | if m: | ||
58 | if m.group(1) == "tr": | ||
59 | trans = x[m.end(1)+1:] | ||
60 | elif not text: | ||
61 | text = x | ||
62 | if text: | ||
63 | if trans: | ||
64 | text += ' <span class="trans">[' + trans + ']</span>' | ||
65 | return text | ||
66 | |||
67 | def tmpl_proto(self, s): | ||
68 | text = '<span class="proto-lang">Proto-' + s[1] + '</span>' | ||
69 | if len(s) >= 4: | ||
70 | n = 0 | ||
71 | for x in s[2:-2]: | ||
72 | if n > 0: | ||
73 | text += ',' | ||
74 | n += 1 | ||
75 | text += ' <span class="proto">' + x + '</span>' | ||
76 | text += ' <span class="meaning">(' + s[-2] + ')</span>' | ||
77 | return text | ||
78 | |||
48 | 79 | ||
49 | def fmtlink(self, elt, istmpl): | 80 | def fmtlink(self, elt, istmpl): |
50 | arg = self.format(elt[1][0]) | 81 | arg = self.format(elt[1][0]) |
82 | text = None | ||
51 | if len(elt[1]) > 1: | 83 | if len(elt[1]) > 1: |
52 | text = self.format(elt[1][1]) | 84 | text = '<span class="template">' + self.format(elt[1][1]) + '</span>' |
53 | else: | 85 | if istmpl: |
54 | text = None | 86 | s = map(self.format, elt[1]) |
87 | if re.match("t[+-]$", s[0]): | ||
88 | if len(s) > 2: | ||
89 | text = s[2] | ||
90 | elif s[0] == "term": | ||
91 | text = self.tmpl_term(s) | ||
92 | elif s[0] == "proto": | ||
93 | text = self.tmpl_proto(s) | ||
94 | return text | ||
95 | |||
55 | (qual,sep,tgt) = arg.partition(':') | 96 | (qual,sep,tgt) = arg.partition(':') |
56 | if tgt != '': | 97 | if tgt != '': |
57 | ns = self.wiki_ns_name(qual) | 98 | ns = self.wiki_ns_name(qual) |
@@ -89,10 +130,10 @@ class HtmlWikiMarkup (WikiMarkup): | |||
89 | else arg) | 130 | else arg) |
90 | 131 | ||
91 | def str_link(self, elt): | 132 | def str_link(self, elt): |
92 | return self.fmtlink(elt, False) + " " | 133 | return self.fmtlink(elt, False) |
93 | 134 | ||
94 | def str_tmpl(self, elt): | 135 | def str_tmpl(self, elt): |
95 | return self.fmtlink(elt, True) + " " | 136 | return self.fmtlink(elt, True) |
96 | 137 | ||
97 | def str_ref(self, elt): | 138 | def str_ref(self, elt): |
98 | target = elt[1] | 139 | target = elt[1] |
@@ -104,7 +145,7 @@ class HtmlWikiMarkup (WikiMarkup): | |||
104 | def concat(self, eltlist): | 145 | def concat(self, eltlist): |
105 | string = "" | 146 | string = "" |
106 | for x in eltlist: | 147 | for x in eltlist: |
107 | string += " " + self.format(x) | 148 | string += self.format(x) |
108 | return string | 149 | return string |
109 | 150 | ||
110 | def str_it(self, elt): | 151 | def str_it(self, elt): |
diff --git a/wikicvt.py b/wikicvt.py index a2e95e4..a2e95e4 100644..100755 --- a/wikicvt.py +++ b/wikicvt.py | |||
diff --git a/wikimarkup.py b/wikimarkup.py index 716bc4a..a340628 100644 --- a/wikimarkup.py +++ b/wikimarkup.py | |||
@@ -183,6 +183,7 @@ class BaseWikiMarkup: | |||
183 | if self.peektkn()[0] == NL: | 183 | if self.peektkn()[0] == NL: |
184 | self.dprint(80, "LEAVE parse_fontmod=None") | 184 | self.dprint(80, "LEAVE parse_fontmod=None") |
185 | return None | 185 | return None |
186 | seq.append((TEXT, '\n')) | ||
186 | else: | 187 | else: |
187 | self.dprint(80, "LEAVE parse_fontmod=None") | 188 | self.dprint(80, "LEAVE parse_fontmod=None") |
188 | return None | 189 | return None |
@@ -243,6 +244,7 @@ class BaseWikiMarkup: | |||
243 | elif tok[0] == TEXT: | 244 | elif tok[0] == TEXT: |
244 | list.append(tok) | 245 | list.append(tok) |
245 | elif tok[0] == NL: | 246 | elif tok[0] == NL: |
247 | list.append((TEXT, '\n')) | ||
246 | continue | 248 | continue |
247 | else: | 249 | else: |
248 | self.dprint(80, "LEAVE parse_ref=%s", "None") | 250 | self.dprint(80, "LEAVE parse_ref=%s", "None") |
@@ -299,6 +301,7 @@ class BaseWikiMarkup: | |||
299 | self.ungetkn() | 301 | self.ungetkn() |
300 | if self.is_block_delim(tok): | 302 | if self.is_block_delim(tok): |
301 | break | 303 | break |
304 | textlist.append('\n') | ||
302 | elif tok[0] == NIL: | 305 | elif tok[0] == NIL: |
303 | break | 306 | break |
304 | elif tok[0] == DELIM: | 307 | elif tok[0] == DELIM: |
@@ -431,7 +434,8 @@ class BaseWikiMarkup: | |||
431 | self.ungetkn() | 434 | self.ungetkn() |
432 | return self.parse_para() | 435 | return self.parse_para() |
433 | elif toktype == NL: | 436 | elif toktype == NL: |
434 | return self.parse0() | 437 | return (TEXT, '\n') |
438 | # return self.parse0() | ||
435 | 439 | ||
436 | def parse(self): | 440 | def parse(self): |
437 | if not self.toklist: | 441 | if not self.toklist: |