summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2015-07-15 20:03:03 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2015-07-15 20:06:59 (GMT)
commit6f963022315d4306f50f2e7046f2872cfd3c0500 (patch) (side-by-side diff)
tree4197bb5a9d8b03d8d0b100aece83d9c973b00d8e
parentb3aa1433e6cd41cdb3a6212ad60d5468d6f6d649 (diff)
downloadwit-6f963022315d4306f50f2e7046f2872cfd3c0500.tar.gz
wit-6f963022315d4306f50f2e7046f2872cfd3c0500.tar.bz2
Parse tag attributes
* wikimarkup.py (TagAttributes) (TagAttributeSyntax): New classes. (BaseWikiMarkup): Store a TagAttributes object in tag['args'] of an XML tag. * wiki2html.py (str_tag): convert tag['args'] to string * wiki2text.py: Likewise. * wiki2texi.py (str_tag): Handle <div>
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2html.py2
-rw-r--r--wiki2texi.py6
-rw-r--r--wiki2text.py2
-rw-r--r--wikimarkup.py86
4 files changed, 84 insertions, 12 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 10a3e1b..0330b92 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -181,7 +181,7 @@ class HtmlWikiMarkup (WikiMarkup):
else:
s = '<' + elt['tag']
if elt['args']:
- s += ' ' + elt['args']
+ s += ' ' + str(elt['args'])
s += '>'
s += self.format(elt['content'])
return s + '</' + elt['tag'] + '>'
diff --git a/wiki2texi.py b/wiki2texi.py
index 3d74f80..a7b5e92 100644
--- a/wiki2texi.py
+++ b/wiki2texi.py
@@ -132,6 +132,12 @@ class TexiWikiMarkup (WikiMarkup):
s = self.format(elt['content'])
self.nested -= 1
return "@code{%s}" % s
+ elif elt['tag'] == 'div':
+ s = ''
+ if 'args' in elt and 'id' in elt['args']:
+ s += "\n@anchor{%s}\n" % elt['args']['id']
+ s += self.format(elt['content'])
+ return s
else:
s = '<' + elt['tag']
if elt['args']:
diff --git a/wiki2text.py b/wiki2text.py
index c92dbc0..6308da1 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -151,7 +151,7 @@ class TextWikiMarkup (WikiMarkup):
else:
s = '<' + elt['tag']
if elt['args']:
- s += ' ' + elt['args']
+ s += ' ' + str(elt['args'])
s += '>' + self.format(elt['content']) + '</' + elt['tag'] + '>'
return s
diff --git a/wikimarkup.py b/wikimarkup.py
index 18e9a21..adaa1a2 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -19,12 +19,74 @@ import sys
import re
from types import *
-__all__ = [ "BaseWikiMarkup", "WikiMarkup" ]
+__all__ = [ "BaseWikiMarkup", "WikiMarkup",
+ "TagAttributes", "TagAttributeSyntax" ]
+
+class TagAttributeSyntax(Exception):
+ def __init__(self, value):
+ self.value = value
+ def __str__(self):
+ return repr(self.value)
+
+class TagAttributes(object):
+ attrstart = re.compile("^(?P<attr>[a-zA-Z0-9_-]+)(?P<eq>=\")?")
+ valseg = re.compile("^[^\\\"]+")
+ tab = {}
+ printable = None
+ def __init__(self, string):
+ if not string:
+ self.printable = ''
+ return
+ self.printable = string
+ s = string
+ self.tab = {}
+ while s != '':
+ s = s.strip()
+ m = self.attrstart.match(s)
+ if m:
+ name = m.group('attr')
+ val = ''
+ s = s[m.end(0):]
+ if m.group('eq'):
+ while 1:
+ m = self.valseg.match(s)
+ val += m.group(0)
+ s = s[m.end(0):]
+ if s[0] == '\\':
+ val += s[1]
+ s += 2
+ elif s[0] == '"':
+ s = s[1:]
+ break
+ else:
+ val = 1
+ self.tab[name] = val
+ else:
+ raise TagAttributeSyntax(s)
+ def __len__(self):
+ return len(self.tab)
+ def __getitem__(self, key):
+ return self.tab[key]
+ def __contains__(self, key):
+ return key in self.tab
+ def __iter__(self):
+ for key in self.tab:
+ yield(key)
+ def has_key(self, key):
+ return self.__contains__(key)
+ def __setitem__(self, key, value):
+ self.tab[key] = value
+ def __delitem__(self, key):
+ del self.tab[key]
+ def __str__(self):
+ return self.printable
+ def __repr__(self):
+ return self.printable
class BaseWikiMarkup(object):
delim = re.compile("^==+|==+[ \\t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<")
- otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>.+))?\s*(?P<closed>/)?>")
+ otag = re.compile("<(?P<tag>[a-zA-Z0-9_]+)(?:\s+(?P<args>[^>]+))?\s*(?P<closed>/)?>")
ctag = re.compile("</(?P<tag>[a-zA-Z0-9_]+)\s*>")
refstart = re.compile("^https?://")
@@ -154,7 +216,7 @@ class BaseWikiMarkup(object):
def dump(self, tree, level=0, file=sys.stdout):
for node in tree:
self.dump_node(node, level, file)
-
+
def tokread(self):
line = None
pos = 0
@@ -208,13 +270,17 @@ class BaseWikiMarkup(object):
break
continue
elif m.group('tag') in self.tags:
- t = { 'type': 'OTAG',
- 'tag': m.group('tag'),
- 'args': m.group('args') }
- yield(t)
- if m.group('closed'):
- t['type'] = 'CTAG'
+ try:
+ t = { 'type': 'OTAG',
+ 'tag': m.group('tag'),
+ 'args': TagAttributes(m.group('args')) }
yield(t)
+ if m.group('closed'):
+ t['type'] = 'CTAG'
+ yield(t)
+ except TagAttributeSyntax:
+ yield({'type': 'TEXT',
+ 'content': m.group(0)})
continue
else:
m = self.ctag.match(line, pos)
@@ -664,7 +730,7 @@ class BaseWikiMarkup(object):
self.tokind = save
s = '<' + tag['tag']
if 'args' in tag and tag['args']:
- s += ' ' + tag['args']
+ s += ' ' + str(tag['args'])
del tag['args']
s += '>'
if 'content' in tag:

Return to:

Send suggestions and report system problems to the System administrator.