1 files changed, 192 insertions, 8 deletions
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index ee1748c..b5bd708 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -15,11 +15,186 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+from wikitoken import *
 from wikimarkup import *
 from wikins import wiki_ns_re, wiki_ns
 import re
 import urllib
 
+class TextSeqNode(WikiSeqNode):
+    def format(self):
+        string = ""
+        for x in self.content:
+            if len(string) > 1 and not string[-1].isspace():
+                string += ' '
+            string += x.format()
+        return string
+    
+class TextTextNode(WikiTextNode):
+    def format(self):
+        if isinstance(elt.content,list):
+            string = ""
+            for s in elt.content:
+                if string:
+                    if string.endswith("."):
+                        string += "  "
+                    else:
+                        string += " "
+                string += s
+        else:
+            string = elt.content
+        return string
+
+class TextPreNode(WikiSeqNode):
+    def format(self):
+        string = ""
+        for x in elt.content:
+            string += x.format()
+        string += '\n'
+        return string
+
+class TextParaNode(WikiSeqNode):
+    def format(self):
+        string = ""
+        for x in elt.content:
+            string += x.format()
+        string = self.parser.fmtpara(string) + '\n\n'
+        return string
+
+class TextItNode(WikiSeqNode):
+    def format(self):
+        string = ""
+        for x in elt.content:
+            s = x.format()
+            if s:
+                string += " " + s
+        return "_" + string.lstrip(" ") + "_"
+        
+class TextBoldNode(WikiSeqNode):
+    def format(self):
+        string = ""
+        for x in elt.content:
+            if string.endswith("."):
+                string += "  "
+            else:
+                string += " "
+            string += x.format()
+        return string.upper()
+
+class TextLinkNode(WikiSeqNode):
+    def format(self):
+        arg = self.content[0].format()
+        if len(self.content) > 1:
+            s = [x for x in map(lambda x: x.format(), self.content)]
+            text = s[1]
+        else:
+            s = None
+            text = None
+
+        if s:
+            if s[0] == 'disambigR' or s[0] == 'wikiquote':
+                return ""
+            if len(s) > 1 and s[1] == 'thumb':
+                return ""
+        (qual,sep,tgt) = arg.partition(':')
+        if tgt != '':
+            ns = self.parser.wiki_ns_name(qual)
+            if ns:
+                if ns == 'NS_IMAGE':
+                    if not self.parser.references:
+                        return ""
+                    text = "[%s: %s]" % (qual, text if text else arg)
+                    tgt = self.image_base + '/' + \
+                                 urllib.quote(tgt) + \
+                                 '/250px-' + urllib.quote(tgt)
+                elif ns == 'NS_MEDIA':
+                    text = "[%s]" % (qual)
+                else:
+                    tgt = self.parser.mktgt(tgt)
+            elif self.type == 'LINK' and qual in self.parser.langtab:
+                text = self.parser.langtab[qual] + ": " + tgt
+                tgt = self.parser.mktgt(tgt, qual)
+            else:
+                tgt = self.parser.mktgt(tgt)
+        else:
+            tgt = self.parser.mktgt(arg)
+        if self.parser.references:
+            return "%s (see %s) " % (text, tgt)
+        elif not text or text == '':
+            return arg
+        else:
+            return text
+        
+class TextTmplNode(TextLinkNode):
+    def format(self):
+        return '[' + super(TextTmplNode, self).format() + ']'
+        
+class TextBarNode(WikiNode):
+    def format(self):
+        w = self.parser.width
+        if w < 5:
+            w = 5
+        return "\n" + ("-" * (w - 5)).center(w - 1) + "\n"
+
+class TextHdrNode(WikiHdrNode):
+    def format(self):
+        return "\n" + ("*" * self.level) + " " + \
+                      elt.content.format().lstrip(" ") + "\n\n"
+        
+class TextRefNode(WikiRefNode):
+    def format(self):
+        text = self.content.format()
+        if text:
+            return "%s (see %s) " % (text, self.ref)
+        else:
+            return "see " + self.ref
+
+class TextEnvNode(WikiEnvNode):
+    def format(self):
+        type = self.envtype
+        lev = self.level
+        if lev > self.parser.width - 4:
+            lev = 1
+        string = ""
+        n = 1
+        for s in self.content:
+            if not string.endswith("\n"):
+                string += "\n"
+            x = s.content.format()
+            if type == "unnumbered":
+                string += self.parser.fmtpara(self.parser.indent(lev, "- " + x.lstrip(" ")))
+            elif type == "numbered":
+                string += self.parser.fmtpara(self.parser.indent(lev, "%d. %s" % (n, x)))
+                n += 1
+            elif type == "defn":
+                if s.subtype == 0:
+                    string += self.parser.indent(lev-1, x)
+                else:
+                    string += self.parser.indent(lev+3, x)
+
+            if not string.endswith("\n"):
+                string += "\n"
+
+            return string
+
+class TextIndNode(WikiIndNode):
+    def format(self):
+        return (" " * self.level) + self.content.format() + '\n'
+
+class TextTagNode(WikiTagNode):
+    def format(self):
+        if self.tag == 'code':
+            self.parser.nested += 1
+            s = elt.content.format()
+            self.parser.nested -= 1
+        else:
+            s = '<' + self.tag
+            if self.args:
+                s += ' ' + str(self.args)
+            s += '>' + elt.content.format() + '</' + self.tag + '>'
+        return s            
+    
+
 class TextWikiMarkup (WikiMarkup):
     """
     A (general-purpose Wiki->Text translator class.
@@ -36,20 +211,29 @@ class TextWikiMarkup (WikiMarkup):
     num = 0
     
     def __init__(self, *args, **keywords):
-        WikiMarkup.__init__(self, *args, **keywords)
+        super(TextWikiMarkup,self).__init__(*args, **keywords)
         if 'width' in keywords:
             self.width = keywords['width']
         if 'refs' in keywords:
             self.references = keywords['refs']
         if 'markup' in keywords:
             self.markup = keywords['markup']
-
-    def xref(self, text, target):
-        if text:
-            return "%s (see %s) " % (text, target)
-        else:
-            return "see " + target
-
+        self.token_class['SEQ'] = TextSeqNode
+        self.token_class['TEXT'] = TextTextNode
+        self.token_class['PRE'] = TextPreNode
+        self.token_class['PARA'] = TextParaNode
+        self.token_class['SEQ'] = TextSeqNode
+        self.token_class['IT'] = TextItNode
+        self.token_class['BOLD'] = TextBoldNode
+        self.token_class['LINK'] = TextLinkNode
+        self.token_class['TMPL'] = TextTmplNode
+        self.token_class['BAR'] = TextBarNode
+        self.token_class['HDR'] = TextHdrNode
+        self.token_class['REF'] = TextRefNode
+        self.token_class['ENV'] = TextEnvNode
+        self.token_class['IND'] = TextIndNode
+        self.token_class['TAG'] = TextTagNode
+    
     def wiki_ns_name(self, str):
         if str in wiki_ns[self.lang]:
             return wiki_ns[self.lang][str]