#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2008-2018 Sergey Poznyakoff
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function
from __future__ import unicode_literals
import sys
import re
import tempfile
import xml.etree.ElementTree as etree
from optparse import OptionParser
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO
from WikiTrans.wiki2html  import HtmlWikiMarkup, HtmlWiktionaryMarkup
from WikiTrans.wiki2text  import TextWikiMarkup, TextWiktionaryMarkup
from WikiTrans.wiki2texi  import TexiWikiMarkup
from WikiTrans.wikimarkup import WikiMarkup
from WikiTrans.wikidump   import DumpWikiMarkup

# Set utf-8 as the default encoding for Python 2.7. 
# Trying to do so using encode('utf_8')/unicode, which is 
# supposed to be the right way, does not work in Python 2.7
# Simply calling sys.setdefaultencoding is not possible,
# because, for some obscure reason, Python chooses to delete 
# this symbol from the namespace after setting its default 
# encoding in site.py. That's why reload is needed. 
try:
    reload(sys)
    sys.setdefaultencoding('utf-8')
except:
    pass

handlers = {
    'dump': {
        'default': DumpWikiMarkup,
        'wiktionary': DumpWikiMarkup
    },
    'html': {
        'default': HtmlWikiMarkup,
        'wiktionary': HtmlWiktionaryMarkup
    },
    'text': {
        'default': TextWikiMarkup,
        'wiktionary': TextWiktionaryMarkup
    },
    'texi': {
        'default': TexiWikiMarkup
    }
}

def setkw(option, opt, value, parser):
    if not parser.values.kwdict:
        parser.values.kwdict = {}
    (kw,sep,val) = value.partition('=')
    if val:
        parser.values.kwdict[kw] = val

def getwiki(url):
    tmp = tempfile.NamedTemporaryFile()            
    if sys.version_info[0] > 2:
        import urllib.request
        with urllib.request.urlopen(url) as u:
            root = etree.fromstring(u.read())
    else:
        import urllib
        urllib.urlretrieve(url, tmp.name)
        root = etree.parse(tmp.name).getroot()
    ns = { 'wiki':'' }
    if 'version' in root.attrib:
        ns['wiki'] = 'http://www.mediawiki.org/xml/export-%s/' % root.attrib['version']
        text = root.find('wiki:page/wiki:revision/wiki:text',ns)
    if text is None:
        print("no page/revision/text element in the downloaded page")
        exit(0)
    return text.text.encode()
        
def main():
    usage = '%prog [OPTIONS] ARG'
    version = '%prog 1.0'
    description = """Translates MediaWiki documents markup to various other formats.
If ARG looks like a URL, the wiki text to be converted will be downloaded
from that URL.
Otherwise, if --base-url is given, ARG is treated as the name of the page to
get from the WikiMedia istallation at that URL.
Otherwise, ARG is name of the file to read wiki material from.    
"""
    epilog = "Report bugs to: <gray+wikitrans@gnu.org.ua>"
    
    parser = OptionParser(usage=usage,
                          version=version,
                          description=description,
                          epilog=epilog)
    parser.add_option('-v', '--verbose',
                      action="count", dest="verbose",
                      help="verbose operation")
    parser.add_option('-I', '--input-type',
                      action='store', type='string', dest='itype',
                      default='default',
                      help='set input document type ("default" or "wiktionary")')
    parser.add_option('-t', '--to', '--type',
                      action='store', type='string', dest='otype',
                      default='html',
                      help='set output document type ("html" (default), "texi" or "text")')
    parser.add_option('-l', '--lang',
                      action='store', type='string', dest='lang',
                      default='pl',
                      help='set input document language')
    parser.add_option('-o', '--option',
                      action='callback', callback=setkw,
                      type='string', dest='kwdict',
                      default={},
                      help='set keyword option for the parser class')
    parser.add_option('-d', '--debug',
                      action='store', type='int', dest='debug',
                      default=0,
                      help='set debug level (0..100)')
    parser.add_option('-D', '--dump',
                      action='store_const', const='dump',
                      dest='otype',
                      help='dump parse tree and exit; similar to --type=dump')
    parser.add_option('-b', '--base-url',
                      action='store', type='string', dest='base_url',
                      help='set base url')
    

    (options, args) = parser.parse_args()
    if len(args) == 1:
        if options.base_url:
            options.kwdict['text'] = getwiki(options.base_url + '/wiki/Special:Export/' + args[0])
        elif args[0] == '-':
            options.kwdict['file'] = sys.stdin
        elif re.match('^(http|ftp)s?://',args[0]):
            options.kwdict['text'] = getwiki(args[0])
        else:
            options.kwdict['filename'] = args[0]
    else:
        parser.error("bad number of arguments")
        
    options.kwdict['lang'] = options.lang # FIXME

    if options.otype == 'dump' and not 'indent' in options.kwdict:
        options.kwdict['indent'] = 2
    if options.otype in handlers:
        if options.itype in handlers[options.otype]:
            markup = handlers[options.otype][options.itype](**options.kwdict)
            markup.debug_level = options.debug
            markup.parse()
            print("%s" % str(markup))
            exit(0)
        else:
            print("input type %s is not supported for %s output" % (options.itype, options.otype))
    else:
        print("unsupported output type: %s" % options.otype)
    exit(1)

if __name__ == '__main__':
    main()