#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2018 Sergey Poznyakoff # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from __future__ import print_function import sys import re from types import * from WikiTrans.wikitoken import * __all__ = [ "BaseWikiMarkup", "WikiMarkup", "TagAttributes", "TagAttributeSyntax" ] class UnexpectedToken(Exception): def __init__(self, value): self.value = value class TagAttributeSyntax(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class TagAttributes(object): attrstart = re.compile("^(?P[a-zA-Z0-9_-]+)(?P=\")?") valseg = re.compile("^[^\\\"]+") tab = {} printable = None def __init__(self, string): if not string: self.printable = '' return self.printable = string s = string self.tab = {} while s != '': s = s.strip() m = self.attrstart.match(s) if m: name = m.group('attr') val = '' s = s[m.end(0):] if m.group('eq'): while 1: m = self.valseg.match(s) val += m.group(0) s = s[m.end(0):] if s[0] == '\\': val += s[1] s += 2 elif s[0] == '"': s = s[1:] break else: val = 1 self.tab[name] = val else: raise TagAttributeSyntax(s) def __len__(self): return len(self.tab) def __getitem__(self, key): return self.tab[key] def __contains__(self, key): return key in self.tab def __iter__(self): for key in self.tab: yield(key) def has_key(self, key): return self.__contains__(key) def __setitem__(self, key, value): self.tab[key] = value def __delitem__(self, key): del self.tab[key] def __str__(self): return self.printable def __repr__(self): return self.printable class BaseWikiMarkup(object): delim = re.compile("^==+[ \t]*|[ \t]*==+[ \t]*$|(^----$)|^\\*+|^#+|^[;:]+|(\\[\\[)|\\[|(\\{\\{)|(\\]\\])|\\]|(\\}\\})|\\||(\\'\\'\\'?)|<") otag = re.compile("<(?P[a-zA-Z0-9_]+)(?:\s+(?P[^>]+))?\s*(?P/)?>") ctag = re.compile("[a-zA-Z0-9_]+)\s*>") refstart = re.compile("^https?://") close_delim = { '[': ']', '[[': ']]', '{{': '}}' } # Environment types: envtypes = { "*": [ "unnumbered", 0 ], "#": [ "numbered", 0 ], ";": [ "defn", 0 ], ":": [ "defn", 1 ] } toklist = None tokind = 0 newline = 0 tree = None tags = [ 'code', 'nowiki', 'tt', 'div' ] debug_level = 0 def dprint(self, lev, fmt, *argv): if self.debug_level >= lev: for l in (fmt % argv).split('\n'): print("[DEBUG] %s" % l) inline_delims = [ "''", "'''", "[", "]", "[[", "]]", "{{", "}}", "|" ] token_class = { 'NIL': WikiNode, 'NL': WikiNode, 'OTAG': WikiTagNode, 'CTAG': WikiTagNode, 'TAG': WikiTagNode, 'DELIM': WikiDelimNode, 'TEXT': WikiTextNode, 'PRE': WikiContentNode, 'PARA': WikiSeqNode, 'BAR': WikiNode, 'SEQ': WikiSeqNode, 'IND': WikiIndNode, 'REF': WikiRefNode, 'TMPL': WikiSeqNode, 'IT': WikiSeqNode, 'BOLD': WikiSeqNode, 'ELT': WikiEltNode, 'ENV': WikiEnvNode, 'LINK': WikiSeqNode, 'HDR': WikiHdrNode } def __createWikiNode(self,**kwarg): return self.token_class[kwarg['type']](self, **kwarg) def tokread(self): line = None pos = 0 while 1: if (not line or pos == len(line)): try: line = self.input() pos = 0 except StopIteration: line = u'' if not line or line == "": yield(self.__createWikiNode(type='NIL')) break if line == '\n': yield(self.__createWikiNode(type='NL')) line = None continue self.dprint(100, "LINE: %s", line[pos:]) m = self.delim.search(line, pos) if m: if (pos < m.start(0)): yield(self.__createWikiNode(type='TEXT', content=line[pos:m.start(0)])) pos = m.start(0) t = None if line[m.start(0)] == '<': m = self.otag.match(line, pos) if m: pos = m.end(0) if m.group('tag') == 'nowiki': if not m.group('closed'): while 1: try: m = self.ctag.search(line, pos) if m and m.group('tag') == 'nowiki': yield(self.__createWikiNode(type='TEXT', content=line[pos:m.start(0)] )) pos = m.end(0) break yield(self.__createWikiNode(type='TEXT', content=line[pos:])) line = self.input() pos = 0 except StopIteration: break continue elif m.group('tag') in self.tags: try: yield(self.__createWikiNode(type='OTAG', tag=m.group('tag'), isblock=(line[pos] == '\n'), args=TagAttributes(m.group('args')))) if m.group('closed'): yield(self.__createWikiNode(type='CTAG', tag=m.group('tag'))) except TagAttributeSyntax: yield(self.__createWikiNode(type='TEXT',content=m.group(0))) continue else: yield(self.__createWikiNode(type='TEXT',content=m.group(0))) continue else: m = self.ctag.match(line, pos) if m: if m.group('tag') in self.tags: yield(self.__createWikiNode(type='CTAG', tag=m.group('tag'))) pos = m.end(0) continue else: yield(self.__createWikiNode(type='TEXT', content=line[pos:pos+1])) pos += 1 continue else: pos = m.end(0) content = m.group(0) if content[0] in self.envtypes: node = self.__createWikiNode(type='DELIM', content=content, isblock=True, continuation=pos < len(line) and line[pos] == ":") if node.continuation: node.content += node.content[0] pos += 1 yield(node) while pos < len(line) and line[pos] in [' ', '\t']: pos += 1 else: yield(self.__createWikiNode(type='DELIM', isblock=(content.strip() not in self.inline_delims), content=content.strip())) continue if line: if line[-1] == '\n': if line[pos:-1] != '': yield(self.__createWikiNode(type='TEXT',content=line[pos:-1])) yield(self.__createWikiNode(type='NL')) else: yield(self.__createWikiNode(type='TEXT',content=line[pos:])) line = None def input(self): return None def swaptkn(self, i, j): self.dprint(80, "SWAPPING %s <-> %s", i, j) x = self.toklist[i] self.toklist[i] = self.toklist[j] self.toklist[j] = x def tokenize(self): self.toklist = [] for tok in self.tokread(): self.dprint(100, "TOK: %s", tok) self.toklist.append(tok) # Determine and fix up the ordering of bold and italic markers # There are three possible cases: # # 1a. '''a b ''c'' d''' # 1b. ''a b '''c''' d'' # # 2a. '''''a b'' c d''' # 2b. '''''a b''' c d'' # # 3a. '''a b ''c d''''' # 3b. ''a b '''c d''''' stack = [] for i in range(0,len(self.toklist)): if self.toklist[i].type == 'DELIM' \ and (self.toklist[i].content == "''" \ or self.toklist[i].content == "'''"): if len(stack) > 0: if self.toklist[stack[-1]].content == self.toklist[i].content: # Case 1: just pop the matching delimiter off the stack stack.pop() elif len(stack) == 2 and stack[-2] + 1 == stack[-1]: # Case 2: swap delimiters saved on stack ... self.swaptkn(stack[-2], stack[-1]) # and pop off the matching one stack.pop() elif i < len(self.toklist) \ and self.toklist[i+1].type == 'DELIM' \ and self.toklist[stack[-1]].content == self.toklist[i+1].content: # Case 3: swap current and next tokens self.swaptkn(i, i+1) # and pop off the matching one stack.pop() else: # Push the token on stack stack.append(i) else: # Push the token on stack stack.append(i) # Redefine all non-matched tokens as TEXT for i in stack: self.toklist[i].type = 'TEXT' # FIXME mark = [] def push_mark(self): self.mark.append(self.tokind) def pop_mark(self): self.tokind = self.mark.pop() def clear_mark(self): self.mark.pop() def lookahead(self, off=0): tok = self.toklist[self.tokind+off] self.dprint(20, "lookahead(%s): %s", off, tok) return tok def setkn(self,val): self.toklist[self.tokind] = val def getkn(self): self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' if self.tokind == len(self.toklist): return self.__createWikiNode(type='NIL') tok = self.toklist[self.tokind] self.tokind = self.tokind + 1 self.dprint(20, "getkn: %s", tok) return tok def ungetkn(self, tok=None): self.tokind = self.tokind - 1 self.newline = self.tokind == 0 or self.toklist[self.tokind-1].type == 'NL' if tok: self.toklist[self.tokind] = tok self.dprint(20, "ungetkn: %s", tok) return self.toklist[self.tokind] def fixuptkn(self, tok): if self.tokind == 0: raise IndexError('wikimarkup.fixuptkn called at start of input') self.toklist[self.tokind-1] = tok return tok def dump(self, tree, file=sys.stdout): for node in tree: file.write(str(node)) file.write('\n') def is_block_end(self, tok): if tok.type == 'NIL': return True elif tok.type == 'NL': if self.lookahead().type == 'NIL': return True elif self.lookahead().type == 'NL': self.getkn() return True elif tok.type in ['DELIM', 'CTAG', 'TAG']: if tok.isblock: self.ungetkn(tok) return True return False def parse_para(self, tok): self.dprint(80, "ENTER parse_para: %s", tok) acc = { 'seq': [], 'textlist': [] } def flush(): if acc['textlist']: acc['seq'].append(self.__createWikiNode(type='TEXT', content=''.join(acc['textlist']))) acc['textlist'] = [] if isinstance(tok, WikiContentNode) \ and isinstance(tok.content,str) \ and re.match("^[ \t]", tok.content): type = 'PRE' rx = re.compile("^\S") else: type = 'PARA' rx = re.compile("^[ \t]") while not self.is_block_end(tok): if tok.type == 'TEXT': if rx and self.newline and rx.match(tok.content): self.ungetkn() break acc['textlist'].append(tok.content) elif tok.type == 'NL': acc['textlist'].append('\n') elif tok.type == 'OTAG': flush() acc['seq'].append(self.parse_tag(tok)) elif tok.type == 'DELIM': flush() acc['seq'].append(self.parse_inline_delim(tok)) else: raise UnexpectedToken(tok) tok = self.getkn() flush() if acc['seq']: tok = self.__createWikiNode(type=type, content=acc['seq']) else: tok = None self.dprint(80, "LEAVE parse_para=%s", tok) return tok def parse_block_delim(self, tok): self.dprint(80, "ENTER parse_block_delim") assert(tok.type == 'DELIM') if tok.content == "----": node = self.__createWikiNode(type = 'BAR') elif tok.content[0:2] == "==": node = self.parse_header(tok) if not node: tok = self.ungetkn(self.__createWikiNode(type='TEXT', content=tok.content)) elif tok.content[0] in self.envtypes: node = None if tok.content[0] == ':': t = self.lookahead(-2) if not (t.type == 'DELIM' and t.content == ';'): node = self.parse_indent(tok) if not node: node = self.parse_env(tok) else: self.ungetkn(tok) node = None self.dprint(80, "LEAVE parse_block_delim=%s", node) return node def parse_line(self): self.dprint(80, "ENTER parse_line") list = [] while True: tok = self.getkn() if tok.type == 'NL' or tok.type == 'NIL': break elif tok.type == 'TEXT': list.append(tok) elif tok.type == 'DELIM': if tok.isblock: tok = self.__createWikiNode(type = 'TEXT', content = tok.content) self.fixuptkn(tok) list.append(tok) elif tok.content[0] == ":": # FIXME list.append(self.parse_indent(tok)) break else: x = self.parse_inline_delim(tok) if x: list.append(x) else: list.append(self.fixuptkn(self.__createWikiNode(type = 'TEXT', content = tok.content))) elif tok.type == 'OTAG': if tok.isblock: self.ungetkn() break list.append(self.parse_tag(tok)) else: list.append(tok) ret = self.__createWikiNode(type='SEQ', content=list) self.dprint(80, "LEAVE parse_line=%s", ret) return ret def parse_indent(self, tok): lev = len(tok.content) self.dprint(80, "ENTER parse_indent(%s)", lev) x = self.__createWikiNode(type='IND', level=lev, content=self.parse_line()) self.dprint(80, "LEAVE parse_indent=%s", x) return x def parse_fontmod(self,delim,what): self.dprint(80, "ENTER parse_fontmod(%s,%s), tok %s", delim, what, self.lookahead()) seq = [] text = '' while True: tok = self.getkn() if tok.type == 'TEXT': text += tok.content elif self.is_block_end(tok): self.dprint(80, "LEAVE parse_fontmod=%s", "None") return None elif tok.type == 'DELIM': # self.dprint(80, "got %s, want %s", tok.content, delim) if tok.content == delim: break else: if text: seq.append(self.__createWikiNode(type='TEXT', content=text)) text = '' x = self.parse_inline_delim(tok) if x: seq.append(x) else: self.dprint(80, "LEAVE parse_fontmod=%s", "None") return None elif tok.type == 'NL': seq.append(self.__createWikiNode(type='TEXT', content='\n')) else: self.dprint(80, "LEAVE parse_fontmod=None") return None if text: seq.append(self.__createWikiNode(type='TEXT', content=text)) res = self.__createWikiNode(type=what, content=seq) self.dprint(80, "LEAVE parse_fontmod=%s", res) return res def parse_ref(self): self.dprint(80, "ENTER parse_ref") tok = self.getkn() if not (tok.type == 'TEXT' and self.refstart.match(tok.content)): self.dprint(80, "LEAVE parse_ref=None") return None seq = [] (ref,sep,text) = tok.content.partition(' ') if text: seq.insert(0, self.__createWikiNode(type='TEXT', content=text)) while True: tok = self.getkn() if tok.type == 'NIL': self.dprint(80, "LEAVE parse_ref=None") return None elif self.is_block_end(tok): self.dprint(80, "LEAVE parse_ref=None") return None elif tok.type == 'DELIM': if tok.content == ']': break else: tok = self.parse_inline_delim(tok) if tok: seq.append(tok) else: self.dprint(80, "LEAVE parse_ref=None") return None elif tok.type == 'OTAG': list.append(self.parse_tag(tok)) else: seq.append(tok) ret = self.__createWikiNode(type='REF', ref=ref, content=self.__createWikiNode(type='SEQ', content=seq)) self.dprint(80, "LEAVE parse_ref= %s", ret) return ret def parse_link(self, type, delim): self.dprint(80, "ENTER parse_link(%s,%s)", type, delim) subtree = [] list = [] while True: tok = self.getkn() if tok.type == 'NIL': self.dprint(80, "LEAVE parse_link=None [EOF]") return None if tok.type == 'DELIM': if tok.content == delim: if list: subtree.append(self.__createWikiNode(type='SEQ', content=list)) break elif tok.content == "|": if len(list) > 1: subtree.append(self.__createWikiNode(type='SEQ', content=list)) elif list: subtree.append(list[0]) list = [] else: x = self.parse_inline_delim(tok) if x: list.append(x) else: self.dprint(80, "LEAVE parse_link=None [bad inline]") return None elif tok.type == 'TEXT': list.append(tok) else: self.dprint(80, "LEAVE parse_link=None [unexpected token]") return None ret = self.__createWikiNode(type=type, content=subtree) self.dprint(80, "LEAVE parse_link=%s", ret) return ret def parse_inline_delim(self, tok): self.dprint(80, "ENTER parse_inline_delim") assert(tok.type == 'DELIM') self.push_mark() if tok.content == "''": x = self.parse_fontmod(tok.content, 'IT') elif tok.content == "'''": x = self.parse_fontmod(tok.content, 'BOLD') elif tok.content == "[": x = self.parse_ref() elif tok.content == "[[": x = self.parse_link('LINK', "]]") elif tok.content == "{{": x = self.parse_link('TMPL', "}}") else: x = None if x: self.clear_mark() else: self.dprint(80, "BEGIN DELIMITER RECOVERY: %s", tok) self.pop_mark() x = self.fixuptkn(self.__createWikiNode(type='TEXT', content=tok.content)) od = tok.content if od in self.close_delim: cd = self.close_delim[od] lev = 0 for i,tok in enumerate(self.toklist[self.tokind+1:]): if tok.type == 'NIL': break elif tok.type == 'DELIM': if tok.content == od: lev += 1 elif tok.content == cd: if lev == 0: tok = self.__createWikiNode(type='TEXT', content=tok.content) self.toklist[self.tokind+1+i] = tok lev -= 1 break self.dprint(80, "END DELIMITER RECOVERY: %s", tok) self.dprint(80, "LEAVE parse_inline_delim=%s", x) return x def parse_tag(self, tag): self.dprint(80, "ENTER parse_tag") list = [] self.push_mark() while True: tok = self.getkn() if tok.type == 'NIL': self.pop_mark() s = '<' + tag.tag if tag.args: s += ' ' + str(tag.args) s += '>' node = self.__createWikiNode(type='TEXT',content=s) if tag.content: self.tree[self.tokind:self.tokind] = tag.content self.dprint(80, "LEAVE parse_tag = %s (tree modified)", node) return node elif tok.type == 'DELIM': if tok.isblock: tok = self.parse_block_delim(tok) else: tok = self.parse_inline_delim(tok) if not tok: tok = self.getkn() elif tok.type == 'CTAG': if tag.tag == tok.tag: break s = '' tok = self.fixuptkn(self.__createWikiNode(type='TEXT', content=s)) elif tok.type == 'NL': tok = self.__createWikiNode(type = 'TEXT', content = '\n') list.append(tok) self.clear_mark() ret = self.__createWikiNode(type = 'TAG', tag = tag.tag, args = tag.args, isblock = tag.isblock, content = self.__createWikiNode(type = 'SEQ', content = list)) self.dprint(80, "LEAVE parse_tag = %s", ret) return ret def parse_env(self, tok): type = self.envtypes[tok.content[0]][0] lev = len(tok.content) self.dprint(80, "ENTER parse_env(%s,%s)",type,lev) list = [] while True: if tok.type == 'DELIM' \ and tok.content[0] in self.envtypes \ and type == self.envtypes[tok.content[0]][0]: if len(tok.content) < lev: self.ungetkn() break elif len(tok.content) > lev: elt = self.parse_env(tok) else: elt = self.parse_line() if not tok.continuation: list.append(self.__createWikiNode(type='ELT', subtype=self.envtypes[tok.content[0]][1], content=elt)) tok = self.getkn() continue if list: if list[-1].content.type != 'SEQ': x = list[-1].content.content # FIXME: list[-1].content = self.__createWikiNode(type='SEQ', content=[x]) list[-1].content.content.append(elt) else: self.ungetkn() break tok = self.getkn() ret = self.__createWikiNode(type='ENV', envtype=type, level=lev, content=list) self.dprint(80, "LEAVE parse_env=%s", ret) return ret def parse_header(self, tok): self.dprint(80, "ENTER parse_header") self.push_mark() list = [] delim = tok.content while True: tok = self.getkn() if tok.type == 'NL': self.pop_mark() self.dprint(80, "LEAVE parse_header=None") return None elif tok.type == 'TEXT': list.append(tok) elif tok.type == 'DELIM': if tok.content == delim: if self.lookahead().type == 'NL': self.getkn() if self.lookahead().type == 'NL': self.getkn() break else: self.pop_mark() self.dprint(80, "LEAVE parse_header=None") return None elif tok.isblock: self.pop_mark() self.dprint(80, "LEAVE parse_header=None") return None else: list.append(self.parse_inline_delim(tok)) elif tok.type == 'OTAG': if tok.isblock: self.pop_mark() self.dprint(80, "LEAVE parse_header=None") return None list.append(self.parse_tag(tok)) self.clear_mark() ret = self.__createWikiNode(type='HDR', level = len(delim), content = self.__createWikiNode(type='SEQ', content=list)) self.dprint(80, "LEAVE parse_header=%s", ret) return ret def parse_block(self): tok = self.getkn() while tok.type == 'NL': tok = self.getkn() if tok == None or tok.type == 'NIL': return None elif tok.type == 'DELIM': tok = self.parse_block_delim(tok) if tok: return tok else: tok = self.getkn() elif tok.type == 'OTAG' and tok.isblock: return self.parse_tag(tok) return self.parse_para(tok) def parse(self): if not self.toklist: self.tokenize() if self.debug_level >= 90: print("TOKEN DUMP BEGIN") self.dump(self.toklist) print("TOKEN DUMP END") self.tokind = 0 self.tree = [] while 1: subtree = self.parse_block() if subtree == None: break self.tree.append(subtree) if self.debug_level >= 70: print("TREE DUMP BEGIN") self.dump(self.tree) print("TREE DUMP END") def __str__(self): return str(self.tree) class WikiMarkup (BaseWikiMarkup): """ A derived class, that supplies a basic input method. Three types of inputs are available: 1. filename= The file is opened and used for input. 2. file= The already opened file is used for input. 3. text= Input is taken from , line by line. Usage: obj = WikiMarkup(arg=val) obj.parse ... Do whatever you need with obj.tree ... """ file = None text = None lang = 'en' html_base = 'http://%(lang)s.wiktionary.org/wiki/' image_base = 'http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf' media_base = 'http://www.mediawiki.org/xml/export-0.3' def __init__(self, *args, **keywords): for kw in keywords: if kw == 'file': self.file = keywords[kw] elif kw == 'filename': self.file = open(keywords[kw]) elif kw == 'text': self.text = keywords[kw].split("\n") elif kw == 'lang': self.lang = keywords[kw] elif kw == 'html_base': self.html_base = keywords[kw] elif kw == 'image_base': self.image_base = keywords[kw] elif kw == 'media_base': self.media_base = keywords[kw] def __del__(self): if self.file: self.file.close() def input(self): if self.file: return self.file.readline() elif self.text: return self.text.pop(0) + '\n' else: return None def is_lang_link(self, elt): if elt.type == 'LINK' \ and isinstance(elt.content, list) \ and len(elt.content) == 1: if elt.content[0].type == TEXT: m = re.match('([\w-]+):', elt.content[0].content) if m: # and m.group(1) in self.langtab: return True elif elt.content[0].type == 'SEQ' \ and len(elt.content[0].content) == 1 and\ elt.content[0].content[0].type == TEXT: m = re.match('([\w-]+):',elt.content[0].content[0].content) if m: # and m.group(1) in self.langtab: return True return False def is_empty_text(self, elt): if elt.type == 'TEXT': if re.search('\w', elt.content): return False return True return False def is_empty_para(self, seq): for x in seq: if not (self.is_lang_link(x) or self.is_empty_text(x)): return False return True # ISO 639 langtab = { "aa": "Afar", # Afar "ab": "Аҧсуа", # Abkhazian "ae": None, # Avestan "af": "Afrikaans", # Afrikaans "ak": "Akana", # Akan "als": "Alemannisch", "am": "አማርኛ", # Amharic "an": "Aragonés", # Aragonese "ang": "Englisc", "ar": "العربية" , # Arabic "arc": "ܐܪܡܝܐ", "as": "অসমীয়া", # Assamese "ast": "Asturian", "av": "Авар", # Avaric "ay": "Aymara", # Aymara "az": "Azərbaycan" , # Azerbaijani "ba": "Башҡорт", # Bashkir "bar": "Boarisch", "bat-smg": "Žemaitėška", "bcl": "Bikol", "be": "Беларуская", # Byelorussian; Belarusian "be-x-old": "Беларуская (тарашкевіца)", "bg": "Български", # Bulgarian "bh": "भोजपुरी", # Bihari "bi": "Bislama", # Bislama "bm": "Bamanankan", # Bambara "bn": "বাংলা" , # Bengali; Bangla "bo": "བོད་སྐད", # Tibetan "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" , "br": "Brezhoneg" , # Breton "bs": "Bosanski" , # Bosnian "bug": "Basa Ugi", "bxr": "Буряад", "ca": "Català" , # Catalan "cbk-zam": "Chavacano de Zamboanga", "cdo": "Mìng-dĕ̤ng-ngṳ̄", "cho": "Choctaw", "ce": "Нохчийн", # Chechen "ceb": "Sinugboanong Binisaya" , # Cebuano "ch": "Chamor", # Chamorro "chr": "ᏣᎳᎩ", "chy": "Tsetsêhestâhese", "co": "Cors", # Corsican "cr": "Nehiyaw", # Cree "crh": "Qırımtatarca", "cs": "Česky" , # Czech "csb": "Kaszëbsczi", "c": "Словѣньскъ", # Church Slavic "cv": "Чăваш", # Chuvash "cy": "Cymraeg" , # Welsh "da": "Dansk" , # Danish "de": "Deutsch" , # German "diq": "Zazaki", # Dimli (Southern Zazaki) "dsb": "Dolnoserbski", "dv": "ދިވެހިބަސް", # Divehi "dz": "ཇོང་ཁ", # Dzongkha; Bhutani "ee": "Eʋegbe", # Ewe "el": "Ελληνικά" , # Greek "eml": "Emiliàn e rumagnòl", "en": "English" , # English "eo": "Esperanto" , "es": "Español" , # Spanish "et": "Eesti" , # Estonian "eu": "Euskara" , # Basque "ext": "Estremeñ", "fa": "فارسی" , # Persian "ff": "Fulfulde", # Fulah "fi": "Suomi" , # Finnish "fiu-vro": "Võro", "fj": "Na Vosa Vakaviti",# Fijian; Fiji "fo": "Føroyskt" , # Faroese "fr": "Français" , # French "frp": "Arpitan", "fur": "Furlan", "fy": "Frysk", # Frisian "ga": "Gaeilge", # Irish "gan": "贛語 (Gànyŭ)", "gd": "Gàidhlig", # Scots; Gaelic "gl": "Gallego" , # Gallegan; Galician "glk": "گیلکی", "got": "𐌲𐌿𐍄𐌹𐍃𐌺𐍉𐍂𐌰𐌶𐌳𐌰", "gn": "Avañe'ẽ", # Guarani "g": "ગુજરાતી", # Gujarati "gv": "Gaelg", # Manx "ha": "هَوُسَ", # Hausa "hak": "Hak-kâ-fa / 客家話", "haw": "Hawai`i", "he": "עברית" , # Hebrew (formerly iw) "hi": "हिन्दी" , # Hindi "hif": "Fiji Hindi", "ho": "Hiri Mot", # Hiri Motu "hr": "Hrvatski" , # Croatian "hsb": "Hornjoserbsce", "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole "hu": "Magyar" , # Hungarian "hy": "Հայերեն", # Armenian "hz": "Otsiherero", # Herero "ia": "Interlingua", "ie": "Interlingue", "id": "Bahasa Indonesia",# Indonesian (formerly in) "ig": "Igbo", # Igbo "ii": "ꆇꉙ ", # Sichuan Yi "ik": "Iñupiak", # Inupiak "ilo": "Ilokano", "io": "Ido" , "is": "Íslenska" , # Icelandic "it": "Italiano" , # Italian "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut "ja": "日本語", # Japanese "jbo": "Lojban", "jv": "Basa Jawa", # Javanese "ka": "ქართული" , # Georgian "kaa": "Qaraqalpaqsha", "kab": "Taqbaylit", "kg": "KiKongo", # Kongo "ki": "Gĩkũyũ", # Kikuyu "kj": "Kuanyama", # Kuanyama "kk": "Қазақша", # Kazakh "kl": "Kalaallisut", # Kalaallisut; Greenlandic "km": "ភាសាខ្មែរ", # Khmer; Cambodian "kn": "ಕನ್ನಡ", # Kannada "ko": "한국어" , # Korean "kr": "Kanuri", # Kanuri "ks": "कश्मीरी / كشميري", # Kashmiri "ksh": "Ripoarisch", "ku": "Kurdî / كوردی", # Kurdish "kv": "Коми", # Komi "kw": "Kernewek/Karnuack", # Cornish "ky": "Кыргызча", # Kirghiz "la": "Latina" , # Latin "lad": "Dzhudezmo", "lb": "Lëtzebuergesch" , # Letzeburgesch "lbe": "Лакку", "lg": "Luganda", # Ganda "li": "Limburgs", # Limburgish; Limburger; Limburgan "lij": "Lígur", "ln": "Lingala", # Lingala "lmo": "Lumbaart", "lo": "ລາວ", # Lao; Laotian "lt": "Lietuvių" , # Lithuanian "lua": "Luba", # Luba "lv": "Latvieš" , # Latvian; Lettish "map-bms": "Basa Banyumasan", "mdf": "Мокшень (Mokshanj Kälj)", "mg": "Malagasy", # Malagasy "mh": "Ebon", # Marshall "mi": "Māori", # Maori "mk": "Македонски" , # Macedonian "ml": None, # Malayalam "mn": "Монгол", # Mongolian "mo": "Молдовеняскэ", # Moldavian "mr": "मराठी" , # Marathi "ms": "Bahasa Melay" , # Malay "mt": "Malti", # Maltese "mus": "Muskogee", "my": "မ္ရန္‌မာစာ", # Burmese "myv": "Эрзянь (Erzjanj Kelj)", "mzn": "مَزِروني", "na": "dorerin Naoero", # Nauru "nah": "Nāhuatl", "nap": "Nnapulitano", "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l "nd": None, # Ndebele, North "nds": "Plattdüütsch", "nds-nl": "Nedersaksisch", "ne": "नेपाली", # Nepali "new": "नेपाल भाषा" , # Nepal Bhasa "ng": "Oshiwambo", # Ndonga "nl": "Nederlands" , # Dutch "nn": "Nynorsk", # Norwegian Nynorsk "no": "Norsk (Bokmål)" , # Norwegian "nov": "Novial", "nr": None, # Ndebele, South "nrm": "Nouormand/Normaund", "nv": "Diné bizaad", # Navajo "ny": "Chi-Chewa", # Chichewa; Nyanja "oc": "Occitan", # Occitan; Proven@,{c}al "oj": None, # Ojibwa "om": "Oromoo", # (Afan) Oromo "or": "ଓଡ଼ିଆ", # Oriya "os": "Иронау", # Ossetian; Ossetic "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi "pag": "Pangasinan", "pam": "Kapampangan", "pap": "Papiament", "pdc": "Deitsch", "pi": "पाऴि", # Pali "pih": "Norfuk", "pl": "Polski" , # Polish "pms": "Piemontèis" , "ps": "پښتو", # Pashto, Pushto "pt": "Português" , # Portuguese "q": "Runa Simi" , # Quechua "rm": "Rumantsch", # Rhaeto-Romance "rmy": "romani - रोमानी", "rn": "Kirundi", # Rundi; Kirundi "ro": "Română" , # Romanian "roa-rup": "Armãneashce", "roa-tara": "Tarandíne", "ru": "Русский" , # Russian "rw": "Ikinyarwanda", # Kinyarwanda "sa": "संस्कृतम्", # Sanskrit "sah": "Саха тыла (Saxa Tyla)", "sc": "Sardu", # Sardinian "scn": "Sicilian", "sco": "Scots", "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi "se": "Sámegiella", # Northern Sami "sg": "Sängö", # Sango; Sangro "sh": "Srpskohrvatski / Српскохрватски" , "si": "සිංහල", "simple": "Simple English" , "sk": "Slovenčina" , # Slovak "sl": "Slovenščina" , # Slovenian "sm": "Gagana Samoa", # Samoan "sn": "chiShona", # Shona "so": "Soomaaliga", # Somali "sr": "Српски / Srpski", # Serbian "srn": "Sranantongo", "ss": "SiSwati", # Swati; Siswati "st": "Sesotho", # Sesotho; Sotho, Southern "stk": "Seeltersk", "s": "Basa Sunda", # Sundanese "sq": "Shqip" , # Albanian "szl": "Ślůnski", "sv": "Svenska" , # Swedish "sw": "Kiswahili", # Swahili "ta": "தமிழ்" , # Tamil "te": "తెలుగు" , # Telugu "tet": "Tetun", "tg": "Тоҷикӣ", # Tajik "th": "ไทย" , # Thai "ti": "ትግርኛ", # Tigrinya "tk": "تركمن / Туркмен", # Turkmen "tl": "Tagalog" , # Tagalog "tn": "Setswana", # Tswana; Setswana "to": "faka Tonga", # Tonga (?) # Also ZW ; MW "tokipona": "Tokipona", "tpi": "Tok Pisin", "tr": "Türkçe" , # Turkish "ts": "Xitsonga", # Tsonga "tt": "Tatarça / Татарча", # Tatar "tum": "chiTumbuka", "tw": "Twi", # Twi "ty": "Reo Mā`ohi", # Tahitian "udm": "Удмурт кыл", "ug": "Oyghurque", # Uighur "uk": "Українська" , # Ukrainian "ur": "اردو", # Urdu "uz": "O‘zbek", # Uzbek "ve": "Tshivenda", # Venda "vec": "Vèneto", "vi": "Tiếng Việt" , # Vietnamese "vls": "West-Vlams", "vo": "Volapük" , "wa": "Walon", # Walloon "war": "Winaray", "wo": "Wolof", # Wolof "w": "吴语", "xal": "Хальмг", "xh": "isiXhosa", # Xhosa "yi": "ייִדיש", # Yiddish "yo": "Yorùbá", # Yoruba "za": "Cuengh", # Zhuang "zea": "Zeêuws", "zh": "中文" , # Chinese "zh-classical": "古文 / 文言文", "zm-min-nan": "Bân-lâm-gú", "zh-yue": "粵語", "zu": "isiZulu" # Zulu }