summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2008-11-26 10:25:45 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2008-11-26 10:25:45 (GMT)
commit67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04 (patch) (side-by-side diff)
tree6a211974fc0a7c99720fe1b4af52f54bfe6370ce
parentbd79a17ca5082789d4cf82f62a6afc0baaca90e8 (diff)
downloadwikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.gz
wikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.bz2
Implement paragraphs
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2html.py73
-rw-r--r--wiki2text.py3
-rw-r--r--wikimarkup.py371
3 files changed, 258 insertions, 189 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 907e3b1..7fa97b7 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -26,12 +26,44 @@ class HtmlWikiMarkup (WikiMarkup):
2. [[official position]]s : final 's' gets after closing </a> tag.
Should be before.
"""
+
+ # FIXME: Awful kludge
+ image_kw = [ 'Image',
+ 'Grafika',
+ 'Bild',
+ 'Εικόνα',
+ 'Dosiero',
+ 'Slika',
+ 'Resim'
+ ]
+
+ ST_INIT = 0
+ ST_PARA = 1
+ ST_OPEN = 2
+
+ state = []
+
+ def opara(self):
+ if self.state[-1] == self.ST_PARA:
+ self.state[-1] = self.ST_OPEN
+ return "<p>"
+ else:
+ return ""
+ def cpara(self):
+ state = self.state.pop();
+ self.state.append(self.ST_INIT)
+ if state == self.ST_OPEN:
+ return "</p>"
+ else:
+ return ""
+
+
def target(self, t):
(qual,sep,tgt) = t.partition(':')
r = None
if tgt != '':
- if qual in ('Image', 'Grafika'):
+ if qual in self.image_kw:
t = self.image_base + urllib.quote(tgt) + '/250px-' + urllib.quote(tgt)
elif qual == "Media":
t = self.media_base + '/' + tgt
@@ -96,11 +128,30 @@ class HtmlWikiMarkup (WikiMarkup):
self.fmtok(tok[1], env),
self.envel[env[1]])
- def str_seq(self, tok, env):
+ def str_para(self, tok, env):
+ s = self.cpara()
+ self.state.append(self.ST_PARA)
+ return s
+
+ def fmtok(self, tok, env):
+ if type(tok) != TupleType:
+ return ""
+ if tok[0] in [ self.ENV, self.HDR ]:
+ s = self.cpara()
+ elif tok[0] == self.BAR:
+ s = self.str_para(tok, env)
+ elif tok[0] in [ self.NIL, self.SEQ ]:
s = ""
- for t in tok[1:]:
- s += self.fmtok(t, env)
- return s
+ else:
+ s = self.opara()
+ s1 = WikiMarkup.fmtok(self, tok, env)
+ if s1:
+ s += s1
+ return s
+
+ def __str__(self):
+ self.state = [ self.ST_PARA ]
+ return WikiMarkup.__str__(self) + self.cpara()
@@ -109,7 +160,7 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup):
A class for translating Wiktionary articles into HTML.
This version does not do much, except that it tries to correctly
format templates. But "tries" does not mean "does". The heuristics
- used here is clearly not enogh to cope with it.
+ used here is clearly not enough to cope with it.
1. FIXME:
The right solution would be to have a database of templates with their
@@ -134,12 +185,14 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup):
seq_pos = 0
def str_seq(self, tok, env):
- s = ""
+ str = ""
self.seq_pos=0
for t in tok[1:]:
- s += self.fmtok(t, env)
- self.seq_pos += 1
- return s
+ s = self.fmtok(t, env)
+ if s:
+ str += s
+ self.seq_pos += 1
+ return str
def str_tmpl(self, tok, env):
arg = self.fmtok(tok[1], env)
diff --git a/wiki2text.py b/wiki2text.py
index e943f32..3669bd7 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -136,6 +136,9 @@ class TextWikiMarkup (WikiMarkup):
return "" + self.indent(lev,
"- " + self.fmtok(tok[1], env))
+ def str_para(self, tok, env):
+ return "\n"
+
def __str__(self):
return self.fmtok(self.tree, None)
diff --git a/wikimarkup.py b/wikimarkup.py
index d9ae7cc..e2a1cab 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -35,10 +35,11 @@ class BaseWikiMarkup:
"""
A base class for handling Wiki markups.
It handles:
- 1. basic block markup (headers, numbered and unnumbered lists,
+ 1. paragraphs;
+ 2. basic block markup (headers, numbered and unnumbered lists,
indentations);
- 2. basic inline markup (bold, italic);
- 3. basic reference markup (links, templates, external links).
+ 3. basic inline markup (bold, italic);
+ 4. basic reference markup (links, templates, external links).
It does NOT handle:
1. pseudo-html markup (<nowiki></nowiki>, and similar);
2. leading spaces meaning ``preserve formatting'';
@@ -90,6 +91,8 @@ It handles:
ITEM = 10
# Sequence: seq
SEQ = 11
+ # Paragraph
+ PARA = 12
# Environment types:
# Unnumbered list
@@ -130,6 +133,10 @@ It handles:
self.putback(line)
break
+ if line == '\n':
+ yield(self.PARA,)
+ continue
+
m = eltbeg.match(line)
if m:
if m.group(0)[0] in self.envtypes:
@@ -247,8 +254,6 @@ It handles:
return toktype, self.expandtok(tok[1])
elif toktype == self.HDR:
return toktype, tok[1], self.expandtok(tok[2])
- elif toktype == self.BAR:
- return tok
elif toktype == self.ENV:
return toktype,tok[1],tok[2],self.expandtok(tok[3])
elif toktype == self.SEQ:
@@ -264,6 +269,8 @@ It handles:
subtree.append(x)
return tuple(subtree) if len(subtree) > 2 else \
subtree[1] if len(subtree) == 2 else None
+ else:
+ return tok
def parse(self):
tree = [self.SEQ]
@@ -314,6 +321,8 @@ It handles:
elif toktype == self.ITEM:
print "ITEM"
self.prtok(tok[1], indent+1)
+ elif toktype == self.PARA:
+ print "PARA"
def output(self):
self.prtok(self.tree, 0)
@@ -377,314 +386,316 @@ class WikiMarkup (BaseWikiMarkup):
# ISO 639
langtab = {
- "aa": "Afar", # Afar
- "ab": "Аҧсуа", # Abkhazian
- "ae": None, # Avestan
- "af": "Afrikaans", # Afrikaans
- "ak": "Akana", # Akan # or ak_CI
+ "aa": "Afar", # Afar
+ "ab": "Аҧсуа", # Abkhazian
+ "ae": None, # Avestan
+ "af": "Afrikaans", # Afrikaans
+ "ak": "Akana", # Akan
"als": "Alemannisch",
- "am": "አማርኛ", # Amharic
- "an": "Aragonés", # Aragonese
+ "am": "አማርኛ", # Amharic
+ "an": "Aragonés", # Aragonese
"ang": "Englisc",
- "ar": "العربية" , # Arabic
+ "ar": "العربية" , # Arabic
"arc": "ܐܪܡܝܐ",
- "as": "অসমীয়া", # Assamese
+ "as": "অসমীয়া", # Assamese
"ast": "Asturian",
- "av": "Авар", # Avaric # Spoken mainly in Dagestan
- "ay": "Aymar", # Aymara
- "az": "Azərbaycan" , # Azerbaijani
+ "av": "Авар", # Avaric
+ "ay": "Aymar", # Aymara
+ "az": "Azərbaycan" , # Azerbaijani
- "ba": "Башҡорт", # Bashkir
+ "ba": "Башҡорт", # Bashkir
"bar": "Boarisch",
"bat-smg": "Žemaitėška",
"bcl": "Bikol",
- "be": "Беларуская", # Byelorussian; Belarusian
+ "be": "Беларуская", # Byelorussian; Belarusian
"be-x-old": "Беларуская (тарашкевіца)",
- "bg": "Български", # Bulgarian
- "bh": "भोजपुरी", # Bihari
- "bi": "Bislama", # Bislama
- "bm": "Bamanankan", # Bambara
- "bn": "বাংলা" , # Bengali; Bangla
- "bo": "བོད་སྐད", # Tibetan
+ "bg": "Български", # Bulgarian
+ "bh": "भोजपुरी", # Bihari
+ "bi": "Bislama", # Bislama
+ "bm": "Bamanankan", # Bambara
+ "bn": "বাংলা" , # Bengali; Bangla
+ "bo": "བོད་སྐད", # Tibetan
"bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" ,
- "br": "Brezhoneg" , # Breton
- "bs": "Bosanski" , # Bosnian
+ "br": "Brezhoneg" , # Breton
+ "bs": "Bosanski" , # Bosnian
"bug": "Basa Ugi",
"bxr": "Буряад",
- "ca": "Català" , # Catalan
+ "ca": "Català" , # Catalan
"cbk-zam": "Chavacano de Zamboanga",
"cdo": "Mìng-dĕ̤ng-ngṳ̄",
"cho": "Choctaw",
- "ce": "Нохчийн", # Chechen
+ "ce": "Нохчийн", # Chechen
"ceb": "Sinugboanong Binisaya" , # Cebuano
- "ch": "Chamor", # Chamorro
+ "ch": "Chamor", # Chamorro
"chr": "ᏣᎳᎩ",
"chy": "Tsetsêhestâhese",
- "co": "Cors", # Corsican
- "cr": "Nehiyaw", # Cree
+ "co": "Cors", # Corsican
+ "cr": "Nehiyaw", # Cree
"crh": "Qırımtatarca",
- "cs": "Česky" , # Czech
+ "cs": "Česky" , # Czech
"csb": "Kaszëbsczi",
- "c": "Словѣньскъ", # Church Slavic
- "cv": "Чăваш", # Chuvash
- "cy": "Cymraeg" , # Welsh
+ "c": "Словѣньскъ", # Church Slavic
+ "cv": "Чăваш", # Chuvash
+ "cy": "Cymraeg" , # Welsh
- "da": "Dansk" , # Danish
- "de": "Deutsch" , # German
- "diq": "Zazaki", # Dimli (Southern Zazaki)
+ "da": "Dansk" , # Danish
+ "de": "Deutsch" , # German
+ "diq": "Zazaki", # Dimli (Southern Zazaki)
"dsb": "Dolnoserbski",
- "dv": "ދިވެހިބަސް", # Divehi
- "dz": "ཇོང་ཁ", # Dzongkha; Bhutani
+ "dv": "ދިވެހިބަސް", # Divehi
+ "dz": "ཇོང་ཁ", # Dzongkha; Bhutani
- "ee": "Eʋegbe", # Ewe
- "el": "Ελληνικά" , # Greek
+ "ee": "Eʋegbe", # Ewe
+ "el": "Ελληνικά" , # Greek
"eml": "Emiliàn e rumagnòl",
- "en": "English" , # English
+ "en": "English" , # English
"eo": "Esperanto" ,
- "es": "Español" , # Spanish
- "et": "Eesti" , # Estonian
- "e": "Euskara" , # Basque
+ "es": "Español" , # Spanish
+ "et": "Eesti" , # Estonian
+ "e": "Euskara" , # Basque
"ext": "Estremeñ",
- "fa": "فارسی" , # Persian
- "ff": "Fulfulde", # Fulah # Also NG, MR, and many others
- "fi": "Suomi" , # Finnish
+ "fa": "فارسی" , # Persian
+ "ff": "Fulfulde", # Fulah
+ "fi": "Suomi" , # Finnish
"fiu-vro": "Võro",
- "fj": "Na Vosa Vakaviti", # Fijian; Fiji
- "fo": "Føroyskt" , # Faroese
- "fr": "Français" , # French
+ "fj": "Na Vosa Vakaviti",# Fijian; Fiji
+ "fo": "Føroyskt" , # Faroese
+ "fr": "Français" , # French
"frp": "Arpitan",
"fur": "Furlan",
- "fy": "Frysk", # Frisian
+ "fy": "Frysk", # Frisian
- "ga": "Gaeilge", # Irish
+ "ga": "Gaeilge", # Irish
"gan": "贛語 (Gànyŭ)",
- "gd": "Gàidhlig", # Scots; Gaelic
- "gl": "Gallego" , # Gallegan; Galician
+ "gd": "Gàidhlig", # Scots; Gaelic
+ "gl": "Gallego" , # Gallegan; Galician
"glk": "گیلکی",
"got": "𐌲Œ„𐌹𐌺 ",
- "gn": "Avañe'ẽ", # Guarani
- "g": "ગુજરાતી", # Gujarati
- "gv": "Gaelg", # Manx
+ "gn": "Avañe'ẽ", # Guarani
+ "g": "ગુજરાતી", # Gujarati
+ "gv": "Gaelg", # Manx
- "ha": "هَوُسَ", # Hausa
+ "ha": "هَوُسَ", # Hausa
"hak": "Hak-kâ-fa / 客家話",
"haw": "Hawai`i",
- "he": "עברית" , # Hebrew (formerly iw)
- "hi": "हिन्दी" , # Hindi
+ "he": "עברית" , # Hebrew (formerly iw)
+ "hi": "हिन्दी" , # Hindi
"hif": "Fiji Hindi",
- "ho": "Hiri Mot", # Hiri Motu
- "hr": "Hrvatski" , # Croatian
+ "ho": "Hiri Mot", # Hiri Motu
+ "hr": "Hrvatski" , # Croatian
"hsb": "Hornjoserbsce",
"ht": "Krèyol ayisyen" , # Haitian; Haitian Creole
- "hu": "Magyar" , # Hungarian
- "hy": "Հայերեն", # Armenian
- "hz": "Otsiherero", # Herero
+ "hu": "Magyar" , # Hungarian
+ "hy": "Հայերեն", # Armenian
+ "hz": "Otsiherero", # Herero
"ia": "Interlingua",
"ie": "Interlingue",
- "id": "Bahasa Indonesia", # Indonesian (formerly in)
- "ig": "Igbo", # Igbo
- "ii": "ꆇꉙ ", # Sichuan Yi
- "ik": "Iñupiak", # Inupiak
+ "id": "Bahasa Indonesia",# Indonesian (formerly in)
+ "ig": "Igbo", # Igbo
+ "ii": "ꆇꉙ ", # Sichuan Yi
+ "ik": "Iñupiak", # Inupiak
"ilo": "Ilokano",
"io": "Ido" ,
- "is": "Íslenska" , # Icelandic
- "it": "Italiano" , # Italian
- "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut
+ "is": "Íslenska" , # Icelandic
+ "it": "Italiano" , # Italian
+ "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut
- "ja": "日本語", # Japanese
+ "ja": "日本語", # Japanese
"jbo": "Lojban",
- "jv": "Basa Jawa", # Javanese
+ "jv": "Basa Jawa", # Javanese
- "ka": "ქართული" , # Georgian
+ "ka": "ქართული" , # Georgian
"kaa": "Qaraqalpaqsha",
"kab": "Taqbaylit",
- "kg": "KiKongo", # Kongo # also CD and AO
- "ki": "Gĩkũyũ", # Kikuyu
- "kj": "Kuanyama", # Kuanyama
- "kk": "Қазақша", # Kazakh
- "kl": "Kalaallisut", # Kalaallisut; Greenlandic
- "km": "ភាសាខ្មែរ", # Khmer; Cambodian
- "kn": "ಕನ್ನಡ", # Kannada
- "ko": "한국어" , # Korean
- "kr": "Kanuri", # Kanuri
+ "kg": "KiKongo", # Kongo
+ "ki": "Gĩkũyũ", # Kikuyu
+ "kj": "Kuanyama", # Kuanyama
+ "kk": "Қазақша", # Kazakh
+ "kl": "Kalaallisut", # Kalaallisut; Greenlandic
+ "km": "ភាសាខ្មែរ", # Khmer; Cambodian
+ "kn": "ಕನ್ನಡ", # Kannada
+ "ko": "한국어" , # Korean
+ "kr": "Kanuri", # Kanuri
"ks": "कश्मीरी / كشميري", # Kashmiri
"ksh": "Ripoarisch",
- "ku": "Kurdî / كوردی", # Kurdish
- "kv": "Коми", # Komi
+ "ku": "Kurdî / كوردی", # Kurdish
+ "kv": "Коми", # Komi
"kw": "Kernewek/Karnuack", # Cornish
- "ky": "Кыргызча", # Kirghiz
+ "ky": "Кыргызча", # Kirghiz
- "la": "Latina" , # Latin
+ "la": "Latina" , # Latin
"lad": "Dzhudezmo",
"lb": "Lëtzebuergesch" , # Letzeburgesch
"lbe": "Лакку",
- "lg": "Luganda", # Ganda
- "li": "Limburgs", # Limburgish; Limburger; Limburgan
+ "lg": "Luganda", # Ganda
+ "li": "Limburgs", # Limburgish; Limburger; Limburgan
"lij": "Lígur",
"ln": "Lingala", # Lingala
"lmo": "Lumbaart",
- "lo": "ລາວ", # Lao; Laotian
- "lt": "Lietuvių" , # Lithuanian
- "l": None, # Luba-Katanga
- "lv": "Latvieš" , # Latvian; Lettish
+ "lo": "ລາວ", # Lao; Laotian
+ "lt": "Lietuvių" , # Lithuanian
+ "lua": "Luba", # Luba
+ "lv": "Latvieš" , # Latvian; Lettish
"map-bms": "Basa Banyumasan",
"mdf": "Мокшень (Mokshanj Kälj)",
- "mg": "Malagasy", # Malagasy
- "mh": "Ebon", # Marshall
- "mi": "Māori", # Maori
- "mk": "Македонски" , # Macedonian
- "ml": None, # Malayalam
- "mn": "Монгол", # Mongolian
- "mo": "Молдовеняскэ", # Moldavian
- "mr": "मराठी" , # Marathi
- "ms": "Bahasa Melay" , # Malay
- "mt": "Malti", # Maltese
+ "mg": "Malagasy", # Malagasy
+ "mh": "Ebon", # Marshall
+ "mi": "Māori", # Maori
+ "mk": "Македонски" , # Macedonian
+ "ml": None, # Malayalam
+ "mn": "Монгол", # Mongolian
+ "mo": "Молдовеняскэ", # Moldavian
+ "mr": "मराठी" , # Marathi
+ "ms": "Bahasa Melay" , # Malay
+ "mt": "Malti", # Maltese
"mus": "Muskogee",
- "my": "မ္ရန္‌မာစာ", # Burmese
+ "my": "မ္ရန္‌မာစာ", # Burmese
"myv": "Эрзянь (Erzjanj Kelj)",
"mzn": "مَزِروني",
- "na": "dorerin Naoero", # Nauru
+ "na": "dorerin Naoero", # Nauru
"nah": "Nāhuatl",
"nap": "Nnapulitano",
- "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l
- "nd": None,# Ndebele, North
+ "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l
+ "nd": None, # Ndebele, North
"nds": "Plattdüütsch",
"nds-nl": "Nedersaksisch",
- "ne": "नेपाली", # Nepali
+ "ne": "नेपाली", # Nepali
"new": "नेपाल भाषा" , # Nepal Bhasa
- "ng": "Oshiwambo", # Ndonga
- "nl": "Nederlands" , # Dutch
- "nn": "Nynorsk", # Norwegian Nynorsk
+ "ng": "Oshiwambo", # Ndonga
+ "nl": "Nederlands" , # Dutch
+ "nn": "Nynorsk", # Norwegian Nynorsk
"no": "Norsk (Bokmål)" , # Norwegian
"nov": "Novial",
- "nr": None, # Ndebele, South
+ "nr": None, # Ndebele, South
"nrm": "Nouormand/Normaund",
- "nv": "Diné bizaad", # Navajo
- "ny": "Chi-Chewa", # Chichewa; Nyanja
+ "nv": "Diné bizaad", # Navajo
+ "ny": "Chi-Chewa", # Chichewa; Nyanja
- "oc": "Occitan", # Occitan; Proven@,{c}al
- "oj": None, # Ojibwa
- "om": "Oromoo", # (Afan) Oromo
- "or": "ଓଡ଼ିଆ", # Oriya
- "os": "Иронау", # Ossetian; Ossetic
+ "oc": "Occitan", # Occitan; Proven@,{c}al
+ "oj": None, # Ojibwa
+ "om": "Oromoo", # (Afan) Oromo
+ "or": "ଓଡ଼ିଆ", # Oriya
+ "os": "Иронау", # Ossetian; Ossetic
- "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi
+ "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi
"pag": "Pangasinan",
"pam": "Kapampangan",
"pap": "Papiament",
"pdc": "Deitsch",
- "pi": "पाऴि", # Pali
+ "pi": "पाऴि", # Pali
"pih": "Norfuk",
- "pl": "Polski" , # Polish
+ "pl": "Polski" , # Polish
"pms": "Piemontèis" ,
- "ps": "پښتو", # Pashto, Pushto
- "pt": "Português" , # Portuguese
+ "ps": "پښتو", # Pashto, Pushto
+ "pt": "Português" , # Portuguese
- "q": "Runa Simi" , # Quechua
+ "q": "Runa Simi" , # Quechua
- "rm": "Rumantsch", # Rhaeto-Romance
+ "rm": "Rumantsch", # Rhaeto-Romance
"rmy": "romani - रोमानी",
- "rn": "Kirundi", # Rundi; Kirundi
- "ro": "Română" , # Romanian
+ "rn": "Kirundi", # Rundi; Kirundi
+ "ro": "Română" , # Romanian
"roa-rup": "Armãneashce",
"roa-tara": "Tarandíne",
- "ru": "Русский" , # Russian
- "rw": "Ikinyarwanda", # Kinyarwanda
+ "ru": "Русский" , # Russian
+ "rw": "Ikinyarwanda", # Kinyarwanda
"sa": "संस्कृतम्", # Sanskrit
"sah": "Саха тыла (Saxa Tyla)",
- "sc": "Sard", # Sardinian
+ "sc": "Sardu", # Sardinian
"scn": "Sicilian",
"sco": "Scots",
"sd": "سنڌي، سندھی ، सिन्ध", # Sindhi
- "se": "Sámegiella", # Northern Sami
- "sg": "Sängö", # Sango; Sangro
+ "se": "Sámegiella", # Northern Sami
+ "sg": "Sängö", # Sango; Sangro
"sh": "Srpskohrvatski / Српскохрватски" ,
"si": "සිංහල",
"simple": "Simple English" ,
- "sk": "Slovenčina" , # Slovak
- "sl": "Slovenščina" , # Slovenian
- "sm": "Gagana Samoa", # Samoan
- "sn": "chiShona", # Shona
- "so": "Soomaaliga", # Somali
- "sr": "Српски / Srpski" , # Serbian
+ "sk": "Slovenčina" , # Slovak
+ "sl": "Slovenščina" , # Slovenian
+ "sm": "Gagana Samoa", # Samoan
+ "sn": "chiShona", # Shona
+ "so": "Soomaaliga", # Somali
+ "sr": "Српски / Srpski", # Serbian
"srn": "Sranantongo",
- "ss": "SiSwati", # Swati; Siswati
- "st": "Sesotho", # Sesotho; Sotho, Southern
+ "ss": "SiSwati", # Swati; Siswati
+ "st": "Sesotho", # Sesotho; Sotho, Southern
"stk": "Seeltersk",
- "s": "Basa Sunda", # Sundanese
- "sq": "Shqip" , # Albanian
+ "s": "Basa Sunda", # Sundanese
+ "sq": "Shqip" , # Albanian
"szl": "Ślůnski",
- "sv": "Svenska" , # Swedish
- "sw": "Kiswahili", # Swahili # Also KE
+ "sv": "Svenska" , # Swedish
+ "sw": "Kiswahili", # Swahili
- "ta": "தமிழ்" , # Tamil
- "te": "తెలుగు" , # Telugu
+ "ta": "தமிழ்" , # Tamil
+ "te": "తెలుగు" , # Telugu
"tet": "Tetun",
- "tg": "Тоҷикӣ", # Tajik
- "th": "ไทย" , # Thai
- "ti": "ትግርኛ", # Tigrinya
+ "tg": "Тоҷикӣ", # Tajik
+ "th": "ไทย" , # Thai
+ "ti": "ትግርኛ", # Tigrinya
"tk": "تركمن / Туркмен", # Turkmen
- "tl": "Tagalog" , # Tagalog
- "tn": "Setswana", # Tswana; Setswana
- "to": "faka Tonga", # Tonga (?) # Also ZW ; MW
+ "tl": "Tagalog" , # Tagalog
+ "tn": "Setswana", # Tswana; Setswana
+ "to": "faka Tonga", # Tonga (?) # Also ZW ; MW
"tokipona": "Tokipona",
"tpi": "Tok Pisin",
- "tr": "Türkçe" , # Turkish
- "ts": "Xitsonga", # Tsonga # ZA SZ XW
+ "tr": "Türkçe" , # Turkish
+ "ts": "Xitsonga", # Tsonga
"tt": "Tatarça / Татарча", # Tatar
"tum": "chiTumbuka",
- "tw": "Twi", # Twi
- "ty": "Reo Mā`ohi", # Tahitian
+ "tw": "Twi", # Twi
+ "ty": "Reo Mā`ohi", # Tahitian
"udm": "Удмурт кыл",
- "ug": "Oyghurque", # Uighur
- "uk": "Українська" , # Ukrainian
- "ur": "اردو", # Urdu
- "uz": "O‘zbek", # Uzbek
+ "ug": "Oyghurque", # Uighur
+ "uk": "Українська" , # Ukrainian
+ "ur": "اردو", # Urdu
+ "uz": "O‘zbek", # Uzbek
- "ve": "Tshivenda", # Venda
+ "ve": "Tshivenda", # Venda
"vec": "Vèneto",
- "vi": "Tiếng Việt" , # Vietnamese
+ "vi": "Tiếng Việt" , # Vietnamese
"vls": "West-Vlams",
"vo": "Volapük" ,
- "wa": "Walon", # Walloon
+ "wa": "Walon", # Walloon
"war": "Winaray",
- "wo": "Wolof", # Wolof
+ "wo": "Wolof", # Wolof
"w": "吴语",
"xal": "Хальмг",
- "xh": "isiXhosa", # Xhosa
+ "xh": "isiXhosa", # Xhosa
- "yi": "ייִדיש", # Yiddish (formerly ji)
- "yo": "Yorùbá", # Yoruba
+ "yi": "ייִדיש", # Yiddish
+ "yo": "Yorùbá", # Yoruba
- "za": "Cuengh", # Zhuang
+ "za": "Cuengh", # Zhuang
"zea": "Zeêuws",
- "zh": "中文" , # Chinese
+ "zh": "中文" , # Chinese
"zh-classical": "古文 / 文言文",
"zm-min-nan": "Bân-lâm-gú",
"zh-yue": "粵語",
- "zu": "isiZulu" # Zulu
+ "zu": "isiZulu" # Zulu
}
def str_nil(self, tok, env):
- return ""
+ return None
def str_text(self, tok, env):
return tok[1]
def str_seq(self, tok, env):
- s = ""
+ str = ""
for t in tok[1:]:
- s += self.fmtok(t, env)
- return s
+ s = self.fmtok(t, env)
+ if s:
+ str += s
+ return str
def fmtok(self, tok, env):
if type(tok) != TupleType:
@@ -714,6 +725,8 @@ class WikiMarkup (BaseWikiMarkup):
return self.str_item(tok, env)
elif toktype == self.SEQ:
return self.str_seq(tok, env)
+ elif toktype == self.PARA:
+ return self.str_para(tok, env)
def __str__(self):
return self.fmtok(self.tree, None)

Return to:

Send suggestions and report system problems to the System administrator.