diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2008-11-26 12:25:45 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2008-11-26 12:25:45 +0200 |
commit | 67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04 (patch) | |
tree | 6a211974fc0a7c99720fe1b4af52f54bfe6370ce | |
parent | bd79a17ca5082789d4cf82f62a6afc0baaca90e8 (diff) | |
download | wikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.gz wikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.bz2 |
Implement paragraphs
-rw-r--r-- | wiki2html.py | 73 | ||||
-rw-r--r-- | wiki2text.py | 3 | ||||
-rw-r--r-- | wikimarkup.py | 371 |
3 files changed, 258 insertions, 189 deletions
diff --git a/wiki2html.py b/wiki2html.py index 907e3b1..7fa97b7 100644 --- a/wiki2html.py +++ b/wiki2html.py @@ -26,12 +26,44 @@ class HtmlWikiMarkup (WikiMarkup): 2. [[official position]]s : final 's' gets after closing </a> tag. Should be before. """ + + # FIXME: Awful kludge + image_kw = [ 'Image', + 'Grafika', + 'Bild', + 'Εικόνα', + 'Dosiero', + 'Slika', + 'Resim' + ] + + ST_INIT = 0 + ST_PARA = 1 + ST_OPEN = 2 + + state = [] + + def opara(self): + if self.state[-1] == self.ST_PARA: + self.state[-1] = self.ST_OPEN + return "<p>" + else: + return "" + def cpara(self): + state = self.state.pop(); + self.state.append(self.ST_INIT) + if state == self.ST_OPEN: + return "</p>" + else: + return "" + + def target(self, t): (qual,sep,tgt) = t.partition(':') r = None if tgt != '': - if qual in ('Image', 'Grafika'): + if qual in self.image_kw: t = self.image_base + urllib.quote(tgt) + '/250px-' + urllib.quote(tgt) elif qual == "Media": t = self.media_base + '/' + tgt @@ -96,11 +128,30 @@ class HtmlWikiMarkup (WikiMarkup): self.fmtok(tok[1], env), self.envel[env[1]]) - def str_seq(self, tok, env): + def str_para(self, tok, env): + s = self.cpara() + self.state.append(self.ST_PARA) + return s + + def fmtok(self, tok, env): + if type(tok) != TupleType: + return "" + if tok[0] in [ self.ENV, self.HDR ]: + s = self.cpara() + elif tok[0] == self.BAR: + s = self.str_para(tok, env) + elif tok[0] in [ self.NIL, self.SEQ ]: s = "" - for t in tok[1:]: - s += self.fmtok(t, env) - return s + else: + s = self.opara() + s1 = WikiMarkup.fmtok(self, tok, env) + if s1: + s += s1 + return s + + def __str__(self): + self.state = [ self.ST_PARA ] + return WikiMarkup.__str__(self) + self.cpara() @@ -109,7 +160,7 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup): A class for translating Wiktionary articles into HTML. This version does not do much, except that it tries to correctly format templates. But "tries" does not mean "does". The heuristics - used here is clearly not enogh to cope with it. + used here is clearly not enough to cope with it. 1. FIXME: The right solution would be to have a database of templates with their @@ -134,12 +185,14 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup): seq_pos = 0 def str_seq(self, tok, env): - s = "" + str = "" self.seq_pos=0 for t in tok[1:]: - s += self.fmtok(t, env) - self.seq_pos += 1 - return s + s = self.fmtok(t, env) + if s: + str += s + self.seq_pos += 1 + return str def str_tmpl(self, tok, env): arg = self.fmtok(tok[1], env) diff --git a/wiki2text.py b/wiki2text.py index e943f32..3669bd7 100644 --- a/wiki2text.py +++ b/wiki2text.py @@ -136,6 +136,9 @@ class TextWikiMarkup (WikiMarkup): return "" + self.indent(lev, "- " + self.fmtok(tok[1], env)) + def str_para(self, tok, env): + return "\n" + def __str__(self): return self.fmtok(self.tree, None) diff --git a/wikimarkup.py b/wikimarkup.py index d9ae7cc..e2a1cab 100644 --- a/wikimarkup.py +++ b/wikimarkup.py @@ -35,10 +35,11 @@ class BaseWikiMarkup: """ A base class for handling Wiki markups. It handles: - 1. basic block markup (headers, numbered and unnumbered lists, + 1. paragraphs; + 2. basic block markup (headers, numbered and unnumbered lists, indentations); - 2. basic inline markup (bold, italic); - 3. basic reference markup (links, templates, external links). + 3. basic inline markup (bold, italic); + 4. basic reference markup (links, templates, external links). It does NOT handle: 1. pseudo-html markup (<nowiki></nowiki>, and similar); 2. leading spaces meaning ``preserve formatting''; @@ -90,6 +91,8 @@ It handles: ITEM = 10 # Sequence: seq SEQ = 11 + # Paragraph + PARA = 12 # Environment types: # Unnumbered list @@ -130,6 +133,10 @@ It handles: self.putback(line) break + if line == '\n': + yield(self.PARA,) + continue + m = eltbeg.match(line) if m: if m.group(0)[0] in self.envtypes: @@ -247,8 +254,6 @@ It handles: return toktype, self.expandtok(tok[1]) elif toktype == self.HDR: return toktype, tok[1], self.expandtok(tok[2]) - elif toktype == self.BAR: - return tok elif toktype == self.ENV: return toktype,tok[1],tok[2],self.expandtok(tok[3]) elif toktype == self.SEQ: @@ -264,6 +269,8 @@ It handles: subtree.append(x) return tuple(subtree) if len(subtree) > 2 else \ subtree[1] if len(subtree) == 2 else None + else: + return tok def parse(self): tree = [self.SEQ] @@ -314,6 +321,8 @@ It handles: elif toktype == self.ITEM: print "ITEM" self.prtok(tok[1], indent+1) + elif toktype == self.PARA: + print "PARA" def output(self): self.prtok(self.tree, 0) @@ -377,314 +386,316 @@ class WikiMarkup (BaseWikiMarkup): # ISO 639 langtab = { - "aa": "Afar", # Afar - "ab": "Аҧсуа", # Abkhazian - "ae": None, # Avestan - "af": "Afrikaans", # Afrikaans - "ak": "Akana", # Akan # or ak_CI + "aa": "Afar", # Afar + "ab": "Аҧсуа", # Abkhazian + "ae": None, # Avestan + "af": "Afrikaans", # Afrikaans + "ak": "Akana", # Akan "als": "Alemannisch", - "am": "አማርኛ", # Amharic - "an": "Aragonés", # Aragonese + "am": "አማርኛ", # Amharic + "an": "Aragonés", # Aragonese "ang": "Englisc", - "ar": "العربية" , # Arabic + "ar": "العربية" , # Arabic "arc": "ܐܪܡܝܐ", - "as": "অসমীয়া", # Assamese + "as": "অসমীয়া", # Assamese "ast": "Asturian", - "av": "Авар", # Avaric # Spoken mainly in Dagestan - "ay": "Aymar", # Aymara - "az": "Azərbaycan" , # Azerbaijani + "av": "Авар", # Avaric + "ay": "Aymar", # Aymara + "az": "Azərbaycan" , # Azerbaijani - "ba": "Башҡорт", # Bashkir + "ba": "Башҡорт", # Bashkir "bar": "Boarisch", "bat-smg": "Žemaitėška", "bcl": "Bikol", - "be": "Беларуская", # Byelorussian; Belarusian + "be": "Беларуская", # Byelorussian; Belarusian "be-x-old": "Беларуская (тарашкевіца)", - "bg": "Български", # Bulgarian - "bh": "भोजपुरी", # Bihari - "bi": "Bislama", # Bislama - "bm": "Bamanankan", # Bambara - "bn": "বাংলা" , # Bengali; Bangla - "bo": "བོད་སྐད", # Tibetan + "bg": "Български", # Bulgarian + "bh": "भोजपुरी", # Bihari + "bi": "Bislama", # Bislama + "bm": "Bamanankan", # Bambara + "bn": "বাংলা" , # Bengali; Bangla + "bo": "བོད་སྐད", # Tibetan "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" , - "br": "Brezhoneg" , # Breton - "bs": "Bosanski" , # Bosnian + "br": "Brezhoneg" , # Breton + "bs": "Bosanski" , # Bosnian "bug": "Basa Ugi", "bxr": "Буряад", - "ca": "Català" , # Catalan + "ca": "Català" , # Catalan "cbk-zam": "Chavacano de Zamboanga", "cdo": "Mìng-dĕ̤ng-ngṳ̄", "cho": "Choctaw", - "ce": "Нохчийн", # Chechen + "ce": "Нохчийн", # Chechen "ceb": "Sinugboanong Binisaya" , # Cebuano - "ch": "Chamor", # Chamorro + "ch": "Chamor", # Chamorro "chr": "ᏣᎳᎩ", "chy": "Tsetsêhestâhese", - "co": "Cors", # Corsican - "cr": "Nehiyaw", # Cree + "co": "Cors", # Corsican + "cr": "Nehiyaw", # Cree "crh": "Qırımtatarca", - "cs": "Česky" , # Czech + "cs": "Česky" , # Czech "csb": "Kaszëbsczi", - "c": "Словѣньскъ", # Church Slavic - "cv": "Чăваш", # Chuvash - "cy": "Cymraeg" , # Welsh + "c": "Словѣньскъ", # Church Slavic + "cv": "Чăваш", # Chuvash + "cy": "Cymraeg" , # Welsh - "da": "Dansk" , # Danish - "de": "Deutsch" , # German - "diq": "Zazaki", # Dimli (Southern Zazaki) + "da": "Dansk" , # Danish + "de": "Deutsch" , # German + "diq": "Zazaki", # Dimli (Southern Zazaki) "dsb": "Dolnoserbski", - "dv": "ދިވެހިބަސް", # Divehi - "dz": "ཇོང་ཁ", # Dzongkha; Bhutani + "dv": "ދިވެހިބަސް", # Divehi + "dz": "ཇོང་ཁ", # Dzongkha; Bhutani - "ee": "Eʋegbe", # Ewe - "el": "Ελληνικά" , # Greek + "ee": "Eʋegbe", # Ewe + "el": "Ελληνικά" , # Greek "eml": "Emiliàn e rumagnòl", - "en": "English" , # English + "en": "English" , # English "eo": "Esperanto" , - "es": "Español" , # Spanish - "et": "Eesti" , # Estonian - "e": "Euskara" , # Basque + "es": "Español" , # Spanish + "et": "Eesti" , # Estonian + "e": "Euskara" , # Basque "ext": "Estremeñ", - "fa": "فارسی" , # Persian - "ff": "Fulfulde", # Fulah # Also NG, MR, and many others - "fi": "Suomi" , # Finnish + "fa": "فارسی" , # Persian + "ff": "Fulfulde", # Fulah + "fi": "Suomi" , # Finnish "fiu-vro": "Võro", - "fj": "Na Vosa Vakaviti", # Fijian; Fiji - "fo": "Føroyskt" , # Faroese - "fr": "Français" , # French + "fj": "Na Vosa Vakaviti",# Fijian; Fiji + "fo": "Føroyskt" , # Faroese + "fr": "Français" , # French "frp": "Arpitan", "fur": "Furlan", - "fy": "Frysk", # Frisian + "fy": "Frysk", # Frisian - "ga": "Gaeilge", # Irish + "ga": "Gaeilge", # Irish "gan": "贛語 (Gànyŭ)", - "gd": "Gàidhlig", # Scots; Gaelic - "gl": "Gallego" , # Gallegan; Galician + "gd": "Gàidhlig", # Scots; Gaelic + "gl": "Gallego" , # Gallegan; Galician "glk": "گیلکی", "got": "𐌲𐌹𐌺 ", - "gn": "Avañe'ẽ", # Guarani - "g": "ગુજરાતી", # Gujarati - "gv": "Gaelg", # Manx + "gn": "Avañe'ẽ", # Guarani + "g": "ગુજરાતી", # Gujarati + "gv": "Gaelg", # Manx - "ha": "هَوُسَ", # Hausa + "ha": "هَوُسَ", # Hausa "hak": "Hak-kâ-fa / 客家話", "haw": "Hawai`i", - "he": "עברית" , # Hebrew (formerly iw) - "hi": "हिन्दी" , # Hindi + "he": "עברית" , # Hebrew (formerly iw) + "hi": "हिन्दी" , # Hindi "hif": "Fiji Hindi", - "ho": "Hiri Mot", # Hiri Motu - "hr": "Hrvatski" , # Croatian + "ho": "Hiri Mot", # Hiri Motu + "hr": "Hrvatski" , # Croatian "hsb": "Hornjoserbsce", "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole - "hu": "Magyar" , # Hungarian - "hy": "Հայերեն", # Armenian - "hz": "Otsiherero", # Herero + "hu": "Magyar" , # Hungarian + "hy": "Հայերեն", # Armenian + "hz": "Otsiherero", # Herero "ia": "Interlingua", "ie": "Interlingue", - "id": "Bahasa Indonesia", # Indonesian (formerly in) - "ig": "Igbo", # Igbo - "ii": "ꆇꉙ ", # Sichuan Yi - "ik": "Iñupiak", # Inupiak + "id": "Bahasa Indonesia",# Indonesian (formerly in) + "ig": "Igbo", # Igbo + "ii": "ꆇꉙ ", # Sichuan Yi + "ik": "Iñupiak", # Inupiak "ilo": "Ilokano", "io": "Ido" , - "is": "Íslenska" , # Icelandic - "it": "Italiano" , # Italian - "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut + "is": "Íslenska" , # Icelandic + "it": "Italiano" , # Italian + "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut - "ja": "日本語", # Japanese + "ja": "日本語", # Japanese "jbo": "Lojban", - "jv": "Basa Jawa", # Javanese + "jv": "Basa Jawa", # Javanese - "ka": "ქართული" , # Georgian + "ka": "ქართული" , # Georgian "kaa": "Qaraqalpaqsha", "kab": "Taqbaylit", - "kg": "KiKongo", # Kongo # also CD and AO - "ki": "Gĩkũyũ", # Kikuyu - "kj": "Kuanyama", # Kuanyama - "kk": "Қазақша", # Kazakh - "kl": "Kalaallisut", # Kalaallisut; Greenlandic - "km": "ភាសាខ្មែរ", # Khmer; Cambodian - "kn": "ಕನ್ನಡ", # Kannada - "ko": "한국어" , # Korean - "kr": "Kanuri", # Kanuri + "kg": "KiKongo", # Kongo + "ki": "Gĩkũyũ", # Kikuyu + "kj": "Kuanyama", # Kuanyama + "kk": "Қазақша", # Kazakh + "kl": "Kalaallisut", # Kalaallisut; Greenlandic + "km": "ភាសាខ្មែរ", # Khmer; Cambodian + "kn": "ಕನ್ನಡ", # Kannada + "ko": "한국어" , # Korean + "kr": "Kanuri", # Kanuri "ks": "कश्मीरी / كشميري", # Kashmiri "ksh": "Ripoarisch", - "ku": "Kurdî / كوردی", # Kurdish - "kv": "Коми", # Komi + "ku": "Kurdî / كوردی", # Kurdish + "kv": "Коми", # Komi "kw": "Kernewek/Karnuack", # Cornish - "ky": "Кыргызча", # Kirghiz + "ky": "Кыргызча", # Kirghiz - "la": "Latina" , # Latin + "la": "Latina" , # Latin "lad": "Dzhudezmo", "lb": "Lëtzebuergesch" , # Letzeburgesch "lbe": "Лакку", - "lg": "Luganda", # Ganda - "li": "Limburgs", # Limburgish; Limburger; Limburgan + "lg": "Luganda", # Ganda + "li": "Limburgs", # Limburgish; Limburger; Limburgan "lij": "Lígur", "ln": "Lingala", # Lingala "lmo": "Lumbaart", - "lo": "ລາວ", # Lao; Laotian - "lt": "Lietuvių" , # Lithuanian - "l": None, # Luba-Katanga - "lv": "Latvieš" , # Latvian; Lettish + "lo": "ລາວ", # Lao; Laotian + "lt": "Lietuvių" , # Lithuanian + "lua": "Luba", # Luba + "lv": "Latvieš" , # Latvian; Lettish "map-bms": "Basa Banyumasan", "mdf": "Мокшень (Mokshanj Kälj)", - "mg": "Malagasy", # Malagasy - "mh": "Ebon", # Marshall - "mi": "Māori", # Maori - "mk": "Македонски" , # Macedonian - "ml": None, # Malayalam - "mn": "Монгол", # Mongolian - "mo": "Молдовеняскэ", # Moldavian - "mr": "मराठी" , # Marathi - "ms": "Bahasa Melay" , # Malay - "mt": "Malti", # Maltese + "mg": "Malagasy", # Malagasy + "mh": "Ebon", # Marshall + "mi": "Māori", # Maori + "mk": "Македонски" , # Macedonian + "ml": None, # Malayalam + "mn": "Монгол", # Mongolian + "mo": "Молдовеняскэ", # Moldavian + "mr": "मराठी" , # Marathi + "ms": "Bahasa Melay" , # Malay + "mt": "Malti", # Maltese "mus": "Muskogee", - "my": "မ္ရန္မာစာ", # Burmese + "my": "မ္ရန္မာစာ", # Burmese "myv": "Эрзянь (Erzjanj Kelj)", "mzn": "مَزِروني", - "na": "dorerin Naoero", # Nauru + "na": "dorerin Naoero", # Nauru "nah": "Nāhuatl", "nap": "Nnapulitano", - "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l - "nd": None,# Ndebele, North + "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l + "nd": None, # Ndebele, North "nds": "Plattdüütsch", "nds-nl": "Nedersaksisch", - "ne": "नेपाली", # Nepali + "ne": "नेपाली", # Nepali "new": "नेपाल भाषा" , # Nepal Bhasa - "ng": "Oshiwambo", # Ndonga - "nl": "Nederlands" , # Dutch - "nn": "Nynorsk", # Norwegian Nynorsk + "ng": "Oshiwambo", # Ndonga + "nl": "Nederlands" , # Dutch + "nn": "Nynorsk", # Norwegian Nynorsk "no": "Norsk (Bokmål)" , # Norwegian "nov": "Novial", - "nr": None, # Ndebele, South + "nr": None, # Ndebele, South "nrm": "Nouormand/Normaund", - "nv": "Diné bizaad", # Navajo - "ny": "Chi-Chewa", # Chichewa; Nyanja + "nv": "Diné bizaad", # Navajo + "ny": "Chi-Chewa", # Chichewa; Nyanja - "oc": "Occitan", # Occitan; Proven@,{c}al - "oj": None, # Ojibwa - "om": "Oromoo", # (Afan) Oromo - "or": "ଓଡ଼ିଆ", # Oriya - "os": "Иронау", # Ossetian; Ossetic + "oc": "Occitan", # Occitan; Proven@,{c}al + "oj": None, # Ojibwa + "om": "Oromoo", # (Afan) Oromo + "or": "ଓଡ଼ିଆ", # Oriya + "os": "Иронау", # Ossetian; Ossetic - "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi + "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi "pag": "Pangasinan", "pam": "Kapampangan", "pap": "Papiament", "pdc": "Deitsch", - "pi": "पाऴि", # Pali + "pi": "पाऴि", # Pali "pih": "Norfuk", - "pl": "Polski" , # Polish + "pl": "Polski" , # Polish "pms": "Piemontèis" , - "ps": "پښتو", # Pashto, Pushto - "pt": "Português" , # Portuguese + "ps": "پښتو", # Pashto, Pushto + "pt": "Português" , # Portuguese - "q": "Runa Simi" , # Quechua + "q": "Runa Simi" , # Quechua - "rm": "Rumantsch", # Rhaeto-Romance + "rm": "Rumantsch", # Rhaeto-Romance "rmy": "romani - रोमानी", - "rn": "Kirundi", # Rundi; Kirundi - "ro": "Română" , # Romanian + "rn": "Kirundi", # Rundi; Kirundi + "ro": "Română" , # Romanian "roa-rup": "Armãneashce", "roa-tara": "Tarandíne", - "ru": "Русский" , # Russian - "rw": "Ikinyarwanda", # Kinyarwanda + "ru": "Русский" , # Russian + "rw": "Ikinyarwanda", # Kinyarwanda "sa": "संस्कृतम्", # Sanskrit "sah": "Саха тыла (Saxa Tyla)", - "sc": "Sard", # Sardinian + "sc": "Sardu", # Sardinian "scn": "Sicilian", "sco": "Scots", "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi - "se": "Sámegiella", # Northern Sami - "sg": "Sängö", # Sango; Sangro + "se": "Sámegiella", # Northern Sami + "sg": "Sängö", # Sango; Sangro "sh": "Srpskohrvatski / Српскохрватски" , "si": "සිංහල", "simple": "Simple English" , - "sk": "Slovenčina" , # Slovak - "sl": "Slovenščina" , # Slovenian - "sm": "Gagana Samoa", # Samoan - "sn": "chiShona", # Shona - "so": "Soomaaliga", # Somali - "sr": "Српски / Srpski" , # Serbian + "sk": "Slovenčina" , # Slovak + "sl": "Slovenščina" , # Slovenian + "sm": "Gagana Samoa", # Samoan + "sn": "chiShona", # Shona + "so": "Soomaaliga", # Somali + "sr": "Српски / Srpski", # Serbian "srn": "Sranantongo", - "ss": "SiSwati", # Swati; Siswati - "st": "Sesotho", # Sesotho; Sotho, Southern + "ss": "SiSwati", # Swati; Siswati + "st": "Sesotho", # Sesotho; Sotho, Southern "stk": "Seeltersk", - "s": "Basa Sunda", # Sundanese - "sq": "Shqip" , # Albanian + "s": "Basa Sunda", # Sundanese + "sq": "Shqip" , # Albanian "szl": "Ślůnski", - "sv": "Svenska" , # Swedish - "sw": "Kiswahili", # Swahili # Also KE + "sv": "Svenska" , # Swedish + "sw": "Kiswahili", # Swahili - "ta": "தமிழ்" , # Tamil - "te": "తెలుగు" , # Telugu + "ta": "தமிழ்" , # Tamil + "te": "తెలుగు" , # Telugu "tet": "Tetun", - "tg": "Тоҷикӣ", # Tajik - "th": "ไทย" , # Thai - "ti": "ትግርኛ", # Tigrinya + "tg": "Тоҷикӣ", # Tajik + "th": "ไทย" , # Thai + "ti": "ትግርኛ", # Tigrinya "tk": "تركمن / Туркмен", # Turkmen - "tl": "Tagalog" , # Tagalog - "tn": "Setswana", # Tswana; Setswana - "to": "faka Tonga", # Tonga (?) # Also ZW ; MW + "tl": "Tagalog" , # Tagalog + "tn": "Setswana", # Tswana; Setswana + "to": "faka Tonga", # Tonga (?) # Also ZW ; MW "tokipona": "Tokipona", "tpi": "Tok Pisin", - "tr": "Türkçe" , # Turkish - "ts": "Xitsonga", # Tsonga # ZA SZ XW + "tr": "Türkçe" , # Turkish + "ts": "Xitsonga", # Tsonga "tt": "Tatarça / Татарча", # Tatar "tum": "chiTumbuka", - "tw": "Twi", # Twi - "ty": "Reo Mā`ohi", # Tahitian + "tw": "Twi", # Twi + "ty": "Reo Mā`ohi", # Tahitian "udm": "Удмурт кыл", - "ug": "Oyghurque", # Uighur - "uk": "Українська" , # Ukrainian - "ur": "اردو", # Urdu - "uz": "O‘zbek", # Uzbek + "ug": "Oyghurque", # Uighur + "uk": "Українська" , # Ukrainian + "ur": "اردو", # Urdu + "uz": "O‘zbek", # Uzbek - "ve": "Tshivenda", # Venda + "ve": "Tshivenda", # Venda "vec": "Vèneto", - "vi": "Tiếng Việt" , # Vietnamese + "vi": "Tiếng Việt" , # Vietnamese "vls": "West-Vlams", "vo": "Volapük" , - "wa": "Walon", # Walloon + "wa": "Walon", # Walloon "war": "Winaray", - "wo": "Wolof", # Wolof + "wo": "Wolof", # Wolof "w": "吴语", "xal": "Хальмг", - "xh": "isiXhosa", # Xhosa + "xh": "isiXhosa", # Xhosa - "yi": "ייִדיש", # Yiddish (formerly ji) - "yo": "Yorùbá", # Yoruba + "yi": "ייִדיש", # Yiddish + "yo": "Yorùbá", # Yoruba - "za": "Cuengh", # Zhuang + "za": "Cuengh", # Zhuang "zea": "Zeêuws", - "zh": "中文" , # Chinese + "zh": "中文" , # Chinese "zh-classical": "古文 / 文言文", "zm-min-nan": "Bân-lâm-gú", "zh-yue": "粵語", - "zu": "isiZulu" # Zulu + "zu": "isiZulu" # Zulu } def str_nil(self, tok, env): - return "" + return None def str_text(self, tok, env): return tok[1] def str_seq(self, tok, env): - s = "" + str = "" for t in tok[1:]: - s += self.fmtok(t, env) - return s + s = self.fmtok(t, env) + if s: + str += s + return str def fmtok(self, tok, env): if type(tok) != TupleType: @@ -714,6 +725,8 @@ class WikiMarkup (BaseWikiMarkup): return self.str_item(tok, env) elif toktype == self.SEQ: return self.str_seq(tok, env) + elif toktype == self.PARA: + return self.str_para(tok, env) def __str__(self): return self.fmtok(self.tree, None) |