summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2008-11-26 10:25:45 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2008-11-26 10:25:45 (GMT)
commit67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04 (patch) (unidiff)
tree6a211974fc0a7c99720fe1b4af52f54bfe6370ce
parentbd79a17ca5082789d4cf82f62a6afc0baaca90e8 (diff)
downloadwikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.gz
wikitrans-67cd79c3c64d6dfe73ff5dd80ba51d0bf9267b04.tar.bz2
Implement paragraphs
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--wiki2html.py73
-rw-r--r--wiki2text.py3
-rw-r--r--wikimarkup.py371
3 files changed, 258 insertions, 189 deletions
diff --git a/wiki2html.py b/wiki2html.py
index 907e3b1..7fa97b7 100644
--- a/wiki2html.py
+++ b/wiki2html.py
@@ -26,12 +26,44 @@ class HtmlWikiMarkup (WikiMarkup):
26 2. [[official position]]s : final 's' gets after closing </a> tag. 26 2. [[official position]]s : final 's' gets after closing </a> tag.
27 Should be before. 27 Should be before.
28 """ 28 """
29
30 # FIXME: Awful kludge
31 image_kw = [ 'Image',
32 'Grafika',
33 'Bild',
34 'Εικόνα',
35 'Dosiero',
36 'Slika',
37 'Resim'
38 ]
39
40 ST_INIT = 0
41 ST_PARA = 1
42 ST_OPEN = 2
43
44 state = []
45
46 def opara(self):
47 if self.state[-1] == self.ST_PARA:
48 self.state[-1] = self.ST_OPEN
49 return "<p>"
50 else:
51 return ""
29 52
53 def cpara(self):
54 state = self.state.pop();
55 self.state.append(self.ST_INIT)
56 if state == self.ST_OPEN:
57 return "</p>"
58 else:
59 return ""
60
61
30 def target(self, t): 62 def target(self, t):
31 (qual,sep,tgt) = t.partition(':') 63 (qual,sep,tgt) = t.partition(':')
32 r = None 64 r = None
33 if tgt != '': 65 if tgt != '':
34 if qual in ('Image', 'Grafika'): 66 if qual in self.image_kw:
35 t = self.image_base + urllib.quote(tgt) + '/250px-' + urllib.quote(tgt) 67 t = self.image_base + urllib.quote(tgt) + '/250px-' + urllib.quote(tgt)
36 elif qual == "Media": 68 elif qual == "Media":
37 t = self.media_base + '/' + tgt 69 t = self.media_base + '/' + tgt
@@ -96,11 +128,30 @@ class HtmlWikiMarkup (WikiMarkup):
96 self.fmtok(tok[1], env), 128 self.fmtok(tok[1], env),
97 self.envel[env[1]]) 129 self.envel[env[1]])
98 130
99 def str_seq(self, tok, env): 131 def str_para(self, tok, env):
132 s = self.cpara()
133 self.state.append(self.ST_PARA)
134 return s
135
136 def fmtok(self, tok, env):
137 if type(tok) != TupleType:
138 return ""
139 if tok[0] in [ self.ENV, self.HDR ]:
140 s = self.cpara()
141 elif tok[0] == self.BAR:
142 s = self.str_para(tok, env)
143 elif tok[0] in [ self.NIL, self.SEQ ]:
100 s = "" 144 s = ""
101 for t in tok[1:]: 145 else:
102 s += self.fmtok(t, env) 146 s = self.opara()
103 return s 147 s1 = WikiMarkup.fmtok(self, tok, env)
148 if s1:
149 s += s1
150 return s
151
152 def __str__(self):
153 self.state = [ self.ST_PARA ]
154 return WikiMarkup.__str__(self) + self.cpara()
104 155
105 156
106 157
@@ -109,7 +160,7 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup):
109 A class for translating Wiktionary articles into HTML. 160 A class for translating Wiktionary articles into HTML.
110 This version does not do much, except that it tries to correctly 161 This version does not do much, except that it tries to correctly
111 format templates. But "tries" does not mean "does". The heuristics 162 format templates. But "tries" does not mean "does". The heuristics
112 used here is clearly not enogh to cope with it. 163 used here is clearly not enough to cope with it.
113 164
114 1. FIXME: 165 1. FIXME:
115 The right solution would be to have a database of templates with their 166 The right solution would be to have a database of templates with their
@@ -134,12 +185,14 @@ class HtmlWiktionaryMarkup (HtmlWikiMarkup):
134 seq_pos = 0 185 seq_pos = 0
135 186
136 def str_seq(self, tok, env): 187 def str_seq(self, tok, env):
137 s = "" 188 str = ""
138 self.seq_pos=0 189 self.seq_pos=0
139 for t in tok[1:]: 190 for t in tok[1:]:
140 s += self.fmtok(t, env) 191 s = self.fmtok(t, env)
141 self.seq_pos += 1 192 if s:
142 return s 193 str += s
194 self.seq_pos += 1
195 return str
143 196
144 def str_tmpl(self, tok, env): 197 def str_tmpl(self, tok, env):
145 arg = self.fmtok(tok[1], env) 198 arg = self.fmtok(tok[1], env)
diff --git a/wiki2text.py b/wiki2text.py
index e943f32..3669bd7 100644
--- a/wiki2text.py
+++ b/wiki2text.py
@@ -136,6 +136,9 @@ class TextWikiMarkup (WikiMarkup):
136 return "" + self.indent(lev, 136 return "" + self.indent(lev,
137 "- " + self.fmtok(tok[1], env)) 137 "- " + self.fmtok(tok[1], env))
138 138
139 def str_para(self, tok, env):
140 return "\n"
141
139 def __str__(self): 142 def __str__(self):
140 return self.fmtok(self.tree, None) 143 return self.fmtok(self.tree, None)
141 144
diff --git a/wikimarkup.py b/wikimarkup.py
index d9ae7cc..e2a1cab 100644
--- a/wikimarkup.py
+++ b/wikimarkup.py
@@ -35,10 +35,11 @@ class BaseWikiMarkup:
35 """ 35 """
36A base class for handling Wiki markups. 36A base class for handling Wiki markups.
37It handles: 37It handles:
38 1. basic block markup (headers, numbered and unnumbered lists, 38 1. paragraphs;
39 2. basic block markup (headers, numbered and unnumbered lists,
39 indentations); 40 indentations);
40 2. basic inline markup (bold, italic); 41 3. basic inline markup (bold, italic);
41 3. basic reference markup (links, templates, external links). 42 4. basic reference markup (links, templates, external links).
42 It does NOT handle: 43 It does NOT handle:
43 1. pseudo-html markup (<nowiki></nowiki>, and similar); 44 1. pseudo-html markup (<nowiki></nowiki>, and similar);
44 2. leading spaces meaning ``preserve formatting''; 45 2. leading spaces meaning ``preserve formatting'';
@@ -90,6 +91,8 @@ It handles:
90 ITEM = 10 91 ITEM = 10
91 # Sequence: seq 92 # Sequence: seq
92 SEQ = 11 93 SEQ = 11
94 # Paragraph
95 PARA = 12
93 96
94 # Environment types: 97 # Environment types:
95 # Unnumbered list 98 # Unnumbered list
@@ -130,6 +133,10 @@ It handles:
130 self.putback(line) 133 self.putback(line)
131 break 134 break
132 135
136 if line == '\n':
137 yield(self.PARA,)
138 continue
139
133 m = eltbeg.match(line) 140 m = eltbeg.match(line)
134 if m: 141 if m:
135 if m.group(0)[0] in self.envtypes: 142 if m.group(0)[0] in self.envtypes:
@@ -247,8 +254,6 @@ It handles:
247 return toktype, self.expandtok(tok[1]) 254 return toktype, self.expandtok(tok[1])
248 elif toktype == self.HDR: 255 elif toktype == self.HDR:
249 return toktype, tok[1], self.expandtok(tok[2]) 256 return toktype, tok[1], self.expandtok(tok[2])
250 elif toktype == self.BAR:
251 return tok
252 elif toktype == self.ENV: 257 elif toktype == self.ENV:
253 return toktype,tok[1],tok[2],self.expandtok(tok[3]) 258 return toktype,tok[1],tok[2],self.expandtok(tok[3])
254 elif toktype == self.SEQ: 259 elif toktype == self.SEQ:
@@ -264,6 +269,8 @@ It handles:
264 subtree.append(x) 269 subtree.append(x)
265 return tuple(subtree) if len(subtree) > 2 else \ 270 return tuple(subtree) if len(subtree) > 2 else \
266 subtree[1] if len(subtree) == 2 else None 271 subtree[1] if len(subtree) == 2 else None
272 else:
273 return tok
267 274
268 def parse(self): 275 def parse(self):
269 tree = [self.SEQ] 276 tree = [self.SEQ]
@@ -314,6 +321,8 @@ It handles:
314 elif toktype == self.ITEM: 321 elif toktype == self.ITEM:
315 print "ITEM" 322 print "ITEM"
316 self.prtok(tok[1], indent+1) 323 self.prtok(tok[1], indent+1)
324 elif toktype == self.PARA:
325 print "PARA"
317 326
318 def output(self): 327 def output(self):
319 self.prtok(self.tree, 0) 328 self.prtok(self.tree, 0)
@@ -377,314 +386,316 @@ class WikiMarkup (BaseWikiMarkup):
377 386
378 # ISO 639 387 # ISO 639
379 langtab = { 388 langtab = {
380 "aa": "Afar", # Afar 389 "aa": "Afar", # Afar
381 "ab": "Аҧсуа", # Abkhazian 390 "ab": "Аҧсуа", # Abkhazian
382 "ae": None, # Avestan 391 "ae": None, # Avestan
383 "af": "Afrikaans", # Afrikaans 392 "af": "Afrikaans", # Afrikaans
384 "ak": "Akana", # Akan # or ak_CI 393 "ak": "Akana", # Akan
385 "als": "Alemannisch", 394 "als": "Alemannisch",
386 "am": "አማርኛ", # Amharic 395 "am": "አማርኛ", # Amharic
387 "an": "Aragonés", # Aragonese 396 "an": "Aragonés", # Aragonese
388 "ang": "Englisc", 397 "ang": "Englisc",
389 "ar": "العربية" , # Arabic 398 "ar": "العربية" , # Arabic
390 "arc": "ܐܪܡܝܐ", 399 "arc": "ܐܪܡܝܐ",
391 "as": "অসমীয়া", # Assamese 400 "as": "অসমীয়া", # Assamese
392 "ast": "Asturian", 401 "ast": "Asturian",
393 "av": "Авар", # Avaric # Spoken mainly in Dagestan 402 "av": "Авар", # Avaric
394 "ay": "Aymar", # Aymara 403 "ay": "Aymar", # Aymara
395 "az": "Azərbaycan" , # Azerbaijani 404 "az": "Azərbaycan" , # Azerbaijani
396 405
397 "ba": "Башҡорт", # Bashkir 406 "ba": "Башҡорт", # Bashkir
398 "bar": "Boarisch", 407 "bar": "Boarisch",
399 "bat-smg": "Žemaitėška", 408 "bat-smg": "Žemaitėška",
400 "bcl": "Bikol", 409 "bcl": "Bikol",
401 "be": "Беларуская", # Byelorussian; Belarusian 410 "be": "Беларуская", # Byelorussian; Belarusian
402 "be-x-old": "Беларуская (тарашкевіца)", 411 "be-x-old": "Беларуская (тарашкевіца)",
403 "bg": "Български", # Bulgarian 412 "bg": "Български", # Bulgarian
404 "bh": "भोजपुरी", # Bihari 413 "bh": "भोजपुरी", # Bihari
405 "bi": "Bislama", # Bislama 414 "bi": "Bislama", # Bislama
406 "bm": "Bamanankan", # Bambara 415 "bm": "Bamanankan", # Bambara
407 "bn": "বাংলা" , # Bengali; Bangla 416 "bn": "বাংলা" , # Bengali; Bangla
408 "bo": "བོད་སྐད", # Tibetan 417 "bo": "བོད་སྐད", # Tibetan
409 "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" , 418 "bpy": "ইমার ঠার/বিষ্ণুপ্রিয়া মণিপুরী" ,
410 "br": "Brezhoneg" , # Breton 419 "br": "Brezhoneg" , # Breton
411 "bs": "Bosanski" , # Bosnian 420 "bs": "Bosanski" , # Bosnian
412 "bug": "Basa Ugi", 421 "bug": "Basa Ugi",
413 "bxr": "Буряад", 422 "bxr": "Буряад",
414 423
415 "ca": "Català" , # Catalan 424 "ca": "Català" , # Catalan
416 "cbk-zam": "Chavacano de Zamboanga", 425 "cbk-zam": "Chavacano de Zamboanga",
417 "cdo": "Mìng-dĕ̤ng-ngṳ̄", 426 "cdo": "Mìng-dĕ̤ng-ngṳ̄",
418 "cho": "Choctaw", 427 "cho": "Choctaw",
419 "ce": "Нохчийн", # Chechen 428 "ce": "Нохчийн", # Chechen
420 "ceb": "Sinugboanong Binisaya" , # Cebuano 429 "ceb": "Sinugboanong Binisaya" , # Cebuano
421 "ch": "Chamor", # Chamorro 430 "ch": "Chamor", # Chamorro
422 "chr": "ᏣᎳᎩ", 431 "chr": "ᏣᎳᎩ",
423 "chy": "Tsetsêhestâhese", 432 "chy": "Tsetsêhestâhese",
424 "co": "Cors", # Corsican 433 "co": "Cors", # Corsican
425 "cr": "Nehiyaw", # Cree 434 "cr": "Nehiyaw", # Cree
426 "crh": "Qırımtatarca", 435 "crh": "Qırımtatarca",
427 "cs": "Česky" , # Czech 436 "cs": "Česky" , # Czech
428 "csb": "Kaszëbsczi", 437 "csb": "Kaszëbsczi",
429 "c": "Словѣньскъ", # Church Slavic 438 "c": "Словѣньскъ", # Church Slavic
430 "cv": "Чăваш", # Chuvash 439 "cv": "Чăваш", # Chuvash
431 "cy": "Cymraeg" , # Welsh 440 "cy": "Cymraeg" , # Welsh
432 441
433 "da": "Dansk" , # Danish 442 "da": "Dansk" , # Danish
434 "de": "Deutsch" , # German 443 "de": "Deutsch" , # German
435 "diq": "Zazaki", # Dimli (Southern Zazaki) 444 "diq": "Zazaki", # Dimli (Southern Zazaki)
436 "dsb": "Dolnoserbski", 445 "dsb": "Dolnoserbski",
437 "dv": "ދިވެހިބަސް", # Divehi 446 "dv": "ދިވެހިބަސް", # Divehi
438 "dz": "ཇོང་ཁ", # Dzongkha; Bhutani 447 "dz": "ཇོང་ཁ", # Dzongkha; Bhutani
439 448
440 "ee": "Eʋegbe", # Ewe 449 "ee": "Eʋegbe", # Ewe
441 "el": "Ελληνικά" , # Greek 450 "el": "Ελληνικά" , # Greek
442 "eml": "Emiliàn e rumagnòl", 451 "eml": "Emiliàn e rumagnòl",
443 "en": "English" , # English 452 "en": "English" , # English
444 "eo": "Esperanto" , 453 "eo": "Esperanto" ,
445 "es": "Español" , # Spanish 454 "es": "Español" , # Spanish
446 "et": "Eesti" , # Estonian 455 "et": "Eesti" , # Estonian
447 "e": "Euskara" , # Basque 456 "e": "Euskara" , # Basque
448 "ext": "Estremeñ", 457 "ext": "Estremeñ",
449 458
450 "fa": "فارسی" , # Persian 459 "fa": "فارسی" , # Persian
451 "ff": "Fulfulde", # Fulah # Also NG, MR, and many others 460 "ff": "Fulfulde", # Fulah
452 "fi": "Suomi" , # Finnish 461 "fi": "Suomi" , # Finnish
453 "fiu-vro": "Võro", 462 "fiu-vro": "Võro",
454 "fj": "Na Vosa Vakaviti", # Fijian; Fiji 463 "fj": "Na Vosa Vakaviti",# Fijian; Fiji
455 "fo": "Føroyskt" , # Faroese 464 "fo": "Føroyskt" , # Faroese
456 "fr": "Français" , # French 465 "fr": "Français" , # French
457 "frp": "Arpitan", 466 "frp": "Arpitan",
458 "fur": "Furlan", 467 "fur": "Furlan",
459 "fy": "Frysk", # Frisian 468 "fy": "Frysk", # Frisian
460 469
461 "ga": "Gaeilge", # Irish 470 "ga": "Gaeilge", # Irish
462 "gan": "贛語 (Gànyŭ)", 471 "gan": "贛語 (Gànyŭ)",
463 "gd": "Gàidhlig", # Scots; Gaelic 472 "gd": "Gàidhlig", # Scots; Gaelic
464 "gl": "Gallego" , # Gallegan; Galician 473 "gl": "Gallego" , # Gallegan; Galician
465 "glk": "گیلکی", 474 "glk": "گیلکی",
466 "got": "𐌲Œ„𐌹𐌺 ", 475 "got": "𐌲Œ„𐌹𐌺 ",
467 "gn": "Avañe'ẽ", # Guarani 476 "gn": "Avañe'ẽ", # Guarani
468 "g": "ગુજરાતી", # Gujarati 477 "g": "ગુજરાતી", # Gujarati
469 "gv": "Gaelg", # Manx 478 "gv": "Gaelg", # Manx
470 479
471 "ha": "هَوُسَ", # Hausa 480 "ha": "هَوُسَ", # Hausa
472 "hak": "Hak-kâ-fa / 客家話", 481 "hak": "Hak-kâ-fa / 客家話",
473 "haw": "Hawai`i", 482 "haw": "Hawai`i",
474 "he": "עברית" , # Hebrew (formerly iw) 483 "he": "עברית" , # Hebrew (formerly iw)
475 "hi": "हिन्दी" , # Hindi 484 "hi": "हिन्दी" , # Hindi
476 "hif": "Fiji Hindi", 485 "hif": "Fiji Hindi",
477 "ho": "Hiri Mot", # Hiri Motu 486 "ho": "Hiri Mot", # Hiri Motu
478 "hr": "Hrvatski" , # Croatian 487 "hr": "Hrvatski" , # Croatian
479 "hsb": "Hornjoserbsce", 488 "hsb": "Hornjoserbsce",
480 "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole 489 "ht": "Krèyol ayisyen" , # Haitian; Haitian Creole
481 "hu": "Magyar" , # Hungarian 490 "hu": "Magyar" , # Hungarian
482 "hy": "Հայերեն", # Armenian 491 "hy": "Հայերեն", # Armenian
483 "hz": "Otsiherero", # Herero 492 "hz": "Otsiherero", # Herero
484 493
485 "ia": "Interlingua", 494 "ia": "Interlingua",
486 "ie": "Interlingue", 495 "ie": "Interlingue",
487 "id": "Bahasa Indonesia", # Indonesian (formerly in) 496 "id": "Bahasa Indonesia",# Indonesian (formerly in)
488 "ig": "Igbo", # Igbo 497 "ig": "Igbo", # Igbo
489 "ii": "ꆇꉙ ", # Sichuan Yi 498 "ii": "ꆇꉙ ", # Sichuan Yi
490 "ik": "Iñupiak", # Inupiak 499 "ik": "Iñupiak", # Inupiak
491 "ilo": "Ilokano", 500 "ilo": "Ilokano",
492 "io": "Ido" , 501 "io": "Ido" ,
493 "is": "Íslenska" , # Icelandic 502 "is": "Íslenska" , # Icelandic
494 "it": "Italiano" , # Italian 503 "it": "Italiano" , # Italian
495 "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut 504 "i": "ᐃᓄᒃᑎᑐᑦ", # Inuktitut
496 505
497 "ja": "日本語", # Japanese 506 "ja": "日本語", # Japanese
498 "jbo": "Lojban", 507 "jbo": "Lojban",
499 "jv": "Basa Jawa", # Javanese 508 "jv": "Basa Jawa", # Javanese
500 509
501 "ka": "ქართული" , # Georgian 510 "ka": "ქართული" , # Georgian
502 "kaa": "Qaraqalpaqsha", 511 "kaa": "Qaraqalpaqsha",
503 "kab": "Taqbaylit", 512 "kab": "Taqbaylit",
504 "kg": "KiKongo", # Kongo # also CD and AO 513 "kg": "KiKongo", # Kongo
505 "ki": "Gĩkũyũ", # Kikuyu 514 "ki": "Gĩkũyũ", # Kikuyu
506 "kj": "Kuanyama", # Kuanyama 515 "kj": "Kuanyama", # Kuanyama
507 "kk": "Қазақша", # Kazakh 516 "kk": "Қазақша", # Kazakh
508 "kl": "Kalaallisut", # Kalaallisut; Greenlandic 517 "kl": "Kalaallisut", # Kalaallisut; Greenlandic
509 "km": "ភាសាខ្មែរ", # Khmer; Cambodian 518 "km": "ភាសាខ្មែរ", # Khmer; Cambodian
510 "kn": "ಕನ್ನಡ", # Kannada 519 "kn": "ಕನ್ನಡ", # Kannada
511 "ko": "한국어" , # Korean 520 "ko": "한국어" , # Korean
512 "kr": "Kanuri", # Kanuri 521 "kr": "Kanuri", # Kanuri
513 "ks": "कश्मीरी / كشميري", # Kashmiri 522 "ks": "कश्मीरी / كشميري", # Kashmiri
514 "ksh": "Ripoarisch", 523 "ksh": "Ripoarisch",
515 "ku": "Kurdî / كوردی", # Kurdish 524 "ku": "Kurdî / كوردی", # Kurdish
516 "kv": "Коми", # Komi 525 "kv": "Коми", # Komi
517 "kw": "Kernewek/Karnuack", # Cornish 526 "kw": "Kernewek/Karnuack", # Cornish
518 "ky": "Кыргызча", # Kirghiz 527 "ky": "Кыргызча", # Kirghiz
519 528
520 "la": "Latina" , # Latin 529 "la": "Latina" , # Latin
521 "lad": "Dzhudezmo", 530 "lad": "Dzhudezmo",
522 "lb": "Lëtzebuergesch" , # Letzeburgesch 531 "lb": "Lëtzebuergesch" , # Letzeburgesch
523 "lbe": "Лакку", 532 "lbe": "Лакку",
524 "lg": "Luganda", # Ganda 533 "lg": "Luganda", # Ganda
525 "li": "Limburgs", # Limburgish; Limburger; Limburgan 534 "li": "Limburgs", # Limburgish; Limburger; Limburgan
526 "lij": "Lígur", 535 "lij": "Lígur",
527 "ln": "Lingala", # Lingala 536 "ln": "Lingala", # Lingala
528 "lmo": "Lumbaart", 537 "lmo": "Lumbaart",
529 "lo": "ລາວ", # Lao; Laotian 538 "lo": "ລາວ", # Lao; Laotian
530 "lt": "Lietuvių" , # Lithuanian 539 "lt": "Lietuvių" , # Lithuanian
531 "l": None, # Luba-Katanga 540 "lua": "Luba", # Luba
532 "lv": "Latvieš" , # Latvian; Lettish 541 "lv": "Latvieš" , # Latvian; Lettish
533 542
534 "map-bms": "Basa Banyumasan", 543 "map-bms": "Basa Banyumasan",
535 "mdf": "Мокшень (Mokshanj Kälj)", 544 "mdf": "Мокшень (Mokshanj Kälj)",
536 "mg": "Malagasy", # Malagasy 545 "mg": "Malagasy", # Malagasy
537 "mh": "Ebon", # Marshall 546 "mh": "Ebon", # Marshall
538 "mi": "Māori", # Maori 547 "mi": "Māori", # Maori
539 "mk": "Македонски" , # Macedonian 548 "mk": "Македонски" , # Macedonian
540 "ml": None, # Malayalam 549 "ml": None, # Malayalam
541 "mn": "Монгол", # Mongolian 550 "mn": "Монгол", # Mongolian
542 "mo": "Молдовеняскэ", # Moldavian 551 "mo": "Молдовеняскэ", # Moldavian
543 "mr": "मराठी" , # Marathi 552 "mr": "मराठी" , # Marathi
544 "ms": "Bahasa Melay" , # Malay 553 "ms": "Bahasa Melay" , # Malay
545 "mt": "Malti", # Maltese 554 "mt": "Malti", # Maltese
546 "mus": "Muskogee", 555 "mus": "Muskogee",
547 "my": "မ္ရန္‌မာစာ", # Burmese 556 "my": "မ္ရန္‌မာစာ", # Burmese
548 "myv": "Эрзянь (Erzjanj Kelj)", 557 "myv": "Эрзянь (Erzjanj Kelj)",
549 "mzn": "مَزِروني", 558 "mzn": "مَزِروني",
550 559
551 "na": "dorerin Naoero", # Nauru 560 "na": "dorerin Naoero", # Nauru
552 "nah": "Nāhuatl", 561 "nah": "Nāhuatl",
553 "nap": "Nnapulitano", 562 "nap": "Nnapulitano",
554 "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l 563 "nb": "Norsk (Bokmål)", # Norwegian Bokm@aa{}l
555 "nd": None,# Ndebele, North 564 "nd": None, # Ndebele, North
556 "nds": "Plattdüütsch", 565 "nds": "Plattdüütsch",
557 "nds-nl": "Nedersaksisch", 566 "nds-nl": "Nedersaksisch",
558 "ne": "नेपाली", # Nepali 567 "ne": "नेपाली", # Nepali
559 "new": "नेपाल भाषा" , # Nepal Bhasa 568 "new": "नेपाल भाषा" , # Nepal Bhasa
560 "ng": "Oshiwambo", # Ndonga 569 "ng": "Oshiwambo", # Ndonga
561 "nl": "Nederlands" , # Dutch 570 "nl": "Nederlands" , # Dutch
562 "nn": "Nynorsk", # Norwegian Nynorsk 571 "nn": "Nynorsk", # Norwegian Nynorsk
563 "no": "Norsk (Bokmål)" , # Norwegian 572 "no": "Norsk (Bokmål)" , # Norwegian
564 "nov": "Novial", 573 "nov": "Novial",
565 "nr": None, # Ndebele, South 574 "nr": None, # Ndebele, South
566 "nrm": "Nouormand/Normaund", 575 "nrm": "Nouormand/Normaund",
567 "nv": "Diné bizaad", # Navajo 576 "nv": "Diné bizaad", # Navajo
568 "ny": "Chi-Chewa", # Chichewa; Nyanja 577 "ny": "Chi-Chewa", # Chichewa; Nyanja
569 578
570 "oc": "Occitan", # Occitan; Proven@,{c}al 579 "oc": "Occitan", # Occitan; Proven@,{c}al
571 "oj": None, # Ojibwa 580 "oj": None, # Ojibwa
572 "om": "Oromoo", # (Afan) Oromo 581 "om": "Oromoo", # (Afan) Oromo
573 "or": "ଓଡ଼ିଆ", # Oriya 582 "or": "ଓଡ଼ିଆ", # Oriya
574 "os": "Иронау", # Ossetian; Ossetic 583 "os": "Иронау", # Ossetian; Ossetic
575 584
576 "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi 585 "pa": "ਪੰਜਾਬੀ" , # Panjabi; Punjabi
577 "pag": "Pangasinan", 586 "pag": "Pangasinan",
578 "pam": "Kapampangan", 587 "pam": "Kapampangan",
579 "pap": "Papiament", 588 "pap": "Papiament",
580 "pdc": "Deitsch", 589 "pdc": "Deitsch",
581 "pi": "पाऴि", # Pali 590 "pi": "पाऴि", # Pali
582 "pih": "Norfuk", 591 "pih": "Norfuk",
583 "pl": "Polski" , # Polish 592 "pl": "Polski" , # Polish
584 "pms": "Piemontèis" , 593 "pms": "Piemontèis" ,
585 "ps": "پښتو", # Pashto, Pushto 594 "ps": "پښتو", # Pashto, Pushto
586 "pt": "Português" , # Portuguese 595 "pt": "Português" , # Portuguese
587 596
588 "q": "Runa Simi" , # Quechua 597 "q": "Runa Simi" , # Quechua
589 598
590 "rm": "Rumantsch", # Rhaeto-Romance 599 "rm": "Rumantsch", # Rhaeto-Romance
591 "rmy": "romani - रोमानी", 600 "rmy": "romani - रोमानी",
592 "rn": "Kirundi", # Rundi; Kirundi 601 "rn": "Kirundi", # Rundi; Kirundi
593 "ro": "Română" , # Romanian 602 "ro": "Română" , # Romanian
594 "roa-rup": "Armãneashce", 603 "roa-rup": "Armãneashce",
595 "roa-tara": "Tarandíne", 604 "roa-tara": "Tarandíne",
596 "ru": "Русский" , # Russian 605 "ru": "Русский" , # Russian
597 "rw": "Ikinyarwanda", # Kinyarwanda 606 "rw": "Ikinyarwanda", # Kinyarwanda
598 607
599 "sa": "संस्कृतम्", # Sanskrit 608 "sa": "संस्कृतम्", # Sanskrit
600 "sah": "Саха тыла (Saxa Tyla)", 609 "sah": "Саха тыла (Saxa Tyla)",
601 "sc": "Sard", # Sardinian 610 "sc": "Sardu", # Sardinian
602 "scn": "Sicilian", 611 "scn": "Sicilian",
603 "sco": "Scots", 612 "sco": "Scots",
604 "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi 613 "sd": "سنڌي، سندھی ، सिन्ध", # Sindhi
605 "se": "Sámegiella", # Northern Sami 614 "se": "Sámegiella", # Northern Sami
606 "sg": "Sängö", # Sango; Sangro 615 "sg": "Sängö", # Sango; Sangro
607 "sh": "Srpskohrvatski / Српскохрватски" , 616 "sh": "Srpskohrvatski / Српскохрватски" ,
608 "si": "සිංහල", 617 "si": "සිංහල",
609 "simple": "Simple English" , 618 "simple": "Simple English" ,
610 "sk": "Slovenčina" , # Slovak 619 "sk": "Slovenčina" , # Slovak
611 "sl": "Slovenščina" , # Slovenian 620 "sl": "Slovenščina" , # Slovenian
612 "sm": "Gagana Samoa", # Samoan 621 "sm": "Gagana Samoa", # Samoan
613 "sn": "chiShona", # Shona 622 "sn": "chiShona", # Shona
614 "so": "Soomaaliga", # Somali 623 "so": "Soomaaliga", # Somali
615 "sr": "Српски / Srpski" , # Serbian 624 "sr": "Српски / Srpski", # Serbian
616 "srn": "Sranantongo", 625 "srn": "Sranantongo",
617 "ss": "SiSwati", # Swati; Siswati 626 "ss": "SiSwati", # Swati; Siswati
618 "st": "Sesotho", # Sesotho; Sotho, Southern 627 "st": "Sesotho", # Sesotho; Sotho, Southern
619 "stk": "Seeltersk", 628 "stk": "Seeltersk",
620 "s": "Basa Sunda", # Sundanese 629 "s": "Basa Sunda", # Sundanese
621 "sq": "Shqip" , # Albanian 630 "sq": "Shqip" , # Albanian
622 "szl": "Ślůnski", 631 "szl": "Ślůnski",
623 "sv": "Svenska" , # Swedish 632 "sv": "Svenska" , # Swedish
624 "sw": "Kiswahili", # Swahili # Also KE 633 "sw": "Kiswahili", # Swahili
625 634
626 "ta": "தமிழ்" , # Tamil 635 "ta": "தமிழ்" , # Tamil
627 "te": "తెలుగు" , # Telugu 636 "te": "తెలుగు" , # Telugu
628 "tet": "Tetun", 637 "tet": "Tetun",
629 "tg": "Тоҷикӣ", # Tajik 638 "tg": "Тоҷикӣ", # Tajik
630 "th": "ไทย" , # Thai 639 "th": "ไทย" , # Thai
631 "ti": "ትግርኛ", # Tigrinya 640 "ti": "ትግርኛ", # Tigrinya
632 "tk": "تركمن / Туркмен", # Turkmen 641 "tk": "تركمن / Туркмен", # Turkmen
633 "tl": "Tagalog" , # Tagalog 642 "tl": "Tagalog" , # Tagalog
634 "tn": "Setswana", # Tswana; Setswana 643 "tn": "Setswana", # Tswana; Setswana
635 "to": "faka Tonga", # Tonga (?) # Also ZW ; MW 644 "to": "faka Tonga", # Tonga (?) # Also ZW ; MW
636 "tokipona": "Tokipona", 645 "tokipona": "Tokipona",
637 "tpi": "Tok Pisin", 646 "tpi": "Tok Pisin",
638 "tr": "Türkçe" , # Turkish 647 "tr": "Türkçe" , # Turkish
639 "ts": "Xitsonga", # Tsonga # ZA SZ XW 648 "ts": "Xitsonga", # Tsonga
640 "tt": "Tatarça / Татарча", # Tatar 649 "tt": "Tatarça / Татарча", # Tatar
641 "tum": "chiTumbuka", 650 "tum": "chiTumbuka",
642 "tw": "Twi", # Twi 651 "tw": "Twi", # Twi
643 "ty": "Reo Mā`ohi", # Tahitian 652 "ty": "Reo Mā`ohi", # Tahitian
644 653
645 "udm": "Удмурт кыл", 654 "udm": "Удмурт кыл",
646 "ug": "Oyghurque", # Uighur 655 "ug": "Oyghurque", # Uighur
647 "uk": "Українська" , # Ukrainian 656 "uk": "Українська" , # Ukrainian
648 "ur": "اردو", # Urdu 657 "ur": "اردو", # Urdu
649 "uz": "O‘zbek", # Uzbek 658 "uz": "O‘zbek", # Uzbek
650 659
651 "ve": "Tshivenda", # Venda 660 "ve": "Tshivenda", # Venda
652 "vec": "Vèneto", 661 "vec": "Vèneto",
653 "vi": "Tiếng Việt" , # Vietnamese 662 "vi": "Tiếng Việt" , # Vietnamese
654 "vls": "West-Vlams", 663 "vls": "West-Vlams",
655 "vo": "Volapük" , 664 "vo": "Volapük" ,
656 665
657 "wa": "Walon", # Walloon 666 "wa": "Walon", # Walloon
658 "war": "Winaray", 667 "war": "Winaray",
659 "wo": "Wolof", # Wolof 668 "wo": "Wolof", # Wolof
660 "w": "吴语", 669 "w": "吴语",
661 670
662 "xal": "Хальмг", 671 "xal": "Хальмг",
663 "xh": "isiXhosa", # Xhosa 672 "xh": "isiXhosa", # Xhosa
664 673
665 "yi": "ייִדיש", # Yiddish (formerly ji) 674 "yi": "ייִדיש", # Yiddish
666 "yo": "Yorùbá", # Yoruba 675 "yo": "Yorùbá", # Yoruba
667 676
668 "za": "Cuengh", # Zhuang 677 "za": "Cuengh", # Zhuang
669 "zea": "Zeêuws", 678 "zea": "Zeêuws",
670 "zh": "中文" , # Chinese 679 "zh": "中文" , # Chinese
671 "zh-classical": "古文 / 文言文", 680 "zh-classical": "古文 / 文言文",
672 "zm-min-nan": "Bân-lâm-gú", 681 "zm-min-nan": "Bân-lâm-gú",
673 "zh-yue": "粵語", 682 "zh-yue": "粵語",
674 "zu": "isiZulu" # Zulu 683 "zu": "isiZulu" # Zulu
675 } 684 }
676 685
677 def str_nil(self, tok, env): 686 def str_nil(self, tok, env):
678 return "" 687 return None
679 688
680 def str_text(self, tok, env): 689 def str_text(self, tok, env):
681 return tok[1] 690 return tok[1]
682 691
683 def str_seq(self, tok, env): 692 def str_seq(self, tok, env):
684 s = "" 693 str = ""
685 for t in tok[1:]: 694 for t in tok[1:]:
686 s += self.fmtok(t, env) 695 s = self.fmtok(t, env)
687 return s 696 if s:
697 str += s
698 return str
688 699
689 def fmtok(self, tok, env): 700 def fmtok(self, tok, env):
690 if type(tok) != TupleType: 701 if type(tok) != TupleType:
@@ -714,6 +725,8 @@ class WikiMarkup (BaseWikiMarkup):
714 return self.str_item(tok, env) 725 return self.str_item(tok, env)
715 elif toktype == self.SEQ: 726 elif toktype == self.SEQ:
716 return self.str_seq(tok, env) 727 return self.str_seq(tok, env)
728 elif toktype == self.PARA:
729 return self.str_para(tok, env)
717 730
718 def __str__(self): 731 def __str__(self):
719 return self.fmtok(self.tree, None) 732 return self.fmtok(self.tree, None)

Return to:

Send suggestions and report system problems to the System administrator.