diff options
author | Sergey Poznyakoff <gray@gnu.org> | 2018-08-06 23:38:37 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org> | 2018-08-06 23:38:37 +0300 |
commit | ad2bfbae4f654253fcaab12534c910ba14093e92 (patch) | |
tree | fe7626eabae6d71921e4cb7ceaa97f6ae0676ed9 | |
parent | eb81aad9d5c9f42b480a52c22ea7e26e0ea5d4c2 (diff) | |
download | wikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.gz wikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.bz2 |
Write README.rst
-rw-r--r-- | README.rst | 168 | ||||
-rw-r--r-- | WikiTrans/wiki2html.py | 4 | ||||
-rw-r--r-- | WikiTrans/wiki2texi.py | 8 | ||||
-rw-r--r-- | WikiTrans/wiki2text.py | 12 |
4 files changed, 178 insertions, 14 deletions
@@ -1,3 +1,169 @@ | |||
1 | MediaWiki Markup Translator | 1 | MediaWiki Markup Translator |
2 | =========================== | 2 | =========================== |
3 | FIXME | 3 | This package provides Python framework for translating WikiMedia |
4 | articles to various formats. The present version supports | ||
5 | conversions to plain text, HTML, and Texinfo formats. | ||
6 | |||
7 | A command line convertor utility is included. | ||
8 | |||
9 | Classes | ||
10 | ======= | ||
11 | |||
12 | class ``WikiMarkup`` | ||
13 | -------------------- | ||
14 | A base class for all translator classes. Unless you plan extending | ||
15 | wikitrans, you will never have to create objects of this | ||
16 | class. Instead, you will be using one of its derived classes. | ||
17 | |||
18 | Constructor arguments common for all derived classes: | ||
19 | |||
20 | filename = *name* | ||
21 | The file *name* is opened and used for input. | ||
22 | file = *fd* | ||
23 | An already opened file *fd* is used for input. | ||
24 | text = *string* | ||
25 | Input is taken from *string*, line by line. | ||
26 | |||
27 | lang = *code* | ||
28 | Specifies language version. Default is ``en``. This variable can be | ||
29 | referred to as ``%(lang)s`` in the keyword arguments below. | ||
30 | html_base = *url* | ||
31 | Base URL for cross-references. Default is | ||
32 | ``http://%(lang)s.wiktionary.org/wiki/``. | ||
33 | image_base = *url* | ||
34 | Base URL for images. Default is | ||
35 | ``http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf`` | ||
36 | media_base = *url* | ||
37 | Base URL for media files. Default is | ||
38 | ``http://www.mediawiki.org/xml/export-0.3`` | ||
39 | |||
40 | |||
41 | class ``TextWikiMarkup`` | ||
42 | ------------------------ | ||
43 | Translates material in Wiki markup language to plain text. Usage:: | ||
44 | |||
45 | from WikiTrans.wiki2text import TextWikiMarkup | ||
46 | |||
47 | markup = TextWikiMarkup(filename='input.txt') | ||
48 | markup.parse() | ||
49 | print(str(markup)) | ||
50 | |||
51 | Specific constructor arguments: | ||
52 | |||
53 | width = *N* | ||
54 | Limit output width to *N* columns. Default is 78. | ||
55 | show_urls = *bool* | ||
56 | Whether or not to show the URLs links refer to. If *bool* is | ||
57 | ``True`` (the default), a URL will be displayed in parentheses next | ||
58 | to the link text. If ``False``, only the link text will be displayed. | ||
59 | |||
60 | class ``TextWiktionaryMarkup`` | ||
61 | ------------------------------ | ||
62 | Translate material from wiktionary to plain text form. This is | ||
63 | supposed to provide a wiktionary-specific form of | ||
64 | ``TextWikiMarkup``. Currently both classes are entirely equivalent. | ||
65 | |||
66 | class ``TexiWikiMarkup`` | ||
67 | ------------------------ | ||
68 | Translate Wiki markup to Texinfo source. Usage:: | ||
69 | |||
70 | from WikiTrans.wiki2texi import TexiWikiMarkup | ||
71 | |||
72 | markup = TexiWikiMarkup(filename='input.txt') | ||
73 | markup.parse() | ||
74 | print(str(markup)) | ||
75 | |||
76 | Two markup-specific keywords control the sectioning model used. | ||
77 | |||
78 | sectioning_model = *model* | ||
79 | Selects the Texinfo sectioning model for the output | ||
80 | document. Possible values are: | ||
81 | |||
82 | ``numbered`` | ||
83 | Top of document is marked with ``@top``. Headings (``=``, ``==``, | ||
84 | ``===``, etc) produce ``@chapter``, | ||
85 | ``@section``, ``@subsection``, etc. | ||
86 | ``unnumbered`` | ||
87 | Unnumbered sectioning: ``@top``, ``@unnumbered``, ``@unnumberedsec``, | ||
88 | ``@unnumberedsubsec``. | ||
89 | ``appendix`` | ||
90 | Sectioning suitable for appendix entries: ``@top``, ``@appendix``, | ||
91 | ``@appendixsec``, ``@appendixsubsec``, etc. | ||
92 | ``heading`` | ||
93 | Use heading directives to reflect sectioning: ``@majorheading``, | ||
94 | ``@chapheading``, ``@heading``, ``@subheading``, etc. | ||
95 | |||
96 | sectioning_start = *n* | ||
97 | Shift resulting heading level by *n* positions. For example, supposing | ||
98 | ``sectioning_model=numbered``, ``== A ==`` will produce ``@section | ||
99 | A`` on output. If ``sectioning_start=1`` is also given, this | ||
100 | directive will produce ``@subsection A`` instead. | ||
101 | |||
102 | class ``HtmlWikiMarkup`` | ||
103 | ------------------------ | ||
104 | Translates Wiki markup to HTML. Usage:: | ||
105 | |||
106 | from WikiTrans.wiki2html import HtmlWikiMarkup | ||
107 | |||
108 | markup = HtmlWikiMarkup(filename='input.txt') | ||
109 | markup.parse() | ||
110 | print(str(markup)) | ||
111 | |||
112 | Supported keywords are same as for ``WikiMarkup`` class. | ||
113 | |||
114 | class ``HtmlWiktionaryMarkup`` | ||
115 | ------------------------------ | ||
116 | Translate material from wiktionary to HTML form. This is | ||
117 | supposed to provide a wiktionary-specific form of | ||
118 | ``HtmlWikiMarkup``. Currently both classes are equivalent. | ||
119 | |||
120 | The ``wikitrans`` utility | ||
121 | ========================= | ||
122 | This command line utility converts the supplied text to a selected | ||
123 | output format. The usage syntax is:: | ||
124 | |||
125 | wikitrans [OPTIONS] ARG | ||
126 | |||
127 | If ARG looks like a URL, the wiki text to be converted will be | ||
128 | downloaded from that URL. | ||
129 | |||
130 | Otherwise, if the ``--base-url=URL`` option is given, ARG is treated as | ||
131 | the name of the page to get from the WikiMedia istallation at ``URL``. | ||
132 | |||
133 | Otherwise, ARG is treated as the name of the file to read wiki | ||
134 | material from. | ||
135 | |||
136 | Examples:: | ||
137 | |||
138 | wikitrans text.wiki | ||
139 | |||
140 | wikitrans --base-url http://en.wiktionary.org door | ||
141 | |||
142 | wikitrans https://en.wiktionary.org/wiki/Special:Export/door | ||
143 | |||
144 | Options are: | ||
145 | |||
146 | ``--version`` | ||
147 | Show program's version number and exit. | ||
148 | ``-h``, ``--help`` | ||
149 | Show a short usage summary and exit. | ||
150 | ``-v``, ``--verbose`` | ||
151 | Verbose operation. | ||
152 | ``-I ITYPE``, ``--input-type=ITYPE`` | ||
153 | Set input document type. *ITYPE* is one of: ``default`` or ``wiktionary``. | ||
154 | ``-t OTYPE``, ``--to=OTYPE``, ``--type=OTYPE`` | ||
155 | Set output document type (``html`` (the default), ``texi``, | ||
156 | ``text``, or ``dump``). | ||
157 | ``-l LANG``, ``--lang=LANG`` | ||
158 | Set input document language | ||
159 | ``-o KW=VAL``, ``--option=KW=VAL`` | ||
160 | Pass the keyword argument ``KW=VAL`` to the parser class construct. | ||
161 | ``-d DEBUG``, ``--debug=DEBUG`` | ||
162 | Set debug level (0..100) | ||
163 | ``-D``, ``--dump`` | ||
164 | Dump parse tree and exit; same as ``--type=dump``. | ||
165 | ``-b URL``, ``--base-url=URL`` | ||
166 | Set base url. | ||
167 | |||
168 | |||
169 | |||
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py index 00f02b5..6147642 100644 --- a/WikiTrans/wiki2html.py +++ b/WikiTrans/wiki2html.py | |||
@@ -111,8 +111,8 @@ class HtmlTextNode(HtmlSeqNode): | |||
111 | class HtmlHdrNode(WikiHdrNode): | 111 | class HtmlHdrNode(WikiHdrNode): |
112 | def format(self): | 112 | def format(self): |
113 | level = self.level | 113 | level = self.level |
114 | if level > 4: | 114 | if level > 6: |
115 | level = 4 | 115 | level = 6 |
116 | return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) | 116 | return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) |
117 | 117 | ||
118 | class HtmlBarNode(WikiNode): | 118 | class HtmlBarNode(WikiNode): |
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py index 39c70c6..7297195 100644 --- a/WikiTrans/wiki2texi.py +++ b/WikiTrans/wiki2texi.py | |||
@@ -303,14 +303,14 @@ class TexiWikiMarkup (WikiMarkup): | |||
303 | self.token_class['LINK'] = TexiLinkNode | 303 | self.token_class['LINK'] = TexiLinkNode |
304 | self.token_class['REF'] = TexiRefNode | 304 | self.token_class['REF'] = TexiRefNode |
305 | 305 | ||
306 | if "sectioning-model" in keywords: | 306 | if "sectioning_model" in keywords: |
307 | val = keywords["sectioning-model"] | 307 | val = keywords["sectioning_model"] |
308 | if val in self.sectcomm: | 308 | if val in self.sectcomm: |
309 | self.sectioning_model = val | 309 | self.sectioning_model = val |
310 | else: | 310 | else: |
311 | raise ValueError("Invalid value for sectioning model: %s" % val) | 311 | raise ValueError("Invalid value for sectioning model: %s" % val) |
312 | if "sectioning-start" in keywords: | 312 | if "sectioning_start" in keywords: |
313 | val = keywords["sectioning-start"] | 313 | val = keywords["sectioning_start"] |
314 | if val < 0 or val > 4: | 314 | if val < 0 or val > 4: |
315 | raise ValueError("Invalid value for sectioning start: %s" % val) | 315 | raise ValueError("Invalid value for sectioning start: %s" % val) |
316 | else: | 316 | else: |
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py index dc2e003..cb3a183 100644 --- a/WikiTrans/wiki2text.py +++ b/WikiTrans/wiki2text.py | |||
@@ -104,7 +104,7 @@ class TextLinkNode(WikiSeqNode): | |||
104 | ns = self.parser.wiki_ns_name(qual) | 104 | ns = self.parser.wiki_ns_name(qual) |
105 | if ns: | 105 | if ns: |
106 | if ns == 'NS_IMAGE': | 106 | if ns == 'NS_IMAGE': |
107 | if not self.parser.showrefs: | 107 | if not self.parser.show_urls: |
108 | return "" | 108 | return "" |
109 | text = "[%s: %s]" % (qual, text if text else arg) | 109 | text = "[%s: %s]" % (qual, text if text else arg) |
110 | tgt = self.image_base + '/' + \ | 110 | tgt = self.image_base + '/' + \ |
@@ -121,7 +121,7 @@ class TextLinkNode(WikiSeqNode): | |||
121 | tgt = self.parser.mktgt(tgt) | 121 | tgt = self.parser.mktgt(tgt) |
122 | else: | 122 | else: |