summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-06 23:38:37 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-06 23:38:37 +0300
commitad2bfbae4f654253fcaab12534c910ba14093e92 (patch)
treefe7626eabae6d71921e4cb7ceaa97f6ae0676ed9
parenteb81aad9d5c9f42b480a52c22ea7e26e0ea5d4c2 (diff)
downloadwikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.gz
wikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.bz2
Write README.rst
-rw-r--r--README.rst168
-rw-r--r--WikiTrans/wiki2html.py4
-rw-r--r--WikiTrans/wiki2texi.py8
-rw-r--r--WikiTrans/wiki2text.py12
4 files changed, 178 insertions, 14 deletions
diff --git a/README.rst b/README.rst
index 587526a..40e3d5d 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,169 @@
1MediaWiki Markup Translator 1MediaWiki Markup Translator
2=========================== 2===========================
3FIXME 3This package provides Python framework for translating WikiMedia
4articles to various formats. The present version supports
5conversions to plain text, HTML, and Texinfo formats.
6
7A command line convertor utility is included.
8
9Classes
10=======
11
12class ``WikiMarkup``
13--------------------
14A base class for all translator classes. Unless you plan extending
15wikitrans, you will never have to create objects of this
16class. Instead, you will be using one of its derived classes.
17
18Constructor arguments common for all derived classes:
19
20filename = *name*
21 The file *name* is opened and used for input.
22file = *fd*
23 An already opened file *fd* is used for input.
24text = *string*
25 Input is taken from *string*, line by line.
26
27lang = *code*
28 Specifies language version. Default is ``en``. This variable can be
29 referred to as ``%(lang)s`` in the keyword arguments below.
30html_base = *url*
31 Base URL for cross-references. Default is
32 ``http://%(lang)s.wiktionary.org/wiki/``.
33image_base = *url*
34 Base URL for images. Default is
35 ``http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf``
36media_base = *url*
37 Base URL for media files. Default is
38 ``http://www.mediawiki.org/xml/export-0.3``
39
40
41class ``TextWikiMarkup``
42------------------------
43Translates material in Wiki markup language to plain text. Usage::
44
45 from WikiTrans.wiki2text import TextWikiMarkup
46
47 markup = TextWikiMarkup(filename='input.txt')
48 markup.parse()
49 print(str(markup))
50
51Specific constructor arguments:
52
53width = *N*
54 Limit output width to *N* columns. Default is 78.
55show_urls = *bool*
56 Whether or not to show the URLs links refer to. If *bool* is
57 ``True`` (the default), a URL will be displayed in parentheses next
58 to the link text. If ``False``, only the link text will be displayed.
59
60class ``TextWiktionaryMarkup``
61------------------------------
62Translate material from wiktionary to plain text form. This is
63supposed to provide a wiktionary-specific form of
64``TextWikiMarkup``. Currently both classes are entirely equivalent.
65
66class ``TexiWikiMarkup``
67------------------------
68Translate Wiki markup to Texinfo source. Usage::
69
70 from WikiTrans.wiki2texi import TexiWikiMarkup
71
72 markup = TexiWikiMarkup(filename='input.txt')
73 markup.parse()
74 print(str(markup))
75
76Two markup-specific keywords control the sectioning model used.
77
78sectioning_model = *model*
79 Selects the Texinfo sectioning model for the output
80 document. Possible values are:
81
82 ``numbered``
83 Top of document is marked with ``@top``. Headings (``=``, ``==``,
84 ``===``, etc) produce ``@chapter``,
85 ``@section``, ``@subsection``, etc.
86 ``unnumbered``
87 Unnumbered sectioning: ``@top``, ``@unnumbered``, ``@unnumberedsec``,
88 ``@unnumberedsubsec``.
89 ``appendix``
90 Sectioning suitable for appendix entries: ``@top``, ``@appendix``,
91 ``@appendixsec``, ``@appendixsubsec``, etc.
92 ``heading``
93 Use heading directives to reflect sectioning: ``@majorheading``,
94 ``@chapheading``, ``@heading``, ``@subheading``, etc.
95
96sectioning_start = *n*
97 Shift resulting heading level by *n* positions. For example, supposing
98 ``sectioning_model=numbered``, ``== A ==`` will produce ``@section
99 A`` on output. If ``sectioning_start=1`` is also given, this
100 directive will produce ``@subsection A`` instead.
101
102class ``HtmlWikiMarkup``
103------------------------
104Translates Wiki markup to HTML. Usage::
105
106 from WikiTrans.wiki2html import HtmlWikiMarkup
107
108 markup = HtmlWikiMarkup(filename='input.txt')
109 markup.parse()
110 print(str(markup))
111
112Supported keywords are same as for ``WikiMarkup`` class.
113
114class ``HtmlWiktionaryMarkup``
115------------------------------
116Translate material from wiktionary to HTML form. This is
117supposed to provide a wiktionary-specific form of
118``HtmlWikiMarkup``. Currently both classes are equivalent.
119
120The ``wikitrans`` utility
121=========================
122This command line utility converts the supplied text to a selected
123output format. The usage syntax is::
124
125 wikitrans [OPTIONS] ARG
126
127If ARG looks like a URL, the wiki text to be converted will be
128downloaded from that URL.
129
130Otherwise, if the ``--base-url=URL`` option is given, ARG is treated as
131the name of the page to get from the WikiMedia istallation at ``URL``.
132
133Otherwise, ARG is treated as the name of the file to read wiki
134material from.
135
136Examples::
137
138 wikitrans text.wiki
139
140 wikitrans --base-url http://en.wiktionary.org door
141
142 wikitrans https://en.wiktionary.org/wiki/Special:Export/door
143
144Options are:
145
146``--version``
147 Show program's version number and exit.
148``-h``, ``--help``
149 Show a short usage summary and exit.
150``-v``, ``--verbose``
151 Verbose operation.
152``-I ITYPE``, ``--input-type=ITYPE``
153 Set input document type. *ITYPE* is one of: ``default`` or ``wiktionary``.
154``-t OTYPE``, ``--to=OTYPE``, ``--type=OTYPE``
155 Set output document type (``html`` (the default), ``texi``,
156 ``text``, or ``dump``).
157``-l LANG``, ``--lang=LANG``
158 Set input document language
159``-o KW=VAL``, ``--option=KW=VAL``
160 Pass the keyword argument ``KW=VAL`` to the parser class construct.
161``-d DEBUG``, ``--debug=DEBUG``
162 Set debug level (0..100)
163``-D``, ``--dump``
164 Dump parse tree and exit; same as ``--type=dump``.
165``-b URL``, ``--base-url=URL``
166 Set base url.
167
168
169
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 00f02b5..6147642 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -111,8 +111,8 @@ class HtmlTextNode(HtmlSeqNode):
111class HtmlHdrNode(WikiHdrNode): 111class HtmlHdrNode(WikiHdrNode):
112 def format(self): 112 def format(self):
113 level = self.level 113 level = self.level
114 if level > 4: 114 if level > 6:
115 level = 4 115 level = 6
116 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level) 116 return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
117 117
118class HtmlBarNode(WikiNode): 118class HtmlBarNode(WikiNode):
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index 39c70c6..7297195 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -303,14 +303,14 @@ class TexiWikiMarkup (WikiMarkup):
303 self.token_class['LINK'] = TexiLinkNode 303 self.token_class['LINK'] = TexiLinkNode
304 self.token_class['REF'] = TexiRefNode 304 self.token_class['REF'] = TexiRefNode
305 305
306 if "sectioning-model" in keywords: 306 if "sectioning_model" in keywords:
307 val = keywords["sectioning-model"] 307 val = keywords["sectioning_model"]
308 if val in self.sectcomm: 308 if val in self.sectcomm:
309 self.sectioning_model = val 309 self.sectioning_model = val
310 else: 310 else:
311 raise ValueError("Invalid value for sectioning model: %s" % val) 311 raise ValueError("Invalid value for sectioning model: %s" % val)
312 if "sectioning-start" in keywords: 312 if "sectioning_start" in keywords:
313 val = keywords["sectioning-start"] 313 val = keywords["sectioning_start"]
314 if val < 0 or val > 4: 314 if val < 0 or val > 4:
315 raise ValueError("Invalid value for sectioning start: %s" % val) 315 raise ValueError("Invalid value for sectioning start: %s" % val)
316 else: 316 else:
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index dc2e003..cb3a183 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -104,7 +104,7 @@ class TextLinkNode(WikiSeqNode):
104 ns = self.parser.wiki_ns_name(qual) 104 ns = self.parser.wiki_ns_name(qual)
105 if ns: 105 if ns:
106 if ns == 'NS_IMAGE': 106 if ns == 'NS_IMAGE':
107 if not self.parser.showrefs: 107 if not self.parser.show_urls:
108 return "" 108 return ""
109 text = "[%s: %s]" % (qual, text if text else arg) 109 text = "[%s: %s]" % (qual, text if text else arg)
110 tgt = self.image_base + '/' + \ 110 tgt = self.image_base + '/' + \
@@ -121,7 +121,7 @@ class TextLinkNode(WikiSeqNode):
121 tgt = self.parser.mktgt(tgt) 121 tgt = self.parser.mktgt(tgt)
122 else: 122 else: