summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org>2018-08-06 23:38:37 +0300
committerSergey Poznyakoff <gray@gnu.org>2018-08-06 23:38:37 +0300
commitad2bfbae4f654253fcaab12534c910ba14093e92 (patch)
treefe7626eabae6d71921e4cb7ceaa97f6ae0676ed9
parenteb81aad9d5c9f42b480a52c22ea7e26e0ea5d4c2 (diff)
downloadwikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.gz
wikitrans-ad2bfbae4f654253fcaab12534c910ba14093e92.tar.bz2
Write README.rst
-rw-r--r--README.rst168
-rw-r--r--WikiTrans/wiki2html.py4
-rw-r--r--WikiTrans/wiki2texi.py8
-rw-r--r--WikiTrans/wiki2text.py12
4 files changed, 178 insertions, 14 deletions
diff --git a/README.rst b/README.rst
index 587526a..40e3d5d 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,169 @@
MediaWiki Markup Translator
===========================
-FIXME
+This package provides Python framework for translating WikiMedia
+articles to various formats. The present version supports
+conversions to plain text, HTML, and Texinfo formats.
+
+A command line convertor utility is included.
+
+Classes
+=======
+
+class ``WikiMarkup``
+--------------------
+A base class for all translator classes. Unless you plan extending
+wikitrans, you will never have to create objects of this
+class. Instead, you will be using one of its derived classes.
+
+Constructor arguments common for all derived classes:
+
+filename = *name*
+ The file *name* is opened and used for input.
+file = *fd*
+ An already opened file *fd* is used for input.
+text = *string*
+ Input is taken from *string*, line by line.
+
+lang = *code*
+ Specifies language version. Default is ``en``. This variable can be
+ referred to as ``%(lang)s`` in the keyword arguments below.
+html_base = *url*
+ Base URL for cross-references. Default is
+ ``http://%(lang)s.wiktionary.org/wiki/``.
+image_base = *url*
+ Base URL for images. Default is
+ ``http://upload.wikimedia.org/wikipedia/commons/thumb/a/bf``
+media_base = *url*
+ Base URL for media files. Default is
+ ``http://www.mediawiki.org/xml/export-0.3``
+
+
+class ``TextWikiMarkup``
+------------------------
+Translates material in Wiki markup language to plain text. Usage::
+
+ from WikiTrans.wiki2text import TextWikiMarkup
+
+ markup = TextWikiMarkup(filename='input.txt')
+ markup.parse()
+ print(str(markup))
+
+Specific constructor arguments:
+
+width = *N*
+ Limit output width to *N* columns. Default is 78.
+show_urls = *bool*
+ Whether or not to show the URLs links refer to. If *bool* is
+ ``True`` (the default), a URL will be displayed in parentheses next
+ to the link text. If ``False``, only the link text will be displayed.
+
+class ``TextWiktionaryMarkup``
+------------------------------
+Translate material from wiktionary to plain text form. This is
+supposed to provide a wiktionary-specific form of
+``TextWikiMarkup``. Currently both classes are entirely equivalent.
+
+class ``TexiWikiMarkup``
+------------------------
+Translate Wiki markup to Texinfo source. Usage::
+
+ from WikiTrans.wiki2texi import TexiWikiMarkup
+
+ markup = TexiWikiMarkup(filename='input.txt')
+ markup.parse()
+ print(str(markup))
+
+Two markup-specific keywords control the sectioning model used.
+
+sectioning_model = *model*
+ Selects the Texinfo sectioning model for the output
+ document. Possible values are:
+
+ ``numbered``
+ Top of document is marked with ``@top``. Headings (``=``, ``==``,
+ ``===``, etc) produce ``@chapter``,
+ ``@section``, ``@subsection``, etc.
+ ``unnumbered``
+ Unnumbered sectioning: ``@top``, ``@unnumbered``, ``@unnumberedsec``,
+ ``@unnumberedsubsec``.
+ ``appendix``
+ Sectioning suitable for appendix entries: ``@top``, ``@appendix``,
+ ``@appendixsec``, ``@appendixsubsec``, etc.
+ ``heading``
+ Use heading directives to reflect sectioning: ``@majorheading``,
+ ``@chapheading``, ``@heading``, ``@subheading``, etc.
+
+sectioning_start = *n*
+ Shift resulting heading level by *n* positions. For example, supposing
+ ``sectioning_model=numbered``, ``== A ==`` will produce ``@section
+ A`` on output. If ``sectioning_start=1`` is also given, this
+ directive will produce ``@subsection A`` instead.
+
+class ``HtmlWikiMarkup``
+------------------------
+Translates Wiki markup to HTML. Usage::
+
+ from WikiTrans.wiki2html import HtmlWikiMarkup
+
+ markup = HtmlWikiMarkup(filename='input.txt')
+ markup.parse()
+ print(str(markup))
+
+Supported keywords are same as for ``WikiMarkup`` class.
+
+class ``HtmlWiktionaryMarkup``
+------------------------------
+Translate material from wiktionary to HTML form. This is
+supposed to provide a wiktionary-specific form of
+``HtmlWikiMarkup``. Currently both classes are equivalent.
+
+The ``wikitrans`` utility
+=========================
+This command line utility converts the supplied text to a selected
+output format. The usage syntax is::
+
+ wikitrans [OPTIONS] ARG
+
+If ARG looks like a URL, the wiki text to be converted will be
+downloaded from that URL.
+
+Otherwise, if the ``--base-url=URL`` option is given, ARG is treated as
+the name of the page to get from the WikiMedia istallation at ``URL``.
+
+Otherwise, ARG is treated as the name of the file to read wiki
+material from.
+
+Examples::
+
+ wikitrans text.wiki
+
+ wikitrans --base-url http://en.wiktionary.org door
+
+ wikitrans https://en.wiktionary.org/wiki/Special:Export/door
+
+Options are:
+
+``--version``
+ Show program's version number and exit.
+``-h``, ``--help``
+ Show a short usage summary and exit.
+``-v``, ``--verbose``
+ Verbose operation.
+``-I ITYPE``, ``--input-type=ITYPE``
+ Set input document type. *ITYPE* is one of: ``default`` or ``wiktionary``.
+``-t OTYPE``, ``--to=OTYPE``, ``--type=OTYPE``
+ Set output document type (``html`` (the default), ``texi``,
+ ``text``, or ``dump``).
+``-l LANG``, ``--lang=LANG``
+ Set input document language
+``-o KW=VAL``, ``--option=KW=VAL``
+ Pass the keyword argument ``KW=VAL`` to the parser class construct.
+``-d DEBUG``, ``--debug=DEBUG``
+ Set debug level (0..100)
+``-D``, ``--dump``
+ Dump parse tree and exit; same as ``--type=dump``.
+``-b URL``, ``--base-url=URL``
+ Set base url.
+
+
+
diff --git a/WikiTrans/wiki2html.py b/WikiTrans/wiki2html.py
index 00f02b5..6147642 100644
--- a/WikiTrans/wiki2html.py
+++ b/WikiTrans/wiki2html.py
@@ -111,8 +111,8 @@ class HtmlTextNode(HtmlSeqNode):
class HtmlHdrNode(WikiHdrNode):
def format(self):
level = self.level
- if level > 4:
- level = 4
+ if level > 6:
+ level = 6
return "<h%s>%s</h%s>\n\n" % (level, self.content.format(), level)
class HtmlBarNode(WikiNode):
diff --git a/WikiTrans/wiki2texi.py b/WikiTrans/wiki2texi.py
index 39c70c6..7297195 100644
--- a/WikiTrans/wiki2texi.py
+++ b/WikiTrans/wiki2texi.py
@@ -303,14 +303,14 @@ class TexiWikiMarkup (WikiMarkup):
self.token_class['LINK'] = TexiLinkNode
self.token_class['REF'] = TexiRefNode
- if "sectioning-model" in keywords:
- val = keywords["sectioning-model"]
+ if "sectioning_model" in keywords:
+ val = keywords["sectioning_model"]
if val in self.sectcomm:
self.sectioning_model = val
else:
raise ValueError("Invalid value for sectioning model: %s" % val)
- if "sectioning-start" in keywords:
- val = keywords["sectioning-start"]
+ if "sectioning_start" in keywords:
+ val = keywords["sectioning_start"]
if val < 0 or val > 4:
raise ValueError("Invalid value for sectioning start: %s" % val)
else:
diff --git a/WikiTrans/wiki2text.py b/WikiTrans/wiki2text.py
index dc2e003..cb3a183 100644
--- a/WikiTrans/wiki2text.py
+++ b/WikiTrans/wiki2text.py
@@ -104,7 +104,7 @@ class TextLinkNode(WikiSeqNode):
ns = self.parser.wiki_ns_name(qual)
if ns:
if ns == 'NS_IMAGE':
- if not self.parser.showrefs:
+ if not self.parser.show_urls:
return ""
text = "[%s: %s]" % (qual, text if text else arg)
tgt = self.image_base + '/' + \
@@ -121,7 +121,7 @@ class TextLinkNode(WikiSeqNode):
tgt = self.parser.mktgt(tgt)
else:
tgt = self.parser.mktgt(arg)
- if self.parser.showrefs:
+ if self.parser.show_urls:
return "%s (see %s) " % (text, tgt)
elif not text or text == '':
return arg
@@ -212,7 +212,7 @@ class TextWikiMarkup (WikiMarkup):
# Output width
width = 78
# Do not show references.
- showrefs = False
+ show_urls = False
# Provide a minimum markup
markup = True
@@ -226,10 +226,8 @@ class TextWikiMarkup (WikiMarkup):
super(TextWikiMarkup,self).__init__(*args, **keywords)
if 'width' in keywords:
self.width = keywords['width']
- if 'refs' in keywords:
- self.showrefs = keywords['refs']
- if 'markup' in keywords:
- self.markup = keywords['markup']
+ if 'show_urls' in keywords:
+ self.show_urls = keywords['show_urls']
self.token_class['SEQ'] = TextSeqNode
self.token_class['TEXT'] = TextTextNode
self.token_class['PRE'] = TextPreNode

Return to:

Send suggestions and report system problems to the System administrator.