summaryrefslogtreecommitdiffabout
authorSergey Poznyakoff <gray@gnu.org.ua>2010-03-19 14:55:04 (GMT)
committer Sergey Poznyakoff <gray@gnu.org.ua>2010-03-19 15:29:49 (GMT)
commit004dc35b5d6c451184ed0983caee65e47c6b4091 (patch) (side-by-side diff)
tree695091a2645b1d4d0b7375aa731b2cba818cd91a
parent5c3643f95a9d5568b64875ddf96a7e007050de72 (diff)
downloadgamma-004dc35b5d6c451184ed0983caee65e47c6b4091.tar.gz
gamma-004dc35b5d6c451184ed0983caee65e47c6b4091.tar.bz2
Further improvements in expat.
* src/expat.sci (xml-make-parser): Fix definition. (xml-error-descr): New syntax. * src/gamma-expat.c (xml-expat-version-string) (xml-expat-version) (xml-default-current) (xml-error-string) (xml-current-line-number) (xml-current-byte-count): New functions. (xml-primitive-make-parser): Include context data int the error information. (generic_xml_decl_handler): Account for possible NULLs. * examples/xml-check.scm: New file. * examples/xml-struct.scm: New file. * examples/xmlck.scm: New file. * examples/expat-info.scm: New file. * examples/Makefile.am, examples/README: Update. * doc/expat.texi: Update.
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--doc/expat.texi661
-rw-r--r--examples/Makefile.am8
-rw-r--r--examples/README31
-rw-r--r--examples/expat-info.scm12
-rw-r--r--examples/xml-check.scm47
-rw-r--r--examples/xml-struct.scm63
-rw-r--r--examples/xmlck.scm20
-rw-r--r--src/expat.sci43
-rw-r--r--src/gamma-expat.c123
-rw-r--r--src/gamma-expat.h2
10 files changed, 865 insertions, 145 deletions
diff --git a/doc/expat.texi b/doc/expat.texi
index e52a636..08b4137 100644
--- a/doc/expat.texi
+++ b/doc/expat.texi
@@ -4,8 +4,10 @@
@c *******************************************************************
@WRITEME
-The @samp{(gamma expat)} module provides interface with
+The @samp{(gamma expat)} module provides interface to
@command{libexpat}, a library for parsing @acronym{XML} documents.
+See @uref{http://expat.sourceforge.net}, for a description of the
+library.
Usage:
@@ -14,14 +16,130 @@ Usage:
@end lisp
@menu
-* primitives:: Expat Primitives
+* expat basics::
+* creating parsers::
+* parsing::
+* errors::
* handlers::
@end menu
-@node primitives
-@section Expat Primitives
+@node expat basics
+@section Expat Basics
+
+Parsing of @acronym{XML} documents using Expat is based on
+user-defined callback functions. You create a @dfn{parser}
+object, and associate @dfn{callback} (or @dfn{handler}) functions with
+the events he is interested in. Such events may be, for instance,
+encountering of a open or closing tag, encountering of a comment
+block, etc. Once the parser object is ready, you start feeding the
+document to it. As the parser recognizes @acronym{XML} constructs, it
+calls the callbacks that are registered for them.
+
+Parsers are created using @code{xml-make-parser} function. In the
+simplest case, it takes no arguments, e.g.:
+
+@lisp
+(let ((parser (xml-make-parser)))
+ @dots{}
+@end lisp
+
+The function @code{xml-parse} takes the parser as its argument, reads
+the document from the current input stream and feeds it to the parser.
+Thus, the simplest program for parsing @acronym{XML} documents is:
+
+@lisp
+(use-modules ((gamma expat)))
+(xml-parse (xml-make-parser))
+@end lisp
+
+This program is perhaps not so useful, but you may already use it to
+check whether its input is a correctly formed @acronym{XML} document.
+If @code{xml-parse} encounters an error, it signals the
+@code{gamma-xml-error} error. @xref{errors, error handling}, for a
+discussion on how to handle it.
+
+The @code{xml-make-parser} function takes optional arguments, which
+allow to set callback functions for the new parser. For example, the
+following code sets function @samp{elt-start} as a handler for
+start elements:
+
+@lisp
+(xml-make-parser #:start-element-handler elt-start)
+@end lisp
+
+The @code{#:start-element-handler} keyword informs the function that
+the argument following it is a handler for start @acronym{XML} documents.
+Any number of handlers may be set this way, e.g.:
+
+@lisp
+(xml-make-parser #:start-element-handler elt-start
+ #:end-element-handler elt-end
+ #:comment-handler comment)
+@end lisp
+
+Definitions of particular handler functions differ depending on their
+purpose, i.e. on the event they are defined to handle. For example,
+a start element handler must be defined as having two arguments.
+First of them is the name of the tag, and the second one is a list of
+attributes supplied for that tag. Thus, for example, the following
+start handler prints the tag and the number of attributes:
+
+@lisp
+(define (elt-start name attrs)
+ (format #t "~A (~A)~%" name (length attrs)))
+@end lisp
+
+For a detailed description of all available handlers and handler
+keywords, see @ref{handlers}.
+
+To further improve our example, suppose you need a program that will
+take an @acronym{XML} document as its input and create a description
+of its structure on output, showing element nesting levels by
+indenting their description. Here is how to write it.
+
+First, define handlers for start and end elements. Start element
+handler will print two indenting spaces for each level of ancestor
+elements, followed by the element name and its attributes and a
+newline. It will then increase the global level variable:
+
+@lisp
+(define level 0)
+
+(define (elt-start name attrs)
+ (display (make-string (* 2 level) #\space))
+ (display name)
+ (for-each
+ (lambda (x)
+ (display " ")
+ (display (car x))
+ (display "=")
+ (display (cdr x)))
+ attrs)
+ (newline)
+ (set! level (1+ level)))
+@end lisp
+
+The handler for end tags is simpler: it must only decrease the level:
+
+@lisp
+(define (elt-end name)
+ (set! level (1- level)))
+@end lisp
+
+Finally, create a parser and parse the input:
+
+@lisp
+(xml-parse (xml-make-parser #:start-element-handler elt-start
+ #:end-element-handler elt-end))
+@end lisp
+
+
+
+@node creating parsers
+@section Creating XML Parsers
@WRITEME
+@anchor{xml-primitive-make-parser}
@deffn {Scheme procedure} xml-primitive-make-parser enc sep
Return a new @acronym{XML} parser. If @var{enc} is given, it must be one of:
@samp{US-ASCII}, @samp{UTF-8}, @samp{UTF-16}, @samp{ISO-8859-1}. If @var{sep}
@@ -63,23 +181,6 @@ and to:
@end lisp
@end deffn
-@deffn {Scheme procedure} xml-primitive-parse parser input isfinal
-Parse next piece of input. Arguments are:
-
-@table @var
-@item parser
-A parser returned from a previous call to
-@code{xml-primitive-make-parser} or @code{xml-make-parser}.
-
-@item input
-A piece of input text.
-
-@item isfinal
-Boolean value indicating whether @var{input} is the last part of
-input.
-@end table
-@end deffn
-
@deffn {Scheme procedure} xml-primitive-set-handler parser key handler
Set @acronym{XML} handler for an event. Arguments are:
@@ -87,90 +188,77 @@ Set @acronym{XML} handler for an event. Arguments are:
@item parser
A valid @acronym{XML} parser
+@anchor{handler-keyword}
@item key
-A key, identifying an event.
-
-@table @asis
-@kwindex start-element-handler
-@item #:start-element-handler
-
-@kwindex end-element-handler
-@item #:end-element-handler
-
-@kwindex character-data-handler
-@item #:character-data-handler
-
-@kwindex processing-instruction-handler
-@item #:processing-instruction-handler
-
-@kwindex comment-handler
-@item #:comment-handler
-
-@kwindex start-cdata-section-handler
-@item #:start-cdata-section-handler
+A key, identifying the event. For example,
+@samp{#:start-element-handler} sets handler which is called for start
+tags.
-@kwindex end-cdata-section-handler
-@item #:end-cdata-section-handler
+@xref{handlers}, for its values and their meaning.
-@kwindex default-handler
-@item #:default-handler
-
-@kwindex default-handler-expand
-@item #:default-handler-expand
-
-@kwindex external-entity-ref-handler
-@item #:external-entity-ref-handler
-
-@kwindex skipped-entity-handler
-@item #:skipped-entity-handler
-
-@kwindex unknown-encoding-handler
-@item #:unknown-encoding-handler
-
-@kwindex start-namespace-decl-handler
-@item #:start-namespace-decl-handler
+@item handler
+Handler procedure.
+@end table
-@kwindex end-namespace-decl-handler
-@item #:end-namespace-decl-handler
+@end deffn
-@kwindex xml-decl-handler
-@item #:xml-decl-handler
+@deffn {Scheme function} xml-set-handler parser args@dots{}
+Sets several handlers at once. Optional arguments (@var{args})
+are constracted of keywords (as described in
+@pxref{handler-keyword}), followed by their arguments, for example:
-@kwindex start-doctype-decl-handler
-@item #:start-doctype-decl-handler
+@lisp
+(xml-set-handler parser
+ #:start-element-handler elt-start
+ #:end-element-handler elt-end)
+@end lisp
+@end deffn
-@kwindex end-doctype-decl-handler
-@item #:end-doctype-decl-handler
+@deffn {Scheme function} xml-make-parser [enc [sep]] args@dots{}
+Create a parser and set its handlers. Optional @var{enc} and
+@var{sep} have the same meaning as in @ref{xml-primitive-make-parser}.
+The rest of arguments define handlers for the new parser. They must
+be supplied in pairs: a keyword (as described in
+@pxref{handler-keyword}), followed by its argument. For example:
-@kwindex element-decl-handler
-@item #:element-decl-handler
+@lisp
+(xml-make-parser "US-ASCII"
+ #:start-element-handler elt-start
+ #:end-element-handler elt-end)
+@end lisp
-@kwindex attlist-decl-handler
-@item #:attlist-decl-handler
+This call creates a new parser for documents in @samp{US-ASCII}
+encoding and sets two handlers: for element start and for element end.
+This call is equivalent to:
-@kwindex entity-decl-handler
-@item #:entity-decl-handler
+@lisp
+(let ((p (xml-primitive-make-parser "US-ASCII")))
+ (xml-primitive-set-handler p #:start-element-handler elt-start)
+ (xml-primitive-set-handler p #:end-element-handler elt-end)
+ @dots{}
+@end lisp
+@end deffn
-@kwindex unparsed-entity-decl-handler
-@item #:unparsed-entity-decl-handler
+@node parsing
+@section Parser Functions
-@kwindex notation-decl-handler
-@item #:notation-decl-handler
+@deffn {Scheme procedure} xml-primitive-parse parser input isfinal
+Parse next piece of input. Arguments are:
-@kwindex not-standalone-handler
-@item #:not-standalone-handler
-@end table
+@table @var
+@item parser
+A parser returned from a previous call to
+@code{xml-primitive-make-parser} or @code{xml-make-parser}.
+@item input
+A piece of input text.
-@item handler
-Handler procedure.
+@item isfinal
+Boolean value indicating whether @var{input} is the last part of
+input.
@end table
@end deffn
-@deffn {Scheme function} xml-make-parser args@dots{}
-@FIXME
-@end deffn
-
@deffn {Scheme function} xml-parse-more parser input
Equivalent to:
@@ -188,98 +276,421 @@ is equivalent to:
@end deffn
@deffn {Scheme function} xml-parse parser
-@FIXME
+Reads @acronym{XML} input from the standard input port and parses it
+using @code{xml-primitive-parse}.
@end deffn
-@deffn {Scheme function} xml-set-handler parser args@dots{}
-@FIXME
-@end deffn
+@node errors
+@section Error Handling
+@WRITEME
@node handlers
@section Expat Handlers
@WRITEME
+@menu
+* start-element-handler::
+* end-element-handler::
+* character-data-handler::
+* processing-instruction-handler::
+* comment-handler::
+* start-cdata-section-handler::
+* end-cdata-section-handler::
+* default-handler::
+* default-handler-expand::
+* skipped-entity-handler::
+* start-namespace-decl-handler::
+* end-namespace-decl-handler::
+* xml-decl-handler::
+* start-doctype-decl-handler::
+* end-doctype-decl-handler::
+* attlist-decl-handler::
+* entity-decl-handler::
+* notation-decl-handler::
+* not-standalone-handler::
+@end menu
+
+@node start-element-handler
+@subsection start-element-handler
+
+@defvr {Handler Keyword} #:start-element-handler
+Sets handler for start (and empty) tags.
+@end defvr
+
+The hanlder must be defined as follows:
+
@deffn {Handler prototype} start-element name attrs
-@FIXME
+Arguments:
+
+@table @var
+@item name
+Element name.
+
+@item attrs
+A list of element attributes. Each attribute is represented by a cons
+(@samp{car} holds attribute name, @samp{cdr} holds its value).
+@end table
@end deffn
+@node end-element-handler
+@subsection end-element-handler
+
+@defvr {Handler Keyword} #:end-element-handler
+Sets handler for end (and empty) tags. An empty tag generates a call
+to both start and end handlers (in that order).
+@end defvr
+
+The hanlder must be defined as follows:
+
@deffn {Handler prototype} end-element name
-@FIXME
+Arguments:
+
+@table @var
+@item name
+Element name
+@end table
@end deffn
+@node character-data-handler
+@subsection character-data-handler
+
+@defvr {Handler Keyword} #:character-data-handler
+Sets a text handler. A single block of contiguous text free of markup
+may result in a sequence of calls to this handler. So, if you are
+searching for a pattern in the text, it may be split across calls to
+this handler.
+@end defvr
+
+The handler itself is defined as:
+
@deffn {Handler prototype} character-data text
-@FIXME
+Arguments:
+
+@table @var
+@item text
+The text.
+@end table
@end deffn
+@node processing-instruction-handler
+@subsection processing-instruction-handler
+
+@defvr {Handler Keyword} #:processing-instruction-handler
+Set a handler for @dfn{processing instructions}.
+@end defvr
+
@deffn {Handler prototype} processing-instruction target data
-@FIXME
+Arguments are:
+
+@table @var
+@item target
+First word in the processing instruction.
+
+@item data
+The rest of the characters in the processing instruction, after
+@var{target} and whitespace following it.
+@end table
@end deffn
+@node comment-handler
+@subsection comment-handler
+
+@defvr {Handler Keyword} #:comment-handler
+Sets a handler for comments.
+@end defvr
+
@deffn {Handler prototype} comment text
-@FIXME
+@table @var
+@item text
+The text inside the comment delimiters.
+@end table
@end deffn
+@node start-cdata-section-handler
+@subsection start-cdata-section-handler
+
+@defvr {Handler Keyword} #:start-cdata-section-handler
+Sets a handler that gets called at the beginning of a CDATA section.
+@end defvr
+
+The handler is defined as follows:
+
@deffn {Handler prototype} start-cdata-section
-@FIXME
@end deffn
+@node end-cdata-section-handler
+@subsection end-cdata-section-handler
+
+@defvr {Handler Keyword} #:end-cdata-section-handler
+Sets a handler that gets called at the end of a CDATA section.
+@end defvr
+
+The handler is defined as:
+
@deffn {Handler prototype} end-cdata-section
-@FIXME
@end deffn
+@node default-handler
+@subsection default-handler
+
+@defvr {Handler Keyword} #:default-handler
+Sets a handler for any characters in the document which wouldn't
+otherwise be handled. This includes both data for which no handlers
+can be set (like some kinds of @acronym{DTD} declarations) and data
+which could be reported but which currently has no handler set.
+@end defvr
+
@deffn {Handler prototype} default text
-@FIXME
+@table @var
+@item text
+A string containing all non-handled characters, which are passed
+exactly as they were present in the input @acronym{XML} document
+except that they will be encoded in UTF-8 or UTF-16. Line boundaries
+are not normalized. Note that a byte order mark character is not
+passed to the default handler. There are no guarantees about how
+characters are divided between calls to the default handler: for
+example, a comment might be split between multiple calls. Setting the
+@samp{default} handler has the side effect of turning off expansion of
+references to internally defined general entities. Such references
+are passed to the default handler verbatim.
+@end table
@end deffn
-@deffn {Handler prototype} default-expand text
-@FIXME
-@end deffn
+@node default-handler-expand
+@subsection default-handler-expand
-@deffn {Handler prototype} unparsed-entity-decl entity-name base @
- system-id public-id notation-name
-@FIXME
-@end deffn
+@defvr {Handler Keyword} #:default-handler-expand
+This sets a default handler as above, but does not inhibit the
+expansion of internal entity references. Any entity references are not
+passed to the handler.
+@end defvr
-@deffn {Handler prototype} notation-decl notation-name base @
- system-id public-id
-@FIXME
-@end deffn
+The handler prototype is the same as in @ref{default-handler}.
-@deffn {Handler prototype} start-namespace prefix uri
-@FIXME
-@end deffn
+@node skipped-entity-handler
+@subsection skipped-entity-handler
-@deffn {Handler prototype} end-namespace prefix
-@FIXME
-@end deffn
+@defvr {Handler Keyword} #:skipped-entity-handler
+Set a skipped entity handler, i.e. a handler which is called if:
+
+@itemize @bullet
+@item An entity reference is encountered for which no declaration has
+been read and this is not an error.
+@item An internal entity reference is read, but not expanded, because
+a @samp{#:default-handler} has been set.
+@end itemize
+@end defvr
@deffn {Handler prototype} skipped-entity entity-name parameter?
-@FIXME
+Arguments are:
+
+@table @var
+@item entity-name
+Name of the entity.
+
+@item parameter?
+This argument is @code{#t} if the entity is a parameter, and @code{#f}
+otherwise.
+@end table
@end deffn
-@deffn {Handler prototype} not-standalone
-@FIXME
+@node start-namespace-decl-handler
+@subsection start-namespace-decl-handler
+
+@defvr {Handler Keyword} #:start-namespace-decl-handler
+Set a handler to be called when a namespace is declared.
+@end defvr
+
+@deffn {Handler prototype} start-namespace-decl prefix uri
+Arguments:
+
+@table @var
+@item prefix
+Namespace prefix.
+
+@item uri
+Namespace @acronym{URI}.
+@end table
@end deffn
-@deffn {Handler prototype} xml-decl version encoding detail
-@FIXME
+@node end-namespace-decl-handler
+@subsection end-namespace-decl-handler
+
+@defvr {Handler Keyword} #:end-namespace-decl-handler
+Set a handler to be called when leaving the scope of a namespace
+declaration. This will be called, for each namespace declaration,
+after the handler for the end tag of the element in which the
+namespace was declared.
+@end defvr
+
+The handler prototype is:
+
+@deffn {Handler prototype} end-namespace-decl prefix
@end deffn
+@node xml-decl-handler
+@subsection xml-decl-handler
+
+@defvr {Handler Keyword} #:xml-decl-handler
+Sets a handler that is called for @acronym{XML} declarations and also
+for text declarations discovered in external entities.
+@end defvr
+
+@deffn {Handler prototype} xml-decl version encoding . detail
+Arguments:
+
+@table @var
+@item version
+Version specification (string), or @code{#f}, for text declarations.
+
+@item encoding
+Encoding. May be @code{#f}.
+
+@item detail
+@samp{Unspecified}, if there was no standalone parameter in the
+declaration. Otherwise, @code{#t} or @code{#f} depending on whether
+it was given as @samp{yes} or @samp{no}.
+@end table
+@end deffn
+
+@node start-doctype-decl-handler
+@subsection start-doctype-decl-handler
+
+@defvr {Handler Keyword} #:start-doctype-decl-handler
+Set a handler that is called at the start of a @samp{DOCTYPE} declaration,
+before any external or internal subset is parsed.
+@end defvr
+
@deffn {Handler prototype} start-doctype-decl name sysid pubid @
has-internal-subset?
-@FIXME
+Arguments:
+
+@table @var
+@item name
+Declaration name.
+
+@item sysid
+System @acronym{ID}. May be @code{#f}.
+
+@item pubid
+Public @acronym{ID}. May be @code{#f}.
+
+@item has-internal-subset?
+@code{#t} if the @samp{DOCTYPE} declaration has an internal subset,
+@code{#f} otherwise.
+@end table
@end deffn
+@node end-doctype-decl-handler
+@subsection end-doctype-decl-handler
+
+@defvr {Handler Keyword} #:end-doctype-decl-handler
+Set a handler that is called at the end of a @samp{DOCTYPE}
+declaration, after parsing any external subset.
+@end defvr
+
+The handler takes no arguments:
+
@deffn {Handler prototype} end-doctype-decl
-@FIXME
@end deffn
+@node attlist-decl-handler
+@subsection attlist-decl-handler
+
+@defvr {Handler Keyword} #:attlist-decl-handler
+Sets a handler for @samp{attlist} declarations in the
+@acronym{DTD}. This handler is called for each attribute, which
+means, in particular, that a single attlist declaration with multiple
+attributes causes multiple calls to this handler.
+@end defvr
+
+The handler prototype is:
+
@deffn {Handler prototype} attlist-decl el-name att-name att-type detail
-@FIXME
+Argument:
+
+@table @var
+@item el-name
+Name of the element for which the attribute is being declared.
+
+@item att-name
+Attribute name.
+
+@item detail
+Default value, if @var{el-name} is a @samp{#FIXED} attribute,
+@code{#t}, if it is a @samp{#REQUIRED} attribute, and @code{#f}, if it
+is a @samp{#IMPLIED} attribute.
+@end table
@end deffn
+@node entity-decl-handler
+@subsection entity-decl-handler
+
+@defvr {Handler Keyword} #:entity-decl-handler
+Sets a handler that will be called for all entity declarations.
+@end defvr
+
+
@deffn {Handler prototype} entity-decl name param? value base sys-id pub-id @
notation
-@FIXME
+Arguments:
+
+@table @var
+@item name
+Entity name.
+
+@item param?
+For parameter entities, @code{#t}. Otherwise, @code{#f}.
+
+@item value
+For internal entities, entity value. Otherwise, @code{#f}.
+
+@item base
+Base.
+
+@item sys-id
+System @acronym{ID}. For internal entities -- @code{#f}.
+
+@item pub-id
+Public @acronym{ID}. For internal entities -- @code{#f}.
+
+@item notation
+Notation name, for unparsed entity declarations. Otherwise,
+@code{#f}. Unparsed are entity declarations that have a notation
+(@samp{NDATA}) field, such as:
+
+@example
+<!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
+@end example
+
+@end table
+@end deffn
+
+@node notation-decl-handler
+@subsection notation-decl-handler
+
+@defvr {Handler Keyword} #:notation-decl-handler
+Sets a handler that receives notation declarations.
+@end defvr
+
+Handler prototype is:
+
+@deffn {Handler prototype} notation-decl notation-name base @
+ system-id public-id
+@end deffn
+
+@node not-standalone-handler
+@subsection not-standalone-handler
+
+@defvr {Handler Keyword} #:not-standalone-handler
+Sets a handler that is called if the document is not @dfn{standalone}, i.e.
+when there is an external subset or a reference to a parameter entity,
+but does not have @samp{standalone} set to "yes" in an @acronym{XML}
+declaration.
+@end defvr
+
+The handler takes no arguments:
+
+@deffn {Handler prototype} not-standalone
@end deffn
diff --git a/examples/Makefile.am b/examples/Makefile.am
index dd33878..a2734f9 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -14,4 +14,10 @@
# You should have received a copy of the GNU General Public License
# along with Gamma. If not, see <http://www.gnu.org/licenses/>.
-EXTRA_DIST = whoisd.scm README
+EXTRA_DIST = \
+ whoisd.scm\
+ xmlck.scm\
+ xml-check.scm\
+ xml-struct.scm\
+ expat-version.scm\
+ README
diff --git a/examples/README b/examples/README
index 51e6370..58ac7fc 100644
--- a/examples/README
+++ b/examples/README
@@ -1,6 +1,25 @@
-This directory contains examples of (gamma sql) usage.
+This directory contains examples of Gamma usage.
-whoisd -- a simple whois daemon.
+* expat-info.scm
+
+Shows information about Expat library and its version.
+
+* xmlck.scm
+
+A simple checker for XML documents. Parses its input and verifies
+if it is a valid XML.
+
+* xml-check.scm
+
+A more advanced XML checker. Displays an extended diagnostics,
+including the context in which error appears.
+
+* xml-struct.scm
+
+Reads an XML document from standard input and prints an outline
+of its structure on standard output.
+
+* whoisd.scm -- a simple whois daemon.
Here is the database structure for use with this daemon:
@@ -30,3 +49,11 @@ CREATE TABLE domain (
remark text,
source char(24) default 'UNKNOWN' not null
);
+
+
+
+Local Variables:
+mode: outline
+paragraph-separate: "[ ]*$"
+version-control: never
+End:
diff --git a/examples/expat-info.scm b/examples/expat-info.scm
new file mode 100644
index 0000000..3dc310f
--- a/dev/null
+++ b/examples/expat-info.scm
@@ -0,0 +1,12 @@
+(use-modules (gamma expat))
+
+(display (xml-expat-version-string))
+(newline)
+
+(let ((vinfo (xml-expat-version)))
+ (format #t "Major:~A~%Minor:~A~%Micro:~A~%"
+ (list-ref vinfo 0)
+ (list-ref vinfo 1)
+ (list-ref vinfo 2)))
+
+ \ No newline at end of file
diff --git a/examples/xml-check.scm b/examples/xml-check.scm
new file mode 100644
index 0000000..5f9795a
--- a/dev/null
+++ b/examples/xml-check.scm
@@ -0,0 +1,47 @@
+;;;; This file is part of Gamma.
+;;;; Copyright (C) 2010 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify it
+;;;; under the terms of the GNU General Public License as published by the
+;;;; Free Software Foundation; either version 3 of the License, or (at your
+;;;; option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License along
+;;;; with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(use-modules ((gamma expat)))
+
+(catch 'gamma-xml-error
+ (lambda ()
+ (xml-parse (xml-make-parser)))
+ (lambda (key func fmt args ctx)
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (cond
+ ((not ctx)
+ (apply format #t fmt args)
+ (newline))
+ (else
+ (format #t
+ "~A:~A: ~A~%"
+ (xml-error-descr ctx #:line)
+ (xml-error-descr ctx #:column)
+ (xml-error-string (xml-error-descr ctx #:error-code)))
+ (if (xml-error-descr ctx #:has-context?)
+ (let ((ctx-text (xml-error-descr ctx #:context))
+ (ctx-pos (xml-error-descr ctx #:error-offset)))
+ (format #t
+ "Context (^ marks the point): ~A^~A~%"
+ (substring ctx-text 0 ctx-pos)
+ (substring ctx-text ctx-pos))))
+ (exit 1)))))))
+
+
+
+
diff --git a/examples/xml-struct.scm b/examples/xml-struct.scm
new file mode 100644
index 0000000..dd2b662
--- a/dev/null
+++ b/examples/xml-struct.scm
@@ -0,0 +1,63 @@
+;;;; This file is part of Gamma.
+;;;; Copyright (C) 2010 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify it
+;;;; under the terms of the GNU General Public License as published by the
+;;;; Free Software Foundation; either version 3 of the License, or (at your
+;;;; option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License along
+;;;; with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+(use-modules (gamma expat))
+
+(let ((level 0))
+ (letrec ((elt-start (lambda (name attrs)
+ (display (make-string (* 2 level) #\space))
+ (display name)
+ (for-each
+ (lambda (x)
+ (display " ")
+ (display (car x))
+ (display "=")
+ (display (cdr x)))
+ attrs)
+ (newline)
+ (set! level (1+ level))))
+ (elt-end (lambda (name)
+ (set! level (1- level)))))
+ (catch 'gamma-xml-error
+ (lambda ()
+ (xml-parse (xml-make-parser
+ #:start-element-handler elt-start
+ #:end-element-handler elt-end)))
+ (lambda (key func fmt args ctx)
+ (with-output-to-port
+ (current-error-port)
+ (lambda ()
+ (cond
+ ((not ctx)
+ (apply format #t fmt args)
+ (newline))
+ (else
+ (format #t
+ "~A:~A: ~A~%"
+ (xml-error-descr ctx #:line)
+ (xml-error-descr ctx #:column)
+ (xml-error-string (xml-error-descr ctx #:error-code)))
+ (if (xml-error-descr ctx #:has-context?)
+ (let ((ctx-text (xml-error-descr ctx #:context))
+ (ctx-pos (xml-error-descr ctx #:error-offset)))
+ (format #t
+ "Context (^ marks the point): ~A^~A~%"
+ (substring ctx-text 0 ctx-pos)
+ (substring ctx-text ctx-pos))))
+ (exit 1)))))))))
+
+
+
diff --git a/examples/xmlck.scm b/examples/xmlck.scm
new file mode 100644
index 0000000..103ab63
--- a/dev/null
+++ b/examples/xmlck.scm
@@ -0,0 +1,20 @@
+;;;; This file is part of Gamma.
+;;;; Copyright (C) 2010 Sergey Poznyakoff
+;;;;
+;;;; This program is free software; you can redistribute it and/or modify it
+;;;; under the terms of the GNU General Public License as published by the
+;;;; Free Software Foundation; either version 3 of the License, or (at your
+;;;; option) any later version.
+;;;;
+;;;; This program is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;;; GNU General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU General Public License along
+;;;; with this program. If not, see <http://www.gnu.org/licenses/>.
+;;;;
+
+(use-modules ((gamma expat)))
+
+(xml-parse (xml-make-parser))
diff --git a/src/expat.sci b/src/expat.sci
index 258e387..1dcabc6 100644
--- a/src/expat.sci
+++ b/src/expat.sci
@@ -18,7 +18,14 @@ changequote([,])dnl
(define-module (gamma expat)
:use-module (ice-9 rdelim)
- :use-module (srfi srfi-1))
+ :use-module (srfi srfi-1)
+ :export (xml-make-parser
+ xml-parse-more
+ xml-parse
+ xml-set-handler)
+ :export-syntax (xml-error-descr))
+
+(use-syntax (ice-9 syncase))
(let ((lib-path "LIBDIR/"))
(load-extension (string-append
@@ -26,7 +33,7 @@ changequote([,])dnl
include(BUILDDIR/gamma-expat.inc)
-(define-public (xml-make-parser . rest)
+(define (xml-make-parser . rest)
(if (null? rest)
(xml-primitive-make-parser)
(letrec ((parser-setup (lambda (setup handler-args)
@@ -38,26 +45,28 @@ include(BUILDDIR/gamma-expat.inc)
(if (string? (car rest))
(let ((encoding (car rest))
(rest (cdr rest)))
- (if (char? (car rest))
- (parser-setup (list encoding (car rest))
- (cdr rest))
- (parser-setup (list encoding) rest)))
+ (cond
+ ((null? rest)
+ (parser-setup (list encoding) rest))
+ ((char? (car rest))
+ (parser-setup (list encoding (car rest))
+ (cdr rest)))))
(parser-setup '() rest)))))
-(define-public (xml-parse-more parser input)
+(define (xml-parse-more parser input)
(cond
((eof-object? input)
(xml-primitive-parse parser "" #t))
(else
(xml-primitive-parse parser input #f))))
-(define-public (xml-parse parser)
+(define (xml-parse parser)
(let loop ((line (read-line)))
(xml-parse-more parser line)
(if (not (eof-object? line))
(loop (read-line)))))
-(define-public (xml-set-handler parser . rest)
+(define (xml-set-handler parser . rest)
(if (odd? (length rest))
(scm-error 'wrong-number-of-args
"xml-set-handler"
@@ -82,6 +91,20 @@ include(BUILDDIR/gamma-expat.inc)
prev)))
'()
rest))))
-
+
+(define-syntax xml-error-descr
+ (syntax-rules ()
+ ((xml-error-descr ctx #:error-code)
+ (list-ref ctx 0))
+ ((xml-error-descr ctx #:line)
+ (list-ref ctx 1))
+ ((xml-error-descr ctx #:column)
+ (list-ref ctx 2))
+ ((xml-error-descr ctx #:context)
+ (list-ref ctx 3))
+ ((xml-error-descr ctx #:error-offset)
+ (list-ref ctx 4))
+ ((xml-error-descr ctx #:has-context?)
+ (= (length ctx) 5))))
;;;; End of expat.scm
diff --git a/src/gamma-expat.c b/src/gamma-expat.c
index c606273..84a2a9d 100644
--- a/src/gamma-expat.c
+++ b/src/gamma-expat.c
@@ -110,6 +110,23 @@ make_user_data ()
SCM_GLOBAL_SYMBOL(gamma_xml_error, "gamma-xml-error");
+SCM_DEFINE(scm_xml_expat_version_string, "xml-expat-version-string", 0, 0, 0,
+ (),
+ "Return the version of the expat library as a string.")
+{
+ return scm_from_locale_string(XML_ExpatVersion());
+}
+
+SCM_DEFINE(scm_xml_expat_version, "xml-expat-version", 0, 0, 0,
+ (),
+ "Return expat library version information.")
+{
+ XML_Expat_Version vinfo = XML_ExpatVersionInfo();
+ return scm_list_3(scm_from_int(vinfo.major),
+ scm_from_int(vinfo.minor),
+ scm_from_int(vinfo.micro));
+}
+
SCM_DEFINE(scm_xml_primitive_make_parser, "xml-primitive-make-parser", 0, 2, 0,
(SCM enc, SCM sep),
"Return a new parser. If @var{enc} is given, it must be one of:"
@@ -191,16 +208,107 @@ SCM_DEFINE(scm_xml_primitive_parse, "xml-primitive-parse", 3, 0, 0,
free(str);
if (status == XML_STATUS_ERROR) {
enum XML_Error error = XML_GetErrorCode(gp->parser);
+ int line = XML_GetCurrentLineNumber(gp->parser);
+ int column = XML_GetCurrentColumnNumber(gp->parser);
+ int offset, size;
+ const char *bufptr;
+
+ bufptr = XML_GetInputContext(gp->parser, &offset, &size);
scm_error(gamma_xml_error, FUNC_NAME,
- "~A",
- scm_list_1(scm_from_locale_string(XML_ErrorString(error))),
- scm_list_2(scm_from_int(error), input));
+ "~A near line ~A",
+ scm_list_2(scm_from_locale_string(XML_ErrorString(error)),
+ scm_from_int(line)),
+ bufptr ?
+ scm_list_5(scm_from_int(error),
+ scm_from_int(line),
+ scm_from_int(column),
+ scm_from_locale_stringn(bufptr, size),
+ scm_from_int(offset)) :
+ scm_list_3(scm_from_int(error),
+ scm_from_int(line),
+ scm_from_int(column)));
}
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
+SCM_DEFINE(scm_xml_default_current, "xml-default-current", 1, 0, 0,
+ (SCM parser),
+ "Pass current markup to the default handler.")
+#define FUNC_NAME s_scm_xml_default_current
+{
+ struct gamma_xml_parser *gp;
+
+ SCM_ASSERT(gamma_scm_is_xml_parser(parser), parser,
+ SCM_ARG1, FUNC_NAME);
+ gp = GAMMA_XML_PARSER_PTR(parser);
+ XML_DefaultCurrent(gp->parser);
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE(scm_xml_error_string, "xml-error-string", 1, 0, 0,
+ (SCM code),
+ "Return a textual description corresponding to the error @code{code}.")
+#define FUNC_NAME s_scm_xml_error_string
+{
+ const char *p;
+ SCM_ASSERT(scm_is_integer(code), code, SCM_ARG1, FUNC_NAME);
+ p = XML_ErrorString(scm_to_int(code));
+ return scm_from_locale_string(p);
+}
+#undef FUNC_NAME
+
+SCM_DEFINE(scm_xml_current_line_number, "xml-current-line-number", 1, 0, 0,
+ (SCM parser),
+ "Return the current line number in the input stream. "
+ "The first line is reported as 1.")
+#define FUNC_NAME s_scm_xml_current_line_number
+{
+ struct gamma_xml_parser *gp;
+
+ SCM_ASSERT(gamma_scm_is_xml_parser(parser), parser,
+ SCM_ARG1, FUNC_NAME);
+ gp = GAMMA_XML_PARSER_PTR(parser);
+ return scm_from_int(XML_GetCurrentLineNumber(gp->parser));
+}
+#undef FUNC_NAME
+
+SCM_DEFINE(scm_xml_current_column_number, "xml-current-column-number", 1, 0, 0,
+ (SCM parser),
+ "Return the current column number in the input stream. "
+ "The first column is reported as 0.")
+#define FUNC_NAME s_scm_xml_current_column_number
+{
+ struct gamma_xml_parser *gp;
+
+ SCM_ASSERT(gamma_scm_is_xml_parser(parser), parser,
+ SCM_ARG1, FUNC_NAME);
+ gp = GAMMA_XML_PARSER_PTR(parser);
+ return scm_from_int(XML_GetCurrentColumnNumber(gp->parser));
+}
+#undef FUNC_NAME
+
+SCM_DEFINE(scm_xml_current_byte_count, "xml-current-byte-count", 1, 0, 0,
+ (SCM parser),
+ "Return the number of bytes in the current event. "
+ "Returns 0 if the event is inside a reference to an internal "
+ "entity and for the end-tag event for empty element tags "
+ "(the later can be used to distinguish empty-element tags "
+ "from empty elements using separate start and end tags).")
+#define FUNC_NAME s_scm_xml_current_byte_count
+{
+ struct gamma_xml_parser *gp;
+
+ SCM_ASSERT(gamma_scm_is_xml_parser(parser), parser,
+ SCM_ARG1, FUNC_NAME);
+ gp = GAMMA_XML_PARSER_PTR(parser);
+ return scm_from_int(XML_GetCurrentByteCount(gp->parser));
+}
+#undef FUNC_NAME
+
+
static SCM
attrs_to_scm(const XML_Char **attr)
@@ -543,11 +651,14 @@ generic_xml_decl_handler(void *user_data,
struct gamma_expat_user_data *udata = user_data;
xdata.proc = udata->handler[xml_decl_handler];
- xdata.arg = scm_list_3(scm_from_locale_string(version),
- scm_from_locale_string(encoding),
+ xdata.arg = scm_list_3(version ?
+ scm_from_locale_string(version) : SCM_BOOL_F,
+ encoding ?
+ scm_from_locale_string(encoding) : SCM_BOOL_F,
standalone == -1 ?
SCM_UNSPECIFIED :
- (standalone ? SCM_BOOL_T : SCM_BOOL_F));
+ scm_list_1 (standalone ?
+ SCM_BOOL_T : SCM_BOOL_F));
gamma_safe_exec(gamma_apply_catch_body, &xdata, NULL);
}
diff --git a/src/gamma-expat.h b/src/gamma-expat.h
index 621eadf..2376e4a 100644
--- a/src/gamma-expat.h
+++ b/src/gamma-expat.h
@@ -47,7 +47,7 @@ enum gamma_expat_handler {
element_decl_handler,
attlist_decl_handler,
entity_decl_handler,
- unparsed_entity_decl_handler,
+ unparsed_entity_decl_handler,/* Deprecated */
notation_decl_handler,
not_standalone_handler,

Return to:

Send suggestions and report system problems to the System administrator.