diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2019-08-20 16:02:08 +0300 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2019-08-20 16:02:08 +0300 |
commit | b3d1cf2fb648f77a6ea432486ba2a8f026068f11 (patch) | |
tree | 97e6450c863b478ec62bb25134e86e6434705e1a | |
parent | e9f110af920cf4e6642bc3301340b23a7a2a6589 (diff) | |
download | config-parser-b3d1cf2fb648f77a6ea432486ba2a8f026068f11.tar.gz config-parser-b3d1cf2fb648f77a6ea432486ba2a8f026068f11.tar.bz2 |
Provide a way for supplying options to section definition. Improve the docs
* lib/Config/Parser.pm (loadsynt): Return a reference to the reference
hash. Process eventual __options__ keywords in sections.
Finish documentation.
* lib/Config/Parser/Ini.pm: Add documentation
* t/ConfigSpec2.pm: Use the __options__ keyword.
-rw-r--r-- | lib/Config/Parser.pm | 163 | ||||
-rw-r--r-- | lib/Config/Parser/Ini.pm | 174 | ||||
-rw-r--r-- | t/ConfigSpec2.pm | 3 |
3 files changed, 320 insertions, 20 deletions
diff --git a/lib/Config/Parser.pm b/lib/Config/Parser.pm index 7d20aa2..17a3cd0 100644 --- a/lib/Config/Parser.pm +++ b/lib/Config/Parser.pm @@ -28,20 +28,25 @@ sub new { if (my $lex = delete $_{lexicon}) { $self->lexicon($lex); } else { $self->lexicon({ '*' => '*' }); my $subs = Class::Inspector->subclasses(__PACKAGE__); + my $dict; if ($subs) { foreach my $c (@$subs) { if (my ($file, $line, $data) = $c->findsynt) { - $self->loadsynt($file, $line, $data); + my $d = $self->loadsynt($file, $line, $data); + if ($d) { + $dict = { %{$dict // {}}, %$d } + } } last if $c eq $class; } } + $self->lexicon($dict) if $dict; } $self->init; if (@parseargs) { $self->parse(@parseargs); $self->commit or croak "configuration failed"; @@ -82,36 +87,34 @@ sub loadsynt { or croak "can't open filehandle for data string"; $self->parse($file, fh => $fh, line => $line) or croak "Failed to parse template at $file:$line"; close $fh; - $self->lexicon({ - %{$self->lexicon // {}}, - %{$self->as_hash(sub { + + my @sections; + my $lex = $self->as_hash(sub { my ($what, $name, $val) = @_; $name = '*' if $name eq 'ANY'; if ($what eq 'section') { $val->{section} = {}; - if ($name =~ s/:mandatory$//) { - $val->{mandatory} = 1; - } + push @sections, $val; ($name, $val->{section}); } else { my @words = parse_line('\s+', 0, $val); my $ret = {}; $val = shift @words; if ($val eq 'STRING') { # nothing - } elsif ($val eq 'NUMBER') { - $ret->{re} = '\d+'; + } elsif ($val eq 'NUMBER' || $val eq 'DECIMAL') { + $ret->{re} = '^\d+$'; } elsif ($val eq 'OCTAL') { - $ret->{re} = '[0-7]+'; + $ret->{re} = '^[0-7]+$'; } elsif ($val eq 'HEX') { - $ret->{re} = '([0-9][A-Fa-f])+'; + $ret->{re} = '^([0-9][A-Fa-f])+$'; } else { unshift @words, $val; } while (($val = $words[0]) && $val =~ /^:(?<kw>.+?)(?:\s*=\s*(?<val>.*))?$/) { @@ -124,13 +127,22 @@ sub loadsynt { } else { $ret->{default} = join(' ', @words); } } ($name, $ret); } - })->{section}}}); + })->{section}; + # Process eventual __options__ keywords + foreach my $s (@sections) { + if (exists($s->{section}{__options__})) { + @{$s}{keys %{$s->{section}{__options__}}} + = values %{$s->{section}{__options__}}; + delete $s->{section}{__options__}; + } + } + return $lex; } 1; =head1 NAME @@ -140,13 +152,13 @@ Config::Parser - base class for configuration file parsers B<Config::Parser> provides a framework for writing configuration file parsers. It is an intermediate layer between the abstract syntax tree (B<Config::AST>) and implementation of a parser for a particular configuration file format. -It takes a I<define by example> approach. That means that the implementor +It takes a I<define by example> approach. That means that the implementer creates a derived class that implements a parser on top of B<Config::Parser>. Application writers write an example of configuration file in the B<__DATA__> section of their application, which defines the statements that are allowed in a valid configuration. This example is then processed by the parser implementation to create an instance of the parser, which is then used to process the actual configuration file. @@ -155,17 +167,18 @@ Let's illustrate this on a practical example. Suppose you need a parser for a simple configuration file, which consists of keyword/value pairs. In each pair, the keyword is separated from the value by an equals sign. Pairs are delimited by newlines. Leading and trailing whitespace characters on a line are ignored as well as are empty lines. Comments begin with a hash sign and end with a newline. -You create the class B<Config::Parser::KV> based on B<Config::Parser>. The -method B<parser> in this class implements the actual parser. +You create the class, say B<Config::Parser::KV>, inherited from +B<Config::Parser>. The method B<parser> in this class implements the actual +parser. Application writer decides what keywords are allowed in a valid configuration -file and what are their values and puts them forth in the B<__DATA__> section +file and what are their values and describes them in the B<__DATA__> section of his program (normally in a class derived from B<Config::Parser::KV>, in the same format as the actual configuration file. For example: __DATA__ basedir = STRING :mandatory mode = OCTAL @@ -213,13 +226,15 @@ Apache-like format would look like Creates a new parser object. Keyword arguments are: =over 4 =item B<filename> -Name of the file to parse. This keyword must be present. +Name of the file to parse. If not supplied, you will have to +call the B<$cfg-E<gt>parse> method explicitly after you are returned a +valid B<$cfg>. =item B<line> Optional line where the configuration starts in B<filename>. It is used to keep track of statement location in the file for correct diagnostics. If not supplied, B<1> is assumed. @@ -236,19 +251,19 @@ need this parameter. It is listed here for completeness sake. Refer to the B<Config::AST> constructor for details. =back =head1 USER HOOKS -These are the methods provided for implementors to do any implementation- +These are the methods provided for implementers to do any implementation- specific tasks. Default implementations are empty placeholders. =head2 $cfg->init Called after creation of the base object, when parsing of the syntax -definition has finished. Implementors can use it to do any +definition has finished. Implementers can use it to do any implementation-specific initialization. =head2 $cfg->mangle Called after successful parsing. It can be used to modify the created source tree. @@ -280,9 +295,119 @@ B<1> is assumed. =head2 $cfg->commit Finalizes the syntax tree. Returns true on success, and false on errors. =head1 SYNTAX DEFINITION +Syntax definition is a textual description of statements allowed in +a configuration file. It is written in the format of the configuration +file itself and is parsed using the same object (derivative of +B<Config::Parser>) that will be used later to parse the actual configuration. + +Syntax definitions are gathered from the B<__DATA__> blocks of +subclasses of B<Config::Parser>. + +In a syntax definition the value of each statement consists of optional +data type followed by zero or more options delimited with whitespace. + +Valid data types are: + +=over 4 + +=item B<STRING> + +String value. + +=item B<NUMBER> or B<DECIMAL> + +Decimal number. + +=item B<OCTAL> + +Octal number. + +=item B<HEX> + +Hex number. + +=back + +If the data type is omitted, no checking is performed unless specified +otherwise by other options (see the B<:re> and B<:check> options below). + +Options are special names prefixed with a colon. Option names follow +the keywords from the B<Config::AST> keyword lexicon value. An option +can be followed by an equals sign and its value. If an option is used +without arguments, the value B<1> is implied. + +Any word not recognized as an option or its value starts the I<default +value>. + +Available options are described below: + +=over 4 + +=item B<:mandatory> + +Marks the statement as a mandatory one. If such a statement is missing from +the configuration file, the parser action depends on whether the default value +is supplied. If it is, the statement will be inserted in the parse tree with +the default value. Otherwise, a diagnostic message will be printed and the +constructor will return B<undef>. + +=item B<:default> + +Argument supplies the default value for this setting. + +=item B<:array> + +If the value is 1, declares that the statement is an array. Multiple +occurrences of the statement will be accumulated. They can be retrieved as +a reference to an array when the parsing is finished. + +=item B<:re = >I<string> + +Defines a regular expression which the value must match in order to be +accepted. This provides a more elaborate mechanism of checking than the +data types. In fact, data types are converted to the appropriate B<:re> +options internally, for example B<OCTAL> becomes B<:re = "^[0-7]+$">. +If data type and B<:re> are used together, B<:re> takes precedence. + +=item B<:select = >I<method> + +Argument is the name of a method to call in order to decide +whether to apply this definition. The method will be called as + + $cfg->{ \$method }($node, @path) + +where $node is the B<Config::AST::Node::Value> object (use +B<$vref-E<gt>value>, to obtain the actual value), and B<@path> is its patname. + +=item B<:check = >I<method> + +Argument is the name of a method which will be invoked after parsing the +statement in order to verify its value. This provides the most flexible +way of verification (the other two being the B<:re> option and data type +declaration). The method will be invoked as follows: + + $cfg->{ \$method }($valref, $prev_value, $locus) + +where B<$valref> is a reference to the value, and B<$prev_value> is the +value of the previous instance of this setting. The method must return +B<true>, if the value is OK for that setting. In that case, it is allowed +to modify the value referenced by B<$valref>. If the value is erroneous, +the method must issue an appropriate error message using B<$cfg-E<gt>error>, +and return 0. + +=back + +To specify options for a section, use the reserved keyword B<__options__>. +Its value is the list of options as described above. After processing, the +keyword itself is removed from the lexicon. + +=head1 SEE ALSO + +B<Config::AST>(3). + =cut diff --git a/lib/Config/Parser/Ini.pm b/lib/Config/Parser/Ini.pm index 0ea632c..79d231e 100644 --- a/lib/Config/Parser/Ini.pm +++ b/lib/Config/Parser/Ini.pm @@ -91,6 +91,180 @@ sub _readconfig { } close $fh if $need_close; } 1; +=head1 NAME + +Config::Parser::Ini - configuration file parser for ini-style files + +=head1 SYNOPSIS + +$cfg = new Config::Parser::Ini($filename); + +$val = $cfg->get('dir', 'tmp'); + +print $val->value; +print $val->locus; + +$val = $cfg->tree->Dir->Tmp; + +=head1 DESCRIPTION + +An I<ini-style configuration file> is a textual file consisting of settings +grouped into one or more sections. A I<setting> has the form + + KEYWORD = VALUE + +where I<KEYWORD> is the setting name and I<VALUE> is its value. +Syntactically, I<VALUE> is anything to the right of the equals sign and up +to the linefeed character terminating the line (ASCII 10), not including +the leading and trailing whitespace characters. + +Each setting occupies one line. Very long lines can be split over several +physical lines by ending each line fragment except the last with a backslash +character appearing right before the linefeed character. + +A I<section> begins with a section declaration in the following form: + + [NAME NAME...] + +Here, square brackets form part of the syntax. Any number of I<NAME>s +can be present inside the square brackets. The first I<NAME> must follow the +usual rules for a valid identifier name. Rest of I<NAME>s can contain any +characters, provided that any I<NAME> that includes non-alphanumeric characters +is enclosed in a pair of double-quotes. Any double-quotes and backslash +characters appearing within the quoted string must be escaped by prefixing +them with a single backslash. + +The B<Config::Parser::Ini> module is a framework for parsing such files. + +In the simplest case, the usage of this module is as simple as in the following +fragment: + + use Config::Parser::Ini; + my $cf = new Config::Parser::Ini(filename => "config.ini"); + +On success, this returns a valid B<Config::Parser::Ini> object. On error, +the diagnostic message is issued using the B<error> method (see the description +of the method in B<Config::AST>(3)) and the module croaks. + +This usage, although simple, has one major drawback - no checking is performed +on the input file, except for the syntax check. To fix this, you can supply +a dictionary (or I<lexicon>) of allowed keywords along with their values. +Such a dictionary is itself a valid ini file, where the value of each +keyword describes its properties. The dictionary is placed in the B<__DATA__> +section of the source file which invokes the B<Config::Parser::Ini> constructor. + +Expanding the example above: + + use Config::Parser::Ini; + my $cf = new Config::Parser::Ini(filename => "config.ini"); + + __DATA__ + [core] + root = STRING :default / + umask = OCTAL + [user] + uid = NUMBER + gid = NUMBER + +This code specifies that the configuration file can contain at most two +sections: C<[core]> and C<[user]>. Two keywords are defined within each +section. Data types are specified for each keyword, so the parser will +bail out in case of type mismatches. If the B<core.root> setting is not +present in the configuration, the default one will be created with the +value C</>. + +It is often advisable to create a subclass of B<Config::Parser::Ini> and +use it for parsing. For instance: + + package App::MyConf; + use Config::Parser::Ini; + 1; + __DATA__ + [core] + root = STRING :default / + umask = OCTAL + [user] + uid = NUMBER + gid = NUMBER + +Then, to parse the configuration file, it will suffice to do: + + $cf = my App::MyConf(filename => "config.ini"); + +One advantage of this approach is that it will allow you to install +additional validation for the configuration statements using the +B<:check> option. The argument to this option is the name of a +method which will be invoked after parsing the statement in order +to verify its value. It is described in detail below (see the section +B<SYNTAX DEFINITION> in the documentation of B<Config::Parser>). +For example, if you wish to ensure that the value of the C<root> setting +in C<core> section points to an existing directory, you would do: + + package App::MyConf; + use Config::Parser::Ini; + + sub dir_exists { + my ($self, $valref, $prev_value, $locus) = @_; + + unless (-d $$valref) { + $self->error("$$valref: directory does not exist", + locus => $locus); + return 0; + } + return 1; + } + 1; + __DATA__ + [core] + root = STRING :default / :check=dir_exists + umask = OCTAL + [user] + uid = NUMBER + gid = NUMBER + +=head1 CONSTRUCTOR + + $cfg = new Config::Parser::Ini(%opts) + +Creates a new parser object. Keyword arguments are: + +=over 4 + +=item B<filename> + +Name of the file to parse. If not supplied, you will have to +call the B<$cfg-E<gt>parse> method explicitly after you are returned a +valid B<$cfg>. + +=item B<line> + +Optional line where the configuration starts in B<filename>. It is used to +keep track of statement location in the file for correct diagnostics. If +not supplied, B<1> is assumed. + +=item B<fh> + +File handle to read from. If it is not supplied, new handle will be +created by using B<open> on the supplied filename. + +=item B<lexicon> + +Dictionary of allowed configuration statements in the file. You will not +need this parameter. It is listed here for completeness sake. Refer to +the B<Config::AST> constructor for details. + +=back + +=head1 METHODS + +All methods are inferited from B<Config::Parser>. Please see its +documentation for details. + +=head1 SEE ALSO + +B<Config::Parser>(3), B<Config::AST>(3). + +=cut diff --git a/t/ConfigSpec2.pm b/t/ConfigSpec2.pm index 0a56cd6..b37cd4a 100644 --- a/t/ConfigSpec2.pm +++ b/t/ConfigSpec2.pm @@ -2,9 +2,10 @@ package ConfigSpec2; use parent 'TestConfig'; 1; __DATA__ [core] base = STRING :mandatory null -[load ANY param:mandatory] +[load ANY param] + __options__ = :mandatory mode = OCTAL owner = STRING |