diff options
Diffstat (limited to 'lib/Config/Tree.pm')
-rw-r--r-- | lib/Config/Tree.pm | 893 |
1 files changed, 893 insertions, 0 deletions
diff --git a/lib/Config/Tree.pm b/lib/Config/Tree.pm new file mode 100644 index 0000000..39e0de9 --- /dev/null +++ b/lib/Config/Tree.pm @@ -0,0 +1,893 @@ +# Configuration parser for Sourceyard -*- perl -*- +# Copyright (C) 2017 Sergey Poznyakoff <gray@gnu.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +package Config::Tree; + +use strict; +use warnings; +use Carp; +use Config::Tree::Locus; +use Config::Tree::Node::Section; +use Config::Tree::Node::Value; +use Data::Dumper; + +require Exporter; +our @ISA = qw(Exporter); +our %EXPORT_TAGS = ( 'sort' => [ qw(NO_SORT SORT_NATURAL SORT_PATH) ] ); +our @EXPORT_OK = qw(NO_SORT SORT_NATURAL SORT_PATH); + +our $VERSION = "1.00"; + +=head1 NAME + +Config::Tree - generalized configuration file parser + +=head1 SYNOPSIS + + my $cfg = new Config::Tree($filename, %opts); + $cfg->parse() or die; + + if ($cfg->is_set('core', 'variable')) { + ... + } + + my $x = $cfg->get('file', 'locking'); + + $cfg->set('file', 'locking', 'true'); + + $cfg->unset('file', 'locking'); + +=head1 DESCRIPTION + +Configuration file handling. Features: + +=over 4 + +=item 1 + +Handles I<git>-format configuration files. + +=item 2 + +Table-driven syntax checking and validation. + +=item 3 + +Optional caching facility allows for faster loading. This is especially +useful for big configurations. + +=item 4 + +Built-in B<lint> facility. + +=item 5 + +Location tracking. + +=item 6 + +Dump facility. The parsed configuration can be output to the given file +handler in a standardized form. + +=item 7 + +Both random access and iteration over all settings is possible. + +=back + +=head1 METHODS + +=head2 $cfg = new Config::Tree($filename, %opts); + +Creates new configuration object for file B<$filename>. Valid +options are: + +=over 4 + +=item B<debug> => I<NUM> + +Sets debug verbosity level. + +=item B<ci> => B<0> | B<1> + +If B<1>, enables case-insensitive keyword matching. Default is B<0>, +i.e. the keywords are case-sensitive. + +=item B<parameters> => \%hash + +Defines the syntax table. See below for a description of B<%hash>. + +=back + +=head3 Syntax hash + +The hash passed via the B<parameters> keyword defines the keywords and +sections allowed within a configuration file. In a simplest case, a +keyword is described as + + name => 1 + +This means that B<name> is a valid keyword, but does not imply anything +more about it or its value. A more complex declaration is possible, in +which the value is a hash reference, containing one or more of the following +keywords: + +=over 4 + +=item mandatory => 0 | 1 + +Whether or not this setting is mandatory. + +=item default => I<VALUE> + +Default value for the setting. It is assigned when entire configuration file +has been parsed, if that particular setting did not occur in it. If I<VALUE> +is a code, it will be invoked as a method each time the value is accessed. + +Default values must be pure Perl values (not the values that should appear +in the configuration file). They are not processed using the B<check> +callbacks. + +=item array => 0 | 1 + +If B<1>, the value of the setting is an array. Each subsequent occurrence +of the statement appends its value to the end of the array. + +=item re => I<regexp> + +Defines a regular expression to which must be matched by the value of the +setting, otherwise a syntax error will be reported. + +=item select => I<coderef> + +Points to a function to be called to decide whether to apply this hash to +a particular configuration setting. The function is called as + + $self->$coderef($node, @path) + +where $node is the B<Config::Tree::Node::Value> object (use +B<$vref-E<gt>value>, to obtain the actual value), and B<@path> is its patname. + +=item check => I<coderef> + +Defines a code which will be called after parsing the statement in order to +verify its value. The I<coderef> is called as + + $self->$coderef($valref, $prev_value, $locus) + +where B<$valref> is a reference to its value, and B<$prev_value> is the +value of the previous instance of this setting. The function must return non-0 +if the value is OK for that setting. In that case, it is allowed to modify +the value, referenced by B<$valref>. If the value is erroneous, the function +must issue an appropriate error message using B<$cfg-E<gt>error>, and return 0. + +=back + +To define a section, use the B<section> keyword, e.g.: + + core => { + section => { + pidfile => { + mandatory => 1 + }, + verbose => { + re => qr/^(?:on|off)/i + } + } + } + +This says that a section B<[core]> can have two variables: B<pidfile>, which +is mandatory, and B<verbose>, whose value must be B<on>, or B<off> (case- +insensitive). + +To allow for arbitrary keywords, use B<*>. For example, the following +declares the B<[code]> section, which must have the B<pidfile> setting +and is allowed to have any other settings as well. + + code => { + section => { + pidfile => { mandatory => 1 }, + '*' => 1 + } + } + +Everything said above applies to the B<'*'> as well. E.g. the following +example declares the B<[code]> section, which must have the B<pidfile> +setting and is allowed to have I<subsections> with arbitrary settings. + + code => { + section => { + pidfile = { mandatory => 1 }, + '*' => { + section => { + '*' => 1 + } + } + } + } + +The special entry + + '*' => '*' + +means "any settings and any subsections". + +=cut + +sub new { + my $class = shift; + local %_ = @_; + my $self = bless { _order => 0 }, $class; + my $v; + my $err; + + $self->{_debug} = delete $_{debug} || 0; + $self->{_ci} = delete $_{ci} || 0; + + if (defined($v = delete $_{parameters})) { + if (ref($v) eq 'HASH') { + $self->{_parameters} = $v; + } else { + carp "parameters must refer to a HASH"; + ++$err; + } + } + + if (keys(%_)) { + foreach my $k (keys %_) { + carp "unknown parameter $k" + } + ++$err; + } + croak "can't create configuration instance" if $err; + $self->reset; + return $self; +} + +sub reset { + my $self = shift; + $self->{_error_count} = 0; + delete $self->{_conf}; +} + +=head2 $cfg->error($message) + +=head2 $cfg->error($message, locus => $loc) + +Prints the B<$message> on STDERR. If B<locus> is given, its value must +be a reference to a valid B<Config::Tree::Locus>(3) object. In that +case, the object will be formatted first, then followed by a ": " and the +B<$message>. + +=cut + +sub error { + my $self = shift; + my $err = shift; + local %_ = @_; + print STDERR "$_{locus}: " if exists $_{locus}; + print STDERR "$err\n"; +} + +=head2 $cfg->debug($lev, @msg) + +If B<$lev> is greater than or equal to the B<debug> value used when +creating B<$cfg>, outputs on standard error the strings from @msg, +separating them with a single space character. + +Otherwise, does nothing. + +=cut + +sub debug { + my $self = shift; + my $lev = shift; + return unless $self->{_debug} >= $lev; + $self->error("DEBUG: " . join(' ', @_)); +} + +sub _fixup { + my ($self, $section, $params, @path) = @_; + + while (my ($k, $d) = each %{$params}) { + next unless ref($d) eq 'HASH'; + + if (exists($d->{default}) && !$section->has_key($k)) { + my $n; + my $dfl = ref($d->{default}) eq 'CODE' + ? sub { $self->${ \ $d->{default} } } + : $d->{default}; + if (exists($d->{section})) { + $n = new Config::Tree::Node::Section( + default => 1, + subtree => $dfl + ); + } else { + $n = new Config::Tree::Node::Value( + default => 1, + value => $dfl + ); + } + $section->subtree($k => $n); + } + + if (exists($d->{section})) { + if ($k eq '*') { + while (my ($name, $vref) = each %{$section->subtree}) { + if (my $sel = $d->{select}) { + if ($self->$sel($vref, @path, $name)) { + next; + } + } elsif ($vref->is_section) { + $self->_fixup($vref, $d->{section}, @path, $name); + } + } + } else { + my $node; + + unless ($node = $section->subtree($k)) { + $node = new Config::Tree::Node::Section; + } + if ((!exists($d->{select}) + || $self->${ \ $d->{select} }($node, @path, $k))) { + $self->_fixup($node, $d->{section}, @path, $k); + } + if ($node->keys > 0) { + $section->subtree($k => $node); + } + } + } + + if ($d->{mandatory} && !$section->has_key($k)) { + $self->error(exists($d->{section}) + ? "mandatory section [" + . join(' ', @path, $k) + . "] not present" + : "mandatory variable \"" + . join('.', @path, $k) + . "\" not set", + locus => $section->locus); + $self->{_error_count}++; + } + } +} + +=head2 $cfg->parse() + +Parses the configuration file and stores the data in the object. Returns +true on success and false on failure. Eventual errors in the configuration +are reported using B<error>. + +=cut + +sub parse { + my ($self) = @_; + croak "call to abstract method" +} + +sub getnode { + my $self = shift; + + my $node = $self->{_conf} or return undef; + for (@_) { + $node = $node->subtree($self->{_ci} ? lc($_) : $_) + or return undef; + } + return $node; +} + +=head2 $var = $cfg->get(@path); + +Returns the B<Config::Tree::Node::Value>(3) corresponding to the +configuration variable represented by its I<path>, or B<undef> if the +variable is not set. The path is a list of configuration variables leading +to the value in question. For example, the following statement: + + pidfile = /var/run/x.pid + +has the path + + ( 'pidfile' ) + +The path of the B<pidfile> statement in section B<core>, e.g.: + + [core] + pidfile = /var/run/x.pid + +is + + ( 'core', 'pidfile' ) + +Similarly, the path of the B<file> setting in the following configuration +file: + + [item foo] + file = bar + +is + ( 'item', 'foo', 'bar' ) + +=cut + +sub get { + my $self = shift; + croak "no variable to get" unless @_; + my $node = $self->getnode(@_) or return undef; + my $value = $node->value; + if (ref($value) eq 'ARRAY') { + return wantarray ? @$value : $value; + } elsif (ref($value) eq 'HASH') { + return wantarray ? %$value : $value; + } + return $value; +} + +=head2 $cfg->is_set(@path) + +Returns true if the configuration variable addressed by B<@path> is +set. + +=cut + +sub is_set { + my $self = shift; + return defined $self->getnode(@_); +} + +=head2 $cfg->is_section(@path) + +Returns true if the configuration section addressed by B<@path> is +set. + +=cut + +sub is_section { + my $self = shift; + my $node = $self->getnode(@_); + return defined($node) && $node->is_section; +} + +=head2 $cfg->is_variable(@path) + +Returns true if the configuration setting addressed by B<@path> +is set and is a variable. + +=cut + +sub is_variable { + my $self = shift; + my $node = $self->getnode(@_); + return defined($node) && $node->is_value; +} + +sub tree { + my $self = shift; + return $self->{_conf} //= new Config::Tree::Node::Section(locus => new Config::Tree::Locus); +} + +sub _get_section_synt { + my ($self, $kw, $name) = @_; + + if (defined($kw)) { + if (ref($kw) eq 'HASH') { + my $synt; + if (exists($kw->{$name})) { + $synt = $kw->{$name}; + } elsif (exists($kw->{'*'})) { + $synt = $kw->{'*'}; + if ($synt eq '*') { + return { '*' => '*' }; + } + } + if (defined($synt) + && ref($synt) eq 'HASH' + && exists($synt->{section})) { + return $synt->{section}; + } + } + } + return +} + +=head2 add_node($node, $path) + +=cut + +sub add_node { + my ($self, $path, $v, $locus) = @_; + + unless (ref($path) eq 'ARRAY') { + $path = [ split(/\./, $path) ] + } + + my $kw = $self->{_parameters} // { '*' => '*' }; + my $node = $self->tree; + my $pn = $#{$path}; + my $name; + for (my $i = 0; $i < $pn; $i++) { + $name = ${$path}[$i]; + + unless ($node->is_section) { + $self->error(join('.', @{$path}[0..$i]) . ": not a section"); + $self->{_error_count}++; + return; + } + + $kw = $self->_get_section_synt($kw, $name); + unless ($kw) { + $self->error(join('.', @{$path}[0..$i]) . ": unknown section"); + $self->{_error_count}++; + return; + } + + if (my $subtree = $node->subtree($name)) { + $node = $subtree; + } else { + $node = $node->subtree( + $name => new Config::Tree::Node::Section( + order => $self->{_order}++, + locus => $locus->clone) + ); + } + } + + $name = ${$path}[-1]; + + my $x = $kw->{$name} // $kw->{'*'}; + if (!defined($x)) { + $self->error("keyword \"$name\" is unknown", locus => $locus); + $self->{_error_count}++; + return; + } + + if (ref($x) eq 'HASH') { + my $errstr; + my $prev_val; + if ($node->has_key($name)) { + # FIXME: is_value? + $prev_val = $node->subtree($name)->value; + } + if (exists($x->{re})) { + if ($v !~ /$x->{re}/) { + $self->error("invalid value for $name", + locus => $locus); + $self->{_error_count}++; + return; + } + } + + if (my $ck = $x->{check}) { + unless ($self->$ck(\$v, $prev_val, $locus)) { + $self->{_error_count}++; + return; + } + } + + if ($x->{array}) { + if (!defined($prev_val)) { + $v = [ $v ]; + } else { + $v = [ @{$prev_val}, $v ]; + } + } + } + + $node->locus->add($locus->clone); + + my $newnode; + if ($newnode = $node->subtree($name)) { + $newnode->locus->add($locus); + } else { + $newnode = $node->subtree( + $name => new Config::Tree::Node::Value(locus => $locus) + ); + } + $newnode->order($self->{order}++); + $newnode->value($v); + return $newnode; +} + +sub commit { + my ($self) = @_; + # FIXME + $self->_fixup($self->tree, $self->{_parameters}) + if exists $self->{_parameters}; + return $self->{_error_count} == 0; +} + +=head2 $cfg->set(@path, $value) + +Sets the configuration variable B<@path> to B<$value>. + +=cut + +sub set { + my $self = shift; + my $node = $self->tree; + + while ($#_ > 1) { + croak "not a section" unless $node->is_section; + my $arg = shift; + if (my $n = $node->subtree($arg)) { + $node = $n; + } else { + $node = $node->subtree( + $arg => new Config::Tree::Node::Section + ); + } + } + + my $v = $node->subtree($_[0]) || + $node->subtree($_[0] => new Config::Tree::Node::Value( + order => $self->{_order}++ + )); + + $v->value($_[1]); + $v->default(0); + return $v; +} + +=head2 cfg->unset(@path) + +Unsets the configuration variable. + +=cut + +sub unset { + my $self = shift; + + my $node = $self->{_conf} or return; + my @path; + + for (@_) { + return unless $node->is_section && $node->has_key($_); + push @path, [ $node, $_ ]; + $node = $node->subtree($_); + } + + while (1) { + my $loc = pop @path; + $loc->[0]->delete($loc->[1]); + last unless ($loc->[0]->keys == 0); + } +} + +=head2 @array = $cfg->names_of(@path) + +If B<@path> refers to an existing configuration section, returns a list +of names of variables and subsections defined within that section. E.g., +if you have + + [item foo] + x = 1 + [item bar] + x = 1 + [item baz] + y = 2 + +the call + + $cfg->names_of('item') + +will return + + ( 'foo', 'bar', 'baz' ) + +=cut + +sub names_of { + my $self = shift; + my $node = $self->getnode(@_); + return () unless defined($node) && $node->is_section; + return $node->keys; +} + +=head2 @array = $cfg->flatten() + +=head2 @array = $cfg->flatten(sort => $sort) + +Returns a I<flattened> representation of the configuration, as a +list of pairs B<[ $path, $value ]>, where B<$path> is a reference +to the variable pathname, and B<$value> is a +B<Config::Tree::Node::Value> object. + +=cut + +use constant { + NO_SORT => 0, + SORT_NATURAL => 1, + SORT_PATH => 2 +}; + +=pod + +The I<$sort> argument controls the ordering of the entries in the returned +B<@array>. It is either a code reference suitable to pass to the Perl B<sort> +function, or one of the following constants: + +=over 4 + +=item NO_SORT + +Don't sort the array. Statements will be placed in an apparently random +order. + +=item SORT_NATURAL + +Preserve relative positions of the statements. Entries in the array will +be in the same order as they appeared in the configuration file. This is +the default. + +=item SORT_PATH + +Sort by pathname. + +=back + +These constants are not exported by default. You can either import the +ones you need, or use the B<:sort> keyword to import them all, e.g.: + + use Sourceyard::Config qw(:sort); + @array = $cfg->flatten(sort => SORT_PATH); + +=cut + +sub flatten { + my $self = shift; + local %_ = @_; + my $sort = delete($_{sort}) || SORT_NATURAL; + my @ar; + my $i; + + croak "unrecognized keyword arguments: ". join(',', keys %_) + if keys %_; + + push @ar, [ [], $self->{_conf} ]; + foreach my $elt (@ar) { + next if $elt->[1]->is_value; + while (my ($kw, $val) = each %{$elt->[1]->subtree}) { + push @ar, [ [@{$elt->[0]}, $kw], $val ]; + } + } + + if (ref($sort) eq 'CODE') { + $sort = sub { sort $sort @_ }; + } elsif ($sort == SORT_PATH) { + $sort = sub { + sort { join('.',@{$a->[0]}) cmp join('.', @{$b->[0]}) } @_ + }; + } elsif ($sort == SORT_NATURAL) { + $sort = sub { + sort { $a->[1]->order <=> $b->[1]->order } @_ + }; + } elsif ($sort == NO_SORT) { + $sort = sub { @_ }; + } else { + croak "unsupported sort value"; + } + shift @ar; # toss off first entry + return &{$sort}(grep { $_->[1]->is_value } @ar); +} + +sub __lint { + my ($self, $syntax, $node, @path) = @_; + + $syntax = {} unless ref($syntax) eq 'HASH'; + if (exists($syntax->{section})) { + return unless $node->is_section; + } else { + return if $node->is_section; + } + + if (exists($syntax->{select}) && + !$self->${ \ $syntax->{select} }($node, @path)) { + return; + } + + if ($node->is_section) { + $self->_lint($syntax->{section}, $node, @path); + } else { + my $val = $node->value; + my %opts = ( locus => $node->locus ); + + if (ref($val) eq 'ARRAY') { + if ($syntax->{array}) { + my @ar; + foreach my $v (@$val) { + if (exists($syntax->{re})) { + if ($v !~ /$syntax->{re}/) { + $self->error("invalid value for $path[-1]", %opts); + $self->{_error_count}++; + next; + } + } + if (my $ck = $syntax->{check}) { + unless ($self->$ck(\$v, @ar ? $ar[-1] : undef, + $node->locus)) { + $self->{_error_count}++; + next; + } + } + push @ar, $v; + } + $node->value(\@ar); + return; + } else { + $val = pop(@$val); + } + } + + if (exists($syntax->{re})) { + if ($val !~ /$syntax->{re}/) { + $self->error("invalid value for $path[-1]", %opts); + $self->{_error_count}++; + return; + } + } + + if (my $ck = $syntax->{check}) { + unless ($self->$ck(\$val, undef, $node->locus)) { + $self->{_error_count}++; + return; + } + } + + $node->value($val); + } +} + +sub _lint { + my ($self, $syntab, $node, @path) = @_; + + while (my ($var, $value) = each %{$node->subtree}) { + if (exists($syntab->{$var})) { + $self->__lint($syntab->{$var}, $value, @path, $var); + } elsif (exists($syntab->{'*'})) { + $self->__lint($syntab->{'*'}, $value, @path, $var); + } elsif ($value->is_section) { + next; + } else { + $self->error("keyword \"$var\" is unknown", + locus => $value->locus); + $self->{_error_count}++; + } + } +} + +=head2 $cfg->lint(\%synt) + +Checks the syntax according to the syntax table B<%synt>. On success, +applies eventual default values and returns true. On errors, reports +them using B<error> and returns false. + +This method provides a way to delay syntax checking for a later time, +which is useful, e.g. if some parts of the parser are loaded as modules +after calling B<parse>. + +=cut + +sub lint { + my ($self, $synt) = @_; + +# $synt->{'*'} = { section => { '*' => 1 }} ; + $self->{_conf} = new Config::Tree::Node::Section(default => 1) + unless exists $self->{_conf}; + $self->_lint($synt, $self->{_conf}); + $self->_fixup($self->{_conf}, $synt); + return $self->{_error_count} == 0; +} + +1; |