summaryrefslogtreecommitdiff
path: root/lib/Config/Tree.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Config/Tree.pm')
-rw-r--r--lib/Config/Tree.pm893
1 files changed, 893 insertions, 0 deletions
diff --git a/lib/Config/Tree.pm b/lib/Config/Tree.pm
new file mode 100644
index 0000000..39e0de9
--- /dev/null
+++ b/lib/Config/Tree.pm
@@ -0,0 +1,893 @@
+# Configuration parser for Sourceyard -*- perl -*-
+# Copyright (C) 2017 Sergey Poznyakoff <gray@gnu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package Config::Tree;
+
+use strict;
+use warnings;
+use Carp;
+use Config::Tree::Locus;
+use Config::Tree::Node::Section;
+use Config::Tree::Node::Value;
+use Data::Dumper;
+
+require Exporter;
+our @ISA = qw(Exporter);
+our %EXPORT_TAGS = ( 'sort' => [ qw(NO_SORT SORT_NATURAL SORT_PATH) ] );
+our @EXPORT_OK = qw(NO_SORT SORT_NATURAL SORT_PATH);
+
+our $VERSION = "1.00";
+
+=head1 NAME
+
+Config::Tree - generalized configuration file parser
+
+=head1 SYNOPSIS
+
+ my $cfg = new Config::Tree($filename, %opts);
+ $cfg->parse() or die;
+
+ if ($cfg->is_set('core', 'variable')) {
+ ...
+ }
+
+ my $x = $cfg->get('file', 'locking');
+
+ $cfg->set('file', 'locking', 'true');
+
+ $cfg->unset('file', 'locking');
+
+=head1 DESCRIPTION
+
+Configuration file handling. Features:
+
+=over 4
+
+=item 1
+
+Handles I<git>-format configuration files.
+
+=item 2
+
+Table-driven syntax checking and validation.
+
+=item 3
+
+Optional caching facility allows for faster loading. This is especially
+useful for big configurations.
+
+=item 4
+
+Built-in B<lint> facility.
+
+=item 5
+
+Location tracking.
+
+=item 6
+
+Dump facility. The parsed configuration can be output to the given file
+handler in a standardized form.
+
+=item 7
+
+Both random access and iteration over all settings is possible.
+
+=back
+
+=head1 METHODS
+
+=head2 $cfg = new Config::Tree($filename, %opts);
+
+Creates new configuration object for file B<$filename>. Valid
+options are:
+
+=over 4
+
+=item B<debug> => I<NUM>
+
+Sets debug verbosity level.
+
+=item B<ci> => B<0> | B<1>
+
+If B<1>, enables case-insensitive keyword matching. Default is B<0>,
+i.e. the keywords are case-sensitive.
+
+=item B<parameters> => \%hash
+
+Defines the syntax table. See below for a description of B<%hash>.
+
+=back
+
+=head3 Syntax hash
+
+The hash passed via the B<parameters> keyword defines the keywords and
+sections allowed within a configuration file. In a simplest case, a
+keyword is described as
+
+ name => 1
+
+This means that B<name> is a valid keyword, but does not imply anything
+more about it or its value. A more complex declaration is possible, in
+which the value is a hash reference, containing one or more of the following
+keywords:
+
+=over 4
+
+=item mandatory => 0 | 1
+
+Whether or not this setting is mandatory.
+
+=item default => I<VALUE>
+
+Default value for the setting. It is assigned when entire configuration file
+has been parsed, if that particular setting did not occur in it. If I<VALUE>
+is a code, it will be invoked as a method each time the value is accessed.
+
+Default values must be pure Perl values (not the values that should appear
+in the configuration file). They are not processed using the B<check>
+callbacks.
+
+=item array => 0 | 1
+
+If B<1>, the value of the setting is an array. Each subsequent occurrence
+of the statement appends its value to the end of the array.
+
+=item re => I<regexp>
+
+Defines a regular expression to which must be matched by the value of the
+setting, otherwise a syntax error will be reported.
+
+=item select => I<coderef>
+
+Points to a function to be called to decide whether to apply this hash to
+a particular configuration setting. The function is called as
+
+ $self->$coderef($node, @path)
+
+where $node is the B<Config::Tree::Node::Value> object (use
+B<$vref-E<gt>value>, to obtain the actual value), and B<@path> is its patname.
+
+=item check => I<coderef>
+
+Defines a code which will be called after parsing the statement in order to
+verify its value. The I<coderef> is called as
+
+ $self->$coderef($valref, $prev_value, $locus)
+
+where B<$valref> is a reference to its value, and B<$prev_value> is the
+value of the previous instance of this setting. The function must return non-0
+if the value is OK for that setting. In that case, it is allowed to modify
+the value, referenced by B<$valref>. If the value is erroneous, the function
+must issue an appropriate error message using B<$cfg-E<gt>error>, and return 0.
+
+=back
+
+To define a section, use the B<section> keyword, e.g.:
+
+ core => {
+ section => {
+ pidfile => {
+ mandatory => 1
+ },
+ verbose => {
+ re => qr/^(?:on|off)/i
+ }
+ }
+ }
+
+This says that a section B<[core]> can have two variables: B<pidfile>, which
+is mandatory, and B<verbose>, whose value must be B<on>, or B<off> (case-
+insensitive).
+
+To allow for arbitrary keywords, use B<*>. For example, the following
+declares the B<[code]> section, which must have the B<pidfile> setting
+and is allowed to have any other settings as well.
+
+ code => {
+ section => {
+ pidfile => { mandatory => 1 },
+ '*' => 1
+ }
+ }
+
+Everything said above applies to the B<'*'> as well. E.g. the following
+example declares the B<[code]> section, which must have the B<pidfile>
+setting and is allowed to have I<subsections> with arbitrary settings.
+
+ code => {
+ section => {
+ pidfile = { mandatory => 1 },
+ '*' => {
+ section => {
+ '*' => 1
+ }
+ }
+ }
+ }
+
+The special entry
+
+ '*' => '*'
+
+means "any settings and any subsections".
+
+=cut
+
+sub new {
+ my $class = shift;
+ local %_ = @_;
+ my $self = bless { _order => 0 }, $class;
+ my $v;
+ my $err;
+
+ $self->{_debug} = delete $_{debug} || 0;
+ $self->{_ci} = delete $_{ci} || 0;
+
+ if (defined($v = delete $_{parameters})) {
+ if (ref($v) eq 'HASH') {
+ $self->{_parameters} = $v;
+ } else {
+ carp "parameters must refer to a HASH";
+ ++$err;
+ }
+ }
+
+ if (keys(%_)) {
+ foreach my $k (keys %_) {
+ carp "unknown parameter $k"
+ }
+ ++$err;
+ }
+ croak "can't create configuration instance" if $err;
+ $self->reset;
+ return $self;
+}
+
+sub reset {
+ my $self = shift;
+ $self->{_error_count} = 0;
+ delete $self->{_conf};
+}
+
+=head2 $cfg->error($message)
+
+=head2 $cfg->error($message, locus => $loc)
+
+Prints the B<$message> on STDERR. If B<locus> is given, its value must
+be a reference to a valid B<Config::Tree::Locus>(3) object. In that
+case, the object will be formatted first, then followed by a ": " and the
+B<$message>.
+
+=cut
+
+sub error {
+ my $self = shift;
+ my $err = shift;
+ local %_ = @_;
+ print STDERR "$_{locus}: " if exists $_{locus};
+ print STDERR "$err\n";
+}
+
+=head2 $cfg->debug($lev, @msg)
+
+If B<$lev> is greater than or equal to the B<debug> value used when
+creating B<$cfg>, outputs on standard error the strings from @msg,
+separating them with a single space character.
+
+Otherwise, does nothing.
+
+=cut
+
+sub debug {
+ my $self = shift;
+ my $lev = shift;
+ return unless $self->{_debug} >= $lev;
+ $self->error("DEBUG: " . join(' ', @_));
+}
+
+sub _fixup {
+ my ($self, $section, $params, @path) = @_;
+
+ while (my ($k, $d) = each %{$params}) {
+ next unless ref($d) eq 'HASH';
+
+ if (exists($d->{default}) && !$section->has_key($k)) {
+ my $n;
+ my $dfl = ref($d->{default}) eq 'CODE'
+ ? sub { $self->${ \ $d->{default} } }
+ : $d->{default};
+ if (exists($d->{section})) {
+ $n = new Config::Tree::Node::Section(
+ default => 1,
+ subtree => $dfl
+ );
+ } else {
+ $n = new Config::Tree::Node::Value(
+ default => 1,
+ value => $dfl
+ );
+ }
+ $section->subtree($k => $n);
+ }
+
+ if (exists($d->{section})) {
+ if ($k eq '*') {
+ while (my ($name, $vref) = each %{$section->subtree}) {
+ if (my $sel = $d->{select}) {
+ if ($self->$sel($vref, @path, $name)) {
+ next;
+ }
+ } elsif ($vref->is_section) {
+ $self->_fixup($vref, $d->{section}, @path, $name);
+ }
+ }
+ } else {
+ my $node;
+
+ unless ($node = $section->subtree($k)) {
+ $node = new Config::Tree::Node::Section;
+ }
+ if ((!exists($d->{select})
+ || $self->${ \ $d->{select} }($node, @path, $k))) {
+ $self->_fixup($node, $d->{section}, @path, $k);
+ }
+ if ($node->keys > 0) {
+ $section->subtree($k => $node);
+ }
+ }
+ }
+
+ if ($d->{mandatory} && !$section->has_key($k)) {
+ $self->error(exists($d->{section})
+ ? "mandatory section ["
+ . join(' ', @path, $k)
+ . "] not present"
+ : "mandatory variable \""
+ . join('.', @path, $k)
+ . "\" not set",
+ locus => $section->locus);
+ $self->{_error_count}++;
+ }
+ }
+}
+
+=head2 $cfg->parse()
+
+Parses the configuration file and stores the data in the object. Returns
+true on success and false on failure. Eventual errors in the configuration
+are reported using B<error>.
+
+=cut
+
+sub parse {
+ my ($self) = @_;
+ croak "call to abstract method"
+}
+
+sub getnode {
+ my $self = shift;
+
+ my $node = $self->{_conf} or return undef;
+ for (@_) {
+ $node = $node->subtree($self->{_ci} ? lc($_) : $_)
+ or return undef;
+ }
+ return $node;
+}
+
+=head2 $var = $cfg->get(@path);
+
+Returns the B<Config::Tree::Node::Value>(3) corresponding to the
+configuration variable represented by its I<path>, or B<undef> if the
+variable is not set. The path is a list of configuration variables leading
+to the value in question. For example, the following statement:
+
+ pidfile = /var/run/x.pid
+
+has the path
+
+ ( 'pidfile' )
+
+The path of the B<pidfile> statement in section B<core>, e.g.:
+
+ [core]
+ pidfile = /var/run/x.pid
+
+is
+
+ ( 'core', 'pidfile' )
+
+Similarly, the path of the B<file> setting in the following configuration
+file:
+
+ [item foo]
+ file = bar
+
+is
+ ( 'item', 'foo', 'bar' )
+
+=cut
+
+sub get {
+ my $self = shift;
+ croak "no variable to get" unless @_;
+ my $node = $self->getnode(@_) or return undef;
+ my $value = $node->value;
+ if (ref($value) eq 'ARRAY') {
+ return wantarray ? @$value : $value;
+ } elsif (ref($value) eq 'HASH') {
+ return wantarray ? %$value : $value;
+ }
+ return $value;
+}
+
+=head2 $cfg->is_set(@path)
+
+Returns true if the configuration variable addressed by B<@path> is
+set.
+
+=cut
+
+sub is_set {
+ my $self = shift;
+ return defined $self->getnode(@_);
+}
+
+=head2 $cfg->is_section(@path)
+
+Returns true if the configuration section addressed by B<@path> is
+set.
+
+=cut
+
+sub is_section {
+ my $self = shift;
+ my $node = $self->getnode(@_);
+ return defined($node) && $node->is_section;
+}
+
+=head2 $cfg->is_variable(@path)
+
+Returns true if the configuration setting addressed by B<@path>
+is set and is a variable.
+
+=cut
+
+sub is_variable {
+ my $self = shift;
+ my $node = $self->getnode(@_);
+ return defined($node) && $node->is_value;
+}
+
+sub tree {
+ my $self = shift;
+ return $self->{_conf} //= new Config::Tree::Node::Section(locus => new Config::Tree::Locus);
+}
+
+sub _get_section_synt {
+ my ($self, $kw, $name) = @_;
+
+ if (defined($kw)) {
+ if (ref($kw) eq 'HASH') {
+ my $synt;
+ if (exists($kw->{$name})) {
+ $synt = $kw->{$name};
+ } elsif (exists($kw->{'*'})) {
+ $synt = $kw->{'*'};
+ if ($synt eq '*') {
+ return { '*' => '*' };
+ }
+ }
+ if (defined($synt)
+ && ref($synt) eq 'HASH'
+ && exists($synt->{section})) {
+ return $synt->{section};
+ }
+ }
+ }
+ return
+}
+
+=head2 add_node($node, $path)
+
+=cut
+
+sub add_node {
+ my ($self, $path, $v, $locus) = @_;
+
+ unless (ref($path) eq 'ARRAY') {
+ $path = [ split(/\./, $path) ]
+ }
+
+ my $kw = $self->{_parameters} // { '*' => '*' };
+ my $node = $self->tree;
+ my $pn = $#{$path};
+ my $name;
+ for (my $i = 0; $i < $pn; $i++) {
+ $name = ${$path}[$i];
+
+ unless ($node->is_section) {
+ $self->error(join('.', @{$path}[0..$i]) . ": not a section");
+ $self->{_error_count}++;
+ return;
+ }
+
+ $kw = $self->_get_section_synt($kw, $name);
+ unless ($kw) {
+ $self->error(join('.', @{$path}[0..$i]) . ": unknown section");
+ $self->{_error_count}++;
+ return;
+ }
+
+ if (my $subtree = $node->subtree($name)) {
+ $node = $subtree;
+ } else {
+ $node = $node->subtree(
+ $name => new Config::Tree::Node::Section(
+ order => $self->{_order}++,
+ locus => $locus->clone)
+ );
+ }
+ }
+
+ $name = ${$path}[-1];
+
+ my $x = $kw->{$name} // $kw->{'*'};
+ if (!defined($x)) {
+ $self->error("keyword \"$name\" is unknown", locus => $locus);
+ $self->{_error_count}++;
+ return;
+ }
+
+ if (ref($x) eq 'HASH') {
+ my $errstr;
+ my $prev_val;
+ if ($node->has_key($name)) {
+ # FIXME: is_value?
+ $prev_val = $node->subtree($name)->value;
+ }
+ if (exists($x->{re})) {
+ if ($v !~ /$x->{re}/) {
+ $self->error("invalid value for $name",
+ locus => $locus);
+ $self->{_error_count}++;
+ return;
+ }
+ }
+
+ if (my $ck = $x->{check}) {
+ unless ($self->$ck(\$v, $prev_val, $locus)) {
+ $self->{_error_count}++;
+ return;
+ }
+ }
+
+ if ($x->{array}) {
+ if (!defined($prev_val)) {
+ $v = [ $v ];
+ } else {
+ $v = [ @{$prev_val}, $v ];
+ }
+ }
+ }
+
+ $node->locus->add($locus->clone);
+
+ my $newnode;
+ if ($newnode = $node->subtree($name)) {
+ $newnode->locus->add($locus);
+ } else {
+ $newnode = $node->subtree(
+ $name => new Config::Tree::Node::Value(locus => $locus)
+ );
+ }
+ $newnode->order($self->{order}++);
+ $newnode->value($v);
+ return $newnode;
+}
+
+sub commit {
+ my ($self) = @_;
+ # FIXME
+ $self->_fixup($self->tree, $self->{_parameters})
+ if exists $self->{_parameters};
+ return $self->{_error_count} == 0;
+}
+
+=head2 $cfg->set(@path, $value)
+
+Sets the configuration variable B<@path> to B<$value>.
+
+=cut
+
+sub set {
+ my $self = shift;
+ my $node = $self->tree;
+
+ while ($#_ > 1) {
+ croak "not a section" unless $node->is_section;
+ my $arg = shift;
+ if (my $n = $node->subtree($arg)) {
+ $node = $n;
+ } else {
+ $node = $node->subtree(
+ $arg => new Config::Tree::Node::Section
+ );
+ }
+ }
+
+ my $v = $node->subtree($_[0]) ||
+ $node->subtree($_[0] => new Config::Tree::Node::Value(
+ order => $self->{_order}++
+ ));
+
+ $v->value($_[1]);
+ $v->default(0);
+ return $v;
+}
+
+=head2 cfg->unset(@path)
+
+Unsets the configuration variable.
+
+=cut
+
+sub unset {
+ my $self = shift;
+
+ my $node = $self->{_conf} or return;
+ my @path;
+
+ for (@_) {
+ return unless $node->is_section && $node->has_key($_);
+ push @path, [ $node, $_ ];
+ $node = $node->subtree($_);
+ }
+
+ while (1) {
+ my $loc = pop @path;
+ $loc->[0]->delete($loc->[1]);
+ last unless ($loc->[0]->keys == 0);
+ }
+}
+
+=head2 @array = $cfg->names_of(@path)
+
+If B<@path> refers to an existing configuration section, returns a list
+of names of variables and subsections defined within that section. E.g.,
+if you have
+
+ [item foo]
+ x = 1
+ [item bar]
+ x = 1
+ [item baz]
+ y = 2
+
+the call
+
+ $cfg->names_of('item')
+
+will return
+
+ ( 'foo', 'bar', 'baz' )
+
+=cut
+
+sub names_of {
+ my $self = shift;
+ my $node = $self->getnode(@_);
+ return () unless defined($node) && $node->is_section;
+ return $node->keys;
+}
+
+=head2 @array = $cfg->flatten()
+
+=head2 @array = $cfg->flatten(sort => $sort)
+
+Returns a I<flattened> representation of the configuration, as a
+list of pairs B<[ $path, $value ]>, where B<$path> is a reference
+to the variable pathname, and B<$value> is a
+B<Config::Tree::Node::Value> object.
+
+=cut
+
+use constant {
+ NO_SORT => 0,
+ SORT_NATURAL => 1,
+ SORT_PATH => 2
+};
+
+=pod
+
+The I<$sort> argument controls the ordering of the entries in the returned
+B<@array>. It is either a code reference suitable to pass to the Perl B<sort>
+function, or one of the following constants:
+
+=over 4
+
+=item NO_SORT
+
+Don't sort the array. Statements will be placed in an apparently random
+order.
+
+=item SORT_NATURAL
+
+Preserve relative positions of the statements. Entries in the array will
+be in the same order as they appeared in the configuration file. This is
+the default.
+
+=item SORT_PATH
+
+Sort by pathname.
+
+=back
+
+These constants are not exported by default. You can either import the
+ones you need, or use the B<:sort> keyword to import them all, e.g.:
+
+ use Sourceyard::Config qw(:sort);
+ @array = $cfg->flatten(sort => SORT_PATH);
+
+=cut
+
+sub flatten {
+ my $self = shift;
+ local %_ = @_;
+ my $sort = delete($_{sort}) || SORT_NATURAL;
+ my @ar;
+ my $i;
+
+ croak "unrecognized keyword arguments: ". join(',', keys %_)
+ if keys %_;
+
+ push @ar, [ [], $self->{_conf} ];
+ foreach my $elt (@ar) {
+ next if $elt->[1]->is_value;
+ while (my ($kw, $val) = each %{$elt->[1]->subtree}) {
+ push @ar, [ [@{$elt->[0]}, $kw], $val ];
+ }
+ }
+
+ if (ref($sort) eq 'CODE') {
+ $sort = sub { sort $sort @_ };
+ } elsif ($sort == SORT_PATH) {
+ $sort = sub {
+ sort { join('.',@{$a->[0]}) cmp join('.', @{$b->[0]}) } @_
+ };
+ } elsif ($sort == SORT_NATURAL) {
+ $sort = sub {
+ sort { $a->[1]->order <=> $b->[1]->order } @_
+ };
+ } elsif ($sort == NO_SORT) {
+ $sort = sub { @_ };
+ } else {
+ croak "unsupported sort value";
+ }
+ shift @ar; # toss off first entry
+ return &{$sort}(grep { $_->[1]->is_value } @ar);
+}
+
+sub __lint {
+ my ($self, $syntax, $node, @path) = @_;
+
+ $syntax = {} unless ref($syntax) eq 'HASH';
+ if (exists($syntax->{section})) {
+ return unless $node->is_section;
+ } else {
+ return if $node->is_section;
+ }
+
+ if (exists($syntax->{select}) &&
+ !$self->${ \ $syntax->{select} }($node, @path)) {
+ return;
+ }
+
+ if ($node->is_section) {
+ $self->_lint($syntax->{section}, $node, @path);
+ } else {
+ my $val = $node->value;
+ my %opts = ( locus => $node->locus );
+
+ if (ref($val) eq 'ARRAY') {
+ if ($syntax->{array}) {
+ my @ar;
+ foreach my $v (@$val) {
+ if (exists($syntax->{re})) {
+ if ($v !~ /$syntax->{re}/) {
+ $self->error("invalid value for $path[-1]", %opts);
+ $self->{_error_count}++;
+ next;
+ }
+ }
+ if (my $ck = $syntax->{check}) {
+ unless ($self->$ck(\$v, @ar ? $ar[-1] : undef,
+ $node->locus)) {
+ $self->{_error_count}++;
+ next;
+ }
+ }
+ push @ar, $v;
+ }
+ $node->value(\@ar);
+ return;
+ } else {
+ $val = pop(@$val);
+ }
+ }
+
+ if (exists($syntax->{re})) {
+ if ($val !~ /$syntax->{re}/) {
+ $self->error("invalid value for $path[-1]", %opts);
+ $self->{_error_count}++;
+ return;
+ }
+ }
+
+ if (my $ck = $syntax->{check}) {
+ unless ($self->$ck(\$val, undef, $node->locus)) {
+ $self->{_error_count}++;
+ return;
+ }
+ }
+
+ $node->value($val);
+ }
+}
+
+sub _lint {
+ my ($self, $syntab, $node, @path) = @_;
+
+ while (my ($var, $value) = each %{$node->subtree}) {
+ if (exists($syntab->{$var})) {
+ $self->__lint($syntab->{$var}, $value, @path, $var);
+ } elsif (exists($syntab->{'*'})) {
+ $self->__lint($syntab->{'*'}, $value, @path, $var);
+ } elsif ($value->is_section) {
+ next;
+ } else {
+ $self->error("keyword \"$var\" is unknown",
+ locus => $value->locus);
+ $self->{_error_count}++;
+ }
+ }
+}
+
+=head2 $cfg->lint(\%synt)
+
+Checks the syntax according to the syntax table B<%synt>. On success,
+applies eventual default values and returns true. On errors, reports
+them using B<error> and returns false.
+
+This method provides a way to delay syntax checking for a later time,
+which is useful, e.g. if some parts of the parser are loaded as modules
+after calling B<parse>.
+
+=cut
+
+sub lint {
+ my ($self, $synt) = @_;
+
+# $synt->{'*'} = { section => { '*' => 1 }} ;
+ $self->{_conf} = new Config::Tree::Node::Section(default => 1)
+ unless exists $self->{_conf};
+ $self->_lint($synt, $self->{_conf});
+ $self->_fixup($self->{_conf}, $synt);
+ return $self->{_error_count} == 0;
+}
+
+1;

Return to:

Send suggestions and report system problems to the System administrator.