diff options
author | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-03-10 22:52:40 +0200 |
---|---|---|
committer | Sergey Poznyakoff <gray@gnu.org.ua> | 2015-03-10 22:52:40 +0200 |
commit | 28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74 (patch) | |
tree | 6198b570a5e6820da9b2477e6a0fcd952e60730e | |
parent | cea8ac06c4a33e7f2e3009589b27c43fce346e74 (diff) | |
download | regexp-opt-28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74.tar.gz regexp-opt-28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74.tar.bz2 |
Move into a module
-rw-r--r-- | String/Regexp.pm | 162 | ||||
-rw-r--r-- | regopt.pl | 142 |
2 files changed, 162 insertions, 142 deletions
diff --git a/String/Regexp.pm b/String/Regexp.pm new file mode 100644 index 0000000..88f0b11 --- /dev/null +++ b/String/Regexp.pm @@ -0,0 +1,162 @@ +package String::Regexp; + +use strict; +use Carp; +use Data::Dumper; + +require Exporter; +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw(array_to_regexp) ] ); +our @EXPORT_OK = ( qw(array_to_regexp) ); +our @EXPORT = qw(array_to_regexp); +our $VERSION = "1.00"; + +# split_prefix(ARRAY) +sub split_prefix { + my $aref = shift; + + my $n = $#{$aref}; + my $j = 0; + my $k = -1; + while ($n > 0 and + $j <= $#{$aref->[0]} and + $j <= $#{$aref->[1]} and + $aref->[1][$j] eq $aref->[0][$j]) { + for (my $i = 0; $i < $n; $i++) { + if ($j <= $#{$aref->[$i]}) { + unless ($j <= $#{$aref->[$i+1]} and + $aref->[$i+1][$j] eq $aref->[$i][$j]) { + $n = $i; + last; + } + } + } + $k = $j++; + } + $n = 0 if ($k == -1); + return ($n,$k); +} + +use constant T_ALT => 0x0; +use constant T_PFX => 0x1; +use constant T_SFX => 0x2; +use constant T_MASK => 0xf; + +use constant T_OPT => 0x10; + +sub regexp_opt { + my @t = @_; + my @output; + return [] if $#t == -1; + push @output; + while (1) { +# print "NEXT ".Dumper(\@t); + my @res = split_prefix \@t; +# print "RES @res\n"; + if ($res[1] <= 0) { + # my $rev = [ map { [ reverse @{$_} ] } @t ]; + # @res = split_prefix [ map { [ reverse @{$_} ] } @t ]; + # if ($res[1] > 0) { + # } + push @output, map { join('', @{$_}) } @t[0..$res[0]]; + } elsif ($res[0] == 0) { + push @output, join('', @{$t[0]}); + } else { + my @x = @{$t[0]}[0..$res[1]]; + my $pfxlen = $#x; + my $pfx = join('', @x); + my $type = T_PFX; + if ($pfxlen == $#{$t[0]}) { + $type |= T_OPT; + shift @t; + --$res[0]; + } + push @output, [ $type, + $pfx, + regexp_opt(map { my @r = @{$_}; + my @a = @r[$pfxlen+1..$#r]; + \@a + } @t[0..$res[0]]) ]; + } + last if $res[0] == $#t; + @t = @t[($res[0]+1)..$#t]; + } + return \@output; +} + +sub posix_build_opt { + my $s = shift; + my $delim; + + $$s .= '(' if ($#_ > 0); + foreach my $elt (@_) { + $$s .= $delim if defined $delim; + if (ref($elt) eq 'ARRAY') { + trans_posix_recursive($elt, $s); + } else { + $$s .= "($elt)"; + } + $delim = '|'; + } + $$s .= ')' if ($#_ > 0); +} + +sub trans_posix_recursive { + my ($treeref, $s) = @_; + my @tree = @{$treeref}; + my $delim; + + my $mode = shift @tree; + my $type = $mode & T_MASK; + if ($type == T_ALT) { + posix_build_opt($s, @tree); + $$s .= '?' if ($mode & T_OPT); # FIXME + } elsif ($type == T_PFX) { + $$s .= '('.shift(@tree); + posix_build_opt($s, @{$tree[0]}); + $$s .= '?' if ($mode & T_OPT); + $$s .= ')'; + } else { + croak "unrecognized element type"; + } + return $$s; +} + +sub trans_posix { + my ($tree, $opts) = @_; + my $s = ''; + trans_posix_recursive($tree, \$s); + $s = "\\<$s\\>" if $opts->{word}; + return $s; +} + +my %transtab = ( + posix => \&trans_posix +); + +sub array_to_regexp { + my $trans = \&trans_posix; + my $opts; + + if (ref($_[0]) eq 'HASH') { + $opts = shift; + } + + if (defined($opts->{type})) { + $trans = $transtab{$opts->{type}}; + croak "unsupported type: $opts->{type}" + unless defined $trans; + } + + my @t = map { my @x = split //, $_; \@x } sort @_; + my $res = regexp_opt(@t); + unshift @{$res}, T_ALT; + print Dumper($res) if ($opts->{debug}); + return &{$trans}($res, $opts); +} + +1; + + + diff --git a/regopt.pl b/regopt.pl deleted file mode 100644 index 59cbde4..0000000 --- a/regopt.pl +++ /dev/null @@ -1,142 +0,0 @@ -use strict; -use Carp; -use Data::Dumper; - -# split_prefix(ARRAY) -sub split_prefix { - my $aref = shift; - - my $n = $#{$aref}; - my $j = 0; - my $k = -1; - while ($n > 0 and - $j <= $#{$aref->[0]} and - $j <= $#{$aref->[1]} and - $aref->[1][$j] eq $aref->[0][$j]) { -# print "Start iteration\n"; - for (my $i = 0; $i < $n; $i++) { -# print "($i,$j) $aref->[$i+1][$j] <=> $aref->[$i][$j]\n"; - if ($j <= $#{$aref->[$i]}) { -# print "A:".($j <= $#{$aref->[$i+1]})."\n"; -# print "B:".($aref->[$i+1][$j] eq $aref->[$i][$j])."\n"; - unless ($j <= $#{$aref->[$i+1]} and - $aref->[$i+1][$j] eq $aref->[$i][$j]) { -# print "($i,$j): ". Dumper($aref->[$i]) . " stop\n"; - $n = $i; - last; - } - } - } - $k = $j++; -# print "End iteration: $n, $j\n"; - } -# print "res $n,$k\n"; - $n = 0 if ($k == -1); - return ($n,$k); -} - - -########## -my @input = ( - 'abab', - 'ac', - 'abba', - 'abbaab', - 'abbaabab', - 'ba', - 'bb', - 'babab', - ); -# my @ainput = ('abcd','e'); -print join("\n", sort @input)."\n\n"; -# my @t = map { my @x = split //, $_; \@x } sort @input; -# print Dumper(\@t); -# my @x = split_prefix \@t; -# print "@x\n"; -# exit; - -sub regexp_opt { - my @t = @_; - my @output; - return [] if $#t == -1; - while (1) { - my @res = split_prefix \@t; - if ($res[0] == 0) { - push @output, join('', @{$t[0]}) ; - } elsif ($res[1] <= 0) { - push @output, @t[0..$res[0]]; - } else { - my @x = @{$t[0]}[0..$res[1]]; - push @output, [ join('', @x), - regexp_opt(map { my @r = @{$_}; - my @a = @r[$#x+1..$#r]; - \@a - } @t[0..$res[0]]) ]; - } - last if $res[0] == $#t; - @t = @t[($res[0]+1)..$#t]; - } - return \@output; -} - -sub trans_posix_recursive { - my ($treeref, $s) = @_; - my @tree = @{$treeref}; - my $delim; - while ($#tree >= 0) { - my $node = shift @tree; - $$s .= $delim if defined($delim); - $$s .= '('; - if (ref($node) eq 'ARRAY') { - if ($node->[0] eq '') { - trans_posix_recursive([@$node[1..$#$node]], $s); - $$s .= '?'; - } elsif ($#$node == 1 and ref($node->[1]) eq 'ARRAY') { - $$s .= "($node->[0])"; - trans_posix_recursive([@$node[1..$#$node]], $s); - } else { - trans_posix_recursive($node, $s); - } - } else { - ${$s} .= "$node"; - } - $$s .= ')'; - $delim = '|'; - } - return $$s; -} - -sub trans_posix { - my ($tree, $opts) = @_; - return trans_posix_recursive($tree); -} - -my %transtab = ( - posix => \&trans_posix -); - -sub array_to_regexp { - my $trans = \&trans_posix; - my $opts; - - if (ref($_[0]) eq 'HASH') { - $opts = shift; - } - - if (defined($opts->{type})) { - $trans = $transtab{$opts->{type}}; - croak "unsupported type: $opts->{type}" - unless defined $trans; - } - - my @t = map { my @x = split //, $_; \@x } sort @_; - my $res = regexp_opt(@t); - print Dumper($res) if ($opts->{debug}); - return &{$trans}($res, $opts); -} - -my $s = array_to_regexp({ debug => 1 }, @input); -print "$s\n"; - - - |