aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Poznyakoff <gray@gnu.org.ua>2015-03-10 22:52:40 +0200
committerSergey Poznyakoff <gray@gnu.org.ua>2015-03-10 22:52:40 +0200
commit28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74 (patch)
tree6198b570a5e6820da9b2477e6a0fcd952e60730e
parentcea8ac06c4a33e7f2e3009589b27c43fce346e74 (diff)
downloadregexp-opt-28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74.tar.gz
regexp-opt-28ff1ca74c3fa2c38eb6b5d7c18acc70b4f70e74.tar.bz2
Move into a module
-rw-r--r--String/Regexp.pm162
-rw-r--r--regopt.pl142
2 files changed, 162 insertions, 142 deletions
diff --git a/String/Regexp.pm b/String/Regexp.pm
new file mode 100644
index 0000000..88f0b11
--- /dev/null
+++ b/String/Regexp.pm
@@ -0,0 +1,162 @@
+package String::Regexp;
+
+use strict;
+use Carp;
+use Data::Dumper;
+
+require Exporter;
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(array_to_regexp) ] );
+our @EXPORT_OK = ( qw(array_to_regexp) );
+our @EXPORT = qw(array_to_regexp);
+our $VERSION = "1.00";
+
+# split_prefix(ARRAY)
+sub split_prefix {
+ my $aref = shift;
+
+ my $n = $#{$aref};
+ my $j = 0;
+ my $k = -1;
+ while ($n > 0 and
+ $j <= $#{$aref->[0]} and
+ $j <= $#{$aref->[1]} and
+ $aref->[1][$j] eq $aref->[0][$j]) {
+ for (my $i = 0; $i < $n; $i++) {
+ if ($j <= $#{$aref->[$i]}) {
+ unless ($j <= $#{$aref->[$i+1]} and
+ $aref->[$i+1][$j] eq $aref->[$i][$j]) {
+ $n = $i;
+ last;
+ }
+ }
+ }
+ $k = $j++;
+ }
+ $n = 0 if ($k == -1);
+ return ($n,$k);
+}
+
+use constant T_ALT => 0x0;
+use constant T_PFX => 0x1;
+use constant T_SFX => 0x2;
+use constant T_MASK => 0xf;
+
+use constant T_OPT => 0x10;
+
+sub regexp_opt {
+ my @t = @_;
+ my @output;
+ return [] if $#t == -1;
+ push @output;
+ while (1) {
+# print "NEXT ".Dumper(\@t);
+ my @res = split_prefix \@t;
+# print "RES @res\n";
+ if ($res[1] <= 0) {
+ # my $rev = [ map { [ reverse @{$_} ] } @t ];
+ # @res = split_prefix [ map { [ reverse @{$_} ] } @t ];
+ # if ($res[1] > 0) {
+ # }
+ push @output, map { join('', @{$_}) } @t[0..$res[0]];
+ } elsif ($res[0] == 0) {
+ push @output, join('', @{$t[0]});
+ } else {
+ my @x = @{$t[0]}[0..$res[1]];
+ my $pfxlen = $#x;
+ my $pfx = join('', @x);
+ my $type = T_PFX;
+ if ($pfxlen == $#{$t[0]}) {
+ $type |= T_OPT;
+ shift @t;
+ --$res[0];
+ }
+ push @output, [ $type,
+ $pfx,
+ regexp_opt(map { my @r = @{$_};
+ my @a = @r[$pfxlen+1..$#r];
+ \@a
+ } @t[0..$res[0]]) ];
+ }
+ last if $res[0] == $#t;
+ @t = @t[($res[0]+1)..$#t];
+ }
+ return \@output;
+}
+
+sub posix_build_opt {
+ my $s = shift;
+ my $delim;
+
+ $$s .= '(' if ($#_ > 0);
+ foreach my $elt (@_) {
+ $$s .= $delim if defined $delim;
+ if (ref($elt) eq 'ARRAY') {
+ trans_posix_recursive($elt, $s);
+ } else {
+ $$s .= "($elt)";
+ }
+ $delim = '|';
+ }
+ $$s .= ')' if ($#_ > 0);
+}
+
+sub trans_posix_recursive {
+ my ($treeref, $s) = @_;
+ my @tree = @{$treeref};
+ my $delim;
+
+ my $mode = shift @tree;
+ my $type = $mode & T_MASK;
+ if ($type == T_ALT) {
+ posix_build_opt($s, @tree);
+ $$s .= '?' if ($mode & T_OPT); # FIXME
+ } elsif ($type == T_PFX) {
+ $$s .= '('.shift(@tree);
+ posix_build_opt($s, @{$tree[0]});
+ $$s .= '?' if ($mode & T_OPT);
+ $$s .= ')';
+ } else {
+ croak "unrecognized element type";
+ }
+ return $$s;
+}
+
+sub trans_posix {
+ my ($tree, $opts) = @_;
+ my $s = '';
+ trans_posix_recursive($tree, \$s);
+ $s = "\\<$s\\>" if $opts->{word};
+ return $s;
+}
+
+my %transtab = (
+ posix => \&trans_posix
+);
+
+sub array_to_regexp {
+ my $trans = \&trans_posix;
+ my $opts;
+
+ if (ref($_[0]) eq 'HASH') {
+ $opts = shift;
+ }
+
+ if (defined($opts->{type})) {
+ $trans = $transtab{$opts->{type}};
+ croak "unsupported type: $opts->{type}"
+ unless defined $trans;
+ }
+
+ my @t = map { my @x = split //, $_; \@x } sort @_;
+ my $res = regexp_opt(@t);
+ unshift @{$res}, T_ALT;
+ print Dumper($res) if ($opts->{debug});
+ return &{$trans}($res, $opts);
+}
+
+1;
+
+
+
diff --git a/regopt.pl b/regopt.pl
deleted file mode 100644
index 59cbde4..0000000
--- a/regopt.pl
+++ /dev/null
@@ -1,142 +0,0 @@
-use strict;
-use Carp;
-use Data::Dumper;
-
-# split_prefix(ARRAY)
-sub split_prefix {
- my $aref = shift;
-
- my $n = $#{$aref};
- my $j = 0;
- my $k = -1;
- while ($n > 0 and
- $j <= $#{$aref->[0]} and
- $j <= $#{$aref->[1]} and
- $aref->[1][$j] eq $aref->[0][$j]) {
-# print "Start iteration\n";
- for (my $i = 0; $i < $n; $i++) {
-# print "($i,$j) $aref->[$i+1][$j] <=> $aref->[$i][$j]\n";
- if ($j <= $#{$aref->[$i]}) {
-# print "A:".($j <= $#{$aref->[$i+1]})."\n";
-# print "B:".($aref->[$i+1][$j] eq $aref->[$i][$j])."\n";
- unless ($j <= $#{$aref->[$i+1]} and
- $aref->[$i+1][$j] eq $aref->[$i][$j]) {
-# print "($i,$j): ". Dumper($aref->[$i]) . " stop\n";
- $n = $i;
- last;
- }
- }
- }
- $k = $j++;
-# print "End iteration: $n, $j\n";
- }
-# print "res $n,$k\n";
- $n = 0 if ($k == -1);
- return ($n,$k);
-}
-
-
-##########
-my @input = (
- 'abab',
- 'ac',
- 'abba',
- 'abbaab',
- 'abbaabab',
- 'ba',
- 'bb',
- 'babab',
- );
-# my @ainput = ('abcd','e');
-print join("\n", sort @input)."\n\n";
-# my @t = map { my @x = split //, $_; \@x } sort @input;
-# print Dumper(\@t);
-# my @x = split_prefix \@t;
-# print "@x\n";
-# exit;
-
-sub regexp_opt {
- my @t = @_;
- my @output;
- return [] if $#t == -1;
- while (1) {
- my @res = split_prefix \@t;
- if ($res[0] == 0) {
- push @output, join('', @{$t[0]}) ;
- } elsif ($res[1] <= 0) {
- push @output, @t[0..$res[0]];
- } else {
- my @x = @{$t[0]}[0..$res[1]];
- push @output, [ join('', @x),
- regexp_opt(map { my @r = @{$_};
- my @a = @r[$#x+1..$#r];
- \@a
- } @t[0..$res[0]]) ];
- }
- last if $res[0] == $#t;
- @t = @t[($res[0]+1)..$#t];
- }
- return \@output;
-}
-
-sub trans_posix_recursive {
- my ($treeref, $s) = @_;
- my @tree = @{$treeref};
- my $delim;
- while ($#tree >= 0) {
- my $node = shift @tree;
- $$s .= $delim if defined($delim);
- $$s .= '(';
- if (ref($node) eq 'ARRAY') {
- if ($node->[0] eq '') {
- trans_posix_recursive([@$node[1..$#$node]], $s);
- $$s .= '?';
- } elsif ($#$node == 1 and ref($node->[1]) eq 'ARRAY') {
- $$s .= "($node->[0])";
- trans_posix_recursive([@$node[1..$#$node]], $s);
- } else {
- trans_posix_recursive($node, $s);
- }
- } else {
- ${$s} .= "$node";
- }
- $$s .= ')';
- $delim = '|';
- }
- return $$s;
-}
-
-sub trans_posix {
- my ($tree, $opts) = @_;
- return trans_posix_recursive($tree);
-}
-
-my %transtab = (
- posix => \&trans_posix
-);
-
-sub array_to_regexp {
- my $trans = \&trans_posix;
- my $opts;
-
- if (ref($_[0]) eq 'HASH') {
- $opts = shift;
- }
-
- if (defined($opts->{type})) {
- $trans = $transtab{$opts->{type}};
- croak "unsupported type: $opts->{type}"
- unless defined $trans;
- }
-
- my @t = map { my @x = split //, $_; \@x } sort @_;
- my $res = regexp_opt(@t);
- print Dumper($res) if ($opts->{debug});
- return &{$trans}($res, $opts);
-}
-
-my $s = array_to_regexp({ debug => 1 }, @input);
-print "$s\n";
-
-
-

Return to:

Send suggestions and report system problems to the System administrator.