diff options
Diffstat (limited to 'lib/VarnishMib/HashTable.pm')
-rw-r--r-- | lib/VarnishMib/HashTable.pm | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/lib/VarnishMib/HashTable.pm b/lib/VarnishMib/HashTable.pm new file mode 100644 index 0000000..0f3b47e --- /dev/null +++ b/lib/VarnishMib/HashTable.pm @@ -0,0 +1,242 @@ +package VarnishMib::HashTable; +use strict; +use warnings; +use Carp; +use Inline 'C'; +use Pod::Usage; +use Pod::Man; + +=head1 NAME + +VarnishMib::HashTable - Create a hash table implementation in C + +=head1 DESCRIPTION + +Given a list of unique strings, creates a C code for fast look ups of +data associated with them. + +=head1 CONSTRUCTOR + + $ht = new VarnishMib::HashTable([KW => VAL,...]); + +Returns a new instance of the hash table generator. Allowed arguments are: + +=over 4 + +=item B<max_collisions> + +Maximum number of collisions allowed for the resulting hash table. Default is +unlimited. + +=item B<max_hash_size> + +Maximum size of the resulting hash table (in items). + +=item B<indent> + +Basic indent value for the generated C text. Default is 4. + +=item B<verbose> + +Produce verbose statistics about the created hash table. + +=item B<prefix> + +Prefix all C identifiers with this string. Default is C<ht_>. + +=back + +=cut + +sub new { + my $class = shift; + my $self = bless {}, $class; + my $v; + local %_ = @_; + $self->{max_collisions} = delete $_{max_collisions}; + $self->{max_hash_size} = delete $_{max_hash_size}; + $self->{indent} = ' ' x (delete $_{indent} || 4); + $self->{verbose} = delete $_{verbose}; + $self->{prefix} = delete $_{prefix} || 'ht_'; + croak "extra arguments" if keys %_; + return $self; +} + +=head1 METHODS + +=head2 prefix + + $s = $ht->prefix; + +Returns current prefix value. + +=cut + +sub prefix { shift->{prefix} } + +=head2 indent + + $s = $ht->indent; + +Returns the indent prefix string. I<Note>, that it is not the same as the +B<ident> parameter passed to the constructor. This method returs a string +filled with appropriate number of whitespace characters, such that it can +be used to produce the requested indentation. + +=cut + +sub indent { shift->{indent} } + +sub hash_string { + my ($self, $string, $hash_size) = @_; + string_hash($string, $hash_size); +} + +sub _mktab { + my ($self, $hash_size) = @_; + my @ht = (-1) x $hash_size; + my $cmax = 0; + for (my $i = 0; $i < @{$self->{input}}; $i++) { + my $h = $self->hash_string($self->{input}[$i], $hash_size); + my $cn = 0; + while ($ht[$h] != -1) { + ++$cn; + return if (++$h >= $hash_size); + } + $ht[$h] = $i; +# print STDERR $self->{input}[$i] . ' => ' . $h ." $i\n"; + $cmax = $cn if $cn > $cmax; + } +# print STDERR "$hash_size $cmax\n"; + $self->{hash_table} = \@ht; + $self->{collisions} = $cmax; + return $self->{hash_table}; +} + +=head2 create + + $success = $ht->create(LISTREF) + +B<LISTREF> must be a reference to a list of unique string values. This method +creates a hash table. Returns true on success and undef on failure. + +=cut + +sub create { + my ($self, $names) = @_; + my $htab; + my $hsize; + + $self->{input} = $names; + delete $self->{hash_table}; + + for ($hsize = (2 * @$names + 1);; $hsize++) { + last if $self->{max_hash_size} && $hsize < $self->{max_hash_size}; + $self->_mktab($hsize) or next; + last unless (defined($self->{max_collisions}) + && $self->{collisions} > $self->{max_collisions}); + } + if ($self->{verbose}) { +# print STDERR "Input: " . @$names . "\n"; + if ($self->{hash_table}) { + print STDERR "Table size: " . @{$self->{hash_table}} . "\n"; + print STDERR "Collisions: " . $self->{collisions} . "\n"; + } else { + print STDERR "FAILED\n"; + } + } + return $self->{hash_table}; +} + +sub format_input_table { + my ($self, $fh) = @_; + $fh ||= \*STDOUT; + croak "no input data to format" unless $self->{input}; + print $fh 'char const *' . $self->{prefix} . "name_table[] = {\n"; + foreach my $name (@{$self->{input}}) { + printf $fh $self->{indent} . '"' . $name . "\",\n"; + } + print $fh "};\n"; + +} + +sub format_data_table { + my ($self, $type, $fh) = @_; + $fh ||= \*STDOUT; + croak "no data to format" unless $self->{input}; + my $n = @{$self->{input}}; + print $fh $type . ' ' . $self->{prefix} . "data_table[$n];\n"; +} + +sub format_hash_table { + my ($self, $fh) = @_; + $fh ||= \*STDOUT; + croak "no hash table to format" unless $self->{hash_table}; + print $fh "static int ".$self->{prefix}."hash_table[] = {\n"; + my $col = 0; + print $fh $self->{indent}; + foreach my $p (@{$self->{hash_table}}) { + printf $fh "%3d,", defined($p) ? $p : -1; + $col++; + print $fh ($col % 10 == 0) ? "\n".$self->{indent} : ' '; + } + print $fh "\n" if ($col % 10); + print $fh "};\n"; + my $pfx = $self->{prefix} . 'hash_table'; + print $fh "unsigned ${pfx}_size = sizeof($pfx) / sizeof(${pfx}[0]);\n"; +} + +sub format_code { + my ($self, $fh) = @_; + $fh ||= \*STDOUT; + seek DATA, 0, 0; + my $visible = 0; + while (<DATA>) { + if (/^__C__$/) { + $visible = 1; + } elsif ($visible) { + s{/\*\s*STATIC\s*\*/}{static}; + print $fh "$_"; + } + } +} + +sub format_program { + my ($self, $type, $fh) = @_; + $fh ||= \*STDOUT; + $self->format_input_table($fh); + print $fh "\n"; + $self->format_data_table($type, $fh); + print $fh "\n"; + $self->format_hash_table($fh); + print $fh "\n"; + $self->format_code($fh); +} + +Inline->init(); +1; +__DATA__ +__C__ +#ifndef CHAR_BIT +# define CHAR_BIT 8 +#endif +#ifndef UINT_MAX +# define UINT_MAX ((unsigned)-1) +#endif + +static inline unsigned +rotl_sz(unsigned x, int n) +{ + return ((x << n) | (x >> ((CHAR_BIT * sizeof x) - n))) & UINT_MAX; +} + +/*STATIC*/ unsigned +string_hash(const char *str, unsigned size) +{ + unsigned value = 0; + unsigned char ch; + + for (; (ch = *str); str++) + value = ch + rotl_sz(value, 7); + return value % size; +} |