package VarnishMib::HashTable; use strict; use warnings; use Carp; use Inline 'C'; use Pod::Usage; use Pod::Man; =head1 NAME VarnishMib::HashTable - Create a hash table implementation in C =head1 DESCRIPTION Given a list of unique strings, creates a C code for fast look ups of data associated with them. =head1 CONSTRUCTOR $ht = new VarnishMib::HashTable([KW => VAL,...]); Returns a new instance of the hash table generator. Allowed arguments are: =over 4 =item B Maximum number of collisions allowed for the resulting hash table. Default is unlimited. =item B Maximum size of the resulting hash table (in items). =item B Basic indent value for the generated C text. Default is 4. =item B Produce verbose statistics about the created hash table. =item B Prefix all C identifiers with this string. Default is C. =back =cut sub new { my $class = shift; my $self = bless {}, $class; my $v; local %_ = @_; $self->{max_collisions} = delete $_{max_collisions}; $self->{max_hash_size} = delete $_{max_hash_size}; $self->{indent} = ' ' x (delete $_{indent} || 4); $self->{verbose} = delete $_{verbose}; $self->{prefix} = delete $_{prefix} || 'ht_'; croak "extra arguments" if keys %_; return $self; } =head1 METHODS =head2 prefix $s = $ht->prefix; Returns current prefix value. =cut sub prefix { shift->{prefix} } =head2 indent $s = $ht->indent; Returns the indent prefix string. I, that it is not the same as the B parameter passed to the constructor. This method returs a string filled with appropriate number of whitespace characters, such that it can be used to produce the requested indentation. =cut sub indent { shift->{indent} } sub hash_string { my ($self, $string, $hash_size) = @_; string_hash($string, $hash_size); } sub _mktab { my ($self, $hash_size) = @_; my @ht = (-1) x $hash_size; my $cmax = 0; for (my $i = 0; $i < @{$self->{input}}; $i++) { my $h = $self->hash_string($self->{input}[$i], $hash_size); my $cn = 0; while ($ht[$h] != -1) { ++$cn; return if (++$h >= $hash_size); } $ht[$h] = $i; # print STDERR $self->{input}[$i] . ' => ' . $h ." $i\n"; $cmax = $cn if $cn > $cmax; } # print STDERR "$hash_size $cmax\n"; $self->{hash_table} = \@ht; $self->{collisions} = $cmax; return $self->{hash_table}; } =head2 create $success = $ht->create(LISTREF) B must be a reference to a list of unique string values. This method creates a hash table. Returns true on success and undef on failure. =cut sub create { my ($self, $names) = @_; my $htab; my $hsize; $self->{input} = $names; delete $self->{hash_table}; for ($hsize = (2 * @$names + 1);; $hsize++) { last if $self->{max_hash_size} && $hsize < $self->{max_hash_size}; $self->_mktab($hsize) or next; last unless (defined($self->{max_collisions}) && $self->{collisions} > $self->{max_collisions}); } if ($self->{verbose}) { # print STDERR "Input: " . @$names . "\n"; if ($self->{hash_table}) { print STDERR "Table size: " . @{$self->{hash_table}} . "\n"; print STDERR "Collisions: " . $self->{collisions} . "\n"; } else { print STDERR "FAILED\n"; } } return $self->{hash_table}; } =head2 format_input_table $ht->format_input_table([FILEHANDLE]); Outputs to I (default B) a C array of input names. The array is declared as char const *PFXname_table[] where I is replaced by the prefix given when creating the HashTable object. =cut sub format_input_table { my ($self, $fh) = @_; $fh ||= \*STDOUT; croak "no input data to format" unless $self->{input}; print $fh 'static char const *' . $self->{prefix} . "name_table[] = {\n"; foreach my $name (@{$self->{input}}) { printf $fh $self->{indent} . '"' . $name . "\",\n"; } print $fh "};\n"; } =head2 format_data_table $ht->format_data_table(CTYPE [, FILEHANDLE]) Outputs a C array of data associated with input strings. The array is declared as CTYPE PFXdata_table[N]; where I is the first parameter to the method, I is the prefix and I is the dimension (number of strings for which the hash table is built). Both B and B have the same dimension. =cut sub format_data_table { my ($self, $type, $fh) = @_; $fh ||= \*STDOUT; croak "no data to format" unless $self->{input}; my $n = @{$self->{input}}; print $fh 'static '. $type . ' ' . $self->{prefix} . "data_table[$n];\n"; } =head2 format_hash_table $ht->format_hash_table([FILEHANDLE]); Outputs the hash table to I (B by default). The table is declared as int PFXhash_table[] where I is the prefix. =cut sub format_hash_table { my ($self, $fh) = @_; $fh ||= \*STDOUT; croak "no hash table to format" unless $self->{hash_table}; print $fh "static int ".$self->{prefix}."hash_table[] = {\n"; my $col = 0; print $fh $self->{indent}; foreach my $p (@{$self->{hash_table}}) { printf $fh "%3d,", defined($p) ? $p : -1; $col++; print $fh ($col % 10 == 0) ? "\n".$self->{indent} : ' '; } print $fh "\n" if ($col % 10); print $fh "};\n"; my $pfx = $self->{prefix} . 'hash_table'; print $fh "unsigned ${pfx}_size = sizeof($pfx) / sizeof(${pfx}[0]);\n"; } =head2 format_code $ht->format_code([FILEHANDLE]) Formats the supporting C code to the I (B, if not given). The code contains at least the following function: unsigned string_hash(const char *str, unsigned size) which, given the string I and the size of the hash table (I) returns the index in the table starting from which the pointer to that string and associated data can be located. =cut sub format_code { my ($self, $fh) = @_; $fh ||= \*STDOUT; seek DATA, 0, 0; my $visible = 0; while () { if (/^__C__$/) { $visible = 1; } elsif ($visible) { s{/\*\s*STATIC\s*\*/}{static}; print $fh "$_"; } } } sub format_program { my ($self, $type, $fh) = @_; $fh ||= \*STDOUT; $self->format_input_table($fh); print $fh "\n"; $self->format_data_table($type, $fh); print $fh "\n"; $self->format_hash_table($fh); print $fh "\n"; $self->format_code($fh); } Inline->init(); 1; __DATA__ __C__ #ifndef CHAR_BIT # define CHAR_BIT 8 #endif #ifndef UINT_MAX # define UINT_MAX ((unsigned)-1) #endif static inline unsigned rotl_sz(unsigned x, int n) { return ((x << n) | (x >> ((CHAR_BIT * sizeof x) - n))) & UINT_MAX; } /*STATIC*/ unsigned string_hash(const char *str, unsigned size) { unsigned value = 0; unsigned char ch; for (; (ch = *str); str++) value = ch + rotl_sz(value, 7); return value % size; }