devdaily home | apple | java | perl | unix | directory | blog

What this is

This file is included in the DevDaily.com "Perl Source Code Warehouse" project. The intent of this project is to help you "Learn Perl by Example" TM.

Other links

The source code

#!./perl -w

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
}

use strict;

plan tests => 5;

my %h;

ok (!Internals::HvREHASH(%h), "hash doesn't start with rehash flag on");

foreach (1..10) {
  $h{"\0"x$_}++;
}

ok (!Internals::HvREHASH(%h), "10 entries doesn't trigger rehash");

foreach (11..20) {
  $h{"\0"x$_}++;
}

ok (Internals::HvREHASH(%h), "20 entries triggers rehash");




# second part using an emulation of the PERL_HASH in perl, mounting an
# attack on a prepopulated hash. This is also useful if you need normal
# keys which don't contain \0 -- suitable for stashes

use constant MASK_U32  => 2**32;
use constant HASH_SEED => 0;
use constant THRESHOLD => 14;
use constant START     => "a";

# some initial hash data
my %h2 = map {$_ => 1} 'a'..'cc';

ok (!Internals::HvREHASH(%h2), 
    "starting with pre-populated non-pathalogical hash (rehash flag if off)");

my @keys = get_keys(\%h2);
$h2{$_}++ for @keys;
ok (Internals::HvREHASH(%h2), 
    scalar(@keys) . " colliding into the same bucket keys are triggerring rehash");

sub get_keys {
    my $hr = shift;

    # the minimum of bits required to mount the attack on a hash
    my $min_bits = log(THRESHOLD)/log(2);

    # if the hash has already been populated with a significant amount
    # of entries the number of mask bits can be higher
    my $keys = scalar keys %$hr;
    my $bits = $keys ? log($keys)/log(2) : 0;
    $bits = $min_bits if $min_bits > $bits;

    $bits = int($bits) < $bits ? int($bits) + 1 : int($bits);
    # need to add 2 bits to cover the internal split cases
    $bits += 2;
    my $mask = 2**$bits-1;
    print "# using mask: $mask ($bits)\n";

    my @keys;
    my $s = START;
    my $c = 0;
    # get 2 keys on top of the THRESHOLD
    my $hash;
    while (@keys < THRESHOLD+2) {
        # next if exists $hash->{$s};
        $hash = hash($s);
        next unless ($hash & $mask) == 0;
        $c++;
        printf "# %2d: %5s, %10s\n", $c, $s, $hash;
        push @keys, $s;
    } continue {
        $s++;
    }

    return @keys;
}


# trying to provide the fastest equivalent of C macro's PERL_HASH in
# Perl - the main complication is that it uses U32 integer, which we
# can't do it perl, without doing some tricks
sub hash {
    my $s = shift;
    my @c = split //, $s;
    my $u = HASH_SEED;
    for (@c) {
        # (A % M) + (B % M) == (A + B) % M
        # This works because '+' produces a NV, which is big enough to hold
        # the intermidiate result. We only need the % before any "^" and "&"
        # to get the result in the range for an I32.
        # and << doesn't work on NV, so using 1 << 10
        $u += ord;
        $u += $u * (1 << 10); $u %= MASK_U32;
        $u ^= $u >> 6;
    }
    $u += $u << 3;  $u %= MASK_U32;
    $u ^= $u >> 11; $u %= MASK_U32;
    $u += $u << 15; $u %= MASK_U32;
    $u;
}




Copyright 1998-2008 Alvin Alexander
All Rights Reserved.
 
devdaily.com is based in louisville, kentucky, and this web site is hosted by godaddy.com