%%% File : bloom_filter2.erl
%%% Author : Jani Launonen
%%% Description : Implementation of bloom filter as described in
%%% http://en.wikipedia.org/wiki/Bloom_filter --- could be used
%%% as a building block for bloomier filter.
%%% Shortly: space efficient probabilistic hash table that can
%%% aswer to question "is key in the set?" Can return false
%%% positives (i.e. can return true even if key is not
%%% inserted in set) but never false negatives (i.e. never return
%%% false if key is inserted in set). Basic version of bloom
%%% filter cannot remove inserted keys but it have to be
%%% regenerated. Counting bloom filter can remove inserted
%%% keys to a certain degree. See above wikipedia entry.
%%%
%%% False positive rate depends on number of bits in array,
%%% number of bits set for each key and how well hash function
%%% works for given keys. See
%%% http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
%%% and http://www.cc.gatech.edu/fac/Pete.Manolios/bloom-filters/calculator.html
%%% Created : 14 Jan 2008 by Jani Launonen
-module(bloom_filter2).
-export([new/2,
insert/2,
lookup/2
]).
-record(bloom_filter2, {bit_array = <<>>,
k = 0}). %% number of hash functions
%%
%% Returns a new initialized bloom_filter2 record given number of bits and
%% hashes.
%%
new(Number_of_bits, Number_of_hashes) when is_integer(Number_of_bits) andalso
is_integer(Number_of_hashes) ->
#bloom_filter2{bit_array = <<0:Number_of_bits>>,
k = Number_of_hashes}.
%%
%% Inserts Key into bloom_filter and returns updated bloom_filter.
%%
insert(Bloom_filter, Key) when is_record(Bloom_filter, bloom_filter2) ->
calculate_hash_and_set_bit(Bloom_filter, Key, 0).
%%
%% Looks up if Key is in the set. Can return true even if it's not inserted
%% (see description).
%%
lookup(Bloom_filter, Key) when is_record(Bloom_filter, bloom_filter2) ->
calculate_hash_and_check_bit(Bloom_filter, Key, 0).
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% internal functions
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
%% Calculates hashes and checks if corresponding bits are set in the bit array.
%% Returns true if all bits pointed by hash values are set --- otherwise false.
%%
calculate_hash_and_check_bit(#bloom_filter2{bit_array = Bit_array,
k = Hashes} = Bloom_filter,
Key,
Hashes_done) when Hashes > Hashes_done ->
Hash = erlang:phash2(Key),
Offset = Hash rem bit_size(Bit_array),
<<_Head:Offset/bits, X:1, _Tail/bits>> = Bit_array,
case X of
1 -> calculate_hash_and_check_bit(Bloom_filter,
Hash,
Hashes_done + 1);
0 ->
false
end;
calculate_hash_and_check_bit(#bloom_filter2{k = Hashes_done},
_Key,
Hashes_done) ->
true.
%%
%% Calculates hashes and sets corresponding bits in the bit array.
%% Returns updated bloom_filter.
%%
calculate_hash_and_set_bit(#bloom_filter2{bit_array = Bit_array,
k = Hashes} = Bloom_filter,
Key,
Hashes_done) when Hashes > Hashes_done ->
Hash = erlang:phash2(Key),
Offset = Hash rem bit_size(Bit_array),
calculate_hash_and_set_bit(
Bloom_filter#bloom_filter2{bit_array = set_bit_at(Offset, Bit_array)},
Hash,
Hashes_done + 1);
calculate_hash_and_set_bit(Bloom_filter, _Key, _Hashes_done) ->
Bloom_filter.
%%
%% Sets bit at the Offset in the given Bin (bit array)
%%
set_bit_at(Offset, Bin) ->
<> = Bin,
<>.