12 namespace bloom_filter
17 extern const szt seedCount;
18 extern const szt seeds[];
37 return make_tuple(bitCount, hashCount);
41 template<
class T,
class Alloc = std::allocator<szt>>
46 Key(
szt elemCount,
Double errorProb = 0.01,
const Alloc& a = Alloc()) :
59 template<
class T,
class Alloc>
64 {
assert(hashIndex < val.
hashes.size(),
"Key does not have enough hashes for bloom filter");
return val.
hashes[hashIndex]; }
75 template<
class T,
class Block = u
int64,
class Alloc = std::allocator<Block>>
92 assert(_hashCount <= bloom_filter::priv::seedCount,
"Not enough seeds, either try a higher error prob or add more seeds");
94 _bits.
resize(BitOp::pow2Ceil(bitCount));
95 _bitIndexMask = _bits.
size()-1;
101 for (
szt i = 0; i < _hashCount; ++i)
102 _bits.
set(bitIndex(_hasher(obj, i)));
108 for (
szt i = 0; i < _hashCount; ++i)
109 if (!_bits.
test(bitIndex(_hasher(obj, i))))
return false;
119 const BitSet&
bits()
const {
return _bits; }
123 szt bitIndex(
szt hash)
const {
return hash & _bitIndexMask; }
129 bloom_filter::hash<T> _hasher;
137 template<
class T,
class Alloc>
138 struct hash<
honey::bloom_filter::Key<T,Alloc>>
142 using namespace honey;
szt fast(ByteBufConst bs, szt seed)
Quickly generate a small hash value. Each seed value produces a unique hash from the same data...
Definition: Hash.cpp:99
Double_::Real Double
double type
Definition: Double.h:60
Key()
Definition: BloomFilter.h:44
A space-efficient probabilistic data structure that is used to test set membership. Can be tuned to use less space at the expense of increased false positive probabilty.
Definition: BloomFilter.h:76
Buffer< const byte > ByteBufConst
Definition: Bytes.h:21
szt size() const
Number of bits in the bit array.
Definition: BitSet.h:103
void reset(szt index)
Set bit to false.
Definition: BitSet.h:76
BitSet_< Block, Alloc > BitSet
Definition: BloomFilter.h:79
static Real ceil(Real x)
Round up to the nearest whole number towards +inf.
Definition: Alge.h:33
Functor used to generate hash. Each hashIndex for the same object must produce a unique hash...
Definition: BloomFilter.h:24
Caches multiple hashes of an object. The key can be inserted into a bloom filter and tested very quic...
Definition: BloomFilter.h:42
void clear()
Remove all elements from the set.
Definition: BloomFilter.h:114
void set(szt index)
Set bit to true.
Definition: BitSet.h:70
BloomFilter(szt elemCount, Double errorProb=0.01, const Alloc &a=Alloc())
Definition: BloomFilter.h:86
tuple< szt, szt > calcParams(szt elemCount, Double errorProb)
Calculate optimal bloom parameters – bit and hash count.
Definition: BloomFilter.h:30
vector< szt, Alloc > hashes
Definition: BloomFilter.h:54
#define assert(...)
Forwards to assert_#args. See assert_1(), assert_2().
Definition: Debug.h:24
bool contains(const T &obj) const
Check if element is in the set, may return false positive.
Definition: BloomFilter.h:106
void resize(szt size, bool val=false)
Resize array to contain size number of bits, with new bits initialized to val
Definition: BitSet.h:38
bloom_filter::hash< T > hasher
Definition: BloomFilter.h:55
void hash(const T &obj)
Generate and cache all the hashes for the object.
Definition: BloomFilter.h:52
size_t szt
Size type, shorthand for size_t.
Definition: Core.h:90
Key(szt elemCount, Double errorProb=0.01, const Alloc &a=Alloc())
Same params as bloom filter, key will cache the required number of hashes.
Definition: BloomFilter.h:46
bool operator==(const Key &rhs) const
Definition: BloomFilter.h:49
static Num sqr(Num x)
Square.
Definition: Alge.h:60
void insert(const T &obj)
Insert element into set.
Definition: BloomFilter.h:99
size_t operator()(const honey::bloom_filter::Key< T, Alloc > &val) const
Definition: BloomFilter.h:140
Double errorProb() const
Get false positive probability.
Definition: BloomFilter.h:117
bool test(szt index) const
Get bit value.
Definition: BitSet.h:84
Global Honeycomb namespace.
szt operator()(const T &val, szt hashIndex) const
Definition: BloomFilter.h:26
static Real log(Real x)
Natural logarithm. ie. ln(x)
Definition: Alge.h:80
const BitSet & bits() const
Get underlying bit array.
Definition: BloomFilter.h:119