diff options
author | Sage Weil <sage@inktank.com> | 2013-09-19 20:02:50 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-10-02 14:09:13 -0700 |
commit | a36cf13c772621a2f974c8c0251e8f3b49da7555 (patch) | |
tree | 48b657479b246d7f892820375bd1dd64039c7045 | |
parent | a2e175bf100d5318045b8bdbe94f43063ef2a638 (diff) | |
download | ceph-a36cf13c772621a2f974c8c0251e8f3b49da7555.tar.gz |
common/bloom_filter: insert/contains methods for uint32_t
This will let us pass in an hobject_t::hash directly (for example) without
rehashing a string.
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/common/bloom_filter.hpp | 35 | ||||
-rw-r--r-- | src/test/common/test_bloom_filter.cc | 38 |
2 files changed, 73 insertions, 0 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 15400b14b9e..6d5f645d8c9 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -131,6 +131,17 @@ public: inserted_element_count_ = 0; } + inline void insert(uint32_t val) { + std::size_t bit_index = 0; + std::size_t bit = 0; + for (std::size_t i = 0; i < salt_.size(); ++i) + { + compute_indices(hash_ap(val,salt_[i]),bit_index,bit); + bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + } + ++inserted_element_count_; + } + inline void insert(const unsigned char* key_begin, const std::size_t& length) { std::size_t bit_index = 0; @@ -170,6 +181,21 @@ public: } } + inline virtual bool contains(uint32_t val) const + { + std::size_t bit_index = 0; + std::size_t bit = 0; + for (std::size_t i = 0; i < salt_.size(); ++i) + { + compute_indices(hash_ap(val,salt_[i]),bit_index,bit); + if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + { + return false; + } + } + return true; + } + inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const { std::size_t bit_index = 0; @@ -425,6 +451,15 @@ protected: *table_size = t; } + inline bloom_type hash_ap(uint32_t val, bloom_type hash) const + { + hash ^= (hash << 7) ^ ((val & 0xff000000) >> 24) * (hash >> 3); + hash ^= (~((hash << 11) + (((val & 0xff0000) >> 16) ^ (hash >> 5)))); + hash ^= (hash << 7) ^ ((val & 0xff00) >> 8) * (hash >> 3); + hash ^= (~((hash << 11) + (((val & 0xff)) ^ (hash >> 5)))); + return hash; + } + inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const { const unsigned char* itr = begin; diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc index 66bda6bcd33..84e2a67a62a 100644 --- a/src/test/common/test_bloom_filter.cc +++ b/src/test/common/test_bloom_filter.cc @@ -61,6 +61,44 @@ TEST(BloomFilter, Sweep) { } } +TEST(BloomFilter, SweepInt) { + std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl; + for (int ex = 3; ex < 12; ex++) { + for (float fpp = .001; fpp < .5; fpp *= 2.0) { + int max = 2 << ex; + bloom_filter bf(max, fpp, 1); + bf.insert("foo"); + bf.insert("bar"); + + ASSERT_TRUE(123); + ASSERT_TRUE(456); + + for (int n = 0; n < max; n++) + bf.insert(n); + + int test = max * 100; + int hit = 0; + for (int n = 0; n < test; n++) + if (bf.contains(100000 + n)) + hit++; + + ASSERT_TRUE(123); + ASSERT_TRUE(456); + + double actual = (double)hit / (double)test; + + bufferlist bl; + ::encode(bf, bl); + + double byte_per_insert = (double)bl.length() / (double)max; + + std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl; + ASSERT_TRUE(actual < fpp * 10); + + } + } +} + // test the fpp over a sequence of bloom filters, each with unique // items inserted into it. // |