diff options
-rw-r--r-- | src/common/bloom_filter.cc | 32 | ||||
-rw-r--r-- | src/common/bloom_filter.hpp | 36 |
2 files changed, 42 insertions, 26 deletions
diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc index a7a4c1d0796..41cad174909 100644 --- a/src/common/bloom_filter.cc +++ b/src/common/bloom_filter.cc @@ -8,7 +8,8 @@ void bloom_filter::encode(bufferlist& bl) const { ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); - ::encode((uint64_t)inserted_element_count_, bl); + ::encode((uint64_t)insert_count_, bl); + ::encode((uint64_t)target_element_count_, bl); ::encode((uint64_t)random_seed_, bl); bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); @@ -22,7 +23,9 @@ void bloom_filter::decode(bufferlist::iterator& p) ::decode(v, p); salt_count_ = v; ::decode(v, p); - inserted_element_count_ = v; + insert_count_ = v; + ::decode(v, p); + target_element_count_ = v; ::decode(v, p); random_seed_ = v; bufferlist t; @@ -46,7 +49,8 @@ void bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); f->dump_unsigned("table_size", table_size_); - f->dump_unsigned("insert_count", inserted_element_count_); + f->dump_unsigned("insert_count", insert_count_); + f->dump_unsigned("target_element_count", target_element_count_); f->dump_unsigned("random_seed", random_seed_); f->open_array_section("salt_table"); @@ -62,11 +66,11 @@ void bloom_filter::dump(Formatter *f) const void bloom_filter::generate_test_instances(list<bloom_filter*>& ls) { - ls.push_back(new bloom_filter(10, .5, 1)); - ls.push_back(new bloom_filter(10, .5, 1)); + ls.push_back(new bloom_filter(10, .5, 1, 5)); + ls.push_back(new bloom_filter(10, .5, 1, 5)); ls.back()->insert("foo"); ls.back()->insert("bar"); - ls.push_back(new bloom_filter(50, .5, 1)); + ls.push_back(new bloom_filter(50, .5, 1, 5)); ls.back()->insert("foo"); ls.back()->insert("bar"); ls.back()->insert("baz"); @@ -84,7 +88,8 @@ void compressible_bloom_filter::encode(bufferlist& bl) const for (vector<size_t>::const_iterator p = size_list.begin(); p != size_list.end(); ++p) ::encode((uint64_t)*p, bl); - ::encode((uint64_t)inserted_element_count_, bl); + ::encode((uint64_t)insert_count_, bl); + ::encode((uint64_t)target_element_count_, bl); ::encode((uint64_t)random_seed_, bl); bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); @@ -107,7 +112,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) } ::decode(v, p); - inserted_element_count_ = v; + insert_count_ = v; + ::decode(v, p); + target_element_count_ = v; ::decode(v, p); random_seed_ = v; bufferlist t; @@ -132,7 +139,8 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) void compressible_bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); - f->dump_unsigned("insert_count", inserted_element_count_); + f->dump_unsigned("insert_count", insert_count_); + f->dump_unsigned("target_element_count", target_element_count_); f->dump_unsigned("random_seed", random_seed_); f->dump_unsigned("table_size", table_size_); f->open_array_section("table_sizes"); @@ -154,11 +162,11 @@ void compressible_bloom_filter::dump(Formatter *f) const void compressible_bloom_filter::generate_test_instances(list<compressible_bloom_filter*>& ls) { - ls.push_back(new compressible_bloom_filter(10, .5, 1)); - ls.push_back(new compressible_bloom_filter(10, .5, 1)); + ls.push_back(new compressible_bloom_filter(10, .5, 1, 4)); + ls.push_back(new compressible_bloom_filter(10, .5, 1, 4)); ls.back()->insert("foo"); ls.back()->insert("bar"); - ls.push_back(new compressible_bloom_filter(50, .5, 1)); + ls.push_back(new compressible_bloom_filter(50, .5, 1, 4)); ls.back()->insert("foo"); ls.back()->insert("bar"); ls.back()->insert("baz"); diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 50349560feb..c336eef8b6a 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -57,7 +57,8 @@ protected: std::vector<bloom_type> salt_; ///< vector of salts std::size_t salt_count_; ///< number of salts std::size_t table_size_; ///< bit table size in bytes - std::size_t inserted_element_count_; ///< insertion count + std::size_t insert_count_; ///< insertion count + std::size_t target_element_count_; ///< target number of unique insertions std::size_t random_seed_; ///< random seed public: @@ -66,7 +67,8 @@ public: : bit_table_(0), salt_count_(0), table_size_(0), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(0), random_seed_(0) {} @@ -74,7 +76,8 @@ public: const double& false_positive_probability, const std::size_t& random_seed) : bit_table_(0), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(predicted_inserted_element_count), random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) { find_optimal_parameters(predicted_inserted_element_count, false_positive_probability, @@ -84,11 +87,13 @@ public: bloom_filter(const std::size_t& salt_count, std::size_t table_size, - const std::size_t& random_seed) + const std::size_t& random_seed, + std::size_t target_element_count) : bit_table_(0), salt_count_(salt_count), table_size_(table_size), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(target_element_count), random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) { init(); @@ -110,7 +115,7 @@ public: if (this != &filter) { salt_count_ = filter.salt_count_; table_size_ = filter.table_size_; - inserted_element_count_ = filter.inserted_element_count_; + insert_count_ = filter.insert_count_; random_seed_ = filter.random_seed_; delete[] bit_table_; bit_table_ = new cell_type[table_size_]; @@ -133,7 +138,7 @@ public: inline void clear() { std::fill_n(bit_table_, table_size_, 0x00); - inserted_element_count_ = 0; + insert_count_ = 0; } /** @@ -153,7 +158,7 @@ public: compute_indices(hash_ap(val,salt_[i]),bit_index,bit); bit_table_[bit_index >> 3] |= bit_mask[bit]; } - ++inserted_element_count_; + ++insert_count_; } inline void insert(const unsigned char* key_begin, const std::size_t& length) @@ -165,7 +170,7 @@ public: compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); bit_table_[bit_index >> 3] |= bit_mask[bit]; } - ++inserted_element_count_; + ++insert_count_; } template<typename T> @@ -288,7 +293,9 @@ public: inline std::size_t element_count() const { - return inserted_element_count_; + return insert_count_; + } + } inline double effective_fpp() const @@ -300,7 +307,7 @@ public: the current number of inserted elements - not the user defined predicated/expected number of inserted elements. */ - return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size()); + return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size()); } inline bloom_filter& operator &= (const bloom_filter& filter) @@ -552,15 +559,16 @@ public: compressible_bloom_filter(const std::size_t& predicted_element_count, const double& false_positive_probability, const std::size_t& random_seed) - : bloom_filter(predicted_element_count,false_positive_probability,random_seed) + : bloom_filter(predicted_element_count, false_positive_probability, random_seed) { size_list.push_back(table_size_); } compressible_bloom_filter(const std::size_t& salt_count, std::size_t table_size, - const std::size_t& random_seed) - : bloom_filter(salt_count, table_size, random_seed) + const std::size_t& random_seed, + std::size_t target_count) + : bloom_filter(salt_count, table_size, random_seed, target_count) { size_list.push_back(table_size_); } |