diff options
-rw-r--r-- | src/common/bloom_filter.cc | 51 | ||||
-rw-r--r-- | src/common/bloom_filter.hpp | 76 |
2 files changed, 61 insertions, 66 deletions
diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc index a14434076aa..a7a4c1d0796 100644 --- a/src/common/bloom_filter.cc +++ b/src/common/bloom_filter.cc @@ -6,25 +6,22 @@ void bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); - ::encode((uint64_t)table_size_, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; ::decode(v, p); - table_size_ = v; - ::decode(v, p); inserted_element_count_ = v; ::decode(v, p); random_seed_ = v; @@ -33,11 +30,14 @@ void bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -46,7 +46,6 @@ void bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); f->dump_unsigned("table_size", table_size_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); @@ -56,7 +55,7 @@ void bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } @@ -78,7 +77,7 @@ void bloom_filter::generate_test_instances(list<bloom_filter*>& ls) void compressible_bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); uint32_t s = size_list.size(); ::encode(s, bl); @@ -87,14 +86,14 @@ void compressible_bloom_filter::encode(bufferlist& bl) const ::encode((uint64_t)*p, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void compressible_bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; @@ -106,10 +105,6 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) ::decode(v, p); size_list[i] = v; } - if (size_list.size()) - table_size_ = size_list.back(); - else - table_size_ = 0; ::decode(v, p); inserted_element_count_ = v; @@ -120,11 +115,16 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); + assert((!table_size_ && size_list.empty()) || + table_size_ == size_list.back()); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -132,10 +132,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) void compressible_bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); - + f->dump_unsigned("table_size", table_size_); f->open_array_section("table_sizes"); for (vector<size_t>::const_iterator p = size_list.begin(); p != size_list.end(); ++p) @@ -148,7 +147,7 @@ void compressible_bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 1c988fadbd1..50349560feb 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -53,13 +53,19 @@ protected: typedef unsigned int bloom_type; typedef unsigned char cell_type; + unsigned char* bit_table_; ///< pointer to bit map + std::vector<bloom_type> salt_; ///< vector of salts + std::size_t salt_count_; ///< number of salts + std::size_t table_size_; ///< bit table size in bytes + std::size_t inserted_element_count_; ///< insertion count + std::size_t random_seed_; ///< random seed + public: bloom_filter() : bit_table_(0), salt_count_(0), table_size_(0), - raw_table_size_(0), inserted_element_count_(0), random_seed_(0) {} @@ -76,7 +82,8 @@ public: init(); } - bloom_filter(const std::size_t& salt_count, std::size_t table_size, + bloom_filter(const std::size_t& salt_count, + std::size_t table_size, const std::size_t& random_seed) : bit_table_(0), salt_count_(salt_count), @@ -89,9 +96,8 @@ public: void init() { generate_unique_salt(); - raw_table_size_ = table_size_ / bits_per_char; - bit_table_ = new cell_type[raw_table_size_]; - std::fill_n(bit_table_,raw_table_size_,0x00); + bit_table_ = new cell_type[table_size_]; + std::fill_n(bit_table_, table_size_, 0x00); } bloom_filter(const bloom_filter& filter) @@ -104,12 +110,11 @@ public: if (this != &filter) { salt_count_ = filter.salt_count_; table_size_ = filter.table_size_; - raw_table_size_ = filter.raw_table_size_; inserted_element_count_ = filter.inserted_element_count_; random_seed_ = filter.random_seed_; delete[] bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - std::copy(filter.bit_table_,filter.bit_table_ + raw_table_size_,bit_table_); + bit_table_ = new cell_type[table_size_]; + std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_); salt_ = filter.salt_; } return *this; @@ -127,7 +132,7 @@ public: inline void clear() { - std::fill_n(bit_table_,raw_table_size_,0x00); + std::fill_n(bit_table_, table_size_, 0x00); inserted_element_count_ = 0; } @@ -146,7 +151,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -158,7 +163,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -207,7 +212,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -222,7 +227,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -278,7 +283,7 @@ public: inline virtual std::size_t size() const { - return table_size_; + return table_size_ * bits_per_char; } inline std::size_t element_count() const @@ -306,7 +311,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] &= filter.bit_table_[i]; } } @@ -321,7 +326,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] |= filter.bit_table_[i]; } } @@ -336,7 +341,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] ^= filter.bit_table_[i]; } } @@ -352,8 +357,8 @@ protected: inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const { - bit_index = hash % table_size_; - bit = bit_index % bits_per_char; + bit_index = hash % (table_size_ << 3); + bit = bit_index & 7; } void generate_unique_salt() @@ -418,7 +423,8 @@ protected: } else { - std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_)); + std::copy(predef_salt,predef_salt + predef_salt_count, + std::back_inserter(salt_)); srand(static_cast<unsigned int>(random_seed_)); while (salt_.size() < salt_count_) { @@ -466,8 +472,8 @@ protected: *salt_count = static_cast<std::size_t>(min_k); size_t t = static_cast<std::size_t>(min_m); - t += (((t % bits_per_char) != 0) ? (bits_per_char - (t % bits_per_char)) : 0); - *table_size = t; + t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0); + *table_size = t >> 3; } inline bloom_type hash_ap(uint32_t val, bloom_type hash) const @@ -507,14 +513,6 @@ protected: return hash; } - std::vector<bloom_type> salt_; - unsigned char* bit_table_; - std::size_t salt_count_; - std::size_t table_size_; - std::size_t raw_table_size_; - std::size_t inserted_element_count_; - std::size_t random_seed_; - public: void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -569,7 +567,7 @@ public: inline virtual std::size_t size() const { - return size_list.back(); + return size_list.back() * bits_per_char; } inline bool compress(const double& percentage) @@ -581,17 +579,16 @@ public: std::size_t original_table_size = size_list.back(); std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0)))); - new_table_size -= (((new_table_size % bits_per_char) != 0) ? (new_table_size % bits_per_char) : 0); - if ((bits_per_char > new_table_size) || (new_table_size >= original_table_size)) + if ((!new_table_size) || (new_table_size >= original_table_size)) { return false; } - cell_type* tmp = new cell_type[new_table_size / bits_per_char]; - std::copy(bit_table_, bit_table_ + (new_table_size / bits_per_char), tmp); - cell_type* itr = bit_table_ + (new_table_size / bits_per_char); - cell_type* end = bit_table_ + (original_table_size / bits_per_char); + cell_type* tmp = new cell_type[new_table_size]; + std::copy(bit_table_, bit_table_ + (new_table_size), tmp); + cell_type* itr = bit_table_ + (new_table_size); + cell_type* end = bit_table_ + (original_table_size); cell_type* itr_tmp = tmp; while (end != itr) @@ -603,7 +600,6 @@ public: bit_table_ = tmp; size_list.push_back(new_table_size); table_size_ = new_table_size; - raw_table_size_ = table_size_ / bits_per_char; return true; } @@ -615,9 +611,9 @@ private: bit_index = hash; for (std::size_t i = 0; i < size_list.size(); ++i) { - bit_index %= size_list[i]; + bit_index %= size_list[i] << 3; } - bit = bit_index % bits_per_char; + bit = bit_index & 7; } std::vector<std::size_t> size_list; |