diff options
author | Sage Weil <sage@inktank.com> | 2013-10-03 16:30:29 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-10-04 22:57:18 -0700 |
commit | d07f5f358841546391c4df54ae83742533aadfd8 (patch) | |
tree | 9e82bfb49c1b4ea391e9f03d2d344307a7df8bea | |
parent | ea2378e539a2edc4bee0c4749d8d77f4f859b621 (diff) | |
download | ceph-d07f5f358841546391c4df54ae83742533aadfd8.tar.gz |
common/bloom_filter: drop raw_table_size_ member
We were storing table_size_ and raw_table_size_, where one is the size in
bits and the other is the size in bytes. This is silly. Store only the
size in bytes.
Also, bytes are always 8 bits, so use bit shifts and drop some of that
silliness too.
Move the member declarations to the top of the class so you read them
before the methods.
Fix some annoying whitespace.
Avoid allocating a 0-length array.
Mark the encoding incompatible with v1.
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/common/bloom_filter.cc | 51 | ||||
-rw-r--r-- | src/common/bloom_filter.hpp | 76 |
2 files changed, 61 insertions, 66 deletions
diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc index a14434076aa..a7a4c1d0796 100644 --- a/src/common/bloom_filter.cc +++ b/src/common/bloom_filter.cc @@ -6,25 +6,22 @@ void bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); - ::encode((uint64_t)table_size_, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; ::decode(v, p); - table_size_ = v; - ::decode(v, p); inserted_element_count_ = v; ::decode(v, p); random_seed_ = v; @@ -33,11 +30,14 @@ void bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -46,7 +46,6 @@ void bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); f->dump_unsigned("table_size", table_size_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); @@ -56,7 +55,7 @@ void bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } @@ -78,7 +77,7 @@ void bloom_filter::generate_test_instances(list<bloom_filter*>& ls) void compressible_bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); uint32_t s = size_list.size(); ::encode(s, bl); @@ -87,14 +86,14 @@ void compressible_bloom_filter::encode(bufferlist& bl) const ::encode((uint64_t)*p, bl); ::encode((uint64_t)inserted_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void compressible_bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; @@ -106,10 +105,6 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) ::decode(v, p); size_list[i] = v; } - if (size_list.size()) - table_size_ = size_list.back(); - else - table_size_ = 0; ::decode(v, p); inserted_element_count_ = v; @@ -120,11 +115,16 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); + assert((!table_size_ && size_list.empty()) || + table_size_ == size_list.back()); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -132,10 +132,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p) void compressible_bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); - f->dump_unsigned("raw_table_size", raw_table_size_); f->dump_unsigned("insert_count", inserted_element_count_); f->dump_unsigned("random_seed", random_seed_); - + f->dump_unsigned("table_size", table_size_); f->open_array_section("table_sizes"); for (vector<size_t>::const_iterator p = size_list.begin(); p != size_list.end(); ++p) @@ -148,7 +147,7 @@ void compressible_bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 1c988fadbd1..50349560feb 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -53,13 +53,19 @@ protected: typedef unsigned int bloom_type; typedef unsigned char cell_type; + unsigned char* bit_table_; ///< pointer to bit map + std::vector<bloom_type> salt_; ///< vector of salts + std::size_t salt_count_; ///< number of salts + std::size_t table_size_; ///< bit table size in bytes + std::size_t inserted_element_count_; ///< insertion count + std::size_t random_seed_; ///< random seed + public: bloom_filter() : bit_table_(0), salt_count_(0), table_size_(0), - raw_table_size_(0), inserted_element_count_(0), random_seed_(0) {} @@ -76,7 +82,8 @@ public: init(); } - bloom_filter(const std::size_t& salt_count, std::size_t table_size, + bloom_filter(const std::size_t& salt_count, + std::size_t table_size, const std::size_t& random_seed) : bit_table_(0), salt_count_(salt_count), @@ -89,9 +96,8 @@ public: void init() { generate_unique_salt(); - raw_table_size_ = table_size_ / bits_per_char; - bit_table_ = new cell_type[raw_table_size_]; - std::fill_n(bit_table_,raw_table_size_,0x00); + bit_table_ = new cell_type[table_size_]; + std::fill_n(bit_table_, table_size_, 0x00); } bloom_filter(const bloom_filter& filter) @@ -104,12 +110,11 @@ public: if (this != &filter) { salt_count_ = filter.salt_count_; table_size_ = filter.table_size_; - raw_table_size_ = filter.raw_table_size_; inserted_element_count_ = filter.inserted_element_count_; random_seed_ = filter.random_seed_; delete[] bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - std::copy(filter.bit_table_,filter.bit_table_ + raw_table_size_,bit_table_); + bit_table_ = new cell_type[table_size_]; + std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_); salt_ = filter.salt_; } return *this; @@ -127,7 +132,7 @@ public: inline void clear() { - std::fill_n(bit_table_,raw_table_size_,0x00); + std::fill_n(bit_table_, table_size_, 0x00); inserted_element_count_ = 0; } @@ -146,7 +151,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -158,7 +163,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } ++inserted_element_count_; } @@ -207,7 +212,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -222,7 +227,7 @@ public: for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -278,7 +283,7 @@ public: inline virtual std::size_t size() const { - return table_size_; + return table_size_ * bits_per_char; } inline std::size_t element_count() const @@ -306,7 +311,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] &= filter.bit_table_[i]; } } @@ -321,7 +326,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] |= filter.bit_table_[i]; } } @@ -336,7 +341,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] ^= filter.bit_table_[i]; } } @@ -352,8 +357,8 @@ protected: inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const { - bit_index = hash % table_size_; - bit = bit_index % bits_per_char; + bit_index = hash % (table_size_ << 3); + bit = bit_index & 7; } void generate_unique_salt() @@ -418,7 +423,8 @@ protected: } else { - std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_)); + std::copy(predef_salt,predef_salt + predef_salt_count, + std::back_inserter(salt_)); srand(static_cast<unsigned int>(random_seed_)); while (salt_.size() < salt_count_) { @@ -466,8 +472,8 @@ protected: *salt_count = static_cast<std::size_t>(min_k); size_t t = static_cast<std::size_t>(min_m); - t += (((t % bits_per_char) != 0) ? (bits_per_char - (t % bits_per_char)) : 0); - *table_size = t; + t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0); + *table_size = t >> 3; } inline bloom_type hash_ap(uint32_t val, bloom_type hash) const @@ -507,14 +513,6 @@ protected: return hash; } - std::vector<bloom_type> salt_; - unsigned char* bit_table_; - std::size_t salt_count_; - std::size_t table_size_; - std::size_t raw_table_size_; - std::size_t inserted_element_count_; - std::size_t random_seed_; - public: void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -569,7 +567,7 @@ public: inline virtual std::size_t size() const { - return size_list.back(); + return size_list.back() * bits_per_char; } inline bool compress(const double& percentage) @@ -581,17 +579,16 @@ public: std::size_t original_table_size = size_list.back(); std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0)))); - new_table_size -= (((new_table_size % bits_per_char) != 0) ? (new_table_size % bits_per_char) : 0); - if ((bits_per_char > new_table_size) || (new_table_size >= original_table_size)) + if ((!new_table_size) || (new_table_size >= original_table_size)) { return false; } - cell_type* tmp = new cell_type[new_table_size / bits_per_char]; - std::copy(bit_table_, bit_table_ + (new_table_size / bits_per_char), tmp); - cell_type* itr = bit_table_ + (new_table_size / bits_per_char); - cell_type* end = bit_table_ + (original_table_size / bits_per_char); + cell_type* tmp = new cell_type[new_table_size]; + std::copy(bit_table_, bit_table_ + (new_table_size), tmp); + cell_type* itr = bit_table_ + (new_table_size); + cell_type* end = bit_table_ + (original_table_size); cell_type* itr_tmp = tmp; while (end != itr) @@ -603,7 +600,6 @@ public: bit_table_ = tmp; size_list.push_back(new_table_size); table_size_ = new_table_size; - raw_table_size_ = table_size_ / bits_per_char; return true; } @@ -615,9 +611,9 @@ private: bit_index = hash; for (std::size_t i = 0; i < size_list.size(); ++i) { - bit_index %= size_list[i]; + bit_index %= size_list[i] << 3; } - bit = bit_index % bits_per_char; + bit = bit_index & 7; } std::vector<std::size_t> size_list; |