summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-10-03 16:30:29 -0700
committerSage Weil <sage@inktank.com>2013-10-04 22:57:18 -0700
commitd07f5f358841546391c4df54ae83742533aadfd8 (patch)
tree9e82bfb49c1b4ea391e9f03d2d344307a7df8bea
parentea2378e539a2edc4bee0c4749d8d77f4f859b621 (diff)
downloadceph-d07f5f358841546391c4df54ae83742533aadfd8.tar.gz
common/bloom_filter: drop raw_table_size_ member
We were storing table_size_ and raw_table_size_, where one is the size in bits and the other is the size in bytes. This is silly. Store only the size in bytes. Also, bytes are always 8 bits, so use bit shifts and drop some of that silliness too. Move the member declarations to the top of the class so you read them before the methods. Fix some annoying whitespace. Avoid allocating a 0-length array. Mark the encoding incompatible with v1. Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/bloom_filter.cc51
-rw-r--r--src/common/bloom_filter.hpp76
2 files changed, 61 insertions, 66 deletions
diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc
index a14434076aa..a7a4c1d0796 100644
--- a/src/common/bloom_filter.cc
+++ b/src/common/bloom_filter.cc
@@ -6,25 +6,22 @@
void bloom_filter::encode(bufferlist& bl) const
{
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 2, bl);
::encode((uint64_t)salt_count_, bl);
- ::encode((uint64_t)table_size_, bl);
::encode((uint64_t)inserted_element_count_, bl);
::encode((uint64_t)random_seed_, bl);
- bufferptr bp((const char*)bit_table_, raw_table_size_);
+ bufferptr bp((const char*)bit_table_, table_size_);
::encode(bp, bl);
ENCODE_FINISH(bl);
}
void bloom_filter::decode(bufferlist::iterator& p)
{
- DECODE_START(1, p);
+ DECODE_START(2, p);
uint64_t v;
::decode(v, p);
salt_count_ = v;
::decode(v, p);
- table_size_ = v;
- ::decode(v, p);
inserted_element_count_ = v;
::decode(v, p);
random_seed_ = v;
@@ -33,11 +30,14 @@ void bloom_filter::decode(bufferlist::iterator& p)
salt_.clear();
generate_unique_salt();
- raw_table_size_ = t.length();
- assert(raw_table_size_ == table_size_ / bits_per_char);
+ table_size_ = t.length();
delete bit_table_;
- bit_table_ = new cell_type[raw_table_size_];
- t.copy(0, raw_table_size_, (char *)bit_table_);
+ if (table_size_) {
+ bit_table_ = new cell_type[table_size_];
+ t.copy(0, table_size_, (char *)bit_table_);
+ } else {
+ bit_table_ = NULL;
+ }
DECODE_FINISH(p);
}
@@ -46,7 +46,6 @@ void bloom_filter::dump(Formatter *f) const
{
f->dump_unsigned("salt_count", salt_count_);
f->dump_unsigned("table_size", table_size_);
- f->dump_unsigned("raw_table_size", raw_table_size_);
f->dump_unsigned("insert_count", inserted_element_count_);
f->dump_unsigned("random_seed", random_seed_);
@@ -56,7 +55,7 @@ void bloom_filter::dump(Formatter *f) const
f->close_section();
f->open_array_section("bit_table");
- for (unsigned i = 0; i < raw_table_size_; ++i)
+ for (unsigned i = 0; i < table_size_; ++i)
f->dump_unsigned("byte", (unsigned)bit_table_[i]);
f->close_section();
}
@@ -78,7 +77,7 @@ void bloom_filter::generate_test_instances(list<bloom_filter*>& ls)
void compressible_bloom_filter::encode(bufferlist& bl) const
{
- ENCODE_START(1, 1, bl);
+ ENCODE_START(2, 2, bl);
::encode((uint64_t)salt_count_, bl);
uint32_t s = size_list.size();
::encode(s, bl);
@@ -87,14 +86,14 @@ void compressible_bloom_filter::encode(bufferlist& bl) const
::encode((uint64_t)*p, bl);
::encode((uint64_t)inserted_element_count_, bl);
::encode((uint64_t)random_seed_, bl);
- bufferptr bp((const char*)bit_table_, raw_table_size_);
+ bufferptr bp((const char*)bit_table_, table_size_);
::encode(bp, bl);
ENCODE_FINISH(bl);
}
void compressible_bloom_filter::decode(bufferlist::iterator& p)
{
- DECODE_START(1, p);
+ DECODE_START(2, p);
uint64_t v;
::decode(v, p);
salt_count_ = v;
@@ -106,10 +105,6 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
::decode(v, p);
size_list[i] = v;
}
- if (size_list.size())
- table_size_ = size_list.back();
- else
- table_size_ = 0;
::decode(v, p);
inserted_element_count_ = v;
@@ -120,11 +115,16 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
salt_.clear();
generate_unique_salt();
- raw_table_size_ = t.length();
- assert(raw_table_size_ == table_size_ / bits_per_char);
+ table_size_ = t.length();
+ assert((!table_size_ && size_list.empty()) ||
+ table_size_ == size_list.back());
delete bit_table_;
- bit_table_ = new cell_type[raw_table_size_];
- t.copy(0, raw_table_size_, (char *)bit_table_);
+ if (table_size_) {
+ bit_table_ = new cell_type[table_size_];
+ t.copy(0, table_size_, (char *)bit_table_);
+ } else {
+ bit_table_ = NULL;
+ }
DECODE_FINISH(p);
}
@@ -132,10 +132,9 @@ void compressible_bloom_filter::decode(bufferlist::iterator& p)
void compressible_bloom_filter::dump(Formatter *f) const
{
f->dump_unsigned("salt_count", salt_count_);
- f->dump_unsigned("raw_table_size", raw_table_size_);
f->dump_unsigned("insert_count", inserted_element_count_);
f->dump_unsigned("random_seed", random_seed_);
-
+ f->dump_unsigned("table_size", table_size_);
f->open_array_section("table_sizes");
for (vector<size_t>::const_iterator p = size_list.begin();
p != size_list.end(); ++p)
@@ -148,7 +147,7 @@ void compressible_bloom_filter::dump(Formatter *f) const
f->close_section();
f->open_array_section("bit_table");
- for (unsigned i = 0; i < raw_table_size_; ++i)
+ for (unsigned i = 0; i < table_size_; ++i)
f->dump_unsigned("byte", (unsigned)bit_table_[i]);
f->close_section();
}
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp
index 1c988fadbd1..50349560feb 100644
--- a/src/common/bloom_filter.hpp
+++ b/src/common/bloom_filter.hpp
@@ -53,13 +53,19 @@ protected:
typedef unsigned int bloom_type;
typedef unsigned char cell_type;
+ unsigned char* bit_table_; ///< pointer to bit map
+ std::vector<bloom_type> salt_; ///< vector of salts
+ std::size_t salt_count_; ///< number of salts
+ std::size_t table_size_; ///< bit table size in bytes
+ std::size_t inserted_element_count_; ///< insertion count
+ std::size_t random_seed_; ///< random seed
+
public:
bloom_filter()
: bit_table_(0),
salt_count_(0),
table_size_(0),
- raw_table_size_(0),
inserted_element_count_(0),
random_seed_(0)
{}
@@ -76,7 +82,8 @@ public:
init();
}
- bloom_filter(const std::size_t& salt_count, std::size_t table_size,
+ bloom_filter(const std::size_t& salt_count,
+ std::size_t table_size,
const std::size_t& random_seed)
: bit_table_(0),
salt_count_(salt_count),
@@ -89,9 +96,8 @@ public:
void init() {
generate_unique_salt();
- raw_table_size_ = table_size_ / bits_per_char;
- bit_table_ = new cell_type[raw_table_size_];
- std::fill_n(bit_table_,raw_table_size_,0x00);
+ bit_table_ = new cell_type[table_size_];
+ std::fill_n(bit_table_, table_size_, 0x00);
}
bloom_filter(const bloom_filter& filter)
@@ -104,12 +110,11 @@ public:
if (this != &filter) {
salt_count_ = filter.salt_count_;
table_size_ = filter.table_size_;
- raw_table_size_ = filter.raw_table_size_;
inserted_element_count_ = filter.inserted_element_count_;
random_seed_ = filter.random_seed_;
delete[] bit_table_;
- bit_table_ = new cell_type[raw_table_size_];
- std::copy(filter.bit_table_,filter.bit_table_ + raw_table_size_,bit_table_);
+ bit_table_ = new cell_type[table_size_];
+ std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_);
salt_ = filter.salt_;
}
return *this;
@@ -127,7 +132,7 @@ public:
inline void clear()
{
- std::fill_n(bit_table_,raw_table_size_,0x00);
+ std::fill_n(bit_table_, table_size_, 0x00);
inserted_element_count_ = 0;
}
@@ -146,7 +151,7 @@ public:
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
- bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
+ bit_table_[bit_index >> 3] |= bit_mask[bit];
}
++inserted_element_count_;
}
@@ -158,7 +163,7 @@ public:
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
- bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
+ bit_table_[bit_index >> 3] |= bit_mask[bit];
}
++inserted_element_count_;
}
@@ -207,7 +212,7 @@ public:
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(val,salt_[i]),bit_index,bit);
- if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
+ if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit])
{
return false;
}
@@ -222,7 +227,7 @@ public:
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit);
- if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
+ if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit])
{
return false;
}
@@ -278,7 +283,7 @@ public:
inline virtual std::size_t size() const
{
- return table_size_;
+ return table_size_ * bits_per_char;
}
inline std::size_t element_count() const
@@ -306,7 +311,7 @@ public:
(table_size_ == filter.table_size_) &&
(random_seed_ == filter.random_seed_)
) {
- for (std::size_t i = 0; i < raw_table_size_; ++i) {
+ for (std::size_t i = 0; i < table_size_; ++i) {
bit_table_[i] &= filter.bit_table_[i];
}
}
@@ -321,7 +326,7 @@ public:
(table_size_ == filter.table_size_) &&
(random_seed_ == filter.random_seed_)
) {
- for (std::size_t i = 0; i < raw_table_size_; ++i) {
+ for (std::size_t i = 0; i < table_size_; ++i) {
bit_table_[i] |= filter.bit_table_[i];
}
}
@@ -336,7 +341,7 @@ public:
(table_size_ == filter.table_size_) &&
(random_seed_ == filter.random_seed_)
) {
- for (std::size_t i = 0; i < raw_table_size_; ++i) {
+ for (std::size_t i = 0; i < table_size_; ++i) {
bit_table_[i] ^= filter.bit_table_[i];
}
}
@@ -352,8 +357,8 @@ protected:
inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
{
- bit_index = hash % table_size_;
- bit = bit_index % bits_per_char;
+ bit_index = hash % (table_size_ << 3);
+ bit = bit_index & 7;
}
void generate_unique_salt()
@@ -418,7 +423,8 @@ protected:
}
else
{
- std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_));
+ std::copy(predef_salt,predef_salt + predef_salt_count,
+ std::back_inserter(salt_));
srand(static_cast<unsigned int>(random_seed_));
while (salt_.size() < salt_count_)
{
@@ -466,8 +472,8 @@ protected:
*salt_count = static_cast<std::size_t>(min_k);
size_t t = static_cast<std::size_t>(min_m);
- t += (((t % bits_per_char) != 0) ? (bits_per_char - (t % bits_per_char)) : 0);
- *table_size = t;
+ t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0);
+ *table_size = t >> 3;
}
inline bloom_type hash_ap(uint32_t val, bloom_type hash) const
@@ -507,14 +513,6 @@ protected:
return hash;
}
- std::vector<bloom_type> salt_;
- unsigned char* bit_table_;
- std::size_t salt_count_;
- std::size_t table_size_;
- std::size_t raw_table_size_;
- std::size_t inserted_element_count_;
- std::size_t random_seed_;
-
public:
void encode(bufferlist& bl) const;
void decode(bufferlist::iterator& bl);
@@ -569,7 +567,7 @@ public:
inline virtual std::size_t size() const
{
- return size_list.back();
+ return size_list.back() * bits_per_char;
}
inline bool compress(const double& percentage)
@@ -581,17 +579,16 @@ public:
std::size_t original_table_size = size_list.back();
std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0))));
- new_table_size -= (((new_table_size % bits_per_char) != 0) ? (new_table_size % bits_per_char) : 0);
- if ((bits_per_char > new_table_size) || (new_table_size >= original_table_size))
+ if ((!new_table_size) || (new_table_size >= original_table_size))
{
return false;
}
- cell_type* tmp = new cell_type[new_table_size / bits_per_char];
- std::copy(bit_table_, bit_table_ + (new_table_size / bits_per_char), tmp);
- cell_type* itr = bit_table_ + (new_table_size / bits_per_char);
- cell_type* end = bit_table_ + (original_table_size / bits_per_char);
+ cell_type* tmp = new cell_type[new_table_size];
+ std::copy(bit_table_, bit_table_ + (new_table_size), tmp);
+ cell_type* itr = bit_table_ + (new_table_size);
+ cell_type* end = bit_table_ + (original_table_size);
cell_type* itr_tmp = tmp;
while (end != itr)
@@ -603,7 +600,6 @@ public:
bit_table_ = tmp;
size_list.push_back(new_table_size);
table_size_ = new_table_size;
- raw_table_size_ = table_size_ / bits_per_char;
return true;
}
@@ -615,9 +611,9 @@ private:
bit_index = hash;
for (std::size_t i = 0; i < size_list.size(); ++i)
{
- bit_index %= size_list[i];
+ bit_index %= size_list[i] << 3;
}
- bit = bit_index % bits_per_char;
+ bit = bit_index & 7;
}
std::vector<std::size_t> size_list;