diff options
-rw-r--r-- | PendingReleaseNotes | 34 | ||||
-rw-r--r-- | doc/release-notes.rst | 53 | ||||
-rw-r--r-- | src/common/bloom_filter.cc | 95 | ||||
-rw-r--r-- | src/common/bloom_filter.hpp | 181 | ||||
-rw-r--r-- | src/test/common/test_bloom_filter.cc | 71 | ||||
-rw-r--r-- | src/test/encoding/types.h | 1 |
6 files changed, 328 insertions, 107 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 779a081480f..a3ec73290f3 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,37 +1,3 @@ -v0.70 -~~~~~ - -* librados::Rados::pool_create_async() and librados::Rados::pool_delete_async() - don't drop a reference to the completion object on error, caller needs to take - care of that. This has never really worked correctly and we were leaking an - object - -* 'ceph osd crush set <id> <weight> <loc..>' no longer adds the osd to the - specified location, as that's a job for 'ceph osd crush add'. It will - however continue to work just the same as long as the osd already exists - in the crush map. - -* The OSD now enforces that class write methods cannot both mutate an - object and return data. The rbd.assign_bid method, the lone - offender, has been removed. This breaks compatibility with - pre-bobtail librbd clients by preventing them from creating new - images. - -* librados now returns on commit instead of ack for synchronous calls. - This is a bit safer in the case where both OSDs and the client crash, and - is probably how it should have been acting from the beginning. Users are - unlikely to notice but it could result in lower performance in some - circumstances. Those who care should switch to using the async interfaces, - which let you specify safety semantics precisely. - -* The C++ librados AioComplete::get_version() method was incorrectly - returning an int (usually 32-bits). To avoid breaking library - compatibility, a get_version64() method is added that returns the - full-width value. The old method is deprecated and will be removed - in a future release. Users of the C++ librados API that make use of - the get_version() method should modify their code to avoid getting a - value that is truncated from 64 to to 32 bits. - v0.71 ~~~~~ diff --git a/doc/release-notes.rst b/doc/release-notes.rst index bb1dfe4bfec..2b566baa0ea 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -2,6 +2,37 @@ Release Notes =============== +v0.70 +----- + +Upgrading +~~~~~~~~~ + +* librados::Rados::pool_create_async() and librados::Rados::pool_delete_async() + don't drop a reference to the completion object on error, caller needs to take + care of that. This has never really worked correctly and we were leaking an + object + +* 'ceph osd crush set <id> <weight> <loc..>' no longer adds the osd to the + specified location, as that's a job for 'ceph osd crush add'. It will + however continue to work just the same as long as the osd already exists + in the crush map. + +Notable Changes +~~~~~~~~~~~~~~~ + +* mon: a few 'ceph mon add' races fixed (command is now idempotent) (Joao Luis) +* crush: fix name caching +* rgw: fix a few minor memory leaks (Yehuda Sadeh) +* ceph: improve parsing of CEPH_ARGS (Benoit Knecht) +* mon: avoid rewriting full osdmaps on restart (Joao Luis) +* crc32c: fix optimized crc32c code (it now detects arch support properly) +* mon: fix 'ceph osd crush reweight ...' (Joao Luis) +* osd: revert xattr size limit (fixes large rgw uploads) +* mds: fix heap profiler commands (Joao Luis) +* rgw: fix inefficient use of std::list::size() (Yehuda Sadeh) + + v0.69 ----- @@ -19,6 +50,28 @@ Upgrading the because the server-side behavior has changed it is possible that an application misusing the interface may now get errors. +* The OSD now enforces that class write methods cannot both mutate an + object and return data. The rbd.assign_bid method, the lone + offender, has been removed. This breaks compatibility with + pre-bobtail librbd clients by preventing them from creating new + images. + +* librados now returns on commit instead of ack for synchronous calls. + This is a bit safer in the case where both OSDs and the client crash, and + is probably how it should have been acting from the beginning. Users are + unlikely to notice but it could result in lower performance in some + circumstances. Those who care should switch to using the async interfaces, + which let you specify safety semantics precisely. + +* The C++ librados AioComplete::get_version() method was incorrectly + returning an int (usually 32-bits). To avoid breaking library + compatibility, a get_version64() method is added that returns the + full-width value. The old method is deprecated and will be removed + in a future release. Users of the C++ librados API that make use of + the get_version() method should modify their code to avoid getting a + value that is truncated from 64 to to 32 bits. + + Notable Changes ~~~~~~~~~~~~~~~ diff --git a/src/common/bloom_filter.cc b/src/common/bloom_filter.cc index f602b80149e..a1c20bf0c12 100644 --- a/src/common/bloom_filter.cc +++ b/src/common/bloom_filter.cc @@ -6,26 +6,26 @@ void bloom_filter::encode(bufferlist& bl) const { - ENCODE_START(1, 1, bl); + ENCODE_START(2, 2, bl); ::encode((uint64_t)salt_count_, bl); - ::encode((uint64_t)table_size_, bl); - ::encode((uint64_t)inserted_element_count_, bl); + ::encode((uint64_t)insert_count_, bl); + ::encode((uint64_t)target_element_count_, bl); ::encode((uint64_t)random_seed_, bl); - bufferptr bp((const char*)bit_table_, raw_table_size_); + bufferptr bp((const char*)bit_table_, table_size_); ::encode(bp, bl); ENCODE_FINISH(bl); } void bloom_filter::decode(bufferlist::iterator& p) { - DECODE_START(1, p); + DECODE_START(2, p); uint64_t v; ::decode(v, p); salt_count_ = v; ::decode(v, p); - table_size_ = v; + insert_count_ = v; ::decode(v, p); - inserted_element_count_ = v; + target_element_count_ = v; ::decode(v, p); random_seed_ = v; bufferlist t; @@ -33,11 +33,14 @@ void bloom_filter::decode(bufferlist::iterator& p) salt_.clear(); generate_unique_salt(); - raw_table_size_ = t.length(); - assert(raw_table_size_ == table_size_ / bits_per_char); + table_size_ = t.length(); delete bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - t.copy(0, raw_table_size_, (char *)bit_table_); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + t.copy(0, table_size_, (char *)bit_table_); + } else { + bit_table_ = NULL; + } DECODE_FINISH(p); } @@ -46,8 +49,8 @@ void bloom_filter::dump(Formatter *f) const { f->dump_unsigned("salt_count", salt_count_); f->dump_unsigned("table_size", table_size_); - f->dump_unsigned("raw_table_size", raw_table_size_); - f->dump_unsigned("insert_count", inserted_element_count_); + f->dump_unsigned("insert_count", insert_count_); + f->dump_unsigned("target_element_count", target_element_count_); f->dump_unsigned("random_seed", random_seed_); f->open_array_section("salt_table"); @@ -56,21 +59,79 @@ void bloom_filter::dump(Formatter *f) const f->close_section(); f->open_array_section("bit_table"); - for (unsigned i = 0; i < raw_table_size_; ++i) + for (unsigned i = 0; i < table_size_; ++i) f->dump_unsigned("byte", (unsigned)bit_table_[i]); f->close_section(); } void bloom_filter::generate_test_instances(list<bloom_filter*>& ls) { - ls.push_back(new bloom_filter(10, .5, 1)); - ls.push_back(new bloom_filter(10, .5, 1)); + ls.push_back(new bloom_filter(10, .5, 1, 5)); + ls.push_back(new bloom_filter(10, .5, 1, 5)); ls.back()->insert("foo"); ls.back()->insert("bar"); - ls.push_back(new bloom_filter(50, .5, 1)); + ls.push_back(new bloom_filter(50, .5, 1, 5)); ls.back()->insert("foo"); ls.back()->insert("bar"); ls.back()->insert("baz"); ls.back()->insert("boof"); ls.back()->insert("boogggg"); } + + +void compressible_bloom_filter::encode(bufferlist& bl) const +{ + ENCODE_START(2, 2, bl); + bloom_filter::encode(bl); + + uint32_t s = size_list.size(); + ::encode(s, bl); + for (vector<size_t>::const_iterator p = size_list.begin(); + p != size_list.end(); ++p) + ::encode((uint64_t)*p, bl); + + ENCODE_FINISH(bl); +} + +void compressible_bloom_filter::decode(bufferlist::iterator& p) +{ + DECODE_START(2, p); + bloom_filter::decode(p); + + uint32_t s; + ::decode(s, p); + size_list.resize(s); + for (unsigned i = 0; i < s; i++) { + uint64_t v; + ::decode(v, p); + size_list[i] = v; + } + + DECODE_FINISH(p); +} + +void compressible_bloom_filter::dump(Formatter *f) const +{ + bloom_filter::dump(f); + + f->open_array_section("table_sizes"); + for (vector<size_t>::const_iterator p = size_list.begin(); + p != size_list.end(); ++p) + f->dump_unsigned("size", (uint64_t)*p); + f->close_section(); +} + +void compressible_bloom_filter::generate_test_instances(list<compressible_bloom_filter*>& ls) +{ + ls.push_back(new compressible_bloom_filter(10, .5, 1, 4)); + ls.push_back(new compressible_bloom_filter(10, .5, 1, 4)); + ls.back()->insert("foo"); + ls.back()->insert("bar"); + ls.push_back(new compressible_bloom_filter(50, .5, 1, 4)); + ls.back()->insert("foo"); + ls.back()->insert("bar"); + ls.back()->insert("baz"); + ls.back()->insert("boof"); + ls.back()->compress(20); + ls.back()->insert("boogggg"); +} diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 6216c7fb34d..93787a89a60 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -53,14 +53,22 @@ protected: typedef unsigned int bloom_type; typedef unsigned char cell_type; + unsigned char* bit_table_; ///< pointer to bit map + std::vector<bloom_type> salt_; ///< vector of salts + std::size_t salt_count_; ///< number of salts + std::size_t table_size_; ///< bit table size in bytes + std::size_t insert_count_; ///< insertion count + std::size_t target_element_count_; ///< target number of unique insertions + std::size_t random_seed_; ///< random seed + public: bloom_filter() : bit_table_(0), salt_count_(0), table_size_(0), - raw_table_size_(0), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(0), random_seed_(0) {} @@ -68,7 +76,8 @@ public: const double& false_positive_probability, const std::size_t& random_seed) : bit_table_(0), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(predicted_inserted_element_count), random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) { find_optimal_parameters(predicted_inserted_element_count, false_positive_probability, @@ -76,12 +85,15 @@ public: init(); } - bloom_filter(const std::size_t& salt_count, std::size_t table_size, - const std::size_t& random_seed) + bloom_filter(const std::size_t& salt_count, + std::size_t table_size, + const std::size_t& random_seed, + std::size_t target_element_count) : bit_table_(0), salt_count_(salt_count), table_size_(table_size), - inserted_element_count_(0), + insert_count_(0), + target_element_count_(target_element_count), random_seed_((random_seed) ? random_seed : 0xA5A5A5A5) { init(); @@ -89,9 +101,12 @@ public: void init() { generate_unique_salt(); - raw_table_size_ = table_size_ / bits_per_char; - bit_table_ = new cell_type[raw_table_size_]; - std::fill_n(bit_table_,raw_table_size_,0x00); + if (table_size_) { + bit_table_ = new cell_type[table_size_]; + std::fill_n(bit_table_, table_size_, 0x00); + } else { + bit_table_ = NULL; + } } bloom_filter(const bloom_filter& filter) @@ -104,12 +119,11 @@ public: if (this != &filter) { salt_count_ = filter.salt_count_; table_size_ = filter.table_size_; - raw_table_size_ = filter.raw_table_size_; - inserted_element_count_ = filter.inserted_element_count_; + insert_count_ = filter.insert_count_; random_seed_ = filter.random_seed_; delete[] bit_table_; - bit_table_ = new cell_type[raw_table_size_]; - std::copy(filter.bit_table_,filter.bit_table_ + raw_table_size_,bit_table_); + bit_table_ = new cell_type[table_size_]; + std::copy(filter.bit_table_, filter.bit_table_ + table_size_, bit_table_); salt_ = filter.salt_; } return *this; @@ -127,8 +141,9 @@ public: inline void clear() { - std::fill_n(bit_table_,raw_table_size_,0x00); - inserted_element_count_ = 0; + if (bit_table_) + std::fill_n(bit_table_, table_size_, 0x00); + insert_count_ = 0; } /** @@ -141,26 +156,28 @@ public: * @param val integer value to insert */ inline void insert(uint32_t val) { + assert(bit_table_); std::size_t bit_index = 0; std::size_t bit = 0; for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } - ++inserted_element_count_; + ++insert_count_; } inline void insert(const unsigned char* key_begin, const std::size_t& length) { + assert(bit_table_); std::size_t bit_index = 0; std::size_t bit = 0; for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; + bit_table_[bit_index >> 3] |= bit_mask[bit]; } - ++inserted_element_count_; + ++insert_count_; } template<typename T> @@ -202,12 +219,14 @@ public: */ inline virtual bool contains(uint32_t val) const { + if (!bit_table_) + return false; std::size_t bit_index = 0; std::size_t bit = 0; for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(val,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -217,12 +236,14 @@ public: inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const { + if (!bit_table_) + return false; std::size_t bit_index = 0; std::size_t bit = 0; for (std::size_t i = 0; i < salt_.size(); ++i) { compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); - if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) + if ((bit_table_[bit_index >> 3] & bit_mask[bit]) != bit_mask[bit]) { return false; } @@ -278,12 +299,41 @@ public: inline virtual std::size_t size() const { - return table_size_; + return table_size_ * bits_per_char; } inline std::size_t element_count() const { - return inserted_element_count_; + return insert_count_; + } + + /* + * density of bits set. inconvenient units, but: + * .3 = ~50% target insertions + * .5 = 100% target insertions, "perfectly full" + * .75 = 200% target insertions + * 1.0 = all bits set... infinite insertions + */ + inline double density() const + { + if (!bit_table_) + return 0.0; + size_t set = 0; + uint8_t *p = bit_table_; + size_t left = table_size_; + while (left-- > 0) { + uint8_t c = *p; + for (; c; ++set) + c &= c - 1; + ++p; + } + return (double)set / (double)(table_size_ << 3); + } + + virtual inline double approx_unique_element_count() const { + // this is not a very good estimate; a better solution should have + // some asymptotic behavior as density() approaches 1.0. + return (double)target_element_count_ * 2.0 * density(); } inline double effective_fpp() const @@ -295,7 +345,7 @@ public: the current number of inserted elements - not the user defined predicated/expected number of inserted elements. */ - return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size()); + return std::pow(1.0 - std::exp(-1.0 * salt_.size() * insert_count_ / size()), 1.0 * salt_.size()); } inline bloom_filter& operator &= (const bloom_filter& filter) @@ -306,7 +356,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] &= filter.bit_table_[i]; } } @@ -321,7 +371,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] |= filter.bit_table_[i]; } } @@ -336,7 +386,7 @@ public: (table_size_ == filter.table_size_) && (random_seed_ == filter.random_seed_) ) { - for (std::size_t i = 0; i < raw_table_size_; ++i) { + for (std::size_t i = 0; i < table_size_; ++i) { bit_table_[i] ^= filter.bit_table_[i]; } } @@ -352,8 +402,8 @@ protected: inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const { - bit_index = hash % table_size_; - bit = bit_index % bits_per_char; + bit_index = hash % (table_size_ << 3); + bit = bit_index & 7; } void generate_unique_salt() @@ -418,7 +468,8 @@ protected: } else { - std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_)); + std::copy(predef_salt,predef_salt + predef_salt_count, + std::back_inserter(salt_)); srand(static_cast<unsigned int>(random_seed_)); while (salt_.size() < salt_count_) { @@ -466,8 +517,8 @@ protected: *salt_count = static_cast<std::size_t>(min_k); size_t t = static_cast<std::size_t>(min_m); - t += (((t % bits_per_char) != 0) ? (bits_per_char - (t % bits_per_char)) : 0); - *table_size = t; + t += (((t & 7) != 0) ? (bits_per_char - (t & 7)) : 0); + *table_size = t >> 3; } inline bloom_type hash_ap(uint32_t val, bloom_type hash) const @@ -507,14 +558,6 @@ protected: return hash; } - std::vector<bloom_type> salt_; - unsigned char* bit_table_; - std::size_t salt_count_; - std::size_t table_size_; - std::size_t raw_table_size_; - std::size_t inserted_element_count_; - std::size_t random_seed_; - public: void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); @@ -549,53 +592,77 @@ class compressible_bloom_filter : public bloom_filter { public: + compressible_bloom_filter() : bloom_filter() {} + compressible_bloom_filter(const std::size_t& predicted_element_count, const double& false_positive_probability, const std::size_t& random_seed) - : bloom_filter(predicted_element_count,false_positive_probability,random_seed) + : bloom_filter(predicted_element_count, false_positive_probability, random_seed) + { + size_list.push_back(table_size_); + } + + compressible_bloom_filter(const std::size_t& salt_count, + std::size_t table_size, + const std::size_t& random_seed, + std::size_t target_count) + : bloom_filter(salt_count, table_size, random_seed, target_count) { size_list.push_back(table_size_); } inline virtual std::size_t size() const { - return size_list.back(); + return size_list.back() * bits_per_char; } - inline bool compress(const double& percentage) + inline bool compress(const double& target_ratio) { - if ((0.0 >= percentage) || (percentage >= 100.0)) + if (!bit_table_) + return false; + + if ((0.0 >= target_ratio) || (target_ratio >= 1.0)) { return false; } std::size_t original_table_size = size_list.back(); - std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0)))); - new_table_size -= (((new_table_size % bits_per_char) != 0) ? (new_table_size % bits_per_char) : 0); + std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio); - if ((bits_per_char > new_table_size) || (new_table_size >= original_table_size)) + if ((!new_table_size) || (new_table_size >= original_table_size)) { return false; } - cell_type* tmp = new cell_type[new_table_size / bits_per_char]; - std::copy(bit_table_, bit_table_ + (new_table_size / bits_per_char), tmp); - cell_type* itr = bit_table_ + (new_table_size / bits_per_char); - cell_type* end = bit_table_ + (original_table_size / bits_per_char); + cell_type* tmp = new cell_type[new_table_size]; + std::copy(bit_table_, bit_table_ + (new_table_size), tmp); + cell_type* itr = bit_table_ + (new_table_size); + cell_type* end = bit_table_ + (original_table_size); cell_type* itr_tmp = tmp; - + cell_type* itr_end = tmp + (new_table_size); while (end != itr) { *(itr_tmp++) |= (*itr++); + if (itr_tmp == itr_end) + itr_tmp = tmp; } delete[] bit_table_; bit_table_ = tmp; size_list.push_back(new_table_size); + table_size_ = new_table_size; return true; } + virtual inline double approx_unique_element_count() const { + // this is not a very good estimate; a better solution should have + // some asymptotic behavior as density() approaches 1.0. + // + // the compress() correction is also bad; it tends to under-estimate. + return (double)target_element_count_ * 2.0 * density() * (double)size_list.back() / (double)size_list.front(); + } + private: inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const @@ -603,13 +670,19 @@ private: bit_index = hash; for (std::size_t i = 0; i < size_list.size(); ++i) { - bit_index %= size_list[i]; + bit_index %= size_list[i] << 3; } - bit = bit_index % bits_per_char; + bit = bit_index & 7; } std::vector<std::size_t> size_list; +public: + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& bl); + void dump(Formatter *f) const; + static void generate_test_instances(std::list<compressible_bloom_filter*>& ls); }; +WRITE_CLASS_ENCODER(compressible_bloom_filter) #endif diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc index 8e3661b2cc1..cfd41305caa 100644 --- a/src/test/common/test_bloom_filter.cc +++ b/src/test/common/test_bloom_filter.cc @@ -23,7 +23,17 @@ TEST(BloomFilter, Basic) { ASSERT_TRUE(bf.contains("bar")); } +TEST(BloomFilter, Empty) { + bloom_filter bf; + for (int i=0; i<100; ++i) { + ASSERT_FALSE(bf.contains(i)); + ASSERT_FALSE(bf.contains(stringify(i))); + } +} + TEST(BloomFilter, Sweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl; for (int ex = 3; ex < 12; ex += 2) { for (float fpp = .001; fpp < .5; fpp *= 4.0) { @@ -62,7 +72,9 @@ TEST(BloomFilter, Sweep) { } TEST(BloomFilter, SweepInt) { - std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl; + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); + std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl; for (int ex = 3; ex < 12; ex += 2) { for (float fpp = .001; fpp < .5; fpp *= 4.0) { int max = 2 << ex; @@ -92,15 +104,70 @@ TEST(BloomFilter, SweepInt) { double byte_per_insert = (double)bl.length() / (double)max; - std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl; + std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert + << "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl; ASSERT_TRUE(actual < fpp * 10); ASSERT_TRUE(actual > fpp / 10); + ASSERT_TRUE(bf.density() > 0.40); + ASSERT_TRUE(bf.density() < 0.60); } } } +TEST(BloomFilter, CompressibleSweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); + std::cout << "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n"; + float fpp = .01; + int max = 1024; + for (int div = 1; div < 10; div++) { + compressible_bloom_filter bf(max, fpp, 1); + int t = max/div; + for (int n = 0; n < t; n++) + bf.insert(n); + + unsigned est = bf.approx_unique_element_count(); + if (div > 1) + bf.compress(1.0 / div); + + for (int n = 0; n < t; n++) + ASSERT_TRUE(bf.contains(n)); + + int test = max * 100; + int hit = 0; + for (int n = 0; n < test; n++) + if (bf.contains(100000 + n)) + hit++; + + double actual = (double)hit / (double)test; + + bufferlist bl; + ::encode(bf, bl); + + double byte_per_insert = (double)bl.length() / (double)max; + unsigned est_after = bf.approx_unique_element_count(); + std::cout << max + << "\t" << t + << "\t" << est + << "\t" << est_after + << "\t" << fpp + << "\t" << actual + << "\t" << bl.length() << "\t" << byte_per_insert + << std::endl; + + ASSERT_TRUE(actual < fpp * 2.0); + ASSERT_TRUE(actual > fpp / 2.0); + ASSERT_TRUE(est_after < est * 2); + ASSERT_TRUE(est_after > est / 2); + } +} + + + TEST(BloomFilter, BinSweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); int total_max = 16384; float total_fpp = .01; std::cout << "total_inserts " << total_max << " target-fpp " << total_fpp << std::endl; diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index 59e55a11b23..77c8729e986 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -6,6 +6,7 @@ TYPE(filepath) #include "common/bloom_filter.hpp" TYPE(bloom_filter) +TYPE(compressible_bloom_filter) #include "common/snap_types.h" TYPE(SnapContext) |