diff options
-rw-r--r-- | src/common/bloom_filter.hpp | 10 | ||||
-rw-r--r-- | src/test/common/test_bloom_filter.cc | 58 |
2 files changed, 64 insertions, 4 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 8587caa2d8d..3e55687774b 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -603,15 +603,15 @@ public: return size_list.back() * bits_per_char; } - inline bool compress(const double& percentage) + inline bool compress(const double& target_ratio) { - if ((0.0 >= percentage) || (percentage >= 100.0)) + if ((0.0 >= target_ratio) || (target_ratio >= 1.0)) { return false; } std::size_t original_table_size = size_list.back(); - std::size_t new_table_size = static_cast<std::size_t>((size_list.back() * (1.0 - (percentage / 100.0)))); + std::size_t new_table_size = static_cast<std::size_t>(size_list.back() * target_ratio); if ((!new_table_size) || (new_table_size >= original_table_size)) { @@ -623,10 +623,12 @@ public: cell_type* itr = bit_table_ + (new_table_size); cell_type* end = bit_table_ + (original_table_size); cell_type* itr_tmp = tmp; - + cell_type* itr_end = tmp + (new_table_size); while (end != itr) { *(itr_tmp++) |= (*itr++); + if (itr_tmp == itr_end) + itr_tmp = tmp; } delete[] bit_table_; diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc index 5edf3f0f9fa..71d3de97d2a 100644 --- a/src/test/common/test_bloom_filter.cc +++ b/src/test/common/test_bloom_filter.cc @@ -24,6 +24,8 @@ TEST(BloomFilter, Basic) { } TEST(BloomFilter, Sweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl; for (int ex = 3; ex < 12; ex += 2) { for (float fpp = .001; fpp < .5; fpp *= 4.0) { @@ -62,6 +64,8 @@ TEST(BloomFilter, Sweep) { } TEST(BloomFilter, SweepInt) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); std::cout << "# max\tfpp\tactual\tsize\tB/insert\tdensity\tapprox_element_count" << std::endl; for (int ex = 3; ex < 12; ex += 2) { for (float fpp = .001; fpp < .5; fpp *= 4.0) { @@ -96,12 +100,66 @@ TEST(BloomFilter, SweepInt) { << "\t" << bf.density() << "\t" << bf.approx_unique_element_count() << std::endl; ASSERT_TRUE(actual < fpp * 10); ASSERT_TRUE(actual > fpp / 10); + ASSERT_TRUE(bf.density() > 0.40); + ASSERT_TRUE(bf.density() < 0.60); } } } +TEST(BloomFilter, CompressibleSweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); + std::cout << "# max\tins\test ins\tafter\ttgtfpp\tactual\tsize\tb/elem\n"; + float fpp = .01; + int max = 1024; + for (int div = 1; div < 10; div++) { + compressible_bloom_filter bf(max, fpp, 1); + int t = max/div; + for (int n = 0; n < t; n++) + bf.insert(n); + + unsigned est = bf.approx_unique_element_count(); + if (div > 1) + bf.compress(1.0 / div); + + for (int n = 0; n < t; n++) + ASSERT_TRUE(bf.contains(n)); + + int test = max * 100; + int hit = 0; + for (int n = 0; n < test; n++) + if (bf.contains(100000 + n)) + hit++; + + double actual = (double)hit / (double)test; + + bufferlist bl; + ::encode(bf, bl); + + double byte_per_insert = (double)bl.length() / (double)max; + unsigned est_after = bf.approx_unique_element_count(); + std::cout << max + << "\t" << t + << "\t" << est + << "\t" << est_after + << "\t" << fpp + << "\t" << actual + << "\t" << bl.length() << "\t" << byte_per_insert + << std::endl; + + ASSERT_TRUE(actual < fpp * 2.0); + ASSERT_TRUE(actual > fpp / 2.0); + ASSERT_TRUE(est_after < est * 2); + ASSERT_TRUE(est_after > est / 2); + } +} + + + TEST(BloomFilter, BinSweep) { + std::cout.setf(std::ios_base::fixed, std::ios_base::floatfield); + std::cout.precision(5); int total_max = 16384; float total_fpp = .01; std::cout << "total_inserts " << total_max << " target-fpp " << total_fpp << std::endl; |