diff options
author | Sage Weil <sage@inktank.com> | 2013-09-19 17:57:14 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-09-19 18:27:07 -0700 |
commit | 71c806986aab771eccc0bcfeb9fa9ef2d1f38883 (patch) | |
tree | efc2fcbc5ef392e4db4ad5ce550434479ca8b204 | |
parent | 5df20649236754ac2841c8710522a1701db03091 (diff) | |
download | ceph-71c806986aab771eccc0bcfeb9fa9ef2d1f38883.tar.gz |
common/bloom_filter: unit tests
Fun facts:
- fpp = false positive probability
- fpp is a function of insert count only
- at .1% fpp, we pay about 2 bytes per insert
- at 1-2% fpp, we pay about 1 byte per insert
- at 15% fpp, we pay about .5 bytes per insert
Signed-off-by: Sage Weil <sage@inktank.com>
-rw-r--r-- | src/common/bloom_filter.hpp | 3 | ||||
-rw-r--r-- | src/test/Makefile.am | 5 | ||||
-rw-r--r-- | src/test/common/test_bloom_filter.cc | 62 |
3 files changed, 69 insertions, 1 deletions
diff --git a/src/common/bloom_filter.hpp b/src/common/bloom_filter.hpp index 2a1ee2c4217..cc22136e5ca 100644 --- a/src/common/bloom_filter.hpp +++ b/src/common/bloom_filter.hpp @@ -26,6 +26,7 @@ #include <algorithm> #include <cmath> #include <limits> +#include <list> #include <string> #include <vector> @@ -470,7 +471,7 @@ public: void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); void dump(Formatter *f) const; - static void generate_test_instances(list<bloom_filter*>& ls); + static void generate_test_instances(std::list<bloom_filter*>& ls); }; WRITE_CLASS_ENCODER(bloom_filter) diff --git a/src/test/Makefile.am b/src/test/Makefile.am index 9ce4a246673..0a0f5438f5d 100644 --- a/src/test/Makefile.am +++ b/src/test/Makefile.am @@ -250,6 +250,11 @@ unittest_addrs_CXXFLAGS = $(UNITTEST_CXXFLAGS) unittest_addrs_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) check_PROGRAMS += unittest_addrs +unittest_bloom_filter_SOURCES = test/common/test_bloom_filter.cc +unittest_bloom_filter_CXXFLAGS = $(UNITTEST_CXXFLAGS) +unittest_bloom_filter_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) +check_PROGRAMS += unittest_bloom_filter + unittest_sharedptr_registry_SOURCES = test/common/test_sharedptr_registry.cc unittest_sharedptr_registry_CXXFLAGS = $(UNITTEST_CXXFLAGS) unittest_sharedptr_registry_LDADD = $(UNITTEST_LDADD) $(CEPH_GLOBAL) diff --git a/src/test/common/test_bloom_filter.cc b/src/test/common/test_bloom_filter.cc new file mode 100644 index 00000000000..dac354357e5 --- /dev/null +++ b/src/test/common/test_bloom_filter.cc @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2013 Inktank + * + * LGPL2 + */ + +#include <iostream> +#include <gtest/gtest.h> + +#include "include/stringify.h" +#include "common/bloom_filter.hpp" + +TEST(BloomFilter, Basic) { + bloom_filter bf(10, .1, 1); + bf.insert("foo"); + bf.insert("bar"); + + ASSERT_TRUE(bf.contains("foo")); + ASSERT_TRUE(bf.contains("bar")); +} + +TEST(BloomFilter, Sweep) { + std::cout << "# max\tfpp\tactual\tsize\tB/insert" << std::endl; + for (int ex = 3; ex < 12; ex++) { + for (float fpp = .001; fpp < .5; fpp *= 2.0) { + int max = 2 << ex; + bloom_filter bf(max, fpp, 1); + bf.insert("foo"); + bf.insert("bar"); + + ASSERT_TRUE(bf.contains("foo")); + ASSERT_TRUE(bf.contains("bar")); + + for (int n = 0; n < max; n++) + bf.insert("ok" + stringify(n)); + + int test = max * 100; + int hit = 0; + for (int n = 0; n < test; n++) + if (bf.contains("asdf" + stringify(n))) + hit++; + + ASSERT_TRUE(bf.contains("foo")); + ASSERT_TRUE(bf.contains("bar")); + + double actual = (double)hit / (double)test; + + bufferlist bl; + ::encode(bf, bl); + + double byte_per_insert = (double)bl.length() / (double)max; + + std::cout << max << "\t" << fpp << "\t" << actual << "\t" << bl.length() << "\t" << byte_per_insert << std::endl; + ASSERT_TRUE(actual < fpp * 10); + + } + } +} |