/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/bloom_test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #ifndef GFLAGS |
25 | | #include <cstdio> |
26 | | int main() { |
27 | | fprintf(stderr, "Please install gflags to run this test... Skipping...\n"); |
28 | | return 0; |
29 | | } |
30 | | #else |
31 | | |
32 | | #include <vector> |
33 | | #include <gflags/gflags.h> |
34 | | |
35 | | #include "yb/rocksdb/filter_policy.h" |
36 | | #include "yb/rocksdb/util/logging.h" |
37 | | #include "yb/rocksdb/util/testharness.h" |
38 | | #include "yb/rocksdb/util/testutil.h" |
39 | | #include "yb/rocksdb/util/arena.h" |
40 | | |
41 | | #include "yb/util/enums.h" |
42 | | #include "yb/util/test_util.h" |
43 | | |
44 | | using GFLAGS::ParseCommandLineFlags; |
45 | | |
46 | | DEFINE_int32(bits_per_key, 10, ""); |
47 | | |
48 | | namespace rocksdb { |
49 | | |
50 | | static const int kVerbose = 1; |
51 | | |
52 | 1.35M | static Slice Key(size_t i, char* buffer) { |
53 | 1.35M | memcpy(buffer, &i, sizeof(i)); |
54 | 1.35M | return Slice(buffer, sizeof(i)); |
55 | 1.35M | } |
56 | | |
57 | 107 | static size_t NextLength(size_t length) { |
58 | 107 | if (length < 10) { |
59 | 27 | length += 1; |
60 | 80 | } else if (length < 100) { |
61 | 27 | length += 10; |
62 | 53 | } else if (length < 1000) { |
63 | 27 | length += 100; |
64 | 26 | } else { |
65 | 26 | length += 1000; |
66 | 26 | } |
67 | 107 | return length; |
68 | 107 | } |
69 | | |
70 | | class BloomTest : public RocksDBTest { |
71 | | private: |
72 | | const FilterPolicy* policy_; |
73 | | std::string filter_; |
74 | | std::vector<std::string> keys_; |
75 | | |
76 | | public: |
77 | | BloomTest() : policy_( |
78 | 3 | NewBloomFilterPolicy(FLAGS_bits_per_key)) {} |
79 | | |
80 | 3 | ~BloomTest() { |
81 | 3 | delete policy_; |
82 | 3 | } |
83 | | |
84 | 37 | void Reset() { |
85 | 37 | keys_.clear(); |
86 | 37 | filter_.clear(); |
87 | 37 | } |
88 | | |
89 | 59.9k | void Add(const Slice& s) { |
90 | 59.9k | keys_.push_back(s.ToString()); |
91 | 59.9k | } |
92 | | |
93 | 38 | void Build() { |
94 | 38 | std::vector<Slice> key_slices; |
95 | 60.0k | for (size_t i = 0; i < keys_.size(); i++) { |
96 | 59.9k | key_slices.push_back(Slice(keys_[i])); |
97 | 59.9k | } |
98 | 38 | filter_.clear(); |
99 | 38 | policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()), |
100 | 38 | &filter_); |
101 | 38 | keys_.clear(); |
102 | 38 | if (kVerbose >= 2) DumpFilter(); |
103 | 38 | } |
104 | | |
105 | 74 | size_t FilterSize() const { |
106 | 74 | return filter_.size(); |
107 | 74 | } |
108 | | |
109 | 0 | void DumpFilter() { |
110 | 0 | fprintf(stderr, "F("); |
111 | 0 | for (size_t i = 0; i+1 < filter_.size(); i++) { |
112 | 0 | const unsigned int c = static_cast<unsigned int>(filter_[i]); |
113 | 0 | for (int j = 0; j < 8; j++) { |
114 | 0 | fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.'); |
115 | 0 | } |
116 | 0 | } |
117 | 0 | fprintf(stderr, ")\n"); |
118 | 0 | } |
119 | | |
120 | 430k | bool Matches(const Slice& s) { |
121 | 430k | if (!keys_.empty()) { |
122 | 1 | Build(); |
123 | 1 | } |
124 | 430k | return policy_->KeyMayMatch(s, filter_); |
125 | 430k | } |
126 | | |
127 | 37 | double FalsePositiveRate() { |
128 | 37 | char buffer[sizeof(size_t)]; |
129 | 37 | size_t result = 0; |
130 | 370k | for (size_t i = 0; i < 10000; i++) { |
131 | 370k | if (Matches(Key(i + 1000000000, buffer))) { |
132 | 2.93k | result++; |
133 | 2.93k | } |
134 | 370k | } |
135 | 37 | return result / 10000.0; |
136 | 37 | } |
137 | | }; |
138 | | |
139 | 1 | TEST_F(BloomTest, EmptyFilter) { |
140 | 1 | ASSERT_TRUE(!Matches("hello")); |
141 | 1 | ASSERT_TRUE(!Matches("world")); |
142 | 1 | } |
143 | | |
144 | 1 | TEST_F(BloomTest, Small) { |
145 | 1 | Add("hello"); |
146 | 1 | Add("world"); |
147 | 1 | ASSERT_TRUE(Matches("hello")); |
148 | 1 | ASSERT_TRUE(Matches("world")); |
149 | 1 | ASSERT_TRUE(!Matches("x")); |
150 | 1 | ASSERT_TRUE(!Matches("foo")); |
151 | 1 | } |
152 | | |
153 | 1 | TEST_F(BloomTest, VaryingLengths) { |
154 | 1 | char buffer[sizeof(size_t)]; |
155 | | |
156 | | // Count number of filters that significantly exceed the false positive rate |
157 | 1 | size_t mediocre_filters = 0; |
158 | 1 | size_t good_filters = 0; |
159 | | |
160 | 38 | for (size_t length = 1; length <= 10000; length = NextLength(length)) { |
161 | 37 | Reset(); |
162 | 60.0k | for (size_t i = 0; i < length; i++) { |
163 | 59.9k | Add(Key(i, buffer)); |
164 | 59.9k | } |
165 | 37 | Build(); |
166 | | |
167 | 74 | ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length; |
168 | | |
169 | | // All added keys must match |
170 | 60.0k | for (size_t i = 0; i < length; i++) { |
171 | 119k | ASSERT_TRUE(Matches(Key(i, buffer))) |
172 | 119k | << "Length " << length << "; key " << i; |
173 | 59.9k | } |
174 | | |
175 | | // Check false positive rate |
176 | 37 | double rate = FalsePositiveRate(); |
177 | 37 | if (kVerbose >= 1) { |
178 | 37 | LOG(INFO) << StringPrintf( |
179 | 37 | "False positives: %5.2f%% @ length = %6zu ; bytes = %6zu\n", rate * 100.0, length, |
180 | 37 | FilterSize()); |
181 | 37 | } |
182 | 37 | ASSERT_LE(rate, 0.02); // Must not be over 2% |
183 | 37 | if (rate > 0.0125) |
184 | 1 | mediocre_filters++; // Allowed, but not too often |
185 | 36 | else |
186 | 36 | good_filters++; |
187 | 37 | } |
188 | 1 | if (kVerbose >= 1) { |
189 | 1 | LOG(INFO) << StringPrintf("Filters: %zu good, %zu mediocre\n", good_filters, mediocre_filters); |
190 | 1 | } |
191 | 1 | ASSERT_LE(mediocre_filters, good_filters/5); |
192 | 1 | } |
193 | | |
194 | | // Different bits-per-byte |
195 | | |
196 | | class BloomTestContext { |
197 | | public: |
198 | 6 | virtual ~BloomTestContext() {} |
199 | | |
200 | | virtual const FilterPolicy& filter_policy() const = 0; |
201 | | virtual size_t max_keys() const = 0; |
202 | | virtual void CheckFilterSize(size_t filter_size, size_t num_keys) const = 0; |
203 | | }; |
204 | | |
205 | | class FullFilterBloomTestContext : public BloomTestContext { |
206 | | public: |
207 | 77 | const FilterPolicy& filter_policy() const override { return *filter_policy_.get(); } |
208 | | |
209 | 37 | size_t max_keys() const override { return 10000; } |
210 | | |
211 | 36 | void CheckFilterSize(size_t filter_size, size_t num_keys) const override { |
212 | 72 | ASSERT_LE(filter_size, (size_t)((num_keys * 10 / 8) + 128 + 5)) << num_keys; |
213 | 36 | } |
214 | | |
215 | | private: |
216 | | std::unique_ptr<const FilterPolicy> filter_policy_{ |
217 | | NewBloomFilterPolicy(FLAGS_bits_per_key, false)}; |
218 | | }; |
219 | | |
220 | | class FixedSizeFilterBloomTestContext : public BloomTestContext { |
221 | | public: |
222 | 73 | const FilterPolicy& filter_policy() const override { return *filter_policy_.get(); } |
223 | | |
224 | | // For fixed-size filter we limit maximum number of keys depending on total bits in test itself |
225 | | // by checking ShouldFlush(). |
226 | 34 | size_t max_keys() const override { return std::numeric_limits<size_t>::max(); } |
227 | | |
228 | 34 | void CheckFilterSize(size_t filter_size, size_t num_keys) const override { |
229 | 68 | ASSERT_LE(filter_size, FilterPolicy::kDefaultFixedSizeFilterBits / 8 + 64 + 5) << num_keys; |
230 | 34 | } |
231 | | |
232 | | private: |
233 | | std::unique_ptr<const FilterPolicy> filter_policy_{ |
234 | | NewFixedSizeFilterPolicy( |
235 | | FilterPolicy::kDefaultFixedSizeFilterBits, FilterPolicy::kDefaultFixedSizeFilterErrorRate, |
236 | | nullptr)}; |
237 | | }; |
238 | | |
239 | | YB_DEFINE_ENUM(BuilderReaderBloomTestType, (kFullFilter)(kFixedSizeFilter)); |
240 | | |
241 | | namespace { |
242 | | |
243 | 6 | std::unique_ptr<const BloomTestContext> CreateContext(BuilderReaderBloomTestType type) { |
244 | 6 | switch (type) { |
245 | 3 | case BuilderReaderBloomTestType::kFullFilter: |
246 | 3 | return std::make_unique<FullFilterBloomTestContext>(); |
247 | 3 | case BuilderReaderBloomTestType::kFixedSizeFilter: |
248 | 3 | return std::make_unique<FixedSizeFilterBloomTestContext>(); |
249 | 0 | } |
250 | 0 | FATAL_INVALID_ENUM_VALUE(BuilderReaderBloomTestType, type); |
251 | 0 | } |
252 | | |
253 | | } // namespace |
254 | | |
255 | | class BuilderReaderBloomTest : public RocksDBTest, |
256 | | public ::testing::WithParamInterface<BuilderReaderBloomTestType> { |
257 | | protected: |
258 | | std::unique_ptr<const BloomTestContext> context_; |
259 | | std::unique_ptr<FilterBitsBuilder> bits_builder_; |
260 | | std::unique_ptr<FilterBitsReader> bits_reader_; |
261 | | std::unique_ptr<const char[]> buf_; |
262 | | size_t filter_size_; |
263 | | |
264 | | public: |
265 | | BuilderReaderBloomTest() : |
266 | | context_(CreateContext(GetParam())), |
267 | 6 | filter_size_(0) { |
268 | 6 | Reset(); |
269 | 6 | } |
270 | | |
271 | 76 | void Reset() { |
272 | 76 | bits_builder_.reset(context_->filter_policy().GetFilterBitsBuilder()); |
273 | 76 | bits_reader_.reset(nullptr); |
274 | 76 | buf_.reset(nullptr); |
275 | 76 | filter_size_ = 0; |
276 | 76 | } |
277 | | |
278 | 82.7k | void Add(const Slice& s) { |
279 | 82.7k | bits_builder_->AddKey(s); |
280 | 82.7k | } |
281 | | |
282 | 82.8k | bool ShouldFlush() { |
283 | 82.8k | return bits_builder_->IsFull(); |
284 | 82.8k | } |
285 | | |
286 | 74 | void Build() { |
287 | 74 | Slice filter = bits_builder_->Finish(&buf_); |
288 | 74 | bits_reader_.reset(context_->filter_policy().GetFilterBitsReader(filter)); |
289 | 74 | filter_size_ = filter.size(); |
290 | 74 | } |
291 | | |
292 | 140 | size_t FilterSize() const { |
293 | 140 | return filter_size_; |
294 | 140 | } |
295 | | |
296 | 782k | bool Matches(const Slice& s) { |
297 | 782k | if (bits_reader_ == nullptr) { |
298 | 4 | Build(); |
299 | 4 | } |
300 | 782k | return bits_reader_->MayMatch(s); |
301 | 782k | } |
302 | | |
303 | 70 | double FalsePositiveRate() { |
304 | 70 | char buffer[sizeof(size_t)]; |
305 | 70 | size_t result = 0; |
306 | 700k | for (size_t i = 0; i < 10000; i++) { |
307 | 700k | if (Matches(Key(i + 1000000000, buffer))) { |
308 | 2.30k | result++; |
309 | 2.30k | } |
310 | 700k | } |
311 | 70 | return result / 10000.0; |
312 | 70 | } |
313 | | }; |
314 | | |
315 | 2 | TEST_P(BuilderReaderBloomTest, FullEmptyFilter) { |
316 | | // Empty filter is not match, at this level |
317 | 2 | ASSERT_TRUE(!Matches("hello")); |
318 | 2 | ASSERT_TRUE(!Matches("world")); |
319 | 2 | } |
320 | | |
321 | 2 | TEST_P(BuilderReaderBloomTest, FullSmall) { |
322 | 2 | Add("hello"); |
323 | 2 | Add("world"); |
324 | 2 | ASSERT_TRUE(Matches("hello")); |
325 | 2 | ASSERT_TRUE(Matches("world")); |
326 | 2 | ASSERT_TRUE(!Matches("x")); |
327 | 2 | ASSERT_TRUE(!Matches("foo")); |
328 | 2 | } |
329 | | |
330 | 2 | TEST_P(BuilderReaderBloomTest, FullVaryingLengths) { |
331 | 2 | char buffer[sizeof(size_t)]; |
332 | | |
333 | | // Count number of filters that significantly exceed the false positive rate |
334 | 2 | size_t mediocre_filters = 0; |
335 | 2 | size_t good_filters = 0; |
336 | | |
337 | 72 | for (size_t length = 1; !ShouldFlush() && length < context_->max_keys(); |
338 | 70 | length = NextLength(length)) { |
339 | 70 | Reset(); |
340 | 82.8k | for (size_t i = 0; i < length; i++) { |
341 | 82.7k | Add(Key(i, buffer)); |
342 | 82.7k | if (ShouldFlush()) { |
343 | 1 | length = i + 1; |
344 | 1 | break; |
345 | 1 | } |
346 | 82.7k | } |
347 | 70 | Build(); |
348 | | |
349 | 70 | context_->CheckFilterSize(FilterSize(), length); |
350 | | |
351 | | // All added keys must match |
352 | 82.8k | for (size_t i = 0; i < length; i++) { |
353 | 165k | ASSERT_TRUE(Matches(Key(i, buffer))) << "Length " << length << "; key " << i; |
354 | 82.7k | } |
355 | | |
356 | | // Check false positive rate |
357 | 70 | double rate = FalsePositiveRate(); |
358 | 70 | if (kVerbose >= 1) { |
359 | 70 | LOG(INFO) << StringPrintf( |
360 | 70 | "False positives: %5.2f%% @ length = %6zu ; bytes = %6zu\n", rate * 100.0, length, |
361 | 70 | FilterSize()); |
362 | 70 | } |
363 | 70 | ASSERT_LE(rate, 0.02); // Must not be over 2% |
364 | 70 | if (rate > 0.0125) |
365 | 3 | mediocre_filters++; // Allowed, but not too often |
366 | 67 | else |
367 | 67 | good_filters++; |
368 | 70 | } |
369 | 2 | if (kVerbose >= 1) { |
370 | 2 | LOG(INFO) << StringPrintf("Filters: %zu good, %zu mediocre\n", good_filters, mediocre_filters); |
371 | 2 | } |
372 | 2 | ASSERT_LE(mediocre_filters, good_filters/5); |
373 | 2 | } |
374 | | |
375 | | INSTANTIATE_TEST_CASE_P(, BuilderReaderBloomTest, ::testing::Values( |
376 | | BuilderReaderBloomTestType::kFullFilter, |
377 | | BuilderReaderBloomTestType::kFixedSizeFilter)); |
378 | | |
379 | | } // namespace rocksdb |
380 | | |
381 | 13.2k | int main(int argc, char** argv) { |
382 | 13.2k | ::testing::InitGoogleTest(&argc, argv); |
383 | 13.2k | ParseCommandLineFlags(&argc, &argv, true); |
384 | | |
385 | 13.2k | return RUN_ALL_TESTS(); |
386 | 13.2k | } |
387 | | |
388 | | #endif // GFLAGS |