/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/bloom.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #include <math.h> |
25 | | |
26 | | #include "yb/rocksdb/filter_policy.h" |
27 | | |
28 | | #include "yb/rocksdb/util/hash.h" |
29 | | #include "yb/rocksdb/util/coding.h" |
30 | | #include "yb/util/slice.h" |
31 | | #include "yb/util/math_util.h" |
32 | | |
33 | | namespace rocksdb { |
34 | | |
35 | | class BlockBasedFilterBlockBuilder; |
36 | | class FullFilterBlockBuilder; |
37 | | class FixedSizeFilterBlockBuilder; |
38 | | typedef FilterPolicy::FilterType FilterType; |
39 | | |
40 | | namespace { |
41 | | static const double LOG2 = log(2); |
42 | | |
43 | | inline void AddHash(uint32_t h, char* data, size_t num_lines, size_t total_bits, |
44 | 40.8M | size_t num_probes) { |
45 | 40.8M | DCHECK_GT(num_lines, 0); |
46 | 40.8M | DCHECK_GT(total_bits, 0); |
47 | 40.8M | DCHECK_EQ(num_lines % 2, 1); |
48 | | |
49 | 40.8M | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
50 | 40.8M | size_t b = (h % num_lines) * (CACHE_LINE_SIZE * 8); |
51 | | |
52 | 286M | for (uint32_t i = 0; i < num_probes; ++i245M ) { |
53 | | // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized |
54 | | // to a simple operation by compiler. |
55 | 245M | const size_t bitpos = b + (h % (CACHE_LINE_SIZE * 8)); |
56 | 245M | assert(bitpos < total_bits); |
57 | 0 | data[bitpos / 8] |= (1 << (bitpos % 8)); |
58 | | |
59 | 245M | h += delta; |
60 | 245M | } |
61 | 40.8M | } |
62 | | |
63 | | class FullFilterBitsBuilder : public FilterBitsBuilder { |
64 | | public: |
65 | | explicit FullFilterBitsBuilder(const size_t bits_per_key, |
66 | | const size_t num_probes) |
67 | | : bits_per_key_(bits_per_key), |
68 | 3.49k | num_probes_(num_probes) { |
69 | 3.49k | assert(bits_per_key_); |
70 | 3.49k | } |
71 | | |
72 | 3.49k | ~FullFilterBitsBuilder() {} |
73 | | |
74 | 573k | void AddKey(const Slice& key) override { |
75 | 573k | uint32_t hash = BloomHash(key); |
76 | 573k | if (hash_entries_.size() == 0 || hash != hash_entries_.back()570k ) { |
77 | 573k | hash_entries_.push_back(hash); |
78 | 573k | } |
79 | 573k | } |
80 | | |
81 | | // Create a filter that for hashes [0, n-1], the filter is allocated here |
82 | | // When creating filter, it is ensured that |
83 | | // total_bits = num_lines * CACHE_LINE_SIZE * 8 |
84 | | // dst len is >= kMetaDataSize (5), 1 for num_probes, 4 for num_lines |
85 | | // Then total_bits = (len - kMetaDataSize) * 8, and cache_line_size could be calculated |
86 | | // +----------------------------------------------------------------+ |
87 | | // | filter data with length total_bits / 8 | |
88 | | // +----------------------------------------------------------------+ |
89 | | // | | |
90 | | // | ... | |
91 | | // | | |
92 | | // +----------------------------------------------------------------+ |
93 | | // | ... | num_probes : 1 byte | num_lines : 4 bytes | |
94 | | // +----------------------------------------------------------------+ |
95 | 3.49k | Slice Finish(std::unique_ptr<const char[]>* buf) override { |
96 | 3.49k | uint32_t total_bits, num_lines; |
97 | 3.49k | char* data = ReserveSpace(static_cast<int>(hash_entries_.size()), |
98 | 3.49k | &total_bits, &num_lines); |
99 | 3.49k | assert(data); |
100 | | |
101 | 3.49k | if (total_bits != 0 && num_lines != 03.49k ) { |
102 | 563k | for (auto h : hash_entries_) { |
103 | 563k | AddHash(h, data, num_lines, total_bits, num_probes_); |
104 | 563k | } |
105 | 3.49k | } |
106 | 3.49k | data[total_bits / 8] = static_cast<char>(num_probes_); |
107 | 3.49k | EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines)); |
108 | | |
109 | 3.49k | const char* const_data = data; |
110 | 3.49k | buf->reset(const_data); |
111 | 3.49k | hash_entries_.clear(); |
112 | | |
113 | 3.49k | return Slice(data, total_bits / 8 + kMetaDataSize); |
114 | 3.49k | } |
115 | | |
116 | 50.0k | virtual bool IsFull() const override { return false; } |
117 | | |
118 | | static constexpr size_t kMetaDataSize = 5; // in bytes |
119 | | |
120 | | private: |
121 | | size_t bits_per_key_; |
122 | | size_t num_probes_; |
123 | | std::vector<uint32_t> hash_entries_; |
124 | | |
125 | | // Get totalbits that optimized for cpu cache line |
126 | | uint32_t GetTotalBitsForLocality(uint32_t total_bits); |
127 | | |
128 | | // Reserve space for new filter |
129 | | char* ReserveSpace(const int num_entry, uint32_t* total_bits, |
130 | | uint32_t* num_lines); |
131 | | |
132 | | // No Copy allowed |
133 | | FullFilterBitsBuilder(const FullFilterBitsBuilder&); |
134 | | void operator=(const FullFilterBitsBuilder&); |
135 | | }; |
136 | | |
137 | 3.49k | uint32_t FullFilterBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) { |
138 | 3.49k | uint32_t num_lines = yb::ceil_div(total_bits, CACHE_LINE_SIZE * 8); |
139 | | |
140 | | // Make num_lines an odd number to make sure more bits are involved |
141 | | // when determining which block. |
142 | 3.49k | if (num_lines % 2 == 0) { |
143 | 445 | num_lines++; |
144 | 445 | } |
145 | 3.49k | return num_lines * (CACHE_LINE_SIZE * 8); |
146 | 3.49k | } |
147 | | |
148 | | char* FullFilterBitsBuilder::ReserveSpace(const int num_entry, |
149 | 3.49k | uint32_t* total_bits, uint32_t* num_lines) { |
150 | 3.49k | assert(bits_per_key_); |
151 | 0 | char* data = nullptr; |
152 | 3.49k | if (num_entry != 0) { |
153 | 3.49k | uint32_t total_bits_tmp = num_entry * static_cast<uint32_t>(bits_per_key_); |
154 | | |
155 | 3.49k | *total_bits = GetTotalBitsForLocality(total_bits_tmp); |
156 | 3.49k | *num_lines = *total_bits / (CACHE_LINE_SIZE * 8); |
157 | 3.49k | assert(*total_bits > 0 && *total_bits % 8 == 0); |
158 | 3.49k | } else { |
159 | | // filter is empty, just leave space for metadata |
160 | 1 | *total_bits = 0; |
161 | 1 | *num_lines = 0; |
162 | 1 | } |
163 | | |
164 | | // Reserve space for Filter |
165 | 0 | uint32_t sz = *total_bits / 8; |
166 | 3.49k | sz += kMetaDataSize; // 4 bytes for num_lines, 1 byte for num_probes |
167 | | |
168 | 3.49k | data = new char[sz]; |
169 | 3.49k | memset(data, 0, sz); |
170 | 3.49k | return data; |
171 | 3.49k | } |
172 | | |
173 | | |
174 | | class FullFilterBitsReader : public FilterBitsReader { |
175 | | public: |
176 | | explicit FullFilterBitsReader(const Slice& contents, Logger* logger) |
177 | | : logger_(logger), |
178 | | data_(const_cast<char*>(contents.cdata())), |
179 | | data_len_(static_cast<uint32_t>(contents.size())), |
180 | | num_probes_(0), |
181 | 7.87k | num_lines_(0) { |
182 | 7.87k | assert(data_); |
183 | 0 | GetFilterMeta(contents, &num_probes_, &num_lines_); |
184 | | // Sanitize broken parameters |
185 | 7.87k | if (num_lines_ != 0 && data_len_ != num_lines_ * 7.87k CACHE_LINE_SIZE7.87k + |
186 | 7.87k | FullFilterBitsBuilder::kMetaDataSize) { |
187 | 0 | RLOG(InfoLogLevel::ERROR_LEVEL, logger, "Bloom filter data is broken, won't be used."); |
188 | 0 | FAIL_IF_NOT_PRODUCTION(); |
189 | 0 | num_lines_ = 0; |
190 | 0 | num_probes_ = 0; |
191 | 0 | } |
192 | 7.87k | } |
193 | | |
194 | 5.99k | ~FullFilterBitsReader() {} |
195 | | |
196 | 14.0M | bool MayMatch(const Slice& entry) override { |
197 | 14.0M | if (data_len_ <= FullFilterBitsBuilder::kMetaDataSize) { // remain same with original filter |
198 | 2 | return false; |
199 | 2 | } |
200 | | // Other Error params, including a broken filter, regarded as match |
201 | 14.0M | if (num_probes_ == 0 || num_lines_ == 014.0M ) return true0 ; |
202 | 14.0M | uint32_t hash = BloomHash(entry); |
203 | 14.0M | return HashMayMatch(hash, Slice(data_, data_len_), |
204 | 14.0M | num_probes_, num_lines_); |
205 | 14.0M | } |
206 | | |
207 | | private: |
208 | | Logger* logger_; |
209 | | // Filter meta data |
210 | | char* data_; |
211 | | uint32_t data_len_; |
212 | | size_t num_probes_; |
213 | | uint32_t num_lines_; |
214 | | |
215 | | // Get num_probes, and num_lines from filter |
216 | | // If filter format broken, set both to 0. |
217 | | void GetFilterMeta(const Slice& filter, size_t* num_probes, |
218 | | uint32_t* num_lines); |
219 | | |
220 | | // "filter" contains the data appended by a preceding call to |
221 | | // CreateFilterFromHash() on this class. This method must return true if |
222 | | // the key was in the list of keys passed to CreateFilter(). |
223 | | // This method may return true or false if the key was not on the |
224 | | // list, but it should aim to return false with a high probability. |
225 | | // |
226 | | // hash: target to be checked |
227 | | // filter: the whole filter, including meta data bytes |
228 | | // num_probes: number of probes, read before hand |
229 | | // num_lines: filter metadata, read before hand |
230 | | // Before calling this function, need to ensure the input meta data |
231 | | // is valid. |
232 | | bool HashMayMatch(const uint32_t hash, const Slice& filter, |
233 | | const size_t num_probes, const uint32_t num_lines); |
234 | | |
235 | | // No Copy allowed |
236 | | FullFilterBitsReader(const FullFilterBitsReader&); |
237 | | void operator=(const FullFilterBitsReader&); |
238 | | }; |
239 | | |
240 | | void FullFilterBitsReader::GetFilterMeta(const Slice& filter, |
241 | 7.87k | size_t* num_probes, uint32_t* num_lines) { |
242 | 7.87k | uint32_t len = static_cast<uint32_t>(filter.size()); |
243 | 7.87k | if (len <= FullFilterBitsBuilder::kMetaDataSize) { |
244 | | // filter is empty or broken |
245 | 2 | *num_probes = 0; |
246 | 2 | *num_lines = 0; |
247 | 2 | return; |
248 | 2 | } |
249 | | |
250 | 7.87k | *num_probes = filter.data()[len - FullFilterBitsBuilder::kMetaDataSize]; |
251 | 7.87k | *num_lines = DecodeFixed32(filter.data() + len - 4); |
252 | 7.87k | } |
253 | | |
254 | | inline bool FullFilterBitsReader::HashMayMatch(const uint32_t hash, const Slice& filter, |
255 | 14.0M | const size_t num_probes, const uint32_t num_lines) { |
256 | 14.0M | uint32_t len = static_cast<uint32_t>(filter.size()); |
257 | 14.0M | if (len <= FullFilterBitsBuilder::kMetaDataSize) |
258 | 0 | return false; // Remain the same with original filter. |
259 | | |
260 | | // It is ensured the params are valid before calling it |
261 | 14.0M | assert(num_probes != 0); |
262 | 0 | assert(num_lines != 0 && |
263 | 14.0M | (len - FullFilterBitsBuilder::kMetaDataSize) % num_lines == 0); |
264 | | // cache_line_size is calculated here based on filter metadata instead of using CACHE_LINE_SIZE. |
265 | | // The reason may be to support deserialization of filters which are already persisted in case we |
266 | | // change CACHE_LINE_SIZE or if machine architecture is changed. |
267 | 0 | uint32_t cache_line_size = (len - FullFilterBitsBuilder::kMetaDataSize) / num_lines; |
268 | 14.0M | const char* data = filter.cdata(); |
269 | | |
270 | 14.0M | uint32_t h = hash; |
271 | 14.0M | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
272 | 14.0M | uint32_t b = (h % num_lines) * (cache_line_size * 8); |
273 | | |
274 | 57.8M | for (uint32_t i = 0; i < num_probes; ++i43.7M ) { |
275 | | // Since CACHE_LINE_SIZE is defined as 2^n, this line will be optimized |
276 | | // to a simple and operation by compiler. |
277 | 50.6M | const uint32_t bitpos = b + (h % (cache_line_size * 8)); |
278 | 50.6M | if (((data[bitpos / 8]) & (1 << (bitpos % 8))) == 0) { |
279 | 6.89M | return false; |
280 | 6.89M | } |
281 | | |
282 | 43.7M | h += delta; |
283 | 43.7M | } |
284 | | |
285 | 7.13M | return true; |
286 | 14.0M | } |
287 | | |
288 | | // An implementation of filter policy |
289 | | class BloomFilterPolicy : public FilterPolicy { |
290 | | public: |
291 | | explicit BloomFilterPolicy(int bits_per_key, bool use_block_based_builder) |
292 | | : bits_per_key_(bits_per_key), hash_func_(BloomHash), |
293 | 334 | use_block_based_builder_(use_block_based_builder) { |
294 | 334 | initialize(); |
295 | 334 | } |
296 | | |
297 | 334 | ~BloomFilterPolicy() { |
298 | 334 | } |
299 | | |
300 | 4.60k | FilterType GetFilterType() const override { |
301 | 4.60k | return use_block_based_builder_ ? FilterType::kBlockBasedFilter1.14k : FilterType::kFullFilter3.46k ; |
302 | 4.60k | } |
303 | | |
304 | 24.0k | const char* Name() const override { |
305 | 24.0k | return "rocksdb.BuiltinBloomFilter"; |
306 | 24.0k | } |
307 | | |
308 | | void CreateFilter(const Slice* keys, int n, |
309 | 42.8k | std::string* dst) const override { |
310 | | // Compute bloom filter size (in both bits and bytes) |
311 | 42.8k | size_t bits = n * bits_per_key_; |
312 | | |
313 | | // For small n, we can see a very high false positive rate. Fix it |
314 | | // by enforcing a minimum bloom filter length. |
315 | 42.8k | bits = std::max<size_t>(bits, 128); |
316 | | |
317 | 42.8k | size_t bytes = (bits + 7) / 8; |
318 | 42.8k | bits = bytes * 8; |
319 | | |
320 | 42.8k | const size_t init_size = dst->size(); |
321 | 42.8k | dst->resize(init_size + bytes, 0); |
322 | 42.8k | dst->push_back(static_cast<char>(num_probes_)); // Remember # of probes |
323 | 42.8k | char* array = &(*dst)[init_size]; |
324 | 1.35M | for (size_t i = 0; i < (size_t)n; i++1.30M ) { |
325 | | // Use double-hashing to generate a sequence of hash values. |
326 | | // See analysis in [Kirsch,Mitzenmacher 2006]. |
327 | 1.30M | uint32_t h = hash_func_(keys[i]); |
328 | 1.30M | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
329 | 9.16M | for (size_t j = 0; j < num_probes_; j++7.85M ) { |
330 | 7.85M | const uint32_t bitpos = h % bits; |
331 | 7.85M | array[bitpos / 8] |= (1 << (bitpos % 8)); |
332 | 7.85M | h += delta; |
333 | 7.85M | } |
334 | 1.30M | } |
335 | 42.8k | } |
336 | | |
337 | | bool KeyMayMatch(const Slice& key, |
338 | 1.79M | const Slice& bloom_filter) const override { |
339 | 1.79M | const size_t len = bloom_filter.size(); |
340 | 1.79M | if (len < 2) return false6 ; |
341 | | |
342 | 1.79M | const char* array = bloom_filter.cdata(); |
343 | 1.79M | const size_t bits = (len - 1) * 8; |
344 | | |
345 | | // Use the encoded k so that we can read filters generated by |
346 | | // bloom filters created using different parameters. |
347 | 1.79M | const size_t k = array[len-1]; |
348 | 1.79M | if (k > 30) { |
349 | | // Reserved for potentially new encodings for short bloom filters. |
350 | | // Consider it a match. |
351 | 0 | return true; |
352 | 0 | } |
353 | | |
354 | 1.79M | uint32_t h = hash_func_(key); |
355 | 1.79M | const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits |
356 | 7.06M | for (size_t j = 0; j < k; j++5.27M ) { |
357 | 6.30M | const uint32_t bitpos = h % bits; |
358 | 6.30M | if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false1.03M ; |
359 | 5.27M | h += delta; |
360 | 5.27M | } |
361 | 753k | return true; |
362 | 1.79M | } |
363 | | |
364 | 3.49k | FilterBitsBuilder* GetFilterBitsBuilder() const override { |
365 | 3.49k | if (use_block_based_builder_) { |
366 | 0 | return nullptr; |
367 | 0 | } |
368 | | |
369 | 3.49k | return new FullFilterBitsBuilder(bits_per_key_, num_probes_); |
370 | 3.49k | } |
371 | | |
372 | | FilterBitsReader* GetFilterBitsReader(const Slice& contents) |
373 | 4.04k | const override { |
374 | 4.04k | return new FullFilterBitsReader(contents, nullptr); |
375 | 4.04k | } |
376 | | |
377 | | // If choose to use block based builder |
378 | 0 | bool UseBlockBasedBuilder() { return use_block_based_builder_; } |
379 | | |
380 | | private: |
381 | | size_t bits_per_key_; |
382 | | size_t num_probes_; |
383 | | uint32_t (*hash_func_)(const Slice& key); |
384 | | |
385 | | const bool use_block_based_builder_; |
386 | | |
387 | 334 | void initialize() { |
388 | | // We intentionally round down to reduce probing cost a little bit |
389 | 334 | num_probes_ = static_cast<size_t>(bits_per_key_ * 0.69); // 0.69 =~ ln(2) |
390 | 334 | if (num_probes_ < 1) num_probes_ = 13 ; |
391 | 334 | if (num_probes_ > 30) num_probes_ = 300 ; |
392 | 334 | } |
393 | | }; |
394 | | |
395 | | // A fixed size filter bits builder will build (with memory allocation) |
396 | | // and return a Bloom filter of given size and expected false positive rate. |
397 | | // |
398 | | // The fixed size Bloom filter has the following encoding: |
399 | | // For a given number of total bits M and the error rate p, |
400 | | // we will return a block of M + 40 bits, with 40 bits for metadata |
401 | | // and M bits for filter data. |
402 | | // For compliance with FullFilter, the metadata will be encoded |
403 | | // the same way as in FullFilter. |
404 | | // |
405 | | // For detailed proofs on the optimal number of keys and hash functions |
406 | | // please refer to https://en.wikipedia.org/wiki/Bloom_filter. |
407 | | // |
408 | | // The number of hash function given error rate p is -ln p / ln 2. |
409 | | // The maximum number of keys that can be inserted in a Bloom filter of m bits |
410 | | // so that one maintains the false positive error rate p is -m (ln 2)^2 / ln p. |
411 | | class FixedSizeFilterBitsBuilder : public FilterBitsBuilder { |
412 | | public: |
413 | | FixedSizeFilterBitsBuilder(const FixedSizeFilterBitsBuilder&) = delete; |
414 | | void operator=(const FixedSizeFilterBitsBuilder&) = delete; |
415 | | |
416 | | FixedSizeFilterBitsBuilder(size_t total_bits, double error_rate) |
417 | 9.99k | : error_rate_(error_rate) { |
418 | 9.99k | DCHECK_GT(error_rate, 0); |
419 | 9.99k | DCHECK_GT(total_bits, 0); |
420 | 9.99k | num_lines_ = yb::ceil_div<size_t>(total_bits, CACHE_LINE_SIZE * 8); |
421 | | // AddHash implementation gives much higher false positive rate when num_lines_ is even, so |
422 | | // make sure it is odd. |
423 | 9.99k | if (num_lines_ % 2 == 09.99k ) { |
424 | | // For small filter blocks - add one line, so we can have enough keys in block. |
425 | | // For bigger filter block - remove one line, so filter block will fit desired size. |
426 | 9.99k | if (num_lines_ * CACHE_LINE_SIZE < 4096) { |
427 | 2.78k | num_lines_++; |
428 | 7.20k | } else { |
429 | 7.20k | num_lines_--; |
430 | 7.20k | } |
431 | 9.99k | } |
432 | 9.99k | total_bits_ = num_lines_ * CACHE_LINE_SIZE * 8; |
433 | | |
434 | 9.99k | const double minus_log_error_rate = -log(error_rate_); |
435 | 9.99k | DCHECK_GT(minus_log_error_rate, 0); |
436 | 9.99k | num_probes_ = static_cast<size_t> (minus_log_error_rate / LOG2); |
437 | 9.99k | num_probes_ = std::max<size_t>(num_probes_, 1); |
438 | 9.99k | num_probes_ = std::min<size_t>(num_probes_, 255); |
439 | 9.99k | const double max_keys = total_bits_ * LOG2 * LOG2 / minus_log_error_rate; |
440 | 9.99k | DCHECK_LT(max_keys, std::numeric_limits<size_t>::max()); |
441 | 9.99k | max_keys_ = static_cast<size_t> (max_keys); |
442 | 9.99k | keys_added_ = 0; |
443 | | |
444 | | // TODO - add tests verifying that after inserting max_keys we will have required error rate |
445 | | |
446 | 9.99k | data_.reset(new char[FilterSize()]); |
447 | 9.99k | memset(data_.get(), 0, FilterSize()); |
448 | 9.99k | } |
449 | | |
450 | 40.2M | virtual void AddKey(const Slice& key) override { |
451 | 40.2M | ++keys_added_; |
452 | 40.2M | uint32_t hash = BloomHash(key); |
453 | 40.2M | AddHash(hash, data_.get(), num_lines_, total_bits_, num_probes_); |
454 | 40.2M | } |
455 | | |
456 | 40.2M | virtual bool IsFull() const override { return keys_added_ >= max_keys_; } |
457 | | |
458 | 9.98k | virtual Slice Finish(std::unique_ptr<const char[]>* buf) override { |
459 | 9.98k | data_[total_bits_ / 8] = static_cast<char>(num_probes_); |
460 | 9.98k | EncodeFixed32(data_.get() + total_bits_ / 8 + 1, static_cast<uint32_t>(num_lines_)); |
461 | 9.98k | buf->reset(data_.release()); |
462 | 9.98k | return Slice(buf->get(), FilterSize()); |
463 | 9.98k | } |
464 | | |
465 | | // Serialization format is the same as for FullFilter. |
466 | | static constexpr size_t kMetaDataSize = FullFilterBitsBuilder::kMetaDataSize; |
467 | | |
468 | | private: |
469 | | |
470 | 29.9k | inline size_t FilterSize() { return total_bits_ / 8 + kMetaDataSize; } |
471 | | |
472 | | std::unique_ptr<char[]> data_; |
473 | | size_t max_keys_; |
474 | | size_t keys_added_; |
475 | | size_t total_bits_; // total number of bits used for filter (excluding metadata) |
476 | | size_t num_lines_; |
477 | | double error_rate_; |
478 | | size_t num_probes_; // number of hash functions |
479 | | }; |
480 | | |
481 | | class FixedSizeFilterBitsReader : public FullFilterBitsReader { |
482 | | public: |
483 | | FixedSizeFilterBitsReader(const FixedSizeFilterBitsReader&) = delete; |
484 | | void operator=(const FixedSizeFilterBitsReader&) = delete; |
485 | | |
486 | | explicit FixedSizeFilterBitsReader(const Slice& contents, Logger* logger) |
487 | 3.83k | : FullFilterBitsReader(contents, logger) {} |
488 | | }; |
489 | | |
490 | | class FixedSizeFilterPolicy : public FilterPolicy { |
491 | | public: |
492 | | explicit FixedSizeFilterPolicy(size_t total_bits, double error_rate, Logger* logger) |
493 | | : total_bits_(total_bits), |
494 | | error_rate_(error_rate), |
495 | 1.54M | logger_(logger) { |
496 | 1.54M | DCHECK_GT(error_rate, 0); |
497 | | // Make sure num_probes > 0. |
498 | 1.54M | DCHECK_GT(static_cast<int64_t> (-log(error_rate) / LOG2), 0); |
499 | 1.54M | } |
500 | | |
501 | 7.11k | virtual FilterType GetFilterType() const override { return FilterType::kFixedSizeFilter; } |
502 | | |
503 | 144 | virtual const char* Name() const override { |
504 | 144 | return "rocksdb.FixedSizeBloomFilter"; |
505 | 144 | } |
506 | | |
507 | | // Not used in FixedSizeFilter. GetFilterBitsBuilder/Reader interface should be used. |
508 | | virtual void CreateFilter(const Slice* keys, int n, |
509 | 0 | std::string* dst) const override { |
510 | 0 | assert(!"FixedSizeFilterPolicy::CreateFilter is not supported"); |
511 | 0 | } |
512 | | |
513 | 0 | virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const override { |
514 | 0 | assert(!"FixedSizeFilterPolicy::KeyMayMatch is not supported"); |
515 | 0 | return true; |
516 | 0 | } |
517 | | |
518 | 9.99k | virtual FilterBitsBuilder* GetFilterBitsBuilder() const override { |
519 | 9.99k | return new FixedSizeFilterBitsBuilder(total_bits_, error_rate_); |
520 | 9.99k | } |
521 | | |
522 | 3.83k | virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override { |
523 | 3.83k | return new FixedSizeFilterBitsReader(contents, logger_); |
524 | 3.83k | } |
525 | | |
526 | | |
527 | | private: |
528 | | size_t total_bits_; |
529 | | double error_rate_; |
530 | | Logger* logger_; |
531 | | }; |
532 | | |
533 | | } // namespace |
534 | | |
535 | | const FilterPolicy* NewBloomFilterPolicy(int bits_per_key, |
536 | 334 | bool use_block_based_builder) { |
537 | 334 | return new BloomFilterPolicy(bits_per_key, use_block_based_builder); |
538 | | // TODO - replace by NewFixedSizeFilterPolicy and check tests. |
539 | 334 | } |
540 | | |
541 | | const FilterPolicy* NewFixedSizeFilterPolicy(size_t total_bits, |
542 | | double error_rate, |
543 | 1.54M | Logger* logger) { |
544 | 1.54M | return new FixedSizeFilterPolicy(total_bits, error_rate, logger); |
545 | 1.54M | } |
546 | | |
547 | | } // namespace rocksdb |