/Users/deen/code/yugabyte-db/src/yb/util/bloom_filter.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #include <math.h> |
33 | | |
34 | | #include <glog/logging.h> |
35 | | |
36 | | #include "yb/util/bloom_filter.h" |
37 | | #include "yb/util/bitmap.h" |
38 | | |
39 | | namespace yb { |
40 | | |
41 | | static double kNaturalLog2 = 0.69314; |
42 | | |
43 | 1 | static int ComputeOptimalHashCount(size_t n_bits, size_t elems) { |
44 | 1 | int n_hashes = n_bits * kNaturalLog2 / elems; |
45 | 1 | if (n_hashes < 1) n_hashes = 10 ; |
46 | 1 | return n_hashes; |
47 | 1 | } |
48 | | |
49 | | BloomFilterSizing BloomFilterSizing::ByCountAndFPRate( |
50 | 1 | size_t expected_count, double fp_rate) { |
51 | 1 | CHECK_GT(fp_rate, 0); |
52 | 1 | CHECK_LT(fp_rate, 1); |
53 | | |
54 | 1 | double n_bits = -static_cast<double>(expected_count) * log(fp_rate) |
55 | 1 | / kNaturalLog2 / kNaturalLog2; |
56 | 1 | int n_bytes = static_cast<int>(ceil(n_bits / 8)); |
57 | 1 | CHECK_GT(n_bytes, 0) |
58 | 0 | << "expected_count: " << expected_count |
59 | 0 | << " fp_rate: " << fp_rate; |
60 | 1 | return BloomFilterSizing(n_bytes, expected_count); |
61 | 1 | } |
62 | | |
63 | 0 | BloomFilterSizing BloomFilterSizing::BySizeAndFPRate(size_t n_bytes, double fp_rate) { |
64 | 0 | size_t n_bits = n_bytes * 8; |
65 | 0 | double expected_elems = -static_cast<double>(n_bits) * kNaturalLog2 * kNaturalLog2 / |
66 | 0 | log(fp_rate); |
67 | 0 | DCHECK_GT(expected_elems, 1); |
68 | 0 | return BloomFilterSizing(n_bytes, (size_t)ceil(expected_elems)); |
69 | 0 | } |
70 | | |
71 | | |
72 | | BloomFilterBuilder::BloomFilterBuilder(const BloomFilterSizing &sizing) |
73 | | : n_bits_(sizing.n_bytes() * 8), |
74 | | bitmap_(new uint8_t[sizing.n_bytes()]), |
75 | | n_hashes_(ComputeOptimalHashCount(n_bits_, sizing.expected_count())), |
76 | | expected_count_(sizing.expected_count()), |
77 | 1 | n_inserted_(0) { |
78 | 1 | Clear(); |
79 | 1 | } |
80 | | |
81 | 1 | void BloomFilterBuilder::Clear() { |
82 | 1 | memset(&bitmap_[0], 0, n_bytes()); |
83 | 1 | n_inserted_ = 0; |
84 | 1 | } |
85 | | |
86 | 1 | double BloomFilterBuilder::false_positive_rate() const { |
87 | 1 | CHECK_NE(expected_count_, 0) |
88 | 0 | << "expected_count_ not initialized: can't call this function on " |
89 | 0 | << "a BloomFilter initialized from external data"; |
90 | | |
91 | 1 | return pow(1 - exp(-static_cast<double>(n_hashes_) * expected_count_ / n_bits_), n_hashes_); |
92 | 1 | } |
93 | | |
94 | | BloomFilter::BloomFilter(const Slice &data, size_t n_hashes) |
95 | | : n_bits_(data.size() * 8), |
96 | | bitmap_(reinterpret_cast<const uint8_t *>(data.data())), |
97 | | n_hashes_(n_hashes) |
98 | 1 | {} |
99 | | |
100 | | |
101 | | |
102 | | } // namespace yb |