/Users/deen/code/yugabyte-db/src/yb/rocksdb/filter_policy.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
6 | | // Use of this source code is governed by a BSD-style license that can be |
7 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
8 | | // |
9 | | // The following only applies to changes made to this file as part of YugaByte development. |
10 | | // |
11 | | // Portions Copyright (c) YugaByte, Inc. |
12 | | // |
13 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
14 | | // in compliance with the License. You may obtain a copy of the License at |
15 | | // |
16 | | // http://www.apache.org/licenses/LICENSE-2.0 |
17 | | // |
18 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
19 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
20 | | // or implied. See the License for the specific language governing permissions and limitations |
21 | | // under the License. |
22 | | // |
23 | | // A database can be configured with a custom FilterPolicy object. |
24 | | // This object is responsible for creating a small filter from a set |
25 | | // of keys. These filters are stored in rocksdb and are consulted |
26 | | // automatically by rocksdb to decide whether or not to read some |
27 | | // information from disk. In many cases, a filter can cut down the |
28 | | // number of disk seeks form a handful to a single disk seek per |
29 | | // DB::Get() call. |
30 | | // |
31 | | // Most people will want to use the builtin bloom filter support (see |
32 | | // NewBloomFilterPolicy() below). |
33 | | |
34 | | #ifndef YB_ROCKSDB_FILTER_POLICY_H |
35 | | #define YB_ROCKSDB_FILTER_POLICY_H |
36 | | |
37 | | #include <string> |
38 | | #include <memory> |
39 | | |
40 | | #include "yb/rocksdb/env.h" |
41 | | #include "yb/util/slice.h" |
42 | | |
43 | | namespace rocksdb { |
44 | | |
45 | | // A class that takes a bunch of keys, then generates filter |
46 | | class FilterBitsBuilder { |
47 | | public: |
48 | 6.71k | virtual ~FilterBitsBuilder() {} |
49 | | |
50 | | // Add Key to filter, you could use any way to store the key. |
51 | | // Such as: storing hashes or original keys |
52 | | // Keys are in sorted order and duplicated keys are possible. |
53 | | virtual void AddKey(const Slice& key) = 0; |
54 | | |
55 | | // Generate the filter using the keys that are added |
56 | | // The return value of this function would be the filter bits, |
57 | | // The ownership of actual data is set to buf |
58 | | virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0; |
59 | | |
60 | | // Checks whether bits builder is full and we should start a new bloom filter block. |
61 | | virtual bool IsFull() const = 0; |
62 | | }; |
63 | | |
64 | | // A class that checks if a key can be in filter |
65 | | // It should be initialized by Slice generated by BitsBuilder |
66 | | class FilterBitsReader { |
67 | | public: |
68 | 4.21k | virtual ~FilterBitsReader() {} |
69 | | |
70 | | // Check if the entry match the bits in filter |
71 | | virtual bool MayMatch(const Slice& entry) = 0; |
72 | | }; |
73 | | |
74 | | // We add a new format of filter block called full filter block |
75 | | // This new interface gives you more space of customization |
76 | | // |
77 | | // For the full filter block, you can plug in your version by implement |
78 | | // the FilterBitsBuilder and FilterBitsReader |
79 | | // |
80 | | // There are two sets of interface in FilterPolicy |
81 | | // Set 1: CreateFilter, KeyMayMatch: used for blockbased filter |
82 | | // Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for |
83 | | // full filter and fixed-size filter. |
84 | | // Either Set 1 or Set 2 MUST be implemented correctly. |
85 | | // RocksDB would first try using functions in Set 2. if they return nullptr, |
86 | | // it would use Set 1 instead. |
87 | | // You can choose filter type in NewBloomFilterPolicy. |
88 | | class FilterPolicy { |
89 | | public: |
90 | | virtual ~FilterPolicy(); |
91 | | |
92 | | // Return the name of this policy. Note that if the filter encoding |
93 | | // changes in an incompatible way, the name returned by this method |
94 | | // must be changed. Otherwise, old incompatible filters may be |
95 | | // passed to methods of this type. |
96 | | virtual const char* Name() const = 0; |
97 | | |
98 | | // keys[0,n-1] contains a list of keys (potentially with duplicates) |
99 | | // that are ordered according to the user supplied comparator. |
100 | | // Append a filter that summarizes keys[0,n-1] to *dst. |
101 | | // |
102 | | // Warning: do not change the initial contents of *dst. Instead, |
103 | | // append the newly constructed filter to *dst. |
104 | | virtual void CreateFilter(const Slice* keys, int n, std::string* dst) |
105 | | const = 0; |
106 | | |
107 | | // "filter" contains the data appended by a preceding call to |
108 | | // CreateFilter() on this class. This method must return true if |
109 | | // the key was in the list of keys passed to CreateFilter(). |
110 | | // This method may return true or false if the key was not on the |
111 | | // list, but it should aim to return false with a high probability. |
112 | | virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; |
113 | | |
114 | | // Get the FilterBitsBuilder, which is used for full filter block and fixed size filter block. |
115 | | // It contains interface to take individual key, then generate filter. |
116 | 0 | virtual FilterBitsBuilder* GetFilterBitsBuilder() const { |
117 | 0 | return nullptr; |
118 | 0 | } |
119 | | |
120 | | // Get the FilterBitsReader, which is used for full filter block and fixed size filter block. |
121 | | // It contains interface to tell if key can be in filter. |
122 | | // The input slice should NOT be deleted by FilterPolicy. |
123 | 0 | virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) const { |
124 | 0 | return nullptr; |
125 | 0 | } |
126 | | |
127 | | // Filter type that will be used for this table. |
128 | | enum FilterType { |
129 | | // No filter is used. |
130 | | kNoFilter, |
131 | | |
132 | | // One monolithic full filter per SSTable, with keys buffering while building. |
133 | | kFullFilter, |
134 | | |
135 | | // Block based filter, with one filter block corresponding to each data block. |
136 | | kBlockBasedFilter, |
137 | | |
138 | | // Fixed size filter without key buffering. |
139 | | kFixedSizeFilter |
140 | | }; |
141 | | |
142 | | // Returns filter type to be used based on this policy. |
143 | | virtual FilterType GetFilterType() const = 0; |
144 | | |
145 | | class KeyTransformer { |
146 | | public: |
147 | 160 | virtual ~KeyTransformer() {} |
148 | | |
149 | | // Transform a key. |
150 | | virtual Slice Transform(Slice key) const = 0; |
151 | | }; |
152 | | |
153 | | // Filter policy can optionally return key transformer to be used before writing key to filter or |
154 | | // testing key against filter and building/reading filter index based on keys (used for fixed-size |
155 | | // bloom filter). This method is used by BlockBasedTable(Reader)/BlockBasedTableBuilder. |
156 | | // If KeyTransformer returns empty filter key - this is treated as matching the filter by |
157 | | // BlockBasedTableReader. This allows us to support disabling bloom filters for old versions |
158 | | // of filter policies if they have bugs. |
159 | | // Because of this when BlockBasedTableBuilder is getting empty filter key - there is no need to |
160 | | // add it into the filter block. |
161 | | // |
162 | | // Requires: order of keys defined by BytewiseComparator shouldn't be broken by key transformer, |
163 | | // which means if `BytewiseComparator::Compare(a, b) <= 0` then for non-empty a_transformed and |
164 | | // b_transformed `BytewiseComparator::Compare(a_tranformed, b_transformed) <= 0` must hold. |
165 | | // Returning empty keys affecting the order is acceptable, for example: "AAA123" <= "BBB456", |
166 | | // but it is acceptable to transform it to: "AAA" > "", because we treat empty filter keys |
167 | | // specifically (see previous section). |
168 | | // |
169 | | // Actually we can use ColumnFamilyOptions::prefix_extractor instead and switch off whole key |
170 | | // filtering by setting BlockBasedTableOptions::whole_key_filtering to false. But we want |
171 | | // key transformation algorithm to be part of filter policy, so we can: |
172 | | // 1) detect which key transformation to use based on policy serialized into filter meta block. |
173 | | // 2) support multiple filter policies with their own key transformations. |
174 | | // |
175 | | // Note: in case prefix_extractor is also set, it is applied after key transformer. But for |
176 | | // block-based filter prefix_extractor should also be applicable to user key, because |
177 | | // block filter containing required key prefix is retrieved from data index based on user key. |
178 | 9.93k | virtual const KeyTransformer* GetKeyTransformer() const { return nullptr; } |
179 | | |
180 | | static constexpr size_t kDefaultFixedSizeFilterBits = 65536; |
181 | | static constexpr double kDefaultFixedSizeFilterErrorRate = 0.01; |
182 | | }; |
183 | | |
184 | | // Return a new filter policy that uses a bloom filter with approximately |
185 | | // the specified number of bits per key. |
186 | | // |
187 | | // bits_per_key: bits per key in bloom filter. A good value for bits_per_key |
188 | | // is 10, which yields a filter with ~ 1% false positive rate. |
189 | | // use_block_based_builder: use block based filter rather than full fiter. |
190 | | // If you want to builder full filter, it needs to be set to false. |
191 | | // |
192 | | // Callers must delete the result after any database that is using the |
193 | | // result has been closed. |
194 | | // |
195 | | // Note: if you are using a custom comparator that ignores some parts |
196 | | // of the keys being compared, you must not use NewBloomFilterPolicy() |
197 | | // and must provide your own FilterPolicy that also ignores the |
198 | | // corresponding parts of the keys. For example, if the comparator |
199 | | // ignores trailing spaces, it would be incorrect to use a |
200 | | // FilterPolicy (like NewBloomFilterPolicy) that does not ignore |
201 | | // trailing spaces in keys. |
202 | | extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key, |
203 | | bool use_block_based_builder = true); |
204 | | |
205 | | // Return a new filter policy that uses a bloom filter divided into fixed-size blocks with |
206 | | // specified parameters: |
207 | | // |
208 | | // total_bits: purely size of bloom filter itself in each filter block, also each filter block has |
209 | | // some metadata added. |
210 | | // error_rate: expected false positive error rate to calculate maximum number of keys to store in |
211 | | // each filter block. This is used to determine whether a filter block is full. |
212 | | // |
213 | | // Callers must delete the result after any database that is using the filter policy has been |
214 | | // closed. |
215 | | extern const FilterPolicy* NewFixedSizeFilterPolicy(size_t total_bits, |
216 | | double error_rate, |
217 | | Logger* logger); |
218 | | } // namespace rocksdb |
219 | | |
220 | | #endif // YB_ROCKSDB_FILTER_POLICY_H |