YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/filter_policy.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
// This source code is licensed under the BSD-style license found in the
3
// LICENSE file in the root directory of this source tree. An additional grant
4
// of patent rights can be found in the PATENTS file in the same directory.
5
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
6
// Use of this source code is governed by a BSD-style license that can be
7
// found in the LICENSE file. See the AUTHORS file for names of contributors.
8
//
9
// The following only applies to changes made to this file as part of YugaByte development.
10
//
11
// Portions Copyright (c) YugaByte, Inc.
12
//
13
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
14
// in compliance with the License.  You may obtain a copy of the License at
15
//
16
// http://www.apache.org/licenses/LICENSE-2.0
17
//
18
// Unless required by applicable law or agreed to in writing, software distributed under the License
19
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
20
// or implied.  See the License for the specific language governing permissions and limitations
21
// under the License.
22
//
23
// A database can be configured with a custom FilterPolicy object.
24
// This object is responsible for creating a small filter from a set
25
// of keys.  These filters are stored in rocksdb and are consulted
26
// automatically by rocksdb to decide whether or not to read some
27
// information from disk. In many cases, a filter can cut down the
28
// number of disk seeks form a handful to a single disk seek per
29
// DB::Get() call.
30
//
31
// Most people will want to use the builtin bloom filter support (see
32
// NewBloomFilterPolicy() below).
33
34
#ifndef YB_ROCKSDB_FILTER_POLICY_H
35
#define YB_ROCKSDB_FILTER_POLICY_H
36
37
#include <string>
38
#include <memory>
39
40
#include "yb/rocksdb/env.h"
41
#include "yb/util/slice.h"
42
43
namespace rocksdb {
44
45
// A class that takes a bunch of keys, then generates filter
46
class FilterBitsBuilder {
47
 public:
48
6.71k
  virtual ~FilterBitsBuilder() {}
49
50
  // Add Key to filter, you could use any way to store the key.
51
  // Such as: storing hashes or original keys
52
  // Keys are in sorted order and duplicated keys are possible.
53
  virtual void AddKey(const Slice& key) = 0;
54
55
  // Generate the filter using the keys that are added
56
  // The return value of this function would be the filter bits,
57
  // The ownership of actual data is set to buf
58
  virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
59
60
  // Checks whether bits builder is full and we should start a new bloom filter block.
61
  virtual bool IsFull() const = 0;
62
};
63
64
// A class that checks if a key can be in filter
65
// It should be initialized by Slice generated by BitsBuilder
66
class FilterBitsReader {
67
 public:
68
4.21k
  virtual ~FilterBitsReader() {}
69
70
  // Check if the entry match the bits in filter
71
  virtual bool MayMatch(const Slice& entry) = 0;
72
};
73
74
// We add a new format of filter block called full filter block
75
// This new interface gives you more space of customization
76
//
77
// For the full filter block, you can plug in your version by implement
78
// the FilterBitsBuilder and FilterBitsReader
79
//
80
// There are two sets of interface in FilterPolicy
81
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
82
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
83
// full filter and fixed-size filter.
84
// Either Set 1 or Set 2 MUST be implemented correctly.
85
// RocksDB would first try using functions in Set 2. if they return nullptr,
86
// it would use Set 1 instead.
87
// You can choose filter type in NewBloomFilterPolicy.
88
class FilterPolicy {
89
 public:
90
  virtual ~FilterPolicy();
91
92
  // Return the name of this policy.  Note that if the filter encoding
93
  // changes in an incompatible way, the name returned by this method
94
  // must be changed.  Otherwise, old incompatible filters may be
95
  // passed to methods of this type.
96
  virtual const char* Name() const = 0;
97
98
  // keys[0,n-1] contains a list of keys (potentially with duplicates)
99
  // that are ordered according to the user supplied comparator.
100
  // Append a filter that summarizes keys[0,n-1] to *dst.
101
  //
102
  // Warning: do not change the initial contents of *dst.  Instead,
103
  // append the newly constructed filter to *dst.
104
  virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
105
      const = 0;
106
107
  // "filter" contains the data appended by a preceding call to
108
  // CreateFilter() on this class.  This method must return true if
109
  // the key was in the list of keys passed to CreateFilter().
110
  // This method may return true or false if the key was not on the
111
  // list, but it should aim to return false with a high probability.
112
  virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
113
114
  // Get the FilterBitsBuilder, which is used for full filter block and fixed size filter block.
115
  // It contains interface to take individual key, then generate filter.
116
0
  virtual FilterBitsBuilder* GetFilterBitsBuilder() const {
117
0
    return nullptr;
118
0
  }
119
120
  // Get the FilterBitsReader, which is used for full filter block and fixed size filter block.
121
  // It contains interface to tell if key can be in filter.
122
  // The input slice should NOT be deleted by FilterPolicy.
123
0
  virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) const {
124
0
    return nullptr;
125
0
  }
126
127
  // Filter type that will be used for this table.
128
  enum FilterType {
129
    // No filter is used.
130
    kNoFilter,
131
132
    // One monolithic full filter per SSTable, with keys buffering while building.
133
    kFullFilter,
134
135
    // Block based filter, with one filter block corresponding to each data block.
136
    kBlockBasedFilter,
137
138
    // Fixed size filter without key buffering.
139
    kFixedSizeFilter
140
  };
141
142
  // Returns filter type to be used based on this policy.
143
  virtual FilterType GetFilterType() const = 0;
144
145
  class KeyTransformer {
146
   public:
147
160
    virtual ~KeyTransformer() {}
148
149
    // Transform a key.
150
    virtual Slice Transform(Slice key) const = 0;
151
  };
152
153
  // Filter policy can optionally return key transformer to be used before writing key to filter or
154
  // testing key against filter and building/reading filter index based on keys (used for fixed-size
155
  // bloom filter). This method is used by BlockBasedTable(Reader)/BlockBasedTableBuilder.
156
  // If KeyTransformer returns empty filter key - this is treated as matching the filter by
157
  // BlockBasedTableReader. This allows us to support disabling bloom filters for old versions
158
  // of filter policies if they have bugs.
159
  // Because of this when BlockBasedTableBuilder is getting empty filter key - there is no need to
160
  // add it into the filter block.
161
  //
162
  // Requires: order of keys defined by BytewiseComparator shouldn't be broken by key transformer,
163
  // which means if `BytewiseComparator::Compare(a, b) <= 0` then for non-empty a_transformed and
164
  // b_transformed `BytewiseComparator::Compare(a_tranformed, b_transformed) <= 0` must hold.
165
  // Returning empty keys affecting the order is acceptable, for example: "AAA123" <= "BBB456",
166
  // but it is acceptable to transform it to: "AAA" > "", because we treat empty filter keys
167
  // specifically (see previous section).
168
  //
169
  // Actually we can use ColumnFamilyOptions::prefix_extractor instead and switch off whole key
170
  // filtering by setting BlockBasedTableOptions::whole_key_filtering to false. But we want
171
  // key transformation algorithm to be part of filter policy, so we can:
172
  // 1) detect which key transformation to use based on policy serialized into filter meta block.
173
  // 2) support multiple filter policies with their own key transformations.
174
  //
175
  // Note: in case prefix_extractor is also set, it is applied after key transformer. But for
176
  // block-based filter prefix_extractor should also be applicable to user key, because
177
  // block filter containing required key prefix is retrieved from data index based on user key.
178
9.93k
  virtual const KeyTransformer* GetKeyTransformer() const { return nullptr; }
179
180
  static constexpr size_t kDefaultFixedSizeFilterBits = 65536;
181
  static constexpr double kDefaultFixedSizeFilterErrorRate = 0.01;
182
};
183
184
// Return a new filter policy that uses a bloom filter with approximately
185
// the specified number of bits per key.
186
//
187
// bits_per_key: bits per key in bloom filter. A good value for bits_per_key
188
// is 10, which yields a filter with ~ 1% false positive rate.
189
// use_block_based_builder: use block based filter rather than full fiter.
190
// If you want to builder full filter, it needs to be set to false.
191
//
192
// Callers must delete the result after any database that is using the
193
// result has been closed.
194
//
195
// Note: if you are using a custom comparator that ignores some parts
196
// of the keys being compared, you must not use NewBloomFilterPolicy()
197
// and must provide your own FilterPolicy that also ignores the
198
// corresponding parts of the keys.  For example, if the comparator
199
// ignores trailing spaces, it would be incorrect to use a
200
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
201
// trailing spaces in keys.
202
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
203
    bool use_block_based_builder = true);
204
205
// Return a new filter policy that uses a bloom filter divided into fixed-size blocks with
206
// specified parameters:
207
//
208
// total_bits: purely size of bloom filter itself in each filter block, also each filter block has
209
// some metadata added.
210
// error_rate: expected false positive error rate to calculate maximum number of keys to store in
211
// each filter block. This is used to determine whether a filter block is full.
212
//
213
// Callers must delete the result after any database that is using the filter policy has been
214
// closed.
215
extern const FilterPolicy* NewFixedSizeFilterPolicy(size_t total_bits,
216
                                                    double error_rate,
217
                                                    Logger* logger);
218
}  // namespace rocksdb
219
220
#endif  // YB_ROCKSDB_FILTER_POLICY_H