/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_based_filter_block.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #include "yb/rocksdb/table/block_based_filter_block.h" |
25 | | |
26 | | #include <algorithm> |
27 | | |
28 | | #include "yb/rocksdb/filter_policy.h" |
29 | | #include "yb/rocksdb/util/coding.h" |
30 | | #include "yb/rocksdb/util/perf_context_imp.h" |
31 | | |
32 | | #include "yb/util/string_util.h" |
33 | | |
34 | | namespace rocksdb { |
35 | | |
36 | | namespace { |
37 | | |
38 | | void AppendItem(std::string* props, const std::string& key, |
39 | 9 | const std::string& value) { |
40 | 9 | char cspace = ' '; |
41 | 9 | std::string value_str(""); |
42 | 9 | size_t i = 0; |
43 | 9 | const size_t dataLength = 64; |
44 | 9 | const size_t tabLength = 2; |
45 | 9 | const size_t offLength = 16; |
46 | | |
47 | 9 | value_str.append(&value[i], std::min(size_t(dataLength), value.size())); |
48 | 9 | i += dataLength; |
49 | 48 | while (i < value.size()) { |
50 | 39 | value_str.append("\n"); |
51 | 39 | value_str.append(offLength, cspace); |
52 | 39 | value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); |
53 | 39 | i += dataLength; |
54 | 39 | } |
55 | | |
56 | 9 | std::string result(""); |
57 | 9 | if (key.size() < (offLength - tabLength)) |
58 | 8 | result.append(size_t((offLength - tabLength)) - key.size(), cspace); |
59 | 9 | result.append(key); |
60 | | |
61 | 9 | props->append(result + ": " + value_str + "\n"); |
62 | 9 | } |
63 | | |
64 | | template <class TKey> |
65 | 7 | void AppendItem(std::string* props, const TKey& key, const std::string& value) { |
66 | 7 | std::string key_str = rocksdb::ToString(key); |
67 | 7 | AppendItem(props, key_str, value); |
68 | 7 | } |
69 | | } // namespace |
70 | | |
71 | | |
72 | | // See doc/table_format.txt for an explanation of the filter block format. |
73 | | |
74 | | // Generate new filter every 2KB of data |
75 | | static const size_t kFilterBaseLg = 11; |
76 | | static const size_t kFilterBase = 1 << kFilterBaseLg; |
77 | | |
78 | | BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( |
79 | | const SliceTransform* prefix_extractor, |
80 | | const BlockBasedTableOptions& table_opt) |
81 | | : policy_(table_opt.filter_policy.get()), |
82 | | prefix_extractor_(prefix_extractor), |
83 | | whole_key_filtering_(table_opt.whole_key_filtering), |
84 | | prev_prefix_start_(0), |
85 | 968 | prev_prefix_size_(0) { |
86 | 968 | assert(policy_); |
87 | 968 | } |
88 | | |
89 | 52.5k | void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { |
90 | 52.5k | uint64_t filter_index = (block_offset / kFilterBase); |
91 | 52.5k | assert(filter_index >= filter_offsets_.size()); |
92 | 101k | while (filter_index > filter_offsets_.size()) { |
93 | 49.1k | GenerateFilter(); |
94 | 49.1k | } |
95 | 52.5k | } |
96 | | |
97 | 1.24M | void BlockBasedFilterBlockBuilder::Add(const Slice& key) { |
98 | 1.24M | if (prefix_extractor_ && prefix_extractor_->InDomain(key)266 ) { |
99 | 262 | AddPrefix(key); |
100 | 262 | } |
101 | | |
102 | 1.24M | if (whole_key_filtering_) { |
103 | 1.24M | AddKey(key); |
104 | 1.24M | } |
105 | 1.24M | } |
106 | | |
107 | | // Add key to filter if needed |
108 | 1.24M | inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { |
109 | 1.24M | start_.push_back(entries_.size()); |
110 | 1.24M | entries_.append(key.cdata(), key.size()); |
111 | 1.24M | } |
112 | | |
113 | | // Add prefix to filter if needed |
114 | 262 | inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { |
115 | | // get slice for most recently added entry |
116 | 262 | Slice prev; |
117 | 262 | if (prev_prefix_size_ > 0) { |
118 | 229 | prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); |
119 | 229 | } |
120 | | |
121 | 262 | Slice prefix = prefix_extractor_->Transform(key); |
122 | | // insert prefix only when it's different from the previous prefix. |
123 | 262 | if (prev.size() == 0 || prefix != prev229 ) { |
124 | 81 | start_.push_back(entries_.size()); |
125 | 81 | prev_prefix_start_ = entries_.size(); |
126 | 81 | prev_prefix_size_ = prefix.size(); |
127 | 81 | entries_.append(prefix.cdata(), prefix.size()); |
128 | 81 | } |
129 | 262 | } |
130 | | |
131 | 967 | Slice BlockBasedFilterBlockBuilder::Finish() { |
132 | 967 | if (!start_.empty()) { |
133 | 824 | GenerateFilter(); |
134 | 824 | } |
135 | | |
136 | | // Append array of per-filter offsets |
137 | 967 | const uint32_t array_offset = static_cast<uint32_t>(result_.size()); |
138 | 50.9k | for (size_t i = 0; i < filter_offsets_.size(); i++49.9k ) { |
139 | 49.9k | PutFixed32(&result_, filter_offsets_[i]); |
140 | 49.9k | } |
141 | | |
142 | 967 | PutFixed32(&result_, array_offset); |
143 | 967 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result |
144 | 967 | return Slice(result_); |
145 | 967 | } |
146 | | |
147 | 49.9k | void BlockBasedFilterBlockBuilder::GenerateFilter() { |
148 | 49.9k | const size_t num_entries = start_.size(); |
149 | 49.9k | if (num_entries == 0) { |
150 | | // Fast path if there are no keys for this filter |
151 | 7.18k | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); |
152 | 7.18k | return; |
153 | 7.18k | } |
154 | | |
155 | | // Make list of keys from flattened key structure |
156 | 42.7k | start_.push_back(entries_.size()); // Simplify length computation |
157 | 42.7k | tmp_entries_.resize(num_entries); |
158 | 1.29M | for (size_t i = 0; i < num_entries; i++1.24M ) { |
159 | 1.24M | const char* base = entries_.data() + start_[i]; |
160 | 1.24M | size_t length = start_[i + 1] - start_[i]; |
161 | 1.24M | tmp_entries_[i] = Slice(base, length); |
162 | 1.24M | } |
163 | | |
164 | | // Generate filter for current set of keys and append to result_. |
165 | 42.7k | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); |
166 | 42.7k | policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries), |
167 | 42.7k | &result_); |
168 | | |
169 | 42.7k | tmp_entries_.clear(); |
170 | 42.7k | entries_.clear(); |
171 | 42.7k | start_.clear(); |
172 | 42.7k | prev_prefix_start_ = 0; |
173 | 42.7k | prev_prefix_size_ = 0; |
174 | 42.7k | } |
175 | | |
176 | | BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( |
177 | | const SliceTransform* prefix_extractor, |
178 | | const BlockBasedTableOptions& table_opt, bool whole_key_filtering, |
179 | | BlockContents&& contents) |
180 | | : policy_(table_opt.filter_policy.get()), |
181 | | prefix_extractor_(prefix_extractor), |
182 | | whole_key_filtering_(whole_key_filtering), |
183 | | data_(nullptr), |
184 | | offset_(nullptr), |
185 | | num_(0), |
186 | | base_lg_(0), |
187 | 1.09k | contents_(std::move(contents)) { |
188 | 1.09k | assert(policy_); |
189 | 0 | size_t n = contents_.data.size(); |
190 | 1.09k | if (n < 5) return0 ; // 1 byte for base_lg_ and 4 for start of offset array |
191 | 1.09k | base_lg_ = contents_.data[n - 1]; |
192 | 1.09k | uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5); |
193 | 1.09k | if (last_word > n - 5) return0 ; |
194 | 1.09k | data_ = contents_.data.cdata(); |
195 | 1.09k | offset_ = data_ + last_word; |
196 | 1.09k | num_ = (n - 5 - last_word) / 4; |
197 | 1.09k | } |
198 | | |
199 | | bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key, |
200 | 1.36M | uint64_t block_offset) { |
201 | 1.36M | assert(block_offset != kNotValid); |
202 | 1.36M | if (!whole_key_filtering_) { |
203 | 0 | return true; |
204 | 0 | } |
205 | 1.36M | return MayMatch(key, block_offset); |
206 | 1.36M | } |
207 | | |
208 | | bool BlockBasedFilterBlockReader::PrefixMayMatch(const Slice& prefix, |
209 | 25 | uint64_t block_offset) { |
210 | 25 | assert(block_offset != kNotValid); |
211 | 25 | if (!prefix_extractor_) { |
212 | 0 | return true; |
213 | 0 | } |
214 | 25 | return MayMatch(prefix, block_offset); |
215 | 25 | } |
216 | | |
217 | | bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry, |
218 | 1.36M | uint64_t block_offset) { |
219 | 1.36M | uint64_t index = block_offset >> base_lg_; |
220 | 1.36M | if (index < num_1.36M ) { |
221 | 1.36M | uint32_t start = DecodeFixed32(offset_ + index * 4); |
222 | 1.36M | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); |
223 | 1.36M | if (start <= limit1.36M && limit <= (uint32_t)(offset_ - data_)) { |
224 | 1.36M | Slice filter = Slice(data_ + start, limit - start); |
225 | 1.36M | bool const may_match = policy_->KeyMayMatch(entry, filter); |
226 | 1.36M | if (may_match) { |
227 | 690k | PERF_COUNTER_ADD(bloom_sst_hit_count, 1); |
228 | 690k | return true; |
229 | 690k | } else { |
230 | 670k | PERF_COUNTER_ADD(bloom_sst_miss_count, 1); |
231 | 670k | return false; |
232 | 670k | } |
233 | 1.36M | } else if (0 start == limit0 ) { |
234 | | // Empty filters do not match any entries |
235 | 0 | return false; |
236 | 0 | } |
237 | 1.36M | } |
238 | 18.4E | return true; // Errors are treated as potential matches |
239 | 1.36M | } |
240 | | |
241 | 0 | size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { |
242 | 0 | return num_ * 4 + 5 + (offset_ - data_); |
243 | 0 | } |
244 | | |
245 | 1 | std::string BlockBasedFilterBlockReader::ToString() const { |
246 | 1 | std::string result, filter_meta; |
247 | 1 | result.reserve(1024); |
248 | | |
249 | 1 | std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); |
250 | 1 | AppendItem(&result, s_fb, rocksdb::ToString(num_)); |
251 | 1 | AppendItem(&result, s_bo, s_hd); |
252 | | |
253 | 14 | for (size_t index = 0; index < num_; index++13 ) { |
254 | 13 | uint32_t start = DecodeFixed32(offset_ + index * 4); |
255 | 13 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); |
256 | | |
257 | 13 | if (start != limit) { |
258 | 7 | result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n"); |
259 | 7 | Slice filter = Slice(data_ + start, limit - start); |
260 | 7 | AppendItem(&result, start, filter.ToString(true)); |
261 | 7 | } |
262 | 13 | } |
263 | 1 | return result; |
264 | 1 | } |
265 | | } // namespace rocksdb |