/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_based_filter_block.cc

Source (jump to first uncovered line)
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// The following only applies to changes made to this file as part of YugaByte development.
//
// Portions Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied.  See the License for the specific language governing permissions and limitations
// under the License.
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "yb/rocksdb/table/block_based_filter_block.h"

#include <algorithm>

#include "yb/rocksdb/filter_policy.h"
#include "yb/rocksdb/util/coding.h"
#include "yb/rocksdb/util/perf_context_imp.h"

#include "yb/util/string_util.h"

namespace rocksdb {

namespace {

void AppendItem(std::string* props, const std::string& key,
                const std::string& value) {
  char cspace = ' ';
  std::string value_str("");
  size_t i = 0;
  const size_t dataLength = 64;
  const size_t tabLength = 2;
  const size_t offLength = 16;

  value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
  i += dataLength;
  while (i < value.size()) {
    value_str.append("\n");
    value_str.append(offLength, cspace);
    value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
    i += dataLength;
  }

  std::string result("");
  if (key.size() < (offLength - tabLength))
    result.append(size_t((offLength - tabLength)) - key.size(), cspace);
  result.append(key);

  props->append(result + ": " + value_str + "\n");
}

template <class TKey>
void AppendItem(std::string* props, const TKey& key, const std::string& value) {
  std::string key_str = rocksdb::ToString(key);
  AppendItem(props, key_str, value);
}
}  // namespace


// See doc/table_format.txt for an explanation of the filter block format.

// Generate new filter every 2KB of data
static const size_t kFilterBaseLg = 11;
static const size_t kFilterBase = 1 << kFilterBaseLg;

BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
    const SliceTransform* prefix_extractor,
    const BlockBasedTableOptions& table_opt)
    : policy_(table_opt.filter_policy.get()),
      prefix_extractor_(prefix_extractor),
      whole_key_filtering_(table_opt.whole_key_filtering),
      prev_prefix_start_(0),
      prev_prefix_size_(0) {
  assert(policy_);
}

void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
  uint64_t filter_index = (block_offset / kFilterBase);
  assert(filter_index >= filter_offsets_.size());
  while (filter_index > filter_offsets_.size()) {
    GenerateFilter();
  }
}

void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
  if (prefix_extractor_ && prefix_extractor_->InDomain(key)266) {
    AddPrefix(key);
  }

  if (whole_key_filtering_) {
    AddKey(key);
  }
}

// Add key to filter if needed
inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
  start_.push_back(entries_.size());
  entries_.append(key.cdata(), key.size());
}

// Add prefix to filter if needed
inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
  // get slice for most recently added entry
  Slice prev;
  if (prev_prefix_size_ > 0) {
    prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
  }

  Slice prefix = prefix_extractor_->Transform(key);
  // insert prefix only when it's different from the previous prefix.
  if (prev.size() == 0 || prefix != prev229) {
    start_.push_back(entries_.size());
    prev_prefix_start_ = entries_.size();
    prev_prefix_size_ = prefix.size();
    entries_.append(prefix.cdata(), prefix.size());
  }
}

Slice BlockBasedFilterBlockBuilder::Finish() {
  if (!start_.empty()) {
    GenerateFilter();
  }

  // Append array of per-filter offsets
  const uint32_t array_offset = static_cast<uint32_t>(result_.size());
  for (size_t i = 0; i < filter_offsets_.size(); i++49.9k) {
    PutFixed32(&result_, filter_offsets_[i]);
  }

  PutFixed32(&result_, array_offset);
  result_.push_back(kFilterBaseLg);  // Save encoding parameter in result
  return Slice(result_);
}

void BlockBasedFilterBlockBuilder::GenerateFilter() {
  const size_t num_entries = start_.size();
  if (num_entries == 0) {
    // Fast path if there are no keys for this filter
    filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
    return;
  }

  // Make list of keys from flattened key structure
  start_.push_back(entries_.size());  // Simplify length computation
  tmp_entries_.resize(num_entries);
  for (size_t i = 0; i < num_entries; i++1.24M) {
    const char* base = entries_.data() + start_[i];
    size_t length = start_[i + 1] - start_[i];
    tmp_entries_[i] = Slice(base, length);
  }

  // Generate filter for current set of keys and append to result_.
  filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
  policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
                        &result_);

  tmp_entries_.clear();
  entries_.clear();
  start_.clear();
  prev_prefix_start_ = 0;
  prev_prefix_size_ = 0;
}

BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
    const SliceTransform* prefix_extractor,
    const BlockBasedTableOptions& table_opt, bool whole_key_filtering,
    BlockContents&& contents)
    : policy_(table_opt.filter_policy.get()),
      prefix_extractor_(prefix_extractor),
      whole_key_filtering_(whole_key_filtering),
      data_(nullptr),
      offset_(nullptr),
      num_(0),
      base_lg_(0),
      contents_(std::move(contents)) {
  assert(policy_);
  size_t n = contents_.data.size();
  if (n < 5) return0;  // 1 byte for base_lg_ and 4 for start of offset array
  base_lg_ = contents_.data[n - 1];
  uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5);
  if (last_word > n - 5) return0;
  data_ = contents_.data.cdata();
  offset_ = data_ + last_word;
  num_ = (n - 5 - last_word) / 4;
}

bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key,
                                              uint64_t block_offset) {
  assert(block_offset != kNotValid);
  if (!whole_key_filtering_) {
    return true;
  }
  return MayMatch(key, block_offset);
}

bool BlockBasedFilterBlockReader::PrefixMayMatch(const Slice& prefix,
                                                 uint64_t block_offset) {
  assert(block_offset != kNotValid);
  if (!prefix_extractor_) {
    return true;
  }
  return MayMatch(prefix, block_offset);
}

bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
                                           uint64_t block_offset) {
  uint64_t index = block_offset >> base_lg_;
  if (index < num_1.36M) {
    uint32_t start = DecodeFixed32(offset_ + index * 4);
    uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
    if (start <= limit1.36M && limit <= (uint32_t)(offset_ - data_)) {
      Slice filter = Slice(data_ + start, limit - start);
      bool const may_match = policy_->KeyMayMatch(entry, filter);
      if (may_match) {
        PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
        return true;
      } else {
        PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
        return false;
      }
    } else if (0start == limit0) {
      // Empty filters do not match any entries
      return false;
    }
  }
  return true;  // Errors are treated as potential matches
}

size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
  return num_ * 4 + 5 + (offset_ - data_);
}

std::string BlockBasedFilterBlockReader::ToString() const {
  std::string result, filter_meta;
  result.reserve(1024);

  std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
  AppendItem(&result, s_fb, rocksdb::ToString(num_));
  AppendItem(&result, s_bo, s_hd);

  for (size_t index = 0; index < num_; index++13) {
    uint32_t start = DecodeFixed32(offset_ + index * 4);
    uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);

    if (start != limit) {
      result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n");
      Slice filter = Slice(data_ + start, limit - start);
      AppendItem(&result, start, filter.ToString(true));
    }
  }
  return result;
}
}  // namespace rocksdb

YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2		// This source code is licensed under the BSD-style license found in the
3		// LICENSE file in the root directory of this source tree. An additional grant
4		// of patent rights can be found in the PATENTS file in the same directory.
5		//
6		// The following only applies to changes made to this file as part of YugaByte development.
7		//
8		// Portions Copyright (c) YugaByte, Inc.
9		//
10		// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11		// in compliance with the License. You may obtain a copy of the License at
12		//
13		// http://www.apache.org/licenses/LICENSE-2.0
14		//
15		// Unless required by applicable law or agreed to in writing, software distributed under the License
16		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17		// or implied. See the License for the specific language governing permissions and limitations
18		// under the License.
19		//
20		// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
21		// Use of this source code is governed by a BSD-style license that can be
22		// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24		#include "yb/rocksdb/table/block_based_filter_block.h"
25
26		#include <algorithm>
27
28		#include "yb/rocksdb/filter_policy.h"
29		#include "yb/rocksdb/util/coding.h"
30		#include "yb/rocksdb/util/perf_context_imp.h"
31
32		#include "yb/util/string_util.h"
33
34		namespace rocksdb {
35
36		namespace {
37
38		void AppendItem(std::string* props, const std::string& key,
39	9	const std::string& value) {
40	9	char cspace = ' ';
41	9	std::string value_str("");
42	9	size_t i = 0;
43	9	const size_t dataLength = 64;
44	9	const size_t tabLength = 2;
45	9	const size_t offLength = 16;
46
47	9	value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
48	9	i += dataLength;
49	48	while (i < value.size()) {
50	39	value_str.append("\n");
51	39	value_str.append(offLength, cspace);
52	39	value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
53	39	i += dataLength;
54	39	}
55
56	9	std::string result("");
57	9	if (key.size() < (offLength - tabLength))
58	8	result.append(size_t((offLength - tabLength)) - key.size(), cspace);
59	9	result.append(key);
60
61	9	props->append(result + ": " + value_str + "\n");
62	9	}
63
64		template <class TKey>
65	7	void AppendItem(std::string* props, const TKey& key, const std::string& value) {
66	7	std::string key_str = rocksdb::ToString(key);
67	7	AppendItem(props, key_str, value);
68	7	}
69		} // namespace
70
71
72		// See doc/table_format.txt for an explanation of the filter block format.
73
74		// Generate new filter every 2KB of data
75		static const size_t kFilterBaseLg = 11;
76		static const size_t kFilterBase = 1 << kFilterBaseLg;
77
78		BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
79		const SliceTransform* prefix_extractor,
80		const BlockBasedTableOptions& table_opt)
81		: policy_(table_opt.filter_policy.get()),
82		prefix_extractor_(prefix_extractor),
83		whole_key_filtering_(table_opt.whole_key_filtering),
84		prev_prefix_start_(0),
85	968	prev_prefix_size_(0) {
86	968	assert(policy_);
87	968	}
88
89	52.5k	void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
90	52.5k	uint64_t filter_index = (block_offset / kFilterBase);
91	52.5k	assert(filter_index >= filter_offsets_.size());
92	101k	while (filter_index > filter_offsets_.size()) {
93	49.1k	GenerateFilter();
94	49.1k	}
95	52.5k	}
96
97	1.24M	void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
98	1.24M	if (prefix_extractor_ && prefix_extractor_->InDomain(key)266 ) {
99	262	AddPrefix(key);
100	262	}
101
102	1.24M	if (whole_key_filtering_) {
103	1.24M	AddKey(key);
104	1.24M	}
105	1.24M	}
106
107		// Add key to filter if needed
108	1.24M	inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
109	1.24M	start_.push_back(entries_.size());
110	1.24M	entries_.append(key.cdata(), key.size());
111	1.24M	}
112
113		// Add prefix to filter if needed
114	262	inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
115		// get slice for most recently added entry
116	262	Slice prev;
117	262	if (prev_prefix_size_ > 0) {
118	229	prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
119	229	}
120
121	262	Slice prefix = prefix_extractor_->Transform(key);
122		// insert prefix only when it's different from the previous prefix.
123	262	if (prev.size() == 0 \|\| prefix != prev229 ) {
124	81	start_.push_back(entries_.size());
125	81	prev_prefix_start_ = entries_.size();
126	81	prev_prefix_size_ = prefix.size();
127	81	entries_.append(prefix.cdata(), prefix.size());
128	81	}
129	262	}
130
131	967	Slice BlockBasedFilterBlockBuilder::Finish() {
132	967	if (!start_.empty()) {
133	824	GenerateFilter();
134	824	}
135
136		// Append array of per-filter offsets
137	967	const uint32_t array_offset = static_cast<uint32_t>(result_.size());
138	50.9k	for (size_t i = 0; i < filter_offsets_.size(); i++49.9k ) {
139	49.9k	PutFixed32(&result_, filter_offsets_[i]);
140	49.9k	}
141
142	967	PutFixed32(&result_, array_offset);
143	967	result_.push_back(kFilterBaseLg); // Save encoding parameter in result
144	967	return Slice(result_);
145	967	}
146
147	49.9k	void BlockBasedFilterBlockBuilder::GenerateFilter() {
148	49.9k	const size_t num_entries = start_.size();
149	49.9k	if (num_entries == 0) {
150		// Fast path if there are no keys for this filter
151	7.18k	filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
152	7.18k	return;
153	7.18k	}
154
155		// Make list of keys from flattened key structure
156	42.7k	start_.push_back(entries_.size()); // Simplify length computation
157	42.7k	tmp_entries_.resize(num_entries);
158	1.29M	for (size_t i = 0; i < num_entries; i++1.24M ) {
159	1.24M	const char* base = entries_.data() + start_[i];
160	1.24M	size_t length = start_[i + 1] - start_[i];
161	1.24M	tmp_entries_[i] = Slice(base, length);
162	1.24M	}
163
164		// Generate filter for current set of keys and append to result_.
165	42.7k	filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
166	42.7k	policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
167	42.7k	&result_);
168
169	42.7k	tmp_entries_.clear();
170	42.7k	entries_.clear();
171	42.7k	start_.clear();
172	42.7k	prev_prefix_start_ = 0;
173	42.7k	prev_prefix_size_ = 0;
174	42.7k	}
175
176		BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
177		const SliceTransform* prefix_extractor,
178		const BlockBasedTableOptions& table_opt, bool whole_key_filtering,
179		BlockContents&& contents)
180		: policy_(table_opt.filter_policy.get()),
181		prefix_extractor_(prefix_extractor),
182		whole_key_filtering_(whole_key_filtering),
183		data_(nullptr),
184		offset_(nullptr),
185		num_(0),
186		base_lg_(0),
187	1.09k	contents_(std::move(contents)) {
188	1.09k	assert(policy_);
189	0	size_t n = contents_.data.size();
190	1.09k	if (n < 5) return0 ; // 1 byte for base_lg_ and 4 for start of offset array
191	1.09k	base_lg_ = contents_.data[n - 1];
192	1.09k	uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5);
193	1.09k	if (last_word > n - 5) return0 ;
194	1.09k	data_ = contents_.data.cdata();
195	1.09k	offset_ = data_ + last_word;
196	1.09k	num_ = (n - 5 - last_word) / 4;
197	1.09k	}
198
199		bool BlockBasedFilterBlockReader::KeyMayMatch(const Slice& key,
200	1.36M	uint64_t block_offset) {
201	1.36M	assert(block_offset != kNotValid);
202	1.36M	if (!whole_key_filtering_) {
203	0	return true;
204	0	}
205	1.36M	return MayMatch(key, block_offset);
206	1.36M	}
207
208		bool BlockBasedFilterBlockReader::PrefixMayMatch(const Slice& prefix,
209	25	uint64_t block_offset) {
210	25	assert(block_offset != kNotValid);
211	25	if (!prefix_extractor_) {
212	0	return true;
213	0	}
214	25	return MayMatch(prefix, block_offset);
215	25	}
216
217		bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
218	1.36M	uint64_t block_offset) {
219	1.36M	uint64_t index = block_offset >> base_lg_;
220	1.36M	if ( index < num_1.36M ) {
221	1.36M	uint32_t start = DecodeFixed32(offset_ + index * 4);
222	1.36M	uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
223	1.36M	if ( start <= limit1.36M && limit <= (uint32_t)(offset_ - data_)) {
224	1.36M	Slice filter = Slice(data_ + start, limit - start);
225	1.36M	bool const may_match = policy_->KeyMayMatch(entry, filter);
226	1.36M	if (may_match) {
227	690k	PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
228	690k	return true;
229	690k	} else {
230	670k	PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
231	670k	return false;
232	670k	}
233	1.36M	} else if (0 start == limit0 ) {
234		// Empty filters do not match any entries
235	0	return false;
236	0	}
237	1.36M	}
238	18.4E	return true; // Errors are treated as potential matches
239	1.36M	}
240
241	0	size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
242	0	return num_ * 4 + 5 + (offset_ - data_);
243	0	}
244
245	1	std::string BlockBasedFilterBlockReader::ToString() const {
246	1	std::string result, filter_meta;
247	1	result.reserve(1024);
248
249	1	std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
250	1	AppendItem(&result, s_fb, rocksdb::ToString(num_));
251	1	AppendItem(&result, s_bo, s_hd);
252
253	14	for (size_t index = 0; index < num_; index++13 ) {
254	13	uint32_t start = DecodeFixed32(offset_ + index * 4);
255	13	uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
256
257	13	if (start != limit) {
258	7	result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n");
259	7	Slice filter = Slice(data_ + start, limit - start);
260	7	AppendItem(&result, start, filter.ToString(true));
261	7	}
262	13	}
263	1	return result;
264	1	}
265		} // namespace rocksdb