YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/bloom_test.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24
#ifndef GFLAGS
25
#include <cstdio>
26
int main() {
27
  fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
28
  return 0;
29
}
30
#else
31
32
#include <vector>
33
#include <gflags/gflags.h>
34
35
#include "yb/rocksdb/filter_policy.h"
36
#include "yb/rocksdb/util/logging.h"
37
#include "yb/rocksdb/util/testharness.h"
38
#include "yb/rocksdb/util/testutil.h"
39
#include "yb/rocksdb/util/arena.h"
40
41
#include "yb/util/enums.h"
42
#include "yb/util/test_util.h"
43
44
using GFLAGS::ParseCommandLineFlags;
45
46
DEFINE_int32(bits_per_key, 10, "");
47
48
namespace rocksdb {
49
50
static const int kVerbose = 1;
51
52
1.35M
static Slice Key(size_t i, char* buffer) {
53
1.35M
  memcpy(buffer, &i, sizeof(i));
54
1.35M
  return Slice(buffer, sizeof(i));
55
1.35M
}
56
57
107
static size_t NextLength(size_t length) {
58
107
  if (length < 10) {
59
27
    length += 1;
60
80
  } else if (length < 100) {
61
27
    length += 10;
62
53
  } else if (length < 1000) {
63
27
    length += 100;
64
26
  } else {
65
26
    length += 1000;
66
26
  }
67
107
  return length;
68
107
}
69
70
class BloomTest : public RocksDBTest {
71
 private:
72
  const FilterPolicy* policy_;
73
  std::string filter_;
74
  std::vector<std::string> keys_;
75
76
 public:
77
  BloomTest() : policy_(
78
3
      NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
79
80
3
  ~BloomTest() {
81
3
    delete policy_;
82
3
  }
83
84
37
  void Reset() {
85
37
    keys_.clear();
86
37
    filter_.clear();
87
37
  }
88
89
59.9k
  void Add(const Slice& s) {
90
59.9k
    keys_.push_back(s.ToString());
91
59.9k
  }
92
93
38
  void Build() {
94
38
    std::vector<Slice> key_slices;
95
60.0k
    for (size_t i = 0; i < keys_.size(); i++) {
96
59.9k
      key_slices.push_back(Slice(keys_[i]));
97
59.9k
    }
98
38
    filter_.clear();
99
38
    policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
100
38
                          &filter_);
101
38
    keys_.clear();
102
38
    if (kVerbose >= 2) DumpFilter();
103
38
  }
104
105
74
  size_t FilterSize() const {
106
74
    return filter_.size();
107
74
  }
108
109
0
  void DumpFilter() {
110
0
    fprintf(stderr, "F(");
111
0
    for (size_t i = 0; i+1 < filter_.size(); i++) {
112
0
      const unsigned int c = static_cast<unsigned int>(filter_[i]);
113
0
      for (int j = 0; j < 8; j++) {
114
0
        fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
115
0
      }
116
0
    }
117
0
    fprintf(stderr, ")\n");
118
0
  }
119
120
430k
  bool Matches(const Slice& s) {
121
430k
    if (!keys_.empty()) {
122
1
      Build();
123
1
    }
124
430k
    return policy_->KeyMayMatch(s, filter_);
125
430k
  }
126
127
37
  double FalsePositiveRate() {
128
37
    char buffer[sizeof(size_t)];
129
37
    size_t result = 0;
130
370k
    for (size_t i = 0; i < 10000; i++) {
131
370k
      if (Matches(Key(i + 1000000000, buffer))) {
132
2.93k
        result++;
133
2.93k
      }
134
370k
    }
135
37
    return result / 10000.0;
136
37
  }
137
};
138
139
1
TEST_F(BloomTest, EmptyFilter) {
140
1
  ASSERT_TRUE(!Matches("hello"));
141
1
  ASSERT_TRUE(!Matches("world"));
142
1
}
143
144
1
TEST_F(BloomTest, Small) {
145
1
  Add("hello");
146
1
  Add("world");
147
1
  ASSERT_TRUE(Matches("hello"));
148
1
  ASSERT_TRUE(Matches("world"));
149
1
  ASSERT_TRUE(!Matches("x"));
150
1
  ASSERT_TRUE(!Matches("foo"));
151
1
}
152
153
1
TEST_F(BloomTest, VaryingLengths) {
154
1
  char buffer[sizeof(size_t)];
155
156
  // Count number of filters that significantly exceed the false positive rate
157
1
  size_t mediocre_filters = 0;
158
1
  size_t good_filters = 0;
159
160
38
  for (size_t length = 1; length <= 10000; length = NextLength(length)) {
161
37
    Reset();
162
60.0k
    for (size_t i = 0; i < length; i++) {
163
59.9k
      Add(Key(i, buffer));
164
59.9k
    }
165
37
    Build();
166
167
74
    ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
168
169
    // All added keys must match
170
60.0k
    for (size_t i = 0; i < length; i++) {
171
119k
      ASSERT_TRUE(Matches(Key(i, buffer)))
172
119k
          << "Length " << length << "; key " << i;
173
59.9k
    }
174
175
    // Check false positive rate
176
37
    double rate = FalsePositiveRate();
177
37
    if (kVerbose >= 1) {
178
37
      LOG(INFO) << StringPrintf(
179
37
          "False positives: %5.2f%% @ length = %6zu ; bytes = %6zu\n", rate * 100.0, length,
180
37
          FilterSize());
181
37
    }
182
37
    ASSERT_LE(rate, 0.02);   // Must not be over 2%
183
37
    if (rate > 0.0125)
184
1
      mediocre_filters++;  // Allowed, but not too often
185
36
    else
186
36
      good_filters++;
187
37
  }
188
1
  if (kVerbose >= 1) {
189
1
    LOG(INFO) << StringPrintf("Filters: %zu good, %zu mediocre\n", good_filters, mediocre_filters);
190
1
  }
191
1
  ASSERT_LE(mediocre_filters, good_filters/5);
192
1
}
193
194
// Different bits-per-byte
195
196
class BloomTestContext {
197
 public:
198
6
  virtual ~BloomTestContext() {}
199
200
  virtual const FilterPolicy& filter_policy() const = 0;
201
  virtual size_t max_keys() const = 0;
202
  virtual void CheckFilterSize(size_t filter_size, size_t num_keys) const = 0;
203
};
204
205
class FullFilterBloomTestContext : public BloomTestContext {
206
 public:
207
77
  const FilterPolicy& filter_policy() const override { return *filter_policy_.get(); }
208
209
37
  size_t max_keys() const override { return 10000; }
210
211
36
  void CheckFilterSize(size_t filter_size, size_t num_keys) const override {
212
72
    ASSERT_LE(filter_size, (size_t)((num_keys * 10 / 8) + 128 + 5)) << num_keys;
213
36
  }
214
215
 private:
216
  std::unique_ptr<const FilterPolicy> filter_policy_{
217
      NewBloomFilterPolicy(FLAGS_bits_per_key, false)};
218
};
219
220
class FixedSizeFilterBloomTestContext : public BloomTestContext {
221
 public:
222
73
  const FilterPolicy& filter_policy() const override { return *filter_policy_.get(); }
223
224
  // For fixed-size filter we limit maximum number of keys depending on total bits in test itself
225
  // by checking ShouldFlush().
226
34
  size_t max_keys() const override { return std::numeric_limits<size_t>::max(); }
227
228
34
  void CheckFilterSize(size_t filter_size, size_t num_keys) const override {
229
68
    ASSERT_LE(filter_size, FilterPolicy::kDefaultFixedSizeFilterBits / 8 + 64 + 5) << num_keys;
230
34
  }
231
232
 private:
233
  std::unique_ptr<const FilterPolicy> filter_policy_{
234
      NewFixedSizeFilterPolicy(
235
          FilterPolicy::kDefaultFixedSizeFilterBits, FilterPolicy::kDefaultFixedSizeFilterErrorRate,
236
          nullptr)};
237
};
238
239
YB_DEFINE_ENUM(BuilderReaderBloomTestType, (kFullFilter)(kFixedSizeFilter));
240
241
namespace {
242
243
6
std::unique_ptr<const BloomTestContext> CreateContext(BuilderReaderBloomTestType type) {
244
6
  switch (type) {
245
3
    case BuilderReaderBloomTestType::kFullFilter:
246
3
      return std::make_unique<FullFilterBloomTestContext>();
247
3
    case BuilderReaderBloomTestType::kFixedSizeFilter:
248
3
      return std::make_unique<FixedSizeFilterBloomTestContext>();
249
0
  }
250
0
  FATAL_INVALID_ENUM_VALUE(BuilderReaderBloomTestType, type);
251
0
}
252
253
} // namespace
254
255
class BuilderReaderBloomTest : public RocksDBTest,
256
    public ::testing::WithParamInterface<BuilderReaderBloomTestType> {
257
 protected:
258
  std::unique_ptr<const BloomTestContext> context_;
259
  std::unique_ptr<FilterBitsBuilder> bits_builder_;
260
  std::unique_ptr<FilterBitsReader> bits_reader_;
261
  std::unique_ptr<const char[]> buf_;
262
  size_t filter_size_;
263
264
 public:
265
  BuilderReaderBloomTest() :
266
      context_(CreateContext(GetParam())),
267
6
      filter_size_(0) {
268
6
    Reset();
269
6
  }
270
271
76
  void Reset() {
272
76
    bits_builder_.reset(context_->filter_policy().GetFilterBitsBuilder());
273
76
    bits_reader_.reset(nullptr);
274
76
    buf_.reset(nullptr);
275
76
    filter_size_ = 0;
276
76
  }
277
278
82.7k
  void Add(const Slice& s) {
279
82.7k
    bits_builder_->AddKey(s);
280
82.7k
  }
281
282
82.8k
  bool ShouldFlush() {
283
82.8k
    return bits_builder_->IsFull();
284
82.8k
  }
285
286
74
  void Build() {
287
74
    Slice filter = bits_builder_->Finish(&buf_);
288
74
    bits_reader_.reset(context_->filter_policy().GetFilterBitsReader(filter));
289
74
    filter_size_ = filter.size();
290
74
  }
291
292
140
  size_t FilterSize() const {
293
140
    return filter_size_;
294
140
  }
295
296
782k
  bool Matches(const Slice& s) {
297
782k
    if (bits_reader_ == nullptr) {
298
4
      Build();
299
4
    }
300
782k
    return bits_reader_->MayMatch(s);
301
782k
  }
302
303
70
  double FalsePositiveRate() {
304
70
    char buffer[sizeof(size_t)];
305
70
    size_t result = 0;
306
700k
    for (size_t i = 0; i < 10000; i++) {
307
700k
      if (Matches(Key(i + 1000000000, buffer))) {
308
2.30k
        result++;
309
2.30k
      }
310
700k
    }
311
70
    return result / 10000.0;
312
70
  }
313
};
314
315
2
TEST_P(BuilderReaderBloomTest, FullEmptyFilter) {
316
  // Empty filter is not match, at this level
317
2
  ASSERT_TRUE(!Matches("hello"));
318
2
  ASSERT_TRUE(!Matches("world"));
319
2
}
320
321
2
TEST_P(BuilderReaderBloomTest, FullSmall) {
322
2
  Add("hello");
323
2
  Add("world");
324
2
  ASSERT_TRUE(Matches("hello"));
325
2
  ASSERT_TRUE(Matches("world"));
326
2
  ASSERT_TRUE(!Matches("x"));
327
2
  ASSERT_TRUE(!Matches("foo"));
328
2
}
329
330
2
TEST_P(BuilderReaderBloomTest, FullVaryingLengths) {
331
2
  char buffer[sizeof(size_t)];
332
333
  // Count number of filters that significantly exceed the false positive rate
334
2
  size_t mediocre_filters = 0;
335
2
  size_t good_filters = 0;
336
337
72
  for (size_t length = 1; !ShouldFlush() && length < context_->max_keys();
338
70
      length = NextLength(length)) {
339
70
    Reset();
340
82.8k
    for (size_t i = 0; i < length; i++) {
341
82.7k
      Add(Key(i, buffer));
342
82.7k
      if (ShouldFlush()) {
343
1
        length = i + 1;
344
1
        break;
345
1
      }
346
82.7k
    }
347
70
    Build();
348
349
70
    context_->CheckFilterSize(FilterSize(), length);
350
351
    // All added keys must match
352
82.8k
    for (size_t i = 0; i < length; i++) {
353
165k
      ASSERT_TRUE(Matches(Key(i, buffer))) << "Length " << length << "; key " << i;
354
82.7k
    }
355
356
    // Check false positive rate
357
70
    double rate = FalsePositiveRate();
358
70
    if (kVerbose >= 1) {
359
70
      LOG(INFO) << StringPrintf(
360
70
          "False positives: %5.2f%% @ length = %6zu ; bytes = %6zu\n", rate * 100.0, length,
361
70
          FilterSize());
362
70
    }
363
70
    ASSERT_LE(rate, 0.02);   // Must not be over 2%
364
70
    if (rate > 0.0125)
365
3
      mediocre_filters++;  // Allowed, but not too often
366
67
    else
367
67
      good_filters++;
368
70
  }
369
2
  if (kVerbose >= 1) {
370
2
    LOG(INFO) << StringPrintf("Filters: %zu good, %zu mediocre\n", good_filters, mediocre_filters);
371
2
  }
372
2
  ASSERT_LE(mediocre_filters, good_filters/5);
373
2
}
374
375
INSTANTIATE_TEST_CASE_P(, BuilderReaderBloomTest, ::testing::Values(
376
    BuilderReaderBloomTestType::kFullFilter,
377
    BuilderReaderBloomTestType::kFixedSizeFilter));
378
379
}  // namespace rocksdb
380
381
13.2k
int main(int argc, char** argv) {
382
13.2k
  ::testing::InitGoogleTest(&argc, argv);
383
13.2k
  ParseCommandLineFlags(&argc, &argv, true);
384
385
13.2k
  return RUN_ALL_TESTS();
386
13.2k
}
387
388
#endif  // GFLAGS