YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/dynamic_bloom_test.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
21
#ifndef GFLAGS
22
#include <cstdio>
23
int main() {
24
  fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
25
  return 0;
26
}
27
#else
28
29
#ifndef __STDC_FORMAT_MACROS
30
#define __STDC_FORMAT_MACROS
31
#endif
32
33
#include <inttypes.h>
34
#include <algorithm>
35
#include <atomic>
36
#include <memory>
37
#include <thread>
38
#include <vector>
39
#include <gflags/gflags.h>
40
41
#include "yb/rocksdb/port/port.h"
42
#include "yb/rocksdb/util/arena.h"
43
#include "yb/rocksdb/util/dynamic_bloom.h"
44
#include "yb/rocksdb/util/logging.h"
45
#include "yb/rocksdb/util/testharness.h"
46
#include "yb/rocksdb/util/testutil.h"
47
#include "yb/rocksdb/util/stop_watch.h"
48
49
#include "yb/util/test_util.h"
50
51
using GFLAGS::ParseCommandLineFlags;
52
53
DEFINE_int32(bits_per_key, 10, "");
54
DEFINE_int32(num_probes, 6, "");
55
DEFINE_bool(enable_perf, false, "");
56
57
namespace rocksdb {
58
59
1.09M
static Slice Key(uint64_t i, char* buffer) {
60
1.09M
  memcpy(buffer, &i, sizeof(i));
61
1.09M
  return Slice(buffer, sizeof(i));
62
1.09M
}
63
64
class DynamicBloomTest : public RocksDBTest {};
65
66
1
TEST_F(DynamicBloomTest, EmptyFilter) {
67
1
  Arena arena;
68
1
  DynamicBloom bloom1(&arena, 100, 0, 2);
69
1
  ASSERT_TRUE(!bloom1.MayContain("hello"));
70
1
  ASSERT_TRUE(!bloom1.MayContain("world"));
71
72
1
  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 1, 2);
73
1
  ASSERT_TRUE(!bloom2.MayContain("hello"));
74
1
  ASSERT_TRUE(!bloom2.MayContain("world"));
75
1
}
76
77
1
TEST_F(DynamicBloomTest, Small) {
78
1
  Arena arena;
79
1
  DynamicBloom bloom1(&arena, 100, 0, 2);
80
1
  bloom1.Add("hello");
81
1
  bloom1.Add("world");
82
1
  ASSERT_TRUE(bloom1.MayContain("hello"));
83
1
  ASSERT_TRUE(bloom1.MayContain("world"));
84
1
  ASSERT_TRUE(!bloom1.MayContain("x"));
85
1
  ASSERT_TRUE(!bloom1.MayContain("foo"));
86
87
1
  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 1, 2);
88
1
  bloom2.Add("hello");
89
1
  bloom2.Add("world");
90
1
  ASSERT_TRUE(bloom2.MayContain("hello"));
91
1
  ASSERT_TRUE(bloom2.MayContain("world"));
92
1
  ASSERT_TRUE(!bloom2.MayContain("x"));
93
1
  ASSERT_TRUE(!bloom2.MayContain("foo"));
94
1
}
95
96
1
TEST_F(DynamicBloomTest, SmallConcurrentAdd) {
97
1
  Arena arena;
98
1
  DynamicBloom bloom1(&arena, 100, 0, 2);
99
1
  bloom1.AddConcurrently("hello");
100
1
  bloom1.AddConcurrently("world");
101
1
  ASSERT_TRUE(bloom1.MayContain("hello"));
102
1
  ASSERT_TRUE(bloom1.MayContain("world"));
103
1
  ASSERT_TRUE(!bloom1.MayContain("x"));
104
1
  ASSERT_TRUE(!bloom1.MayContain("foo"));
105
106
1
  DynamicBloom bloom2(&arena, CACHE_LINE_SIZE * 8 * 2 - 1, 1, 2);
107
1
  bloom2.AddConcurrently("hello");
108
1
  bloom2.AddConcurrently("world");
109
1
  ASSERT_TRUE(bloom2.MayContain("hello"));
110
1
  ASSERT_TRUE(bloom2.MayContain("world"));
111
1
  ASSERT_TRUE(!bloom2.MayContain("x"));
112
1
  ASSERT_TRUE(!bloom2.MayContain("foo"));
113
1
}
114
115
74
static uint32_t NextNum(uint32_t num) {
116
74
  if (num < 10) {
117
18
    num += 1;
118
56
  } else if (num < 100) {
119
18
    num += 10;
120
38
  } else if (num < 1000) {
121
18
    num += 100;
122
20
  } else {
123
20
    num += 1000;
124
20
  }
125
74
  return num;
126
74
}
127
128
1
TEST_F(DynamicBloomTest, VaryingLengths) {
129
1
  char buffer[sizeof(uint64_t)];
130
131
  // Count number of filters that significantly exceed the false positive rate
132
1
  int mediocre_filters = 0;
133
1
  int good_filters = 0;
134
1
  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
135
136
1
  fprintf(stderr, "bits_per_key: %d  num_probes: %d\n", FLAGS_bits_per_key,
137
1
          num_probes);
138
139
3
  for (uint32_t enable_locality = 0; enable_locality < 2; ++enable_locality) {
140
76
    for (uint32_t num = 1; num <= 10000; num = NextNum(num)) {
141
74
      uint32_t bloom_bits = 0;
142
74
      Arena arena;
143
74
      if (enable_locality == 0) {
144
37
        bloom_bits = std::max(num * FLAGS_bits_per_key, 64U);
145
37
      } else {
146
37
        bloom_bits = std::max(num * FLAGS_bits_per_key,
147
37
                              enable_locality * CACHE_LINE_SIZE * 8);
148
37
      }
149
74
      DynamicBloom bloom(&arena, bloom_bits, enable_locality, num_probes);
150
120k
      for (uint64_t i = 0; i < num; i++) {
151
119k
        bloom.Add(Key(i, buffer));
152
119k
        ASSERT_TRUE(bloom.MayContain(Key(i, buffer)));
153
119k
      }
154
155
      // All added keys must match
156
120k
      for (uint64_t i = 0; i < num; i++) {
157
239k
        ASSERT_TRUE(bloom.MayContain(Key(i, buffer))) << "Num " << num
158
239k
                                                      << "; key " << i;
159
119k
      }
160
161
      // Check false positive rate
162
163
74
      int result = 0;
164
740k
      for (uint64_t i = 0; i < 10000; i++) {
165
740k
        if (bloom.MayContain(Key(i + 1000000000, buffer))) {
166
5.06k
          result++;
167
5.06k
        }
168
740k
      }
169
74
      double rate = result / 10000.0;
170
171
74
      fprintf(stderr,
172
74
              "False positives: %5.2f%% @ num = %6u, bloom_bits = %6u, "
173
74
              "enable locality?%u\n",
174
74
              rate * 100.0, num, bloom_bits, enable_locality);
175
176
74
      if (rate > 0.0125)
177
4
        mediocre_filters++;  // Allowed, but not too often
178
70
      else
179
70
        good_filters++;
180
74
    }
181
182
2
    fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
183
2
            mediocre_filters);
184
2
    ASSERT_LE(mediocre_filters, good_filters / 5);
185
2
  }
186
1
}
187
188
1
TEST_F(DynamicBloomTest, perf) {
189
1
  StopWatchNano timer(Env::Default());
190
1
  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
191
192
1
  if (!FLAGS_enable_perf) {
193
1
    return;
194
1
  }
195
196
0
  for (uint32_t m = 1; m <= 8; ++m) {
197
0
    Arena arena;
198
0
    const uint32_t num_keys = m * 8 * 1024 * 1024;
199
0
    fprintf(stderr, "testing %" PRIu32 "M keys\n", m * 8);
200
201
0
    DynamicBloom std_bloom(&arena, num_keys * 10, 0, num_probes);
202
203
0
    timer.Start();
204
0
    for (uint64_t i = 1; i <= num_keys; ++i) {
205
0
      std_bloom.Add(Slice(reinterpret_cast<const char*>(&i), 8));
206
0
    }
207
208
0
    uint64_t elapsed = timer.ElapsedNanos();
209
0
    fprintf(stderr, "standard bloom, avg add latency %" PRIu64 "\n",
210
0
            elapsed / num_keys);
211
212
0
    uint32_t count = 0;
213
0
    timer.Start();
214
0
    for (uint64_t i = 1; i <= num_keys; ++i) {
215
0
      if (std_bloom.MayContain(Slice(reinterpret_cast<const char*>(&i), 8))) {
216
0
        ++count;
217
0
      }
218
0
    }
219
0
    ASSERT_EQ(count, num_keys);
220
0
    elapsed = timer.ElapsedNanos();
221
0
    fprintf(stderr, "standard bloom, avg query latency %" PRIu64 "\n",
222
0
            elapsed / count);
223
224
    // Locality enabled version
225
0
    DynamicBloom blocked_bloom(&arena, num_keys * 10, 1, num_probes);
226
227
0
    timer.Start();
228
0
    for (uint64_t i = 1; i <= num_keys; ++i) {
229
0
      blocked_bloom.Add(Slice(reinterpret_cast<const char*>(&i), 8));
230
0
    }
231
232
0
    elapsed = timer.ElapsedNanos();
233
0
    fprintf(stderr,
234
0
            "blocked bloom(enable locality), avg add latency %" PRIu64 "\n",
235
0
            elapsed / num_keys);
236
237
0
    count = 0;
238
0
    timer.Start();
239
0
    for (uint64_t i = 1; i <= num_keys; ++i) {
240
0
      if (blocked_bloom.MayContain(
241
0
              Slice(reinterpret_cast<const char*>(&i), 8))) {
242
0
        ++count;
243
0
      }
244
0
    }
245
246
0
    elapsed = timer.ElapsedNanos();
247
0
    fprintf(stderr,
248
0
            "blocked bloom(enable locality), avg query latency %" PRIu64 "\n",
249
0
            elapsed / count);
250
0
    ASSERT_TRUE(count == num_keys);
251
0
  }
252
0
}
253
254
1
TEST_F(DynamicBloomTest, concurrent_with_perf) {
255
1
  StopWatchNano timer(Env::Default());
256
1
  uint32_t num_probes = static_cast<uint32_t>(FLAGS_num_probes);
257
258
1
  uint32_t m_limit = FLAGS_enable_perf ? 8 : 1;
259
1
  uint32_t locality_limit = FLAGS_enable_perf ? 1 : 0;
260
261
1
  uint32_t num_threads = 4;
262
1
  std::vector<std::thread> threads;
263
264
2
  for (uint32_t m = 1; m <= m_limit; ++m) {
265
2
    for (uint32_t locality = 0; locality <= locality_limit; ++locality) {
266
1
      Arena arena;
267
1
      const uint32_t num_keys = m * 8 * 1024 * 1024;
268
1
      fprintf(stderr, "testing %" PRIu32 "M keys with %" PRIu32 " locality\n",
269
1
              m * 8, locality);
270
271
1
      DynamicBloom std_bloom(&arena, num_keys * 10, locality, num_probes);
272
273
1
      timer.Start();
274
275
4
      std::function<void(size_t)> adder = [&](size_t t) {
276
4.89M
        for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
277
4.89M
          std_bloom.AddConcurrently(
278
4.89M
              Slice(reinterpret_cast<const char*>(&i), 8));
279
4.89M
        }
280
4
      };
281
5
      for (size_t t = 0; t < num_threads; ++t) {
282
4
        threads.emplace_back(adder, t);
283
4
      }
284
5
      while (threads.size() > 0) {
285
4
        threads.back().join();
286
4
        threads.pop_back();
287
4
      }
288
289
1
      uint64_t elapsed = timer.ElapsedNanos();
290
1
      fprintf(stderr, "standard bloom, avg parallel add latency %" PRIu64
291
1
                      " nanos/key\n",
292
1
              elapsed / num_keys);
293
294
1
      timer.Start();
295
296
4
      std::function<void(size_t)> hitter = [&](size_t t) {
297
6.31M
        for (uint64_t i = 1 + t; i <= num_keys; i += num_threads) {
298
6.31M
          bool f =
299
6.31M
              std_bloom.MayContain(Slice(reinterpret_cast<const char*>(&i), 8));
300
6.31M
          ASSERT_TRUE(f);
301
6.31M
        }
302
4
      };
303
5
      for (size_t t = 0; t < num_threads; ++t) {
304
4
        threads.emplace_back(hitter, t);
305
4
      }
306
5
      while (threads.size() > 0) {
307
4
        threads.back().join();
308
4
        threads.pop_back();
309
4
      }
310
311
1
      elapsed = timer.ElapsedNanos();
312
1
      fprintf(stderr, "standard bloom, avg parallel hit latency %" PRIu64
313
1
                      " nanos/key\n",
314
1
              elapsed / num_keys);
315
316
1
      timer.Start();
317
318
1
      std::atomic<uint32_t> false_positives(0);
319
4
      std::function<void(size_t)> misser = [&](size_t t) {
320
3.32M
        for (uint64_t i = num_keys + 1 + t; i <= 2 * num_keys;
321
3.32M
             i += num_threads) {
322
3.32M
          bool f =
323
3.32M
              std_bloom.MayContain(Slice(reinterpret_cast<const char*>(&i), 8));
324
3.32M
          if (f) {
325
83.8k
            ++false_positives;
326
83.8k
          }
327
3.32M
        }
328
4
      };
329
5
      for (size_t t = 0; t < num_threads; ++t) {
330
4
        threads.emplace_back(misser, t);
331
4
      }
332
5
      while (threads.size() > 0) {
333
4
        threads.back().join();
334
4
        threads.pop_back();
335
4
      }
336
337
1
      elapsed = timer.ElapsedNanos();
338
1
      fprintf(stderr, "standard bloom, avg parallel miss latency %" PRIu64
339
1
                      " nanos/key, %f%% false positive rate\n",
340
1
              elapsed / num_keys, false_positives.load() * 100.0 / num_keys);
341
1
    }
342
1
  }
343
1
}
344
345
}  // namespace rocksdb
346
347
13.2k
int main(int argc, char** argv) {
348
13.2k
  ::testing::InitGoogleTest(&argc, argv);
349
13.2k
  ParseCommandLineFlags(&argc, &argv, true);
350
351
13.2k
  return RUN_ALL_TESTS();
352
13.2k
}
353
354
#endif  // GFLAGS