/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/db_bloom_filter_test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/rocksdb/db/db_test_util.h" |
15 | | #include "yb/rocksdb/perf_context.h" |
16 | | #include "yb/rocksdb/port/stack_trace.h" |
17 | | |
18 | | namespace rocksdb { |
19 | | |
20 | | // DB tests related to bloom filter. |
21 | | |
22 | | class DBBloomFilterTest : public DBTestBase { |
23 | | public: |
24 | 22 | DBBloomFilterTest() : DBTestBase("/db_bloom_filter_test") {} |
25 | | |
26 | | typedef std::function<const FilterPolicy*()> FilterPolicyCreator; |
27 | | // Creates SST files with num_unique_keys using filter policy provided by write_policy_creator. |
28 | | // Then tries to read created files using filter policy provided by new_policy_creator as main |
29 | | // filter policy while having filter policy provided by write_policy_creator inside |
30 | | // BlockBasedTableOptions::supported_filter_policies. |
31 | | // Checks that keys could be read and bloom filter is checked during reads. |
32 | | // Uses options for RocksDB. |
33 | | // should_be_useful specifies whether bloom filter is expected to be useful. |
34 | | void CheckOtherFilterPoliciesSupport( |
35 | | Options* options, const int num_unique_keys, FilterPolicyCreator write_policy_creator, |
36 | | FilterPolicyCreator new_policy_creator, bool should_be_useful); |
37 | | }; |
38 | | |
39 | | // KeyMayExist can lead to a few false positives, but not false negatives. |
40 | | // To make test deterministic, use a much larger number of bits per key-20 than |
41 | | // bits in the key, so that false positives are eliminated |
42 | 1 | TEST_F(DBBloomFilterTest, KeyMayExist) { |
43 | 23 | do { |
44 | 23 | ReadOptions ropts; |
45 | 23 | std::string value; |
46 | 23 | anon::OptionsOverride options_override; |
47 | 23 | options_override.filter_policy.reset(NewBloomFilterPolicy(20)); |
48 | 23 | Options options = CurrentOptions(options_override); |
49 | 23 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
50 | 23 | CreateAndReopenWithCF({"pikachu"}, options); |
51 | | |
52 | 23 | ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); |
53 | | |
54 | 23 | ASSERT_OK(Put(1, "a", "b")); |
55 | 23 | bool value_found = false; |
56 | 23 | ASSERT_TRUE( |
57 | 23 | db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); |
58 | 23 | ASSERT_TRUE(value_found); |
59 | 23 | ASSERT_EQ("b", value); |
60 | | |
61 | 23 | ASSERT_OK(Flush(1)); |
62 | 23 | value.clear(); |
63 | | |
64 | 23 | uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS); |
65 | 23 | uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); |
66 | 23 | ASSERT_TRUE( |
67 | 23 | db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); |
68 | 23 | ASSERT_TRUE(!value_found); |
69 | | // assert that no new files were opened and no new blocks were |
70 | | // read into block cache. |
71 | 23 | ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); |
72 | 23 | ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
73 | 23 | ASSERT_EQ(cache_added, |
74 | 23 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
75 | 23 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
76 | | |
77 | 23 | ASSERT_OK(Delete(1, "a")); |
78 | | |
79 | 23 | numopen = TestGetTickerCount(options, NO_FILE_OPENS); |
80 | 23 | cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); |
81 | 23 | ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); |
82 | 23 | ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); |
83 | 23 | ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
84 | 23 | ASSERT_EQ(cache_added, |
85 | 23 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
86 | 23 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
87 | | |
88 | 23 | ASSERT_OK(Flush(1)); |
89 | 23 | ASSERT_OK(dbfull()->TEST_CompactRange( |
90 | 23 | 0, nullptr, nullptr, handles_[1], true /* disallow trivial move */)); |
91 | | |
92 | 23 | numopen = TestGetTickerCount(options, NO_FILE_OPENS); |
93 | 23 | cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); |
94 | 23 | ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); |
95 | 23 | ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); |
96 | 23 | ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
97 | 23 | ASSERT_EQ(cache_added, |
98 | 23 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
99 | 23 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
100 | | |
101 | 23 | ASSERT_OK(Delete(1, "c")); |
102 | | |
103 | 23 | numopen = TestGetTickerCount(options, NO_FILE_OPENS); |
104 | 23 | cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); |
105 | 23 | ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); |
106 | 23 | ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); |
107 | 23 | ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
108 | 23 | ASSERT_EQ(cache_added, |
109 | 23 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
110 | 23 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
111 | | |
112 | | // KeyMayExist function only checks data in block caches, which is not used |
113 | | // by plain table format. |
114 | 23 | } while ( |
115 | 23 | ChangeOptions(kSkipPlainTable | kSkipHashIndex | kSkipFIFOCompaction)); |
116 | 1 | } |
117 | | |
118 | | // A delete is skipped for key if KeyMayExist(key) returns False |
119 | | // Tests Writebatch consistency and proper delete behaviour |
120 | 1 | TEST_F(DBBloomFilterTest, FilterDeletes) { |
121 | 5 | do { |
122 | 5 | anon::OptionsOverride options_override; |
123 | 5 | options_override.filter_policy.reset(NewBloomFilterPolicy(20)); |
124 | 5 | Options options = CurrentOptions(options_override); |
125 | 5 | options.filter_deletes = true; |
126 | 5 | CreateAndReopenWithCF({"pikachu"}, options); |
127 | 5 | WriteBatch batch; |
128 | | |
129 | 5 | batch.Delete(handles_[1], "a"); |
130 | 5 | ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); |
131 | 5 | ASSERT_EQ(AllEntriesFor("a", 1), "[ ]"); // Delete skipped |
132 | 5 | batch.Clear(); |
133 | | |
134 | 5 | batch.Put(handles_[1], "a", "b"); |
135 | 5 | batch.Delete(handles_[1], "a"); |
136 | 5 | ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); |
137 | 5 | ASSERT_EQ(Get(1, "a"), "NOT_FOUND"); |
138 | 5 | ASSERT_EQ(AllEntriesFor("a", 1), "[ DEL, b ]"); // Delete issued |
139 | 5 | batch.Clear(); |
140 | | |
141 | 5 | batch.Delete(handles_[1], "c"); |
142 | 5 | batch.Put(handles_[1], "c", "d"); |
143 | 5 | ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); |
144 | 5 | ASSERT_EQ(Get(1, "c"), "d"); |
145 | 5 | ASSERT_EQ(AllEntriesFor("c", 1), "[ d ]"); // Delete skipped |
146 | 5 | batch.Clear(); |
147 | | |
148 | 5 | ASSERT_OK(Flush(1)); // A stray Flush |
149 | | |
150 | 5 | batch.Delete(handles_[1], "c"); |
151 | 5 | ASSERT_OK(dbfull()->Write(WriteOptions(), &batch)); |
152 | 5 | ASSERT_EQ(AllEntriesFor("c", 1), "[ DEL, d ]"); // Delete issued |
153 | 5 | batch.Clear(); |
154 | 5 | } while (ChangeCompactOptions()); |
155 | 1 | } |
156 | | |
157 | 1 | TEST_F(DBBloomFilterTest, GetFilterByPrefixBloom) { |
158 | 1 | Options options = last_options_; |
159 | 1 | options.prefix_extractor.reset(NewFixedPrefixTransform(8)); |
160 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
161 | 1 | BlockBasedTableOptions bbto; |
162 | 1 | bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
163 | 1 | bbto.whole_key_filtering = false; |
164 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
165 | 1 | DestroyAndReopen(options); |
166 | | |
167 | 1 | WriteOptions wo; |
168 | 1 | ReadOptions ro; |
169 | 1 | FlushOptions fo; |
170 | 1 | fo.wait = true; |
171 | 1 | std::string value; |
172 | | |
173 | 1 | ASSERT_OK(dbfull()->Put(wo, "barbarbar", "foo")); |
174 | 1 | ASSERT_OK(dbfull()->Put(wo, "barbarbar2", "foo2")); |
175 | 1 | ASSERT_OK(dbfull()->Put(wo, "foofoofoo", "bar")); |
176 | | |
177 | 1 | ASSERT_OK(dbfull()->Flush(fo)); |
178 | | |
179 | 1 | ASSERT_EQ("foo", Get("barbarbar")); |
180 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
181 | 1 | ASSERT_EQ("foo2", Get("barbarbar2")); |
182 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
183 | 1 | ASSERT_EQ("NOT_FOUND", Get("barbarbar3")); |
184 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
185 | | |
186 | 1 | ASSERT_EQ("NOT_FOUND", Get("barfoofoo")); |
187 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
188 | | |
189 | 1 | ASSERT_EQ("NOT_FOUND", Get("foobarbar")); |
190 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); |
191 | 1 | } |
192 | | |
193 | 1 | TEST_F(DBBloomFilterTest, WholeKeyFilterProp) { |
194 | 1 | Options options = last_options_; |
195 | 1 | options.prefix_extractor.reset(NewFixedPrefixTransform(3)); |
196 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
197 | | |
198 | 1 | BlockBasedTableOptions bbto; |
199 | 1 | bbto.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
200 | 1 | bbto.whole_key_filtering = false; |
201 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
202 | 1 | DestroyAndReopen(options); |
203 | | |
204 | 1 | WriteOptions wo; |
205 | 1 | ReadOptions ro; |
206 | 1 | FlushOptions fo; |
207 | 1 | fo.wait = true; |
208 | 1 | std::string value; |
209 | | |
210 | 1 | ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); |
211 | | // Needs insert some keys to make sure files are not filtered out by key |
212 | | // ranges. |
213 | 1 | ASSERT_OK(dbfull()->Put(wo, "aaa", "")); |
214 | 1 | ASSERT_OK(dbfull()->Put(wo, "zzz", "")); |
215 | 1 | ASSERT_OK(dbfull()->Flush(fo)); |
216 | | |
217 | 1 | Reopen(options); |
218 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
219 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
220 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
221 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
222 | 1 | ASSERT_EQ("foo", Get("foobar")); |
223 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
224 | | |
225 | | // Reopen with whole key filtering enabled and prefix extractor |
226 | | // NULL. Bloom filter should be off for both of whole key and |
227 | | // prefix bloom. |
228 | 1 | bbto.whole_key_filtering = true; |
229 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
230 | 1 | options.prefix_extractor.reset(); |
231 | 1 | Reopen(options); |
232 | | |
233 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
234 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
235 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
236 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
237 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
238 | 1 | ASSERT_EQ("foo", Get("foobar")); |
239 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
240 | | // Write DB with only full key filtering. |
241 | 1 | ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); |
242 | | // Needs insert some keys to make sure files are not filtered out by key |
243 | | // ranges. |
244 | 1 | ASSERT_OK(dbfull()->Put(wo, "aaa", "")); |
245 | 1 | ASSERT_OK(dbfull()->Put(wo, "zzz", "")); |
246 | 1 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); |
247 | | |
248 | | // Reopen with both of whole key off and prefix extractor enabled. |
249 | | // Still no bloom filter should be used. |
250 | 1 | options.prefix_extractor.reset(NewFixedPrefixTransform(3)); |
251 | 1 | bbto.whole_key_filtering = false; |
252 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
253 | 1 | Reopen(options); |
254 | | |
255 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
256 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
257 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
258 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
259 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
260 | 1 | ASSERT_EQ("foo", Get("foobar")); |
261 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
262 | | |
263 | | // Try to create a DB with mixed files: |
264 | 1 | ASSERT_OK(dbfull()->Put(wo, "foobar", "foo")); |
265 | | // Needs insert some keys to make sure files are not filtered out by key |
266 | | // ranges. |
267 | 1 | ASSERT_OK(dbfull()->Put(wo, "aaa", "")); |
268 | 1 | ASSERT_OK(dbfull()->Put(wo, "zzz", "")); |
269 | 1 | ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); |
270 | | |
271 | 1 | options.prefix_extractor.reset(); |
272 | 1 | bbto.whole_key_filtering = true; |
273 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
274 | 1 | Reopen(options); |
275 | | |
276 | | // Try to create a DB with mixed files. |
277 | 1 | ASSERT_OK(dbfull()->Put(wo, "barfoo", "bar")); |
278 | | // In this case needs insert some keys to make sure files are |
279 | | // not filtered out by key ranges. |
280 | 1 | ASSERT_OK(dbfull()->Put(wo, "aaa", "")); |
281 | 1 | ASSERT_OK(dbfull()->Put(wo, "zzz", "")); |
282 | 1 | ASSERT_OK(Flush()); |
283 | | |
284 | | // Now we have two files: |
285 | | // File 1: An older file with prefix bloom. |
286 | | // File 2: A newer file with whole bloom filter. |
287 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 1); |
288 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
289 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 2); |
290 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
291 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 3); |
292 | 1 | ASSERT_EQ("foo", Get("foobar")); |
293 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); |
294 | 1 | ASSERT_EQ("bar", Get("barfoo")); |
295 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); |
296 | | |
297 | | // Reopen with the same setting: only whole key is used |
298 | 1 | Reopen(options); |
299 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 4); |
300 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
301 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 5); |
302 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
303 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 6); |
304 | 1 | ASSERT_EQ("foo", Get("foobar")); |
305 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); |
306 | 1 | ASSERT_EQ("bar", Get("barfoo")); |
307 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); |
308 | | |
309 | | // Restart with both filters are allowed |
310 | 1 | options.prefix_extractor.reset(NewFixedPrefixTransform(3)); |
311 | 1 | bbto.whole_key_filtering = true; |
312 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
313 | 1 | Reopen(options); |
314 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 7); |
315 | | // File 1 will has it filtered out. |
316 | | // File 2 will not, as prefix `foo` exists in the file. |
317 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
318 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 8); |
319 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
320 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 10); |
321 | 1 | ASSERT_EQ("foo", Get("foobar")); |
322 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); |
323 | 1 | ASSERT_EQ("bar", Get("barfoo")); |
324 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); |
325 | | |
326 | | // Restart with only prefix bloom is allowed. |
327 | 1 | options.prefix_extractor.reset(NewFixedPrefixTransform(3)); |
328 | 1 | bbto.whole_key_filtering = false; |
329 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
330 | 1 | Reopen(options); |
331 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); |
332 | 1 | ASSERT_EQ("NOT_FOUND", Get("foo")); |
333 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 11); |
334 | 1 | ASSERT_EQ("NOT_FOUND", Get("bar")); |
335 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); |
336 | 1 | ASSERT_EQ("foo", Get("foobar")); |
337 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); |
338 | 1 | ASSERT_EQ("bar", Get("barfoo")); |
339 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 12); |
340 | 1 | } |
341 | | |
342 | 1 | TEST_F(DBBloomFilterTest, BloomFilter) { |
343 | 5 | do { |
344 | 5 | Options options = CurrentOptions(); |
345 | 5 | env_->count_random_reads_ = true; |
346 | 5 | options.env = env_; |
347 | | // ChangeCompactOptions() only changes compaction style, which does not |
348 | | // trigger reset of table_factory |
349 | 5 | BlockBasedTableOptions table_options; |
350 | 5 | table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
351 | | // Fixed-size bloom filter is not used without block cache. |
352 | 5 | table_options.no_block_cache = |
353 | 5 | table_options.filter_policy->GetFilterType() != FilterPolicy::kFixedSizeFilter; |
354 | 5 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
355 | | |
356 | 5 | CreateAndReopenWithCF({"pikachu"}, options); |
357 | | |
358 | | // Populate multiple layers |
359 | 5 | const int N = 10000; |
360 | 50.0k | for (int i = 0; i < N; i++) { |
361 | 50.0k | ASSERT_OK(Put(1, Key(i), Key(i))); |
362 | 50.0k | } |
363 | 5 | Compact(1, "a", "z"); |
364 | 505 | for (int i = 0; i < N; i += 100) { |
365 | 500 | ASSERT_OK(Put(1, Key(i), Key(i))); |
366 | 500 | } |
367 | 5 | ASSERT_OK(Flush(1)); |
368 | | |
369 | | // Prevent auto compactions triggered by seeks |
370 | 5 | env_->delay_sstable_sync_.store(true, std::memory_order_release); |
371 | | |
372 | | // Lookup present keys. Should rarely read from small sstable. |
373 | 5 | env_->random_read_counter_.Reset(); |
374 | 50.0k | for (int i = 0; i < N; i++) { |
375 | 50.0k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
376 | 50.0k | } |
377 | 5 | int reads = env_->random_read_counter_.Read(); |
378 | 5 | fprintf(stderr, "%d present => %d reads\n", N, reads); |
379 | 5 | ASSERT_GE(reads, N); |
380 | 5 | ASSERT_LE(reads, N + 2 * N / 100); |
381 | | |
382 | | // Lookup present keys. Should rarely read from either sstable. |
383 | 5 | env_->random_read_counter_.Reset(); |
384 | 50.0k | for (int i = 0; i < N; i++) { |
385 | 50.0k | ASSERT_EQ("NOT_FOUND", Get(1, Key(i) + ".missing")); |
386 | 50.0k | } |
387 | 5 | reads = env_->random_read_counter_.Read(); |
388 | 5 | fprintf(stderr, "%d missing => %d reads\n", N, reads); |
389 | 5 | ASSERT_LE(reads, 3 * N / 100); |
390 | | |
391 | 5 | env_->delay_sstable_sync_.store(false, std::memory_order_release); |
392 | 5 | Close(); |
393 | 5 | } while (ChangeCompactOptions()); |
394 | 1 | } |
395 | | |
396 | | namespace { |
397 | | |
398 | | // Returns lower bound on expected BLOOM_FILTER_USEFUL given that num_total_keys_missed keys are |
399 | | // missed (sum over all SSTs). |
400 | 13 | size_t BloomFilterUsefulLowerBound(const size_t num_total_keys_missed) { |
401 | 13 | return num_total_keys_missed * |
402 | 13 | (1 - FilterPolicy::kDefaultFixedSizeFilterErrorRate); |
403 | 13 | } |
404 | | |
405 | | // Following functions calculate metrics expectations for the case when we read all keys from DB |
406 | | // with 2 SST, first SST has all num_unique_keys which 2nd has num_2nd_file_keys of these unique |
407 | | // keys. |
408 | | |
409 | | // Returns expected BLOOM_FILTER_CHECKED value. |
410 | 9 | size_t BloomFilterCheckedCount(const size_t num_unique_keys, const size_t num_2nd_file_keys) { |
411 | 9 | return 2 * num_unique_keys - num_2nd_file_keys; |
412 | 9 | } |
413 | | |
414 | | // Returns lower bound on expected BLOOM_FILTER_USEFUL. |
415 | 8 | size_t BloomFilterUsefulLowerBound(const size_t num_unique_keys, const size_t num_2nd_file_keys) { |
416 | 8 | return BloomFilterUsefulLowerBound(num_unique_keys - num_2nd_file_keys); |
417 | 8 | } |
418 | | |
419 | | } // namespace |
420 | | |
421 | 1 | TEST_F(DBBloomFilterTest, BloomFilterIndex) { |
422 | 5 | do { |
423 | 5 | Options options = CurrentOptions(); |
424 | 5 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
425 | 5 | options.env = env_; |
426 | | // ChangeCompactOptions() only changes compaction style, which does not |
427 | | // trigger reset of table_factory |
428 | 5 | BlockBasedTableOptions table_options; |
429 | 5 | table_options.filter_policy.reset(NewFixedSizeFilterPolicy( |
430 | 5 | FilterPolicy::kDefaultFixedSizeFilterBits, FilterPolicy::kDefaultFixedSizeFilterErrorRate, |
431 | 5 | nullptr)); |
432 | | // Fixed-size bloom filter is not used without block cache. |
433 | 5 | table_options.no_block_cache = |
434 | 5 | table_options.filter_policy->GetFilterType() != FilterPolicy::kFixedSizeFilter; |
435 | 5 | table_options.cache_index_and_filter_blocks = true; |
436 | 5 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
437 | | |
438 | 5 | CreateAndReopenWithCF({"pikachu"}, options); |
439 | | |
440 | | // Populate DB with two SST. |
441 | 5 | const int key_begin = 100; |
442 | 5 | const int key_end = 1000; |
443 | 5 | const auto num_keys = key_end - key_begin; |
444 | 4.50k | for (int i = key_begin; i < key_end; i++) { |
445 | 4.50k | ASSERT_OK(Put(1, Key(i), Key(i))); |
446 | 4.50k | } |
447 | 5 | ASSERT_OK(Flush(1)); |
448 | 5 | size_t num_keys_in_small_sst = 0; |
449 | 50 | for (int i = key_begin; i < key_end; i += 100) { |
450 | 45 | ASSERT_OK(Put(1, Key(i), Key(i))); |
451 | 45 | num_keys_in_small_sst++; |
452 | 45 | } |
453 | 5 | ASSERT_OK(Flush(1)); |
454 | | |
455 | | // Prevent auto compactions triggered by seeks |
456 | 5 | env_->delay_sstable_sync_.store(true, std::memory_order_release); |
457 | | |
458 | | // Lookup present keys. Should rarely read from small sstable. |
459 | 4.50k | for (int i = key_begin; i < key_end; i++) { |
460 | 4.50k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
461 | 4.50k | } |
462 | 5 | ASSERT_EQ( |
463 | 5 | TestGetTickerCount(options, BLOOM_FILTER_CHECKED), |
464 | 5 | BloomFilterCheckedCount(num_keys, num_keys_in_small_sst)); |
465 | 5 | ASSERT_GE( |
466 | 5 | TestGetTickerCount(options, BLOOM_FILTER_USEFUL), |
467 | 5 | BloomFilterUsefulLowerBound(num_keys, num_keys_in_small_sst)); |
468 | | |
469 | 5 | TestResetTickerCount(options, BLOOM_FILTER_CHECKED); |
470 | 5 | TestResetTickerCount(options, BLOOM_FILTER_USEFUL); |
471 | | // Lookup keys out of bloom filter index range. Should rarely read from both sstables. |
472 | 505 | for (int i = 0; i < key_begin; i++) { |
473 | 500 | ASSERT_EQ("NOT_FOUND", Get(1, Key(i))); |
474 | 500 | } |
475 | 5 | ASSERT_EQ("NOT_FOUND", Get(1, "zzz")); |
476 | 5 | ASSERT_EQ( |
477 | 5 | TestGetTickerCount(options, BLOOM_FILTER_CHECKED), 2 * (key_begin + 1)); |
478 | 5 | ASSERT_GE( |
479 | 5 | TestGetTickerCount(options, BLOOM_FILTER_USEFUL), |
480 | 5 | BloomFilterUsefulLowerBound(2 * (key_begin + 1))); |
481 | | |
482 | 5 | env_->delay_sstable_sync_.store(false, std::memory_order_release); |
483 | 5 | Close(); |
484 | 5 | } while (ChangeCompactOptions()); |
485 | 1 | } |
486 | | |
487 | 1 | TEST_F(DBBloomFilterTest, BloomFilterRate) { |
488 | 3 | while (ChangeFilterOptions()) { |
489 | 2 | Options options = CurrentOptions(); |
490 | 2 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
491 | 2 | CreateAndReopenWithCF({"pikachu"}, options); |
492 | | |
493 | 2 | const int maxKey = 10000; |
494 | 20.0k | for (int i = 0; i < maxKey; i++) { |
495 | 20.0k | ASSERT_OK(Put(1, Key(i), Key(i))); |
496 | 20.0k | } |
497 | | // Add a large key to make the file contain wide range |
498 | 2 | ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); |
499 | 2 | ASSERT_OK(Flush(1)); |
500 | | |
501 | | // Check if they can be found |
502 | 20.0k | for (int i = 0; i < maxKey; i++) { |
503 | 20.0k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
504 | 20.0k | } |
505 | 2 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
506 | | |
507 | | // Check if filter is useful |
508 | 20.0k | for (int i = 0; i < maxKey; i++) { |
509 | 20.0k | ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); |
510 | 20.0k | } |
511 | 2 | ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); |
512 | 2 | } |
513 | 1 | } |
514 | | |
515 | 1 | TEST_F(DBBloomFilterTest, BloomFilterCompatibility) { |
516 | 1 | Options options = CurrentOptions(); |
517 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
518 | 1 | BlockBasedTableOptions table_options; |
519 | 1 | table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); |
520 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
521 | | |
522 | | // Create with block based filter |
523 | 1 | CreateAndReopenWithCF({"pikachu"}, options); |
524 | | |
525 | 1 | const int maxKey = 10000; |
526 | 10.0k | for (int i = 0; i < maxKey; i++) { |
527 | 10.0k | ASSERT_OK(Put(1, Key(i), Key(i))); |
528 | 10.0k | } |
529 | 1 | ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); |
530 | 1 | ASSERT_OK(Flush(1)); |
531 | | |
532 | | // Check db with full filter |
533 | 1 | table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
534 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
535 | 1 | ReopenWithColumnFamilies({"default", "pikachu"}, options); |
536 | | |
537 | | // Check if they can be found |
538 | 10.0k | for (int i = 0; i < maxKey; i++) { |
539 | 10.0k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
540 | 10.0k | } |
541 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
542 | 1 | } |
543 | | |
544 | 1 | TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) { |
545 | 1 | Options options = CurrentOptions(); |
546 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
547 | 1 | BlockBasedTableOptions table_options; |
548 | 1 | table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); |
549 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
550 | | |
551 | | // Create with full filter |
552 | 1 | CreateAndReopenWithCF({"pikachu"}, options); |
553 | | |
554 | 1 | const int maxKey = 10000; |
555 | 10.0k | for (int i = 0; i < maxKey; i++) { |
556 | 10.0k | ASSERT_OK(Put(1, Key(i), Key(i))); |
557 | 10.0k | } |
558 | 1 | ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); |
559 | 1 | ASSERT_OK(Flush(1)); |
560 | | |
561 | | // Check db with block_based filter |
562 | 1 | table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); |
563 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
564 | 1 | ReopenWithColumnFamilies({"default", "pikachu"}, options); |
565 | | |
566 | | // Check if they can be found |
567 | 10.0k | for (int i = 0; i < maxKey; i++) { |
568 | 10.0k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
569 | 10.0k | } |
570 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
571 | 1 | } |
572 | | |
573 | | namespace { |
574 | | |
575 | | class TestFilterPolicy : public FilterPolicy { |
576 | | public: |
577 | | explicit TestFilterPolicy( |
578 | | const std::string& name, std::unique_ptr<KeyTransformer> key_transformer) |
579 | 8 | : name_(name), key_transformer_(std::move(key_transformer)) { |
580 | 8 | wrapped_policy_.reset(rocksdb::NewFixedSizeFilterPolicy( |
581 | 8 | FilterPolicy::kDefaultFixedSizeFilterBits, |
582 | 8 | rocksdb::FilterPolicy::kDefaultFixedSizeFilterErrorRate, nullptr)); |
583 | 8 | } |
584 | | |
585 | 0 | void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst) const override { |
586 | 0 | wrapped_policy_->CreateFilter(keys, n, dst); |
587 | 0 | } |
588 | | |
589 | 0 | bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter) const override { |
590 | 0 | return wrapped_policy_->KeyMayMatch(key, filter); |
591 | 0 | } |
592 | | |
593 | 22 | rocksdb::FilterBitsBuilder* GetFilterBitsBuilder() const override { |
594 | 22 | return wrapped_policy_->GetFilterBitsBuilder(); |
595 | 22 | } |
596 | | |
597 | 22 | rocksdb::FilterBitsReader* GetFilterBitsReader(const rocksdb::Slice& contents) const override { |
598 | 22 | return wrapped_policy_->GetFilterBitsReader(contents); |
599 | 22 | } |
600 | | |
601 | 8 | FilterType GetFilterType() const override { return wrapped_policy_->GetFilterType(); } |
602 | | |
603 | 127 | const char* Name() const override { return name_.c_str(); } |
604 | | |
605 | 24 | const KeyTransformer* GetKeyTransformer() const override { return key_transformer_.get(); } |
606 | | |
607 | | private: |
608 | | std::string name_; |
609 | | std::unique_ptr<const rocksdb::FilterPolicy> wrapped_policy_; |
610 | | std::unique_ptr<KeyTransformer> key_transformer_; |
611 | | }; |
612 | | |
613 | | class KeyToConstTransformer : public FilterPolicy::KeyTransformer { |
614 | | public: |
615 | 3.00k | Slice Transform(Slice key) const override { |
616 | 3.00k | static const std::string kFilterKey("fixed-filter-key"); |
617 | 3.00k | return kFilterKey; |
618 | 3.00k | } |
619 | | }; |
620 | | |
621 | | class KeyIdentityTransformer : public FilterPolicy::KeyTransformer { |
622 | | public: |
623 | 3.00k | Slice Transform(Slice key) const override { return key; } |
624 | | }; |
625 | | |
626 | | class KeyIdentityAltEmptyTransformer : public FilterPolicy::KeyTransformer { |
627 | | public: |
628 | 2 | explicit KeyIdentityAltEmptyTransformer(int parity) : parity_(parity) {} |
629 | | |
630 | 600k | Slice Transform(Slice key) const override { |
631 | 600k | if (!key.empty() && (*(key.end() - 1) & 1) == parity_) { |
632 | | // Return empty filter key for key that ends with byte having parity of kParity. |
633 | 300k | return Slice(); |
634 | 300k | } |
635 | 300k | return key; |
636 | 300k | } |
637 | | |
638 | | private: |
639 | | int parity_; |
640 | | }; |
641 | | |
642 | | } // namespace |
643 | | |
644 | | void DBBloomFilterTest::CheckOtherFilterPoliciesSupport( |
645 | | Options* options, const int num_unique_keys, FilterPolicyCreator write_policy_creator, |
646 | 2 | FilterPolicyCreator new_policy_creator, const bool should_be_useful) { |
647 | 2 | options->statistics = rocksdb::CreateDBStatisticsForTests(); |
648 | 2 | BlockBasedTableOptions table_options; |
649 | 2 | table_options.filter_policy.reset(write_policy_creator()); |
650 | 2 | options->table_factory.reset(NewBlockBasedTableFactory(table_options)); |
651 | | |
652 | | // Create with policy provided by write_policy_creator. |
653 | 2 | CreateAndReopenWithCF({"test"}, *options); |
654 | | |
655 | 2.00k | for (int i = 0; i < num_unique_keys; i++) { |
656 | 2.00k | ASSERT_OK(Put(1, Key(i), Key(i))); |
657 | 2.00k | } |
658 | 2 | ASSERT_OK(Flush(1)); |
659 | 2 | size_t num_2nd_file_keys = 0; |
660 | 22 | for (int i = 0; i < num_unique_keys; i += 100) { |
661 | 20 | ASSERT_OK(Put(1, Key(i), Key(i))); |
662 | 20 | num_2nd_file_keys++; |
663 | 20 | } |
664 | 2 | ASSERT_OK(Flush(1)); |
665 | | |
666 | | // Check with filter policy provided by new_policy_creator as main filter policy. |
667 | 2 | table_options.filter_policy.reset(new_policy_creator()); |
668 | 2 | table_options.supported_filter_policies = |
669 | 2 | std::make_shared<BlockBasedTableOptions::FilterPoliciesMap>(); |
670 | 2 | const auto supported_policy = BlockBasedTableOptions::FilterPolicyPtr(write_policy_creator()); |
671 | 2 | table_options.supported_filter_policies->emplace(supported_policy->Name(), supported_policy); |
672 | 2 | options->table_factory.reset(NewBlockBasedTableFactory(table_options)); |
673 | 2 | ReopenWithColumnFamilies({"default", "test"}, *options); |
674 | | |
675 | | // Check if keys can be found. |
676 | 2.00k | for (int i = 0; i < num_unique_keys; i++) { |
677 | 2.00k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
678 | 2.00k | } |
679 | | |
680 | 2 | ASSERT_EQ( |
681 | 2 | TestGetTickerCount(*options, BLOOM_FILTER_CHECKED), |
682 | 2 | BloomFilterCheckedCount(num_unique_keys, num_2nd_file_keys)); |
683 | | |
684 | 2 | if (should_be_useful) { |
685 | 1 | ASSERT_GT( |
686 | 1 | TestGetTickerCount(*options, BLOOM_FILTER_USEFUL), |
687 | 1 | BloomFilterUsefulLowerBound(num_unique_keys, num_2nd_file_keys)); |
688 | 1 | } else { |
689 | 1 | ASSERT_EQ(TestGetTickerCount(*options, BLOOM_FILTER_USEFUL), 0); |
690 | 1 | } |
691 | 2 | } |
692 | | |
693 | 1 | TEST_F(DBBloomFilterTest, OtherFilterPoliciesSupport) { |
694 | 3 | FilterPolicyCreator constPolicyCreator = [] { |
695 | 3 | return new TestFilterPolicy("ConstFilterPolicy", std::make_unique<KeyToConstTransformer>()); |
696 | 3 | }; |
697 | 3 | FilterPolicyCreator identityPolicyCreator = [] { |
698 | 3 | return new TestFilterPolicy("IdentityFilterPolicy", std::make_unique<KeyIdentityTransformer>()); |
699 | 3 | }; |
700 | | |
701 | 1 | constexpr auto kNumKeys = 1000; |
702 | 1 | Options options = CurrentOptions(); |
703 | | |
704 | 1 | ASSERT_NO_FATALS(CheckOtherFilterPoliciesSupport( |
705 | 1 | &options, kNumKeys, constPolicyCreator, identityPolicyCreator, /* should_be_useful =*/ false)); |
706 | | // Bloom filter with const key transformer is not useful. |
707 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
708 | | |
709 | 1 | DestroyAndReopen(options); |
710 | | |
711 | 1 | ASSERT_NO_FATALS(CheckOtherFilterPoliciesSupport( |
712 | 1 | &options, kNumKeys, identityPolicyCreator, constPolicyCreator, /* should_be_useful =*/ true)); |
713 | 1 | ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
714 | 1 | } |
715 | | |
716 | 1 | TEST_F(DBBloomFilterTest, EmptyFilterKeys) { |
717 | 1 | constexpr auto kNumKeys = 100000; |
718 | | |
719 | | // We alternate parity to check empty filter key both as last and first in the filter block. |
720 | 3 | for (int parity = 0; parity < 2; ++parity) { |
721 | 2 | LOG(INFO) << "KeyIdentityAltEmptyTransformer parity: " << parity; |
722 | 2 | Options options = CurrentOptions(); |
723 | 2 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
724 | 2 | BlockBasedTableOptions table_options; |
725 | 2 | table_options.filter_policy = std::make_unique<TestFilterPolicy>( |
726 | 2 | "KeyIdentityAltEmpty", std::make_unique<KeyIdentityAltEmptyTransformer>(parity)); |
727 | 2 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
728 | | |
729 | 2 | CreateAndReopenWithCF({"test"}, options); |
730 | 2 | const auto kColumnFamilyIndex = 1; |
731 | | |
732 | 200k | for (int i = 0; i < kNumKeys; i++) { |
733 | 200k | ASSERT_OK(Put(kColumnFamilyIndex, Key(i), Key(i))); |
734 | 200k | } |
735 | 2 | ASSERT_OK(Flush(kColumnFamilyIndex)); |
736 | 2 | size_t num_2nd_file_keys = 0; |
737 | | // Independently of parity we want 2nd SST file to have non-empty filter keys, so we |
738 | | // compensate parity that all these keys are transformed to non-empty filter keys. |
739 | 2.00k | for (int i = parity ^ 1; i < kNumKeys; i += 100) { |
740 | 2.00k | ASSERT_OK(Put(1, Key(i), Key(i))); |
741 | 2.00k | num_2nd_file_keys++; |
742 | 2.00k | } |
743 | 2 | ASSERT_OK(Flush(kColumnFamilyIndex)); |
744 | | |
745 | 2 | TablePropertiesCollection props_collection; |
746 | 2 | ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[kColumnFamilyIndex], &props_collection)); |
747 | 4 | ASSERT_EQ(props_collection.size(), 2) << "We should have properties for 2 SST files"; |
748 | 2 | const auto props = *props_collection.begin(); |
749 | 4 | ASSERT_GE(props.second->num_filter_blocks, 2) << yb::Format( |
750 | 4 | "To test rolling over filter block we need at least 2 filter blocks, but got $0 for $1. " |
751 | 4 | "Increase kNumKeys in this test.", |
752 | 4 | props.second->num_filter_blocks, props.first); |
753 | | |
754 | 200k | for (int i = 0; i < kNumKeys; i++) { |
755 | 200k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
756 | 200k | } |
757 | | |
758 | | // We divide number of keys by 2 here, because half of keys in 1st file are transformed to empty |
759 | | // by KeyIdentityAltEmpty and bloom filter is not checked for them. But for the 2nd file all |
760 | | // keys are transformed into non-empty (because of their parity), so we don't need to divide |
761 | | // num_2nd_file_keys. |
762 | 2 | ASSERT_EQ( |
763 | 2 | TestGetTickerCount(options, BLOOM_FILTER_CHECKED), |
764 | 2 | BloomFilterCheckedCount(kNumKeys / 2, num_2nd_file_keys)); |
765 | | |
766 | 2 | ASSERT_GT( |
767 | 2 | TestGetTickerCount(options, BLOOM_FILTER_USEFUL), |
768 | 2 | BloomFilterUsefulLowerBound(kNumKeys / 2, num_2nd_file_keys)); |
769 | | |
770 | 2 | DestroyAndReopen(options); |
771 | 2 | } |
772 | 1 | } |
773 | | |
774 | | namespace { |
775 | | // A wrapped bloom over default FilterPolicy |
776 | | class WrappedBloom : public FilterPolicy { |
777 | | public: |
778 | | explicit WrappedBloom(int bits_per_key) : |
779 | | filter_(NewBloomFilterPolicy(bits_per_key)), |
780 | 1 | counter_(0) {} |
781 | | |
782 | 1 | ~WrappedBloom() { delete filter_; } |
783 | | |
784 | 14 | const char* Name() const override { return "WrappedRocksDbFilterPolicy"; } |
785 | | |
786 | | void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst) |
787 | 41 | const override { |
788 | 41 | std::unique_ptr<rocksdb::Slice[]> user_keys(new rocksdb::Slice[n]); |
789 | 10.0k | for (int i = 0; i < n; ++i) { |
790 | 10.0k | user_keys[i] = convertKey(keys[i]); |
791 | 10.0k | } |
792 | 41 | return filter_->CreateFilter(user_keys.get(), n, dst); |
793 | 41 | } |
794 | | |
795 | | bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter) |
796 | 20.0k | const override { |
797 | 20.0k | counter_++; |
798 | 20.0k | return filter_->KeyMayMatch(convertKey(key), filter); |
799 | 20.0k | } |
800 | | |
801 | 3 | uint32_t GetCounter() { return counter_; } |
802 | | |
803 | 1 | FilterType GetFilterType() const override { return filter_->GetFilterType(); } |
804 | | |
805 | | private: |
806 | | const FilterPolicy* filter_; |
807 | | mutable uint32_t counter_; |
808 | | |
809 | 30.0k | rocksdb::Slice convertKey(const rocksdb::Slice& key) const { |
810 | 30.0k | return key; |
811 | 30.0k | } |
812 | | }; |
813 | | } // namespace |
814 | | |
815 | 1 | TEST_F(DBBloomFilterTest, BloomFilterWrapper) { |
816 | 1 | Options options = CurrentOptions(); |
817 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
818 | | |
819 | 1 | BlockBasedTableOptions table_options; |
820 | 1 | WrappedBloom* policy = new WrappedBloom(10); |
821 | 1 | table_options.filter_policy.reset(policy); |
822 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
823 | | |
824 | 1 | CreateAndReopenWithCF({"pikachu"}, options); |
825 | | |
826 | 1 | const int maxKey = 10000; |
827 | 10.0k | for (int i = 0; i < maxKey; i++) { |
828 | 10.0k | ASSERT_OK(Put(1, Key(i), Key(i))); |
829 | 10.0k | } |
830 | | // Add a large key to make the file contain wide range |
831 | 1 | ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); |
832 | 1 | ASSERT_EQ(0U, policy->GetCounter()); |
833 | 1 | ASSERT_OK(Flush(1)); |
834 | | |
835 | | // Check if they can be found |
836 | 10.0k | for (int i = 0; i < maxKey; i++) { |
837 | 10.0k | ASSERT_EQ(Key(i), Get(1, Key(i))); |
838 | 10.0k | } |
839 | 1 | ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); |
840 | 1 | ASSERT_EQ(1U * maxKey, policy->GetCounter()); |
841 | | |
842 | | // Check if filter is useful |
843 | 10.0k | for (int i = 0; i < maxKey; i++) { |
844 | 10.0k | ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); |
845 | 10.0k | } |
846 | 1 | ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); |
847 | 1 | ASSERT_EQ(2U * maxKey, policy->GetCounter()); |
848 | 1 | } |
849 | | |
850 | | class SliceTransformLimitedDomain : public SliceTransform { |
851 | 6 | const char* Name() const override { return "SliceTransformLimitedDomain"; } |
852 | | |
853 | 10 | Slice Transform(const Slice& src) const override { |
854 | 10 | return Slice(src.data(), 5); |
855 | 10 | } |
856 | | |
857 | 17 | bool InDomain(const Slice& src) const override { |
858 | | // prefix will be x???? |
859 | 17 | return src.size() >= 5 && src[0] == 'x'; |
860 | 17 | } |
861 | | |
862 | 0 | bool InRange(const Slice& dst) const override { |
863 | | // prefix will be x???? |
864 | 0 | return dst.size() == 5 && dst[0] == 'x'; |
865 | 0 | } |
866 | | }; |
867 | | |
868 | 1 | TEST_F(DBBloomFilterTest, PrefixExtractorFullFilter) { |
869 | 1 | BlockBasedTableOptions bbto; |
870 | | // Full Filter Block |
871 | 1 | bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); |
872 | 1 | bbto.whole_key_filtering = false; |
873 | | |
874 | 1 | Options options = CurrentOptions(); |
875 | 1 | options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>(); |
876 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
877 | | |
878 | 1 | DestroyAndReopen(options); |
879 | | |
880 | 1 | ASSERT_OK(Put("x1111_AAAA", "val1")); |
881 | 1 | ASSERT_OK(Put("x1112_AAAA", "val2")); |
882 | 1 | ASSERT_OK(Put("x1113_AAAA", "val3")); |
883 | 1 | ASSERT_OK(Put("x1114_AAAA", "val4")); |
884 | | // Not in domain, wont be added to filter |
885 | 1 | ASSERT_OK(Put("zzzzz_AAAA", "val5")); |
886 | | |
887 | 1 | ASSERT_OK(Flush()); |
888 | | |
889 | 1 | ASSERT_EQ(Get("x1111_AAAA"), "val1"); |
890 | 1 | ASSERT_EQ(Get("x1112_AAAA"), "val2"); |
891 | 1 | ASSERT_EQ(Get("x1113_AAAA"), "val3"); |
892 | 1 | ASSERT_EQ(Get("x1114_AAAA"), "val4"); |
893 | | // Was not added to filter but rocksdb will try to read it from the filter |
894 | 1 | ASSERT_EQ(Get("zzzzz_AAAA"), "val5"); |
895 | 1 | } |
896 | | |
897 | 1 | TEST_F(DBBloomFilterTest, PrefixExtractorBlockFilter) { |
898 | 1 | BlockBasedTableOptions bbto; |
899 | | // Block Filter Block |
900 | 1 | bbto.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); |
901 | | |
902 | 1 | Options options = CurrentOptions(); |
903 | 1 | options.prefix_extractor = std::make_shared<SliceTransformLimitedDomain>(); |
904 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
905 | | |
906 | 1 | DestroyAndReopen(options); |
907 | | |
908 | 1 | ASSERT_OK(Put("x1113_AAAA", "val3")); |
909 | 1 | ASSERT_OK(Put("x1114_AAAA", "val4")); |
910 | | // Not in domain, wont be added to filter |
911 | 1 | ASSERT_OK(Put("zzzzz_AAAA", "val1")); |
912 | 1 | ASSERT_OK(Put("zzzzz_AAAB", "val2")); |
913 | 1 | ASSERT_OK(Put("zzzzz_AAAC", "val3")); |
914 | 1 | ASSERT_OK(Put("zzzzz_AAAD", "val4")); |
915 | | |
916 | 1 | ASSERT_OK(Flush()); |
917 | | |
918 | 1 | std::vector<std::string> iter_res; |
919 | 1 | auto iter = db_->NewIterator(ReadOptions()); |
920 | | // Seek to a key that was not in Domain |
921 | 5 | for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { |
922 | 4 | iter_res.emplace_back(iter->value().ToString()); |
923 | 4 | } |
924 | | |
925 | 1 | std::vector<std::string> expected_res = {"val1", "val2", "val3", "val4"}; |
926 | 1 | ASSERT_EQ(iter_res, expected_res); |
927 | 1 | delete iter; |
928 | 1 | } |
929 | | |
930 | | #ifndef ROCKSDB_LITE |
931 | | class BloomStatsTestWithParam |
932 | | : public DBBloomFilterTest, |
933 | | public testing::WithParamInterface<std::tuple<bool, bool>> { |
934 | | public: |
935 | 6 | BloomStatsTestWithParam() { |
936 | 6 | use_block_table_ = std::get<0>(GetParam()); |
937 | 6 | use_block_based_builder_ = std::get<1>(GetParam()); |
938 | | |
939 | 6 | options_.create_if_missing = true; |
940 | 6 | options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4)); |
941 | 6 | options_.memtable_prefix_bloom_bits = 8 * 1024; |
942 | 6 | if (use_block_table_) { |
943 | 4 | BlockBasedTableOptions table_options; |
944 | 4 | table_options.hash_index_allow_collision = false; |
945 | 4 | table_options.filter_policy.reset( |
946 | 4 | NewBloomFilterPolicy(10, use_block_based_builder_)); |
947 | 4 | options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
948 | 2 | } else { |
949 | 2 | PlainTableOptions table_options; |
950 | 2 | options_.table_factory.reset(NewPlainTableFactory(table_options)); |
951 | 2 | } |
952 | | |
953 | 6 | perf_context.Reset(); |
954 | 6 | DestroyAndReopen(options_); |
955 | 6 | } |
956 | | |
957 | 6 | ~BloomStatsTestWithParam() { |
958 | 6 | perf_context.Reset(); |
959 | 6 | Destroy(options_); |
960 | 6 | } |
961 | | |
962 | | // Required if inheriting from testing::WithParamInterface<> |
963 | 6 | static void SetUpTestCase() {} |
964 | 6 | static void TearDownTestCase() {} |
965 | | |
966 | | bool use_block_table_; |
967 | | bool use_block_based_builder_; |
968 | | Options options_; |
969 | | }; |
970 | | |
971 | | // 1 Insert 2 K-V pairs into DB |
972 | | // 2 Call Get() for both keys - expext memtable bloom hit stat to be 2 |
973 | | // 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1 |
974 | | // 4 Call Flush() to create SST |
975 | | // 5 Call Get() for both keys - expext SST bloom hit stat to be 2 |
976 | | // 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1 |
977 | | // Test both: block and plain SST |
978 | 3 | TEST_P(BloomStatsTestWithParam, BloomStatsTest) { |
979 | 3 | std::string key1("AAAA"); |
980 | 3 | std::string key2("RXDB"); // not in DB |
981 | 3 | std::string key3("ZBRA"); |
982 | 3 | std::string value1("Value1"); |
983 | 3 | std::string value3("Value3"); |
984 | | |
985 | 3 | ASSERT_OK(Put(key1, value1, WriteOptions())); |
986 | 3 | ASSERT_OK(Put(key3, value3, WriteOptions())); |
987 | | |
988 | | // check memtable bloom stats |
989 | 3 | ASSERT_EQ(value1, Get(key1)); |
990 | 3 | ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); |
991 | 3 | ASSERT_EQ(value3, Get(key3)); |
992 | 3 | ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); |
993 | 3 | ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); |
994 | | |
995 | 3 | ASSERT_EQ("NOT_FOUND", Get(key2)); |
996 | 3 | ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); |
997 | 3 | ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); |
998 | | |
999 | | // sanity checks |
1000 | 3 | ASSERT_EQ(0, perf_context.bloom_sst_hit_count); |
1001 | 3 | ASSERT_EQ(0, perf_context.bloom_sst_miss_count); |
1002 | | |
1003 | 3 | ASSERT_OK(Flush()); |
1004 | | |
1005 | | // sanity checks |
1006 | 3 | ASSERT_EQ(0, perf_context.bloom_sst_hit_count); |
1007 | 3 | ASSERT_EQ(0, perf_context.bloom_sst_miss_count); |
1008 | | |
1009 | | // check SST bloom stats |
1010 | | // NOTE: hits per get differs because of code paths differences |
1011 | | // in BlockBasedTable::Get() |
1012 | 3 | int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1; |
1013 | 3 | ASSERT_EQ(value1, Get(key1)); |
1014 | 3 | ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count); |
1015 | 3 | ASSERT_EQ(value3, Get(key3)); |
1016 | 3 | ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count); |
1017 | | |
1018 | 3 | ASSERT_EQ("NOT_FOUND", Get(key2)); |
1019 | 3 | ASSERT_EQ(1, perf_context.bloom_sst_miss_count); |
1020 | 3 | } |
1021 | | |
1022 | | // Same scenario as in BloomStatsTest but using an iterator |
1023 | 3 | TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { |
1024 | 3 | std::string key1("AAAA"); |
1025 | 3 | std::string key2("RXDB"); // not in DB |
1026 | 3 | std::string key3("ZBRA"); |
1027 | 3 | std::string value1("Value1"); |
1028 | 3 | std::string value3("Value3"); |
1029 | | |
1030 | 3 | ASSERT_OK(Put(key1, value1, WriteOptions())); |
1031 | 3 | ASSERT_OK(Put(key3, value3, WriteOptions())); |
1032 | | |
1033 | 3 | unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions())); |
1034 | | |
1035 | | // check memtable bloom stats |
1036 | 3 | iter->Seek(key1); |
1037 | 3 | ASSERT_OK(iter->status()); |
1038 | 3 | ASSERT_TRUE(iter->Valid()); |
1039 | 3 | ASSERT_EQ(value1, iter->value().ToString()); |
1040 | 3 | ASSERT_EQ(1, perf_context.bloom_memtable_hit_count); |
1041 | 3 | ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); |
1042 | | |
1043 | 3 | iter->Seek(key3); |
1044 | 3 | ASSERT_OK(iter->status()); |
1045 | 3 | ASSERT_TRUE(iter->Valid()); |
1046 | 3 | ASSERT_EQ(value3, iter->value().ToString()); |
1047 | 3 | ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); |
1048 | 3 | ASSERT_EQ(0, perf_context.bloom_memtable_miss_count); |
1049 | | |
1050 | 3 | iter->Seek(key2); |
1051 | 3 | ASSERT_OK(iter->status()); |
1052 | 3 | ASSERT_TRUE(!iter->Valid()); |
1053 | 3 | ASSERT_EQ(1, perf_context.bloom_memtable_miss_count); |
1054 | 3 | ASSERT_EQ(2, perf_context.bloom_memtable_hit_count); |
1055 | | |
1056 | 3 | ASSERT_OK(Flush()); |
1057 | | |
1058 | 3 | iter.reset(dbfull()->NewIterator(ReadOptions())); |
1059 | | |
1060 | | // Check SST bloom stats |
1061 | 3 | iter->Seek(key1); |
1062 | 3 | ASSERT_OK(iter->status()); |
1063 | 3 | ASSERT_TRUE(iter->Valid()); |
1064 | 3 | ASSERT_EQ(value1, iter->value().ToString()); |
1065 | 3 | ASSERT_EQ(1, perf_context.bloom_sst_hit_count); |
1066 | | |
1067 | 3 | iter->Seek(key3); |
1068 | 3 | ASSERT_OK(iter->status()); |
1069 | 3 | ASSERT_TRUE(iter->Valid()); |
1070 | 3 | ASSERT_EQ(value3, iter->value().ToString()); |
1071 | 3 | ASSERT_EQ(2, perf_context.bloom_sst_hit_count); |
1072 | | |
1073 | 3 | iter->Seek(key2); |
1074 | 3 | ASSERT_OK(iter->status()); |
1075 | 3 | ASSERT_TRUE(!iter->Valid()); |
1076 | 3 | ASSERT_EQ(1, perf_context.bloom_sst_miss_count); |
1077 | 3 | ASSERT_EQ(2, perf_context.bloom_sst_hit_count); |
1078 | 3 | } |
1079 | | |
1080 | | INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam, |
1081 | | ::testing::Values(std::make_tuple(true, true), |
1082 | | std::make_tuple(true, false), |
1083 | | std::make_tuple(false, false))); |
1084 | | |
1085 | | namespace { |
1086 | 2 | void PrefixScanInit(DBBloomFilterTest* dbtest) { |
1087 | 2 | char buf[100]; |
1088 | 2 | std::string keystr; |
1089 | 2 | const int small_range_sstfiles = 5; |
1090 | 2 | const int big_range_sstfiles = 5; |
1091 | | |
1092 | | // Generate 11 sst files with the following prefix ranges. |
1093 | | // GROUP 0: [0,10] (level 1) |
1094 | | // GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0) |
1095 | | // GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0) |
1096 | | // |
1097 | | // A seek with the previous API would do 11 random I/Os (to all the |
1098 | | // files). With the new API and a prefix filter enabled, we should |
1099 | | // only do 2 random I/O, to the 2 files containing the key. |
1100 | | |
1101 | | // GROUP 0 |
1102 | 2 | snprintf(buf, sizeof(buf), "%02d______:start", 0); |
1103 | 2 | keystr = std::string(buf); |
1104 | 2 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1105 | 2 | snprintf(buf, sizeof(buf), "%02d______:end", 10); |
1106 | 2 | keystr = std::string(buf); |
1107 | 2 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1108 | 2 | ASSERT_OK(dbtest->Flush()); |
1109 | 2 | ASSERT_OK(dbtest->dbfull()->CompactRange( |
1110 | 2 | CompactRangeOptions(), nullptr, nullptr)); // move to level 1 |
1111 | | |
1112 | | // GROUP 1 |
1113 | 12 | for (int i = 1; i <= small_range_sstfiles; i++) { |
1114 | 10 | snprintf(buf, sizeof(buf), "%02d______:start", i); |
1115 | 10 | keystr = std::string(buf); |
1116 | 10 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1117 | 10 | snprintf(buf, sizeof(buf), "%02d______:end", i + 1); |
1118 | 10 | keystr = std::string(buf); |
1119 | 10 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1120 | 10 | ASSERT_OK(dbtest->Flush()); |
1121 | 10 | } |
1122 | | |
1123 | | // GROUP 2 |
1124 | 12 | for (int i = 1; i <= big_range_sstfiles; i++) { |
1125 | 10 | snprintf(buf, sizeof(buf), "%02d______:start", 0); |
1126 | 10 | keystr = std::string(buf); |
1127 | 10 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1128 | 10 | snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); |
1129 | 10 | keystr = std::string(buf); |
1130 | 10 | ASSERT_OK(dbtest->Put(keystr, keystr)); |
1131 | 10 | ASSERT_OK(dbtest->Flush()); |
1132 | 10 | } |
1133 | 2 | } |
1134 | | } // namespace |
1135 | | |
1136 | 1 | TEST_F(DBBloomFilterTest, PrefixScan) { |
1137 | 1 | XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, |
1138 | 1 | kSkipNoPrefix); |
1139 | 3 | while (ChangeFilterOptions()) { |
1140 | 2 | int count; |
1141 | 2 | Slice prefix; |
1142 | 2 | Slice key; |
1143 | 2 | char buf[100]; |
1144 | 2 | Iterator* iter; |
1145 | 2 | snprintf(buf, sizeof(buf), "03______:"); |
1146 | 2 | prefix = Slice(buf, 8); |
1147 | 2 | key = Slice(buf, 9); |
1148 | 2 | ASSERT_EQ(key.difference_offset(prefix), 8); |
1149 | 2 | ASSERT_EQ(prefix.difference_offset(key), 8); |
1150 | | // db configs |
1151 | 2 | env_->count_random_reads_ = true; |
1152 | 2 | Options options = CurrentOptions(); |
1153 | 2 | options.env = env_; |
1154 | 2 | options.prefix_extractor.reset(NewFixedPrefixTransform(8)); |
1155 | 2 | options.disable_auto_compactions = true; |
1156 | 2 | options.max_background_compactions = 2; |
1157 | 2 | options.create_if_missing = true; |
1158 | 2 | options.memtable_factory.reset(NewHashSkipListRepFactory(16)); |
1159 | | |
1160 | 2 | BlockBasedTableOptions table_options; |
1161 | 2 | table_options.filter_policy.reset(NewBloomFilterPolicy(10)); |
1162 | | // Fixed-size bloom filter is not used without block cache. |
1163 | 2 | table_options.no_block_cache = |
1164 | 2 | table_options.filter_policy->GetFilterType() != FilterPolicy::kFixedSizeFilter; |
1165 | 2 | table_options.whole_key_filtering = false; |
1166 | 2 | options.table_factory.reset(NewBlockBasedTableFactory(table_options)); |
1167 | | |
1168 | | // 11 RAND I/Os |
1169 | 2 | DestroyAndReopen(options); |
1170 | 2 | PrefixScanInit(this); |
1171 | 2 | count = 0; |
1172 | 2 | env_->random_read_counter_.Reset(); |
1173 | 2 | iter = db_->NewIterator(ReadOptions()); |
1174 | 6 | for (iter->Seek(prefix); iter->Valid(); iter->Next()) { |
1175 | 6 | if (!iter->key().starts_with(prefix)) { |
1176 | 2 | break; |
1177 | 2 | } |
1178 | 4 | count++; |
1179 | 4 | } |
1180 | 2 | ASSERT_OK(iter->status()); |
1181 | 2 | delete iter; |
1182 | 2 | ASSERT_EQ(count, 2); |
1183 | 2 | ASSERT_EQ(env_->random_read_counter_.Read(), 2); |
1184 | 2 | Close(); |
1185 | 2 | } // end of while |
1186 | 1 | XFUNC_TEST("", "dbtest_prefix", prefix_skip1, XFuncPoint::SetSkip, 0); |
1187 | 1 | } |
1188 | | |
1189 | 1 | TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) { |
1190 | 1 | Options options = CurrentOptions(); |
1191 | 1 | options.write_buffer_size = 64 * 1024; |
1192 | 1 | options.arena_block_size = 4 * 1024; |
1193 | 1 | options.target_file_size_base = 64 * 1024; |
1194 | 1 | options.level0_file_num_compaction_trigger = 2; |
1195 | 1 | options.level0_slowdown_writes_trigger = 2; |
1196 | 1 | options.level0_stop_writes_trigger = 4; |
1197 | 1 | options.max_bytes_for_level_base = 256 * 1024; |
1198 | 1 | options.max_write_buffer_number = 2; |
1199 | 1 | options.max_background_compactions = 8; |
1200 | 1 | options.max_background_flushes = 8; |
1201 | 1 | options.compression = kNoCompression; |
1202 | 1 | options.compaction_style = kCompactionStyleLevel; |
1203 | 1 | options.level_compaction_dynamic_level_bytes = true; |
1204 | 1 | BlockBasedTableOptions bbto; |
1205 | 1 | bbto.cache_index_and_filter_blocks = true; |
1206 | 1 | bbto.filter_policy.reset(NewBloomFilterPolicy(10, true)); |
1207 | 1 | bbto.whole_key_filtering = true; |
1208 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1209 | 1 | options.optimize_filters_for_hits = true; |
1210 | 1 | options.statistics = rocksdb::CreateDBStatisticsForTests(); |
1211 | 1 | CreateAndReopenWithCF({"mypikachu"}, options); |
1212 | | |
1213 | 1 | int numkeys = 200000; |
1214 | | |
1215 | | // Generate randomly shuffled keys, so the updates are almost |
1216 | | // random. |
1217 | 1 | std::vector<int> keys; |
1218 | 1 | keys.reserve(numkeys); |
1219 | 100k | for (int i = 0; i < numkeys; i += 2) { |
1220 | 100k | keys.push_back(i); |
1221 | 100k | } |
1222 | 1 | std::random_shuffle(std::begin(keys), std::end(keys)); |
1223 | | |
1224 | 1 | int num_inserted = 0; |
1225 | 100k | for (int key : keys) { |
1226 | 100k | ASSERT_OK(Put(1, Key(key), "val")); |
1227 | 100k | if (++num_inserted % 1000 == 0) { |
1228 | 100 | ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); |
1229 | 100 | ASSERT_OK(dbfull()->TEST_WaitForCompact()); |
1230 | 100 | } |
1231 | 100k | } |
1232 | 1 | ASSERT_OK(Put(1, Key(0), "val")); |
1233 | 1 | ASSERT_OK(Put(1, Key(numkeys), "val")); |
1234 | 1 | ASSERT_OK(Flush(1)); |
1235 | 1 | ASSERT_OK(dbfull()->TEST_WaitForCompact()); |
1236 | | |
1237 | 1 | if (NumTableFilesAtLevel(0, 1) == 0) { |
1238 | | // No Level 0 file. Create one. |
1239 | 0 | ASSERT_OK(Put(1, Key(0), "val")); |
1240 | 0 | ASSERT_OK(Put(1, Key(numkeys), "val")); |
1241 | 0 | ASSERT_OK(Flush(1)); |
1242 | 0 | ASSERT_OK(dbfull()->TEST_WaitForCompact()); |
1243 | 0 | } |
1244 | | |
1245 | 100k | for (int i = 1; i < numkeys; i += 2) { |
1246 | 100k | ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND"); |
1247 | 100k | } |
1248 | | |
1249 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0)); |
1250 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1)); |
1251 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP)); |
1252 | | |
1253 | | // Now we have three sorted run, L0, L5 and L6 with most files in L6 have |
1254 | | // no bloom filter. Most keys be checked bloom filters twice. |
1255 | 1 | ASSERT_GT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 65000 * 2); |
1256 | 1 | ASSERT_LT(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 120000 * 2); |
1257 | | |
1258 | 100k | for (int i = 0; i < numkeys; i += 2) { |
1259 | 100k | ASSERT_EQ(Get(1, Key(i)), "val"); |
1260 | 100k | } |
1261 | | |
1262 | | // Part 2 (read path): rewrite last level with blooms, then verify they get |
1263 | | // cached only if !optimize_filters_for_hits |
1264 | 1 | options.disable_auto_compactions = true; |
1265 | 1 | options.num_levels = 9; |
1266 | 1 | options.optimize_filters_for_hits = false; |
1267 | 1 | options.statistics = CreateDBStatisticsForTests(); |
1268 | 1 | bbto.block_cache.reset(); |
1269 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1270 | | |
1271 | 1 | ReopenWithColumnFamilies({"default", "mypikachu"}, options); |
1272 | 1 | MoveFilesToLevel(7 /* level */, 1 /* column family index */); |
1273 | | |
1274 | 1 | std::string value = Get(1, Key(0)); |
1275 | 1 | uint64_t prev_cache_filter_hits = |
1276 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); |
1277 | 1 | value = Get(1, Key(0)); |
1278 | 1 | ASSERT_EQ(prev_cache_filter_hits + 1, |
1279 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); |
1280 | | |
1281 | | // Now that we know the filter blocks exist in the last level files, see if |
1282 | | // filter caching is skipped for this optimization |
1283 | 1 | options.optimize_filters_for_hits = true; |
1284 | 1 | options.statistics = CreateDBStatisticsForTests(); |
1285 | 1 | bbto.block_cache.reset(); |
1286 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1287 | | |
1288 | 1 | ReopenWithColumnFamilies({"default", "mypikachu"}, options); |
1289 | | |
1290 | 1 | value = Get(1, Key(0)); |
1291 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); |
1292 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); |
1293 | 1 | ASSERT_EQ(2 /* index and data block */, |
1294 | 1 | TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
1295 | 1 | ASSERT_EQ(2, |
1296 | 1 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
1297 | 1 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
1298 | | |
1299 | | // Check filter block ignored for files preloaded during DB::Open() |
1300 | 1 | options.max_open_files = -1; |
1301 | 1 | options.statistics = CreateDBStatisticsForTests(); |
1302 | 1 | bbto.block_cache.reset(); |
1303 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1304 | | |
1305 | 1 | ReopenWithColumnFamilies({"default", "mypikachu"}, options); |
1306 | | |
1307 | 1 | uint64_t prev_cache_filter_misses = |
1308 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); |
1309 | 1 | prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); |
1310 | 1 | Get(1, Key(0)); |
1311 | 1 | ASSERT_EQ(prev_cache_filter_misses, |
1312 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); |
1313 | 1 | ASSERT_EQ(prev_cache_filter_hits, |
1314 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); |
1315 | | |
1316 | | // Check filter block ignored for file trivially-moved to bottom level |
1317 | 1 | bbto.block_cache.reset(); |
1318 | 1 | options.max_open_files = 100; // setting > -1 makes it not preload all files |
1319 | 1 | options.statistics = CreateDBStatisticsForTests(); |
1320 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1321 | | |
1322 | 1 | ReopenWithColumnFamilies({"default", "mypikachu"}, options); |
1323 | | |
1324 | 1 | ASSERT_OK(Put(1, Key(numkeys + 1), "val")); |
1325 | 1 | ASSERT_OK(Flush(1)); |
1326 | | |
1327 | 1 | int32_t trivial_move = 0; |
1328 | 1 | int32_t non_trivial_move = 0; |
1329 | 1 | rocksdb::SyncPoint::GetInstance()->SetCallBack( |
1330 | 1 | "DBImpl::BackgroundCompaction:TrivialMove", |
1331 | 1 | [&](void* arg) { trivial_move++; }); |
1332 | 1 | rocksdb::SyncPoint::GetInstance()->SetCallBack( |
1333 | 1 | "DBImpl::BackgroundCompaction:NonTrivial", |
1334 | 0 | [&](void* arg) { non_trivial_move++; }); |
1335 | 1 | rocksdb::SyncPoint::GetInstance()->EnableProcessing(); |
1336 | | |
1337 | 1 | CompactRangeOptions compact_options; |
1338 | 1 | compact_options.bottommost_level_compaction = |
1339 | 1 | BottommostLevelCompaction::kSkip; |
1340 | 1 | compact_options.change_level = true; |
1341 | 1 | compact_options.target_level = 7; |
1342 | 1 | ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)); |
1343 | | |
1344 | 1 | ASSERT_EQ(trivial_move, 1); |
1345 | 1 | ASSERT_EQ(non_trivial_move, 0); |
1346 | | |
1347 | 1 | prev_cache_filter_hits = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT); |
1348 | 1 | prev_cache_filter_misses = |
1349 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS); |
1350 | 1 | value = Get(1, Key(numkeys + 1)); |
1351 | 1 | ASSERT_EQ(prev_cache_filter_hits, |
1352 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); |
1353 | 1 | ASSERT_EQ(prev_cache_filter_misses, |
1354 | 1 | TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); |
1355 | | |
1356 | | // Check filter block not cached for iterator |
1357 | 1 | bbto.block_cache.reset(); |
1358 | 1 | options.statistics = CreateDBStatisticsForTests(); |
1359 | 1 | options.table_factory.reset(NewBlockBasedTableFactory(bbto)); |
1360 | | |
1361 | 1 | ReopenWithColumnFamilies({"default", "mypikachu"}, options); |
1362 | | |
1363 | 1 | std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions(), handles_[1])); |
1364 | 1 | iter->SeekToFirst(); |
1365 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS)); |
1366 | 1 | ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT)); |
1367 | 1 | ASSERT_EQ(2 /* index and data block */, |
1368 | 1 | TestGetTickerCount(options, BLOCK_CACHE_ADD)); |
1369 | 1 | ASSERT_EQ(2, |
1370 | 1 | TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_ADD) + |
1371 | 1 | TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_ADD)); |
1372 | 1 | } |
1373 | | |
1374 | | #endif // ROCKSDB_LITE |
1375 | | |
1376 | | } // namespace rocksdb |
1377 | | |
1378 | 13.2k | int main(int argc, char** argv) { |
1379 | 13.2k | rocksdb::port::InstallStackTraceHandler(); |
1380 | 13.2k | ::testing::InitGoogleTest(&argc, argv); |
1381 | 13.2k | return RUN_ALL_TESTS(); |
1382 | 13.2k | } |