/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | |
21 | | #include <stdio.h> |
22 | | |
23 | | #include <string> |
24 | | #include <vector> |
25 | | |
26 | | #include <gtest/gtest.h> |
27 | | |
28 | | #include "yb/rocksdb/db.h" |
29 | | #include "yb/rocksdb/env.h" |
30 | | #include "yb/rocksdb/iterator.h" |
31 | | #include "yb/rocksdb/slice_transform.h" |
32 | | #include "yb/rocksdb/table/block.h" |
33 | | #include "yb/rocksdb/table/block_builder.h" |
34 | | #include "yb/rocksdb/table/block_builder_internal.h" |
35 | | #include "yb/rocksdb/table/block_hash_index.h" |
36 | | #include "yb/rocksdb/table/block_internal.h" |
37 | | #include "yb/rocksdb/util/random.h" |
38 | | #include "yb/rocksdb/util/testutil.h" |
39 | | |
40 | | #include "yb/util/logging.h" |
41 | | #include "yb/util/random_util.h" |
42 | | #include "yb/util/test_macros.h" |
43 | | |
44 | | DECLARE_int32(v); |
45 | | |
46 | | namespace rocksdb { |
47 | | |
48 | | std::string GenerateKey(int primary_key, int secondary_key, int padding_size, |
49 | 999k | Random *rnd) { |
50 | 999k | char buf[50]; |
51 | 999k | char *p = &buf[0]; |
52 | 999k | snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key); |
53 | 999k | std::string k(p); |
54 | 999k | if (padding_size) { |
55 | 600k | k += RandomString(rnd, padding_size); |
56 | 600k | } |
57 | | |
58 | 999k | return k; |
59 | 999k | } |
60 | | |
61 | | // Generate random key value pairs. |
62 | | // The generated key will be sorted. You can tune the parameters to generated |
63 | | // different kinds of test key/value pairs for different scenario. |
64 | | void GenerateRandomKVs(std::vector<std::string> *keys, |
65 | | std::vector<std::string> *values, const int from, |
66 | | const int len, const int step = 1, |
67 | | const int padding_size = 0, |
68 | 6 | const int keys_share_prefix = 1) { |
69 | 6 | Random rnd(302); |
70 | | |
71 | | // generate different prefix |
72 | 400k | for (int i = from; i < from + len; i += step) { |
73 | | // generating keys that shares the prefix |
74 | 1.20M | for (int j = 0; j < keys_share_prefix; ++j) { |
75 | 800k | keys->emplace_back(GenerateKey(i, j, padding_size, &rnd)); |
76 | | |
77 | | // 100 bytes values |
78 | 800k | values->emplace_back(RandomString(&rnd, 100)); |
79 | 800k | } |
80 | 400k | } |
81 | 6 | } |
82 | | |
83 | | class BlockTest : public RocksDBTest {}; |
84 | | |
85 | | // block test |
86 | 1 | TEST_F(BlockTest, SimpleTest) { |
87 | 2 | for (auto key_value_encoding_format : kKeyValueEncodingFormatList) { |
88 | 2 | Random rnd(301); |
89 | 2 | Options options = Options(); |
90 | 2 | std::unique_ptr<InternalKeyComparator> ic; |
91 | 2 | ic.reset(new test::PlainInternalKeyComparator(options.comparator)); |
92 | | |
93 | 2 | std::vector<std::string> keys; |
94 | 2 | std::vector<std::string> values; |
95 | 2 | BlockBuilder builder(16, key_value_encoding_format); |
96 | 2 | int num_records = 100000; |
97 | | |
98 | 2 | GenerateRandomKVs(&keys, &values, 0, num_records); |
99 | | // add a bunch of records to a block |
100 | 200k | for (int i = 0; i < num_records; i++) { |
101 | 200k | builder.Add(keys[i], values[i]); |
102 | 200k | } |
103 | | |
104 | | // read serialized contents of the block |
105 | 2 | Slice rawblock = builder.Finish(); |
106 | | |
107 | | // create block reader |
108 | 2 | BlockContents contents; |
109 | 2 | contents.data = rawblock; |
110 | 2 | contents.cachable = false; |
111 | 2 | Block reader(std::move(contents)); |
112 | | |
113 | | // read contents of block sequentially |
114 | 2 | int count = 0; |
115 | 2 | InternalIterator *iter = reader.NewIterator(options.comparator, key_value_encoding_format); |
116 | 200k | for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) { |
117 | | |
118 | | // read kv from block |
119 | 200k | Slice k = iter->key(); |
120 | 200k | Slice v = iter->value(); |
121 | | |
122 | | // compare with lookaside array |
123 | 200k | ASSERT_EQ(k.ToString().compare(keys[count]), 0); |
124 | 200k | ASSERT_EQ(v.ToString().compare(values[count]), 0); |
125 | 200k | } |
126 | 2 | delete iter; |
127 | | |
128 | | // read block contents randomly |
129 | 2 | iter = reader.NewIterator(options.comparator, key_value_encoding_format); |
130 | 200k | for (int i = 0; i < num_records; i++) { |
131 | | |
132 | | // find a random key in the lookaside array |
133 | 200k | int index = rnd.Uniform(num_records); |
134 | 200k | Slice k(keys[index]); |
135 | | |
136 | | // search in block for this key |
137 | 200k | iter->Seek(k); |
138 | 200k | ASSERT_TRUE(iter->Valid()); |
139 | 200k | Slice v = iter->value(); |
140 | 200k | ASSERT_EQ(v.ToString().compare(values[index]), 0); |
141 | 200k | } |
142 | 2 | delete iter; |
143 | 2 | } |
144 | 1 | } |
145 | | |
146 | | // return the block contents |
147 | | BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder, |
148 | | const std::vector<std::string> &keys, |
149 | | const std::vector<std::string> &values, |
150 | | const KeyValueEncodingFormat key_value_encoding_format, |
151 | 4 | const int prefix_group_size = 1) { |
152 | 4 | builder->reset(new BlockBuilder(1 /* restart interval */, key_value_encoding_format)); |
153 | | |
154 | | // Add only half of the keys |
155 | 600k | for (size_t i = 0; i < keys.size(); ++i) { |
156 | 600k | (*builder)->Add(keys[i], values[i]); |
157 | 600k | } |
158 | 4 | Slice rawblock = (*builder)->Finish(); |
159 | | |
160 | 4 | BlockContents contents; |
161 | 4 | contents.data = rawblock; |
162 | 4 | contents.cachable = false; |
163 | | |
164 | 4 | return contents; |
165 | 4 | } |
166 | | |
167 | | void CheckBlockContents( |
168 | | BlockContents contents, const KeyValueEncodingFormat key_value_encoding_format, |
169 | 24 | const std::vector<std::string>& keys, const std::vector<std::string>& values) { |
170 | 24 | Block reader(std::move(contents)); |
171 | 24 | std::unique_ptr<InternalIterator> iter( |
172 | 24 | reader.NewIterator(BytewiseComparator(), key_value_encoding_format)); |
173 | | |
174 | | // Scan through the block and compare with the data loaded. |
175 | 24 | { |
176 | 24 | size_t i = 0; |
177 | 620k | for (iter->SeekToFirst(); iter->Valid(); i++, iter->Next()) { |
178 | 620k | const auto k = iter->key(); |
179 | 620k | const auto v = iter->value(); |
180 | 1.24M | ASSERT_EQ(k.ToString(), keys[i]) |
181 | 1.24M | << "i: " << i << "\nexpected: " << Slice(keys[i]).ToDebugHexString() |
182 | 1.24M | << "\n actual: " << k.ToDebugHexString(); |
183 | 1.24M | ASSERT_EQ(v.ToString(), values[i]) |
184 | 1.24M | << "i: " << i << "\nkey: " << Slice(keys[i]).ToDebugHexString() |
185 | 1.24M | << "\nexpected value: " << Slice(values[i]).ToDebugHexString() |
186 | 1.24M | << "\n actual value: " << v.ToDebugHexString(); |
187 | 620k | } |
188 | 24 | ASSERT_EQ(i, keys.size()); |
189 | 24 | } |
190 | | |
191 | | // Seek to and near existing keys. |
192 | 24 | std::string higher_key; |
193 | 620k | for (size_t i = 0; i < keys.size(); i++) { |
194 | 620k | const auto& key = keys[i]; |
195 | | |
196 | | // Duplicate keys should not appear in a real workload, but RocksDB does not prohibit duplicate |
197 | | // keys, although RocksDB's Seek behavior in that case is non-deterministic. |
198 | 620k | const bool is_duplicated_key = |
199 | 620k | (i > 0 && key == keys[i - 1]) || (i + 1 < keys.size() && key == keys[i + 1]); |
200 | | |
201 | 620k | iter->Seek(key); |
202 | 620k | ASSERT_OK(iter->status()); |
203 | 620k | ASSERT_TRUE(iter->Valid()); |
204 | | |
205 | 620k | const auto k = iter->key(); |
206 | 1.24M | ASSERT_EQ(k.ToString(), key) |
207 | 1.24M | << "i: " << i << " is_duplicated_key: " << is_duplicated_key |
208 | 1.24M | << "\nexpected key: " << Slice(key).ToDebugHexString() |
209 | 1.24M | << "\n actual key: " << k.ToDebugHexString(); |
210 | | |
211 | 620k | if (is_duplicated_key) { |
212 | | // For duplicated keys we might seek to a different key-value for the equal key. |
213 | 0 | continue; |
214 | 0 | } |
215 | | |
216 | 620k | const auto v = iter->value(); |
217 | 1.24M | ASSERT_EQ(v.ToString(), values[i]) |
218 | 1.24M | << "i: " << i |
219 | 1.24M | << "\nkey: " << Slice(key).ToDebugHexString() |
220 | 1.24M | << "\nexpected value: " << Slice(values[i]).ToDebugHexString() |
221 | 1.24M | << "\n actual value: " << v.ToDebugHexString(); |
222 | | |
223 | 620k | if (!key.empty()) { |
224 | | // Seek to slightly lower key. |
225 | 619k | const auto lower_key = Slice(key.data(), key.size() - 1); |
226 | 619k | auto j = i; |
227 | 622k | while (j > 0 && lower_key.compare(keys[j - 1]) <= 0) { |
228 | 2.20k | --j; |
229 | 2.20k | } |
230 | 619k | iter->Seek(lower_key); |
231 | 619k | ASSERT_OK(iter->status()); |
232 | 619k | ASSERT_TRUE(iter->Valid()); |
233 | | |
234 | 1.23M | ASSERT_EQ(iter->key().ToString(), keys[j]) |
235 | 1.23M | << "j: " << j |
236 | 1.23M | << "\nexpected key: " << Slice(keys[j]).ToDebugHexString() |
237 | 1.23M | << "\n actual key: " << iter->key().ToDebugHexString(); |
238 | 1.23M | ASSERT_EQ(iter->value().ToString(), values[j]) |
239 | 1.23M | << "j: " << j |
240 | 1.23M | << "\nkey: " << iter->key().ToDebugHexString() |
241 | 1.23M | << "\nexpected value: " << Slice(values[j]).ToDebugHexString() |
242 | 1.23M | << "\n actual value: " << iter->value().ToDebugHexString(); |
243 | 620k | } |
244 | | |
245 | | // Seek to slightly higher key. |
246 | 620k | higher_key = key; |
247 | 620k | higher_key.push_back(0); |
248 | 620k | auto j = i + 1; |
249 | 620k | while (j < keys.size() && keys[j].compare(higher_key) < 0) { |
250 | 0 | ++j; |
251 | 0 | } |
252 | 620k | iter->Seek(higher_key); |
253 | 620k | ASSERT_OK(iter->status()); |
254 | 620k | if (j == keys.size()) { |
255 | 24 | ASSERT_FALSE(iter->Valid()); |
256 | 24 | continue; |
257 | 619k | } |
258 | 619k | ASSERT_TRUE(iter->Valid()); |
259 | | |
260 | 1.23M | ASSERT_EQ(iter->key().ToString(), keys[j]) |
261 | 1.23M | << "j: " << j |
262 | 1.23M | << "\nexpected key: " << Slice(keys[j]).ToDebugHexString() |
263 | 1.23M | << "\n actual key: " << iter->key().ToDebugHexString(); |
264 | 1.23M | ASSERT_EQ(iter->value().ToString(), values[j]) |
265 | 1.23M | << "j: " << j |
266 | 1.23M | << "\nkey: " << Slice(keys[j]).ToDebugHexString() |
267 | 1.23M | << "\nexpected value: " << Slice(values[j]).ToDebugHexString() |
268 | 1.23M | << "\n actual value: " << iter->value().ToDebugHexString(); |
269 | 619k | } |
270 | 24 | } |
271 | | |
272 | | void CheckBlockContents(BlockContents contents, |
273 | | const KeyValueEncodingFormat key_value_encoding_format, |
274 | | const int max_key, |
275 | | const std::vector<std::string> &keys, |
276 | 4 | const std::vector<std::string> &values) { |
277 | 4 | ASSERT_EQ(keys.size(), values.size()); |
278 | | |
279 | 4 | CheckBlockContents( |
280 | 4 | BlockContents(contents.data, contents.cachable, contents.compression_type), |
281 | 4 | key_value_encoding_format, keys, values); |
282 | | |
283 | 4 | const size_t prefix_size = 6; |
284 | | // create block reader |
285 | 4 | BlockContents contents_ref(contents.data, contents.cachable, |
286 | 4 | contents.compression_type); |
287 | 4 | Block reader1(std::move(contents)); |
288 | 4 | Block reader2(std::move(contents_ref)); |
289 | | |
290 | 4 | std::unique_ptr<const SliceTransform> prefix_extractor( |
291 | 4 | NewFixedPrefixTransform(prefix_size)); |
292 | | |
293 | 4 | { |
294 | 4 | auto iter1 = reader1.NewIterator(nullptr, key_value_encoding_format); |
295 | 4 | auto iter2 = reader1.NewIterator(nullptr, key_value_encoding_format); |
296 | 4 | reader1.SetBlockHashIndex(CreateBlockHashIndexOnTheFly( |
297 | 4 | iter1, iter2, static_cast<uint32_t>(keys.size()), BytewiseComparator(), |
298 | 4 | prefix_extractor.get())); |
299 | | |
300 | 4 | delete iter1; |
301 | 4 | delete iter2; |
302 | 4 | } |
303 | | |
304 | 4 | std::unique_ptr<InternalIterator> hash_iter( |
305 | 4 | reader1.NewIterator(BytewiseComparator(), key_value_encoding_format, nullptr, false)); |
306 | | |
307 | 4 | std::unique_ptr<InternalIterator> regular_iter( |
308 | 4 | reader2.NewIterator(BytewiseComparator(), key_value_encoding_format)); |
309 | | |
310 | | // Seek existent keys |
311 | 600k | for (size_t i = 0; i < keys.size(); i++) { |
312 | 600k | hash_iter->Seek(keys[i]); |
313 | 600k | ASSERT_OK(hash_iter->status()); |
314 | 600k | ASSERT_TRUE(hash_iter->Valid()); |
315 | | |
316 | 600k | Slice v = hash_iter->value(); |
317 | 600k | ASSERT_EQ(v.ToString().compare(values[i]), 0); |
318 | 600k | } |
319 | | |
320 | | // Seek non-existent keys. |
321 | | // For hash index, if no key with a given prefix is not found, iterator will |
322 | | // simply be set as invalid; whereas the binary search based iterator will |
323 | | // return the one that is closest. |
324 | 200k | for (int i = 1; i < max_key - 1; i += 2) { |
325 | 199k | auto key = GenerateKey(i, 0, 0, nullptr); |
326 | 199k | hash_iter->Seek(key); |
327 | 199k | ASSERT_TRUE(!hash_iter->Valid()); |
328 | | |
329 | 199k | regular_iter->Seek(key); |
330 | 199k | ASSERT_TRUE(regular_iter->Valid()); |
331 | 199k | } |
332 | 4 | } |
333 | | |
334 | | // In this test case, no two key share same prefix. |
335 | 1 | TEST_F(BlockTest, SimpleIndexHash) { |
336 | 1 | const int kMaxKey = 100000; |
337 | | |
338 | 2 | for (auto key_value_encoding_format : kKeyValueEncodingFormatList) { |
339 | 2 | std::vector<std::string> keys; |
340 | 2 | std::vector<std::string> values; |
341 | 2 | GenerateRandomKVs(&keys, &values, 0 /* first key id */, |
342 | 2 | kMaxKey /* last key id */, 2 /* step */, |
343 | 2 | 8 /* padding size (8 bytes randomly generated suffix) */); |
344 | | |
345 | 2 | std::unique_ptr<BlockBuilder> builder; |
346 | 2 | auto contents = GetBlockContents(&builder, keys, values, key_value_encoding_format); |
347 | | |
348 | 2 | CheckBlockContents( |
349 | 2 | std::move(contents), key_value_encoding_format, kMaxKey, keys, values); |
350 | 2 | } |
351 | 1 | } |
352 | | |
353 | 1 | TEST_F(BlockTest, IndexHashWithSharedPrefix) { |
354 | 1 | const int kMaxKey = 100000; |
355 | | // for each prefix, there will be 5 keys starts with it. |
356 | 1 | const int kPrefixGroup = 5; |
357 | | |
358 | 2 | for (auto key_value_encoding_format : kKeyValueEncodingFormatList) { |
359 | 2 | std::vector<std::string> keys; |
360 | 2 | std::vector<std::string> values; |
361 | | // Generate keys with same prefix. |
362 | 2 | GenerateRandomKVs( |
363 | 2 | &keys, &values, 0, // first key id |
364 | 2 | kMaxKey, // last key id |
365 | 2 | 2, // step |
366 | 2 | 10, // padding size, |
367 | 2 | kPrefixGroup); |
368 | | |
369 | 2 | std::unique_ptr<BlockBuilder> builder; |
370 | 2 | auto contents = |
371 | 2 | GetBlockContents(&builder, keys, values, key_value_encoding_format, kPrefixGroup); |
372 | | |
373 | 2 | CheckBlockContents( |
374 | 2 | std::move(contents), key_value_encoding_format, kMaxKey, keys, values); |
375 | 2 | } |
376 | 1 | } |
377 | | |
378 | | namespace { |
379 | | |
380 | 84 | std::string GetPaddedNum(int i) { |
381 | 84 | return StringPrintf("%010d", i); |
382 | 84 | } |
383 | | |
384 | | yb::Result<std::string> GetMiddleKey( |
385 | | const KeyValueEncodingFormat key_value_encoding_format, const int num_keys, |
386 | 12 | const int block_restart_interval) { |
387 | 12 | BlockBuilder builder(block_restart_interval, key_value_encoding_format); |
388 | | |
389 | 86 | for (int i = 1; i <= num_keys; ++i) { |
390 | 74 | const auto padded_num = GetPaddedNum(i); |
391 | 74 | builder.Add("k" + padded_num, "v" + padded_num); |
392 | 74 | } |
393 | | |
394 | 12 | BlockContents contents; |
395 | 12 | contents.data = builder.Finish(); |
396 | 12 | contents.cachable = false; |
397 | 12 | Block reader(std::move(contents)); |
398 | | |
399 | 10 | return VERIFY_RESULT(reader.GetMiddleKey(key_value_encoding_format)).ToString(); |
400 | 12 | } |
401 | | |
402 | | void CheckMiddleKey( |
403 | | const KeyValueEncodingFormat key_value_encoding_format, const int num_keys, |
404 | 10 | const int block_restart_interval, const int expected_middle_key) { |
405 | 10 | const auto middle_key = |
406 | 10 | ASSERT_RESULT(GetMiddleKey(key_value_encoding_format, num_keys, block_restart_interval)); |
407 | 20 | ASSERT_EQ(middle_key, "k" + GetPaddedNum(expected_middle_key)) << "For num_keys = " << num_keys; |
408 | 10 | } |
409 | | |
410 | | } // namespace |
411 | | |
412 | 1 | TEST_F(BlockTest, GetMiddleKey) { |
413 | 1 | const auto block_restart_interval = 1; |
414 | | |
415 | 2 | for (auto key_value_encoding_format : kKeyValueEncodingFormatList) { |
416 | 2 | const auto empty_block_middle_key = |
417 | 2 | GetMiddleKey(key_value_encoding_format, /* num_keys =*/0, block_restart_interval); |
418 | 4 | ASSERT_NOK(empty_block_middle_key) << empty_block_middle_key; |
419 | 4 | ASSERT_TRUE(empty_block_middle_key.status().IsIncomplete()) << empty_block_middle_key; |
420 | | |
421 | 2 | CheckMiddleKey( |
422 | 2 | key_value_encoding_format, /* num_keys = */ 1, block_restart_interval, |
423 | 2 | /* expected_middle_key = */ 1); |
424 | 2 | CheckMiddleKey( |
425 | 2 | key_value_encoding_format, /* num_keys = */ 2, block_restart_interval, |
426 | 2 | /* expected_middle_key = */ 1); |
427 | 2 | CheckMiddleKey( |
428 | 2 | key_value_encoding_format, /* num_keys = */ 3, block_restart_interval, |
429 | 2 | /* expected_middle_key = */ 2); |
430 | 2 | CheckMiddleKey( |
431 | 2 | key_value_encoding_format, /* num_keys = */ 15, block_restart_interval, |
432 | 2 | /* expected_middle_key = */ 8); |
433 | 2 | CheckMiddleKey( |
434 | 2 | key_value_encoding_format, /* num_keys = */ 16, block_restart_interval, |
435 | 2 | /* expected_middle_key = */ 8); |
436 | 2 | } |
437 | 1 | } |
438 | | |
439 | 1 | TEST_F(BlockTest, EncodeThreeSharedPartsSizes) { |
440 | 1 | constexpr auto kNumIters = 100000; |
441 | | |
442 | 1 | constexpr auto kBigMaxCompSize = std::numeric_limits<uint32_t>::max() / 16; |
443 | 1 | constexpr auto kSmallMaxCompSize = 16; |
444 | | |
445 | 700k | auto gen_comp_size = []() { |
446 | 349k | return yb::RandomActWithProbability(0.5) ? 0 |
447 | 350k | : yb::RandomActWithProbability(0.5) |
448 | 174k | ? yb::RandomUniformInt<uint32_t>(0, kBigMaxCompSize) |
449 | 175k | : yb::RandomUniformInt<uint32_t>(0, kSmallMaxCompSize); |
450 | 700k | }; |
451 | 1 | std::string buffer; |
452 | | |
453 | 100k | for (auto i = 0; i < kNumIters; ++i) { |
454 | 100k | YB_LOG_EVERY_N_SECS(INFO, 5) << "Iterations completed: " << i; |
455 | 100k | buffer.clear(); |
456 | | // Simulate there is something in buffer before encoded key. |
457 | 100k | buffer.append('X', yb::RandomUniformInt<size_t>(0, 8)); |
458 | 100k | const auto encoded_sizes_start_offset = buffer.size(); |
459 | | |
460 | 100k | size_t shared_prefix_size = gen_comp_size(); |
461 | 100k | size_t last_internal_component_reuse_size; |
462 | 100k | bool is_last_internal_component_inc; |
463 | 100k | switch (yb::RandomUniformInt<int>(0, 2)) { |
464 | 33.2k | case 0: |
465 | 33.2k | last_internal_component_reuse_size = 0; |
466 | 33.2k | is_last_internal_component_inc = false; |
467 | 33.2k | break; |
468 | 33.3k | case 1: |
469 | 33.3k | last_internal_component_reuse_size = kLastInternalComponentSize; |
470 | 33.3k | is_last_internal_component_inc = false; |
471 | 33.3k | break; |
472 | 33.3k | case 2: |
473 | 33.3k | last_internal_component_reuse_size = kLastInternalComponentSize; |
474 | 33.3k | is_last_internal_component_inc = true; |
475 | 33.3k | break; |
476 | 0 | default: |
477 | 0 | FAIL(); |
478 | 100k | } |
479 | 100k | ComponentSizes rest_sizes { |
480 | 100k | .prev_key_non_shared_1_size = gen_comp_size(), |
481 | 100k | .non_shared_1_size = gen_comp_size(), |
482 | 100k | .shared_middle_size = gen_comp_size(), |
483 | 100k | .prev_key_non_shared_2_size = gen_comp_size(), |
484 | 100k | .non_shared_2_size = gen_comp_size(), |
485 | 100k | }; |
486 | 100k | if (rest_sizes.shared_middle_size == 0) { |
487 | 51.4k | rest_sizes.non_shared_2_size = 0; |
488 | 51.4k | rest_sizes.prev_key_non_shared_2_size = 0; |
489 | 51.4k | } |
490 | 100k | const auto prev_key_size = |
491 | 100k | shared_prefix_size + rest_sizes.prev_key_non_shared_1_size + rest_sizes.shared_middle_size + |
492 | 100k | rest_sizes.prev_key_non_shared_2_size + last_internal_component_reuse_size; |
493 | 100k | const auto key_size = shared_prefix_size + rest_sizes.non_shared_1_size + |
494 | 100k | rest_sizes.shared_middle_size + rest_sizes.non_shared_2_size + |
495 | 100k | last_internal_component_reuse_size; |
496 | 100k | const auto value_size = gen_comp_size(); |
497 | | |
498 | 100k | EncodeThreeSharedPartsSizes( |
499 | 100k | shared_prefix_size, last_internal_component_reuse_size, is_last_internal_component_inc, |
500 | 100k | rest_sizes, prev_key_size, key_size, value_size, &buffer); |
501 | | |
502 | 100k | const auto encoded_sizes_end_offset = buffer.size(); |
503 | 100k | const auto payload_size = |
504 | 100k | rest_sizes.non_shared_1_size + rest_sizes.non_shared_2_size + value_size; |
505 | | |
506 | 100k | uint32_t decoded_shared_prefix_size, decoded_non_shared_1_size, decoded_non_shared_2_size, |
507 | 100k | decoded_shared_last_component_size, decoded_value_size; |
508 | 100k | bool decoded_is_something_shared; |
509 | 100k | int64_t decoded_non_shared_1_size_delta, decoded_non_shared_2_size_delta; |
510 | 100k | uint64_t decoded_shared_last_component_increase; |
511 | | |
512 | 100k | auto* result = DecodeEntryThreeSharedParts( |
513 | 100k | buffer.data() + encoded_sizes_start_offset, buffer.data() + buffer.size() + payload_size, |
514 | 100k | buffer.data(), &decoded_shared_prefix_size, &decoded_non_shared_1_size, |
515 | 100k | &decoded_non_shared_1_size_delta, &decoded_is_something_shared, &decoded_non_shared_2_size, |
516 | 100k | &decoded_non_shared_2_size_delta, &decoded_shared_last_component_size, |
517 | 100k | &decoded_shared_last_component_increase, &decoded_value_size); |
518 | | |
519 | 100k | ASSERT_NE(result, nullptr); |
520 | 100k | EXPECT_EQ(decoded_shared_last_component_size, last_internal_component_reuse_size); |
521 | 100k | EXPECT_EQ(decoded_shared_last_component_increase, 0x100 * is_last_internal_component_inc); |
522 | 100k | EXPECT_EQ( |
523 | 100k | decoded_is_something_shared, |
524 | 100k | shared_prefix_size + rest_sizes.shared_middle_size + last_internal_component_reuse_size > |
525 | 100k | 0); |
526 | 100k | EXPECT_EQ(decoded_non_shared_1_size, rest_sizes.non_shared_1_size); |
527 | | |
528 | 100k | if (decoded_is_something_shared) { |
529 | 91.1k | EXPECT_EQ(decoded_non_shared_2_size, rest_sizes.non_shared_2_size); |
530 | 182k | EXPECT_EQ( |
531 | 182k | rest_sizes.prev_key_non_shared_1_size + decoded_non_shared_1_size_delta, |
532 | 182k | rest_sizes.non_shared_1_size) |
533 | 182k | << " prev_key_non_shared_1_size: " << rest_sizes.prev_key_non_shared_1_size |
534 | 182k | << " decoded_non_shared_1_size_delta: " << decoded_non_shared_1_size_delta |
535 | 182k | << " non_shared_1_size: " << rest_sizes.non_shared_1_size; |
536 | 182k | EXPECT_EQ( |
537 | 182k | rest_sizes.prev_key_non_shared_2_size + decoded_non_shared_2_size_delta, |
538 | 182k | rest_sizes.non_shared_2_size) |
539 | 182k | << " prev_key_non_shared_2_size: " << rest_sizes.prev_key_non_shared_2_size |
540 | 182k | << " decoded_non_shared_2_size_delta: " << decoded_non_shared_2_size_delta |
541 | 182k | << " non_shared_2_size: " << rest_sizes.non_shared_2_size; |
542 | 91.1k | EXPECT_EQ(decoded_shared_prefix_size, shared_prefix_size); |
543 | 91.1k | } |
544 | | |
545 | 100k | EXPECT_EQ(decoded_value_size, value_size); |
546 | 100k | EXPECT_EQ(result, buffer.data() + encoded_sizes_end_offset); |
547 | | |
548 | 100k | if (testing::Test::HasFailure()) { |
549 | 0 | FLAGS_v = 4; |
550 | 0 | EncodeThreeSharedPartsSizes( |
551 | 0 | shared_prefix_size, last_internal_component_reuse_size, is_last_internal_component_inc, |
552 | 0 | rest_sizes, prev_key_size, key_size, value_size, &buffer); |
553 | 0 | result = DecodeEntryThreeSharedParts( |
554 | 0 | buffer.data() + encoded_sizes_start_offset, buffer.data() + buffer.capacity(), |
555 | 0 | buffer.data(), &decoded_shared_prefix_size, &decoded_non_shared_1_size, |
556 | 0 | &decoded_non_shared_1_size_delta, &decoded_is_something_shared, |
557 | 0 | &decoded_non_shared_2_size, &decoded_non_shared_2_size_delta, |
558 | 0 | &decoded_shared_last_component_size, &decoded_shared_last_component_increase, |
559 | 0 | &decoded_value_size); |
560 | 0 | FAIL(); |
561 | 0 | } |
562 | 100k | } |
563 | 1 | } |
564 | | |
565 | 1 | TEST_F(BlockTest, EncodeThreeSharedParts) { |
566 | 1 | constexpr auto kNumIters = 20; |
567 | 1 | constexpr auto kKeysPerBlock = 1000; |
568 | 1 | constexpr auto kMaxKeySize = 1_KB; |
569 | 1 | constexpr auto kMaxValueSize = 1_KB; |
570 | 1 | constexpr auto kBlockRestartInterval = 16; |
571 | 1 | constexpr auto kKeyValueEncodingFormat = |
572 | 1 | KeyValueEncodingFormat::kKeyDeltaEncodingThreeSharedParts; |
573 | | |
574 | 100k | auto gen_comp_size = [](size_t* size_left) { |
575 | 100k | auto result = |
576 | 50.1k | yb::RandomActWithProbability(0.5) ? 0 : yb::RandomUniformInt<size_t>(0, *size_left); |
577 | 100k | CHECK_GE(*size_left, result); |
578 | 100k | *size_left -= result; |
579 | 100k | return result; |
580 | 100k | }; |
581 | 60.0k | auto append_random_string = [](std::string* buf, size_t size) { |
582 | 16.3M | while (size > 0) { |
583 | 16.3M | *buf += yb::RandomUniformInt<char>(); |
584 | 16.3M | size--; |
585 | 16.3M | } |
586 | 60.0k | }; |
587 | | |
588 | 1 | std::vector<std::string> keys; |
589 | 21 | for (auto iter = 0; iter < kNumIters; ++iter) { |
590 | 20 | const bool use_delta_encoding = iter % 2 == 0; |
591 | 20 | keys.clear(); |
592 | 20 | { |
593 | 20 | const std::string empty; |
594 | 20 | std::string key; |
595 | 20.0k | for (auto i = 0; i < kKeysPerBlock; ++i) { |
596 | 19.9k | const auto& prev_key = keys.empty() ? empty : keys.back(); |
597 | | |
598 | | // Generate key based on prev_key in the following format: |
599 | | // <shared_prefix><non_shared_1><shared_middle><non_shared_2><shared_last> |
600 | | // Each component might be empty. |
601 | | // shared_prefix, shared_middle, shared_last are taken from the beginning, middle and |
602 | | // end of prev_key and have random size (+ random position for the middle). |
603 | | // |
604 | | // Since we generate non_shared_1 and non_shared_2 randomly we can still have |
605 | | // beginning/end/whole of those shared with prev key, but that doesn't matter, because |
606 | | // we will still have enough non-shared components for the test. |
607 | 20.0k | auto prev_key_size_left = prev_key.size(); |
608 | 20.0k | const auto shared_prefix_size = gen_comp_size(&prev_key_size_left); |
609 | 20.0k | const auto shared_middle_size = gen_comp_size(&prev_key_size_left); |
610 | 20.0k | const auto shared_last_size = gen_comp_size(&prev_key_size_left); |
611 | 20.0k | const auto shared_size = shared_prefix_size + shared_middle_size + shared_last_size; |
612 | | |
613 | 20.0k | auto key_size_left = yb::RandomUniformInt<size_t>(0, kMaxKeySize - shared_size); |
614 | 20.0k | const auto non_shared_1_size = gen_comp_size(&key_size_left); |
615 | 20.0k | const auto non_shared_2_size = key_size_left; |
616 | 0 | DVLOG(4) << "shared_prefix_size: " << shared_prefix_size |
617 | 0 | << " shared_middle_size: " << shared_middle_size |
618 | 0 | << " shared_last_size: " << shared_last_size |
619 | 0 | << " non_shared_1_size: " << non_shared_1_size |
620 | 0 | << " non_shared_2_size: " << non_shared_2_size; |
621 | | |
622 | 20.0k | const auto prev_key_non_shared_1_size = gen_comp_size(&prev_key_size_left); |
623 | 20.0k | const auto prev_key_non_shared_2_size = prev_key_size_left; |
624 | | |
625 | 20.0k | key = prev_key.substr(0, shared_prefix_size); |
626 | 20.0k | append_random_string(&key, non_shared_1_size); |
627 | 20.0k | key.append( |
628 | 20.0k | prev_key.substr(shared_prefix_size + prev_key_non_shared_1_size, shared_middle_size)); |
629 | 20.0k | append_random_string(&key, non_shared_2_size); |
630 | 20.0k | key.append(prev_key.substr( |
631 | 20.0k | shared_prefix_size + prev_key_non_shared_1_size + shared_middle_size + |
632 | 20.0k | prev_key_non_shared_2_size, |
633 | 20.0k | shared_last_size)); |
634 | | |
635 | 20.0k | keys.emplace_back(std::move(key)); |
636 | 20.0k | } |
637 | 20 | } |
638 | | |
639 | 20 | std::sort(keys.begin(), keys.end()); |
640 | | |
641 | 20 | BlockBuilder builder(kBlockRestartInterval, kKeyValueEncodingFormat, use_delta_encoding); |
642 | | |
643 | 20 | std::vector<std::string> values; |
644 | 20 | { |
645 | 20 | std::string value; |
646 | 20.0k | for (const auto& key : keys) { |
647 | 20.0k | value.clear(); |
648 | 20.0k | append_random_string(&value, yb::RandomUniformInt<size_t>(0, kMaxValueSize)); |
649 | 20.0k | builder.Add(key, value); |
650 | 20.0k | values.emplace_back(std::move(value)); |
651 | 20.0k | } |
652 | 20 | } |
653 | | |
654 | 20 | auto rawblock = builder.Finish(); |
655 | | |
656 | 20 | BlockContents contents; |
657 | 20 | contents.data = rawblock; |
658 | 20 | contents.cachable = false; |
659 | | |
660 | 20 | CheckBlockContents(std::move(contents), kKeyValueEncodingFormat, keys, values); |
661 | 20 | } |
662 | 1 | } |
663 | | |
664 | | } // namespace rocksdb |
665 | | |
666 | 13.2k | int main(int argc, char **argv) { |
667 | 13.2k | ::testing::InitGoogleTest(&argc, argv); |
668 | 13.2k | google::ParseCommandLineFlags(&argc, &argv, true); |
669 | 13.2k | return RUN_ALL_TESTS(); |
670 | 13.2k | } |