YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_test.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
21
#include <stdio.h>
22
23
#include <string>
24
#include <vector>
25
26
#include <gtest/gtest.h>
27
28
#include "yb/rocksdb/db.h"
29
#include "yb/rocksdb/env.h"
30
#include "yb/rocksdb/iterator.h"
31
#include "yb/rocksdb/slice_transform.h"
32
#include "yb/rocksdb/table/block.h"
33
#include "yb/rocksdb/table/block_builder.h"
34
#include "yb/rocksdb/table/block_builder_internal.h"
35
#include "yb/rocksdb/table/block_hash_index.h"
36
#include "yb/rocksdb/table/block_internal.h"
37
#include "yb/rocksdb/util/random.h"
38
#include "yb/rocksdb/util/testutil.h"
39
40
#include "yb/util/logging.h"
41
#include "yb/util/random_util.h"
42
#include "yb/util/test_macros.h"
43
44
DECLARE_int32(v);
45
46
namespace rocksdb {
47
48
std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
49
999k
                        Random *rnd) {
50
999k
  char buf[50];
51
999k
  char *p = &buf[0];
52
999k
  snprintf(buf, sizeof(buf), "%6d%4d", primary_key, secondary_key);
53
999k
  std::string k(p);
54
999k
  if (padding_size) {
55
600k
    k += RandomString(rnd, padding_size);
56
600k
  }
57
58
999k
  return k;
59
999k
}
60
61
// Generate random key value pairs.
62
// The generated key will be sorted. You can tune the parameters to generated
63
// different kinds of test key/value pairs for different scenario.
64
void GenerateRandomKVs(std::vector<std::string> *keys,
65
                       std::vector<std::string> *values, const int from,
66
                       const int len, const int step = 1,
67
                       const int padding_size = 0,
68
6
                       const int keys_share_prefix = 1) {
69
6
  Random rnd(302);
70
71
  // generate different prefix
72
400k
  for (int i = from; i < from + len; i += step) {
73
    // generating keys that shares the prefix
74
1.20M
    for (int j = 0; j < keys_share_prefix; ++j) {
75
800k
      keys->emplace_back(GenerateKey(i, j, padding_size, &rnd));
76
77
      // 100 bytes values
78
800k
      values->emplace_back(RandomString(&rnd, 100));
79
800k
    }
80
400k
  }
81
6
}
82
83
class BlockTest : public RocksDBTest {};
84
85
// block test
86
1
TEST_F(BlockTest, SimpleTest) {
87
2
  for (auto key_value_encoding_format : kKeyValueEncodingFormatList) {
88
2
    Random rnd(301);
89
2
    Options options = Options();
90
2
    std::unique_ptr<InternalKeyComparator> ic;
91
2
    ic.reset(new test::PlainInternalKeyComparator(options.comparator));
92
93
2
    std::vector<std::string> keys;
94
2
    std::vector<std::string> values;
95
2
    BlockBuilder builder(16, key_value_encoding_format);
96
2
    int num_records = 100000;
97
98
2
    GenerateRandomKVs(&keys, &values, 0, num_records);
99
    // add a bunch of records to a block
100
200k
    for (int i = 0; i < num_records; i++) {
101
200k
      builder.Add(keys[i], values[i]);
102
200k
    }
103
104
    // read serialized contents of the block
105
2
    Slice rawblock = builder.Finish();
106
107
    // create block reader
108
2
    BlockContents contents;
109
2
    contents.data = rawblock;
110
2
    contents.cachable = false;
111
2
    Block reader(std::move(contents));
112
113
    // read contents of block sequentially
114
2
    int count = 0;
115
2
    InternalIterator *iter = reader.NewIterator(options.comparator, key_value_encoding_format);
116
200k
    for (iter->SeekToFirst(); iter->Valid(); count++, iter->Next()) {
117
118
      // read kv from block
119
200k
      Slice k = iter->key();
120
200k
      Slice v = iter->value();
121
122
      // compare with lookaside array
123
200k
      ASSERT_EQ(k.ToString().compare(keys[count]), 0);
124
200k
      ASSERT_EQ(v.ToString().compare(values[count]), 0);
125
200k
    }
126
2
    delete iter;
127
128
    // read block contents randomly
129
2
    iter = reader.NewIterator(options.comparator, key_value_encoding_format);
130
200k
    for (int i = 0; i < num_records; i++) {
131
132
      // find a random key in the lookaside array
133
200k
      int index = rnd.Uniform(num_records);
134
200k
      Slice k(keys[index]);
135
136
      // search in block for this key
137
200k
      iter->Seek(k);
138
200k
      ASSERT_TRUE(iter->Valid());
139
200k
      Slice v = iter->value();
140
200k
      ASSERT_EQ(v.ToString().compare(values[index]), 0);
141
200k
    }
142
2
    delete iter;
143
2
  }
144
1
}
145
146
// return the block contents
147
BlockContents GetBlockContents(std::unique_ptr<BlockBuilder> *builder,
148
                               const std::vector<std::string> &keys,
149
                               const std::vector<std::string> &values,
150
                               const KeyValueEncodingFormat key_value_encoding_format,
151
4
                               const int prefix_group_size = 1) {
152
4
  builder->reset(new BlockBuilder(1 /* restart interval */, key_value_encoding_format));
153
154
  // Add only half of the keys
155
600k
  for (size_t i = 0; i < keys.size(); ++i) {
156
600k
    (*builder)->Add(keys[i], values[i]);
157
600k
  }
158
4
  Slice rawblock = (*builder)->Finish();
159
160
4
  BlockContents contents;
161
4
  contents.data = rawblock;
162
4
  contents.cachable = false;
163
164
4
  return contents;
165
4
}
166
167
void CheckBlockContents(
168
    BlockContents contents, const KeyValueEncodingFormat key_value_encoding_format,
169
24
    const std::vector<std::string>& keys, const std::vector<std::string>& values) {
170
24
  Block reader(std::move(contents));
171
24
  std::unique_ptr<InternalIterator> iter(
172
24
      reader.NewIterator(BytewiseComparator(), key_value_encoding_format));
173
174
  // Scan through the block and compare with the data loaded.
175
24
  {
176
24
    size_t i = 0;
177
620k
    for (iter->SeekToFirst(); iter->Valid(); i++, iter->Next()) {
178
620k
      const auto k = iter->key();
179
620k
      const auto v = iter->value();
180
1.24M
      ASSERT_EQ(k.ToString(), keys[i])
181
1.24M
          << "i: " << i << "\nexpected: " << Slice(keys[i]).ToDebugHexString()
182
1.24M
          << "\n  actual: " << k.ToDebugHexString();
183
1.24M
      ASSERT_EQ(v.ToString(), values[i])
184
1.24M
          << "i: " << i << "\nkey: " << Slice(keys[i]).ToDebugHexString()
185
1.24M
          << "\nexpected value: " << Slice(values[i]).ToDebugHexString()
186
1.24M
          << "\n  actual value: " << v.ToDebugHexString();
187
620k
    }
188
24
    ASSERT_EQ(i, keys.size());
189
24
  }
190
191
  // Seek to and near existing keys.
192
24
  std::string higher_key;
193
620k
  for (size_t i = 0; i < keys.size(); i++) {
194
620k
    const auto& key = keys[i];
195
196
    // Duplicate keys should not appear in a real workload, but RocksDB does not prohibit duplicate
197
    // keys, although RocksDB's Seek behavior in that case is non-deterministic.
198
620k
    const bool is_duplicated_key =
199
620k
        (i > 0 && key == keys[i - 1]) || (i + 1 < keys.size() && key == keys[i + 1]);
200
201
620k
    iter->Seek(key);
202
620k
    ASSERT_OK(iter->status());
203
620k
    ASSERT_TRUE(iter->Valid());
204
205
620k
    const auto k = iter->key();
206
1.24M
    ASSERT_EQ(k.ToString(), key)
207
1.24M
        << "i: " << i << " is_duplicated_key: " << is_duplicated_key
208
1.24M
        << "\nexpected key: " << Slice(key).ToDebugHexString()
209
1.24M
        << "\n  actual key: " << k.ToDebugHexString();
210
211
620k
    if (is_duplicated_key) {
212
      // For duplicated keys we might seek to a different key-value for the equal key.
213
0
      continue;
214
0
    }
215
216
620k
    const auto v = iter->value();
217
1.24M
    ASSERT_EQ(v.ToString(), values[i])
218
1.24M
        << "i: " << i
219
1.24M
        << "\nkey: " << Slice(key).ToDebugHexString()
220
1.24M
        << "\nexpected value: " << Slice(values[i]).ToDebugHexString()
221
1.24M
        << "\n  actual value: " << v.ToDebugHexString();
222
223
620k
    if (!key.empty()) {
224
      // Seek to slightly lower key.
225
619k
      const auto lower_key = Slice(key.data(), key.size() - 1);
226
619k
      auto j = i;
227
622k
      while (j > 0 && lower_key.compare(keys[j - 1]) <= 0) {
228
2.20k
        --j;
229
2.20k
      }
230
619k
      iter->Seek(lower_key);
231
619k
      ASSERT_OK(iter->status());
232
619k
      ASSERT_TRUE(iter->Valid());
233
234
1.23M
      ASSERT_EQ(iter->key().ToString(), keys[j])
235
1.23M
          << "j: " << j
236
1.23M
          << "\nexpected key: " << Slice(keys[j]).ToDebugHexString()
237
1.23M
          << "\n  actual key: " << iter->key().ToDebugHexString();
238
1.23M
      ASSERT_EQ(iter->value().ToString(), values[j])
239
1.23M
          << "j: " << j
240
1.23M
          << "\nkey: " << iter->key().ToDebugHexString()
241
1.23M
          << "\nexpected value: " << Slice(values[j]).ToDebugHexString()
242
1.23M
          << "\n  actual value: " << iter->value().ToDebugHexString();
243
620k
    }
244
245
    // Seek to slightly higher key.
246
620k
    higher_key = key;
247
620k
    higher_key.push_back(0);
248
620k
    auto j = i + 1;
249
620k
    while (j < keys.size() && keys[j].compare(higher_key) < 0) {
250
0
      ++j;
251
0
    }
252
620k
    iter->Seek(higher_key);
253
620k
    ASSERT_OK(iter->status());
254
620k
    if (j == keys.size()) {
255
24
      ASSERT_FALSE(iter->Valid());
256
24
      continue;
257
619k
    }
258
619k
    ASSERT_TRUE(iter->Valid());
259
260
1.23M
    ASSERT_EQ(iter->key().ToString(), keys[j])
261
1.23M
        << "j: " << j
262
1.23M
        << "\nexpected key: " << Slice(keys[j]).ToDebugHexString()
263
1.23M
        << "\n  actual key: " << iter->key().ToDebugHexString();
264
1.23M
    ASSERT_EQ(iter->value().ToString(), values[j])
265
1.23M
        << "j: " << j
266
1.23M
        << "\nkey: " << Slice(keys[j]).ToDebugHexString()
267
1.23M
        << "\nexpected value: " << Slice(values[j]).ToDebugHexString()
268
1.23M
        << "\n  actual value: " << iter->value().ToDebugHexString();
269
619k
  }
270
24
}
271
272
void CheckBlockContents(BlockContents contents,
273
                        const KeyValueEncodingFormat key_value_encoding_format,
274
                        const int max_key,
275
                        const std::vector<std::string> &keys,
276
4
                        const std::vector<std::string> &values) {
277
4
  ASSERT_EQ(keys.size(), values.size());
278
279
4
  CheckBlockContents(
280
4
      BlockContents(contents.data, contents.cachable, contents.compression_type),
281
4
      key_value_encoding_format, keys, values);
282
283
4
  const size_t prefix_size = 6;
284
  // create block reader
285
4
  BlockContents contents_ref(contents.data, contents.cachable,
286
4
                             contents.compression_type);
287
4
  Block reader1(std::move(contents));
288
4
  Block reader2(std::move(contents_ref));
289
290
4
  std::unique_ptr<const SliceTransform> prefix_extractor(
291
4
      NewFixedPrefixTransform(prefix_size));
292
293
4
  {
294
4
    auto iter1 = reader1.NewIterator(nullptr, key_value_encoding_format);
295
4
    auto iter2 = reader1.NewIterator(nullptr, key_value_encoding_format);
296
4
    reader1.SetBlockHashIndex(CreateBlockHashIndexOnTheFly(
297
4
        iter1, iter2, static_cast<uint32_t>(keys.size()), BytewiseComparator(),
298
4
        prefix_extractor.get()));
299
300
4
    delete iter1;
301
4
    delete iter2;
302
4
  }
303
304
4
  std::unique_ptr<InternalIterator> hash_iter(
305
4
      reader1.NewIterator(BytewiseComparator(), key_value_encoding_format, nullptr, false));
306
307
4
  std::unique_ptr<InternalIterator> regular_iter(
308
4
      reader2.NewIterator(BytewiseComparator(), key_value_encoding_format));
309
310
  // Seek existent keys
311
600k
  for (size_t i = 0; i < keys.size(); i++) {
312
600k
    hash_iter->Seek(keys[i]);
313
600k
    ASSERT_OK(hash_iter->status());
314
600k
    ASSERT_TRUE(hash_iter->Valid());
315
316
600k
    Slice v = hash_iter->value();
317
600k
    ASSERT_EQ(v.ToString().compare(values[i]), 0);
318
600k
  }
319
320
  // Seek non-existent keys.
321
  // For hash index, if no key with a given prefix is not found, iterator will
322
  // simply be set as invalid; whereas the binary search based iterator will
323
  // return the one that is closest.
324
200k
  for (int i = 1; i < max_key - 1; i += 2) {
325
199k
    auto key = GenerateKey(i, 0, 0, nullptr);
326
199k
    hash_iter->Seek(key);
327
199k
    ASSERT_TRUE(!hash_iter->Valid());
328
329
199k
    regular_iter->Seek(key);
330
199k
    ASSERT_TRUE(regular_iter->Valid());
331
199k
  }
332
4
}
333
334
// In this test case, no two key share same prefix.
335
1
TEST_F(BlockTest, SimpleIndexHash) {
336
1
  const int kMaxKey = 100000;
337
338
2
  for (auto key_value_encoding_format : kKeyValueEncodingFormatList) {
339
2
    std::vector<std::string> keys;
340
2
    std::vector<std::string> values;
341
2
    GenerateRandomKVs(&keys, &values, 0 /* first key id */,
342
2
                      kMaxKey /* last key id */, 2 /* step */,
343
2
                      8 /* padding size (8 bytes randomly generated suffix) */);
344
345
2
    std::unique_ptr<BlockBuilder> builder;
346
2
    auto contents = GetBlockContents(&builder, keys, values, key_value_encoding_format);
347
348
2
    CheckBlockContents(
349
2
        std::move(contents), key_value_encoding_format, kMaxKey, keys, values);
350
2
  }
351
1
}
352
353
1
TEST_F(BlockTest, IndexHashWithSharedPrefix) {
354
1
  const int kMaxKey = 100000;
355
  // for each prefix, there will be 5 keys starts with it.
356
1
  const int kPrefixGroup = 5;
357
358
2
  for (auto key_value_encoding_format : kKeyValueEncodingFormatList) {
359
2
    std::vector<std::string> keys;
360
2
    std::vector<std::string> values;
361
    // Generate keys with same prefix.
362
2
    GenerateRandomKVs(
363
2
        &keys, &values, 0,  // first key id
364
2
        kMaxKey,            // last key id
365
2
        2,                  // step
366
2
        10,                 // padding size,
367
2
        kPrefixGroup);
368
369
2
    std::unique_ptr<BlockBuilder> builder;
370
2
    auto contents =
371
2
        GetBlockContents(&builder, keys, values, key_value_encoding_format, kPrefixGroup);
372
373
2
    CheckBlockContents(
374
2
        std::move(contents), key_value_encoding_format, kMaxKey, keys, values);
375
2
  }
376
1
}
377
378
namespace {
379
380
84
std::string GetPaddedNum(int i) {
381
84
  return StringPrintf("%010d", i);
382
84
}
383
384
yb::Result<std::string> GetMiddleKey(
385
    const KeyValueEncodingFormat key_value_encoding_format, const int num_keys,
386
12
    const int block_restart_interval) {
387
12
  BlockBuilder builder(block_restart_interval, key_value_encoding_format);
388
389
86
  for (int i = 1; i <= num_keys; ++i) {
390
74
    const auto padded_num = GetPaddedNum(i);
391
74
    builder.Add("k" + padded_num, "v" + padded_num);
392
74
  }
393
394
12
  BlockContents contents;
395
12
  contents.data = builder.Finish();
396
12
  contents.cachable = false;
397
12
  Block reader(std::move(contents));
398
399
10
  return VERIFY_RESULT(reader.GetMiddleKey(key_value_encoding_format)).ToString();
400
12
}
401
402
void CheckMiddleKey(
403
    const KeyValueEncodingFormat key_value_encoding_format, const int num_keys,
404
10
    const int block_restart_interval, const int expected_middle_key) {
405
10
  const auto middle_key =
406
10
      ASSERT_RESULT(GetMiddleKey(key_value_encoding_format, num_keys, block_restart_interval));
407
20
  ASSERT_EQ(middle_key, "k" + GetPaddedNum(expected_middle_key)) << "For num_keys = " << num_keys;
408
10
}
409
410
} // namespace
411
412
1
TEST_F(BlockTest, GetMiddleKey) {
413
1
  const auto block_restart_interval = 1;
414
415
2
  for (auto key_value_encoding_format : kKeyValueEncodingFormatList) {
416
2
    const auto empty_block_middle_key =
417
2
        GetMiddleKey(key_value_encoding_format, /* num_keys =*/0, block_restart_interval);
418
4
    ASSERT_NOK(empty_block_middle_key) << empty_block_middle_key;
419
4
    ASSERT_TRUE(empty_block_middle_key.status().IsIncomplete()) << empty_block_middle_key;
420
421
2
    CheckMiddleKey(
422
2
        key_value_encoding_format, /* num_keys = */ 1, block_restart_interval,
423
2
        /* expected_middle_key = */ 1);
424
2
    CheckMiddleKey(
425
2
        key_value_encoding_format, /* num_keys = */ 2, block_restart_interval,
426
2
        /* expected_middle_key = */ 1);
427
2
    CheckMiddleKey(
428
2
        key_value_encoding_format, /* num_keys = */ 3, block_restart_interval,
429
2
        /* expected_middle_key = */ 2);
430
2
    CheckMiddleKey(
431
2
        key_value_encoding_format, /* num_keys = */ 15, block_restart_interval,
432
2
        /* expected_middle_key = */ 8);
433
2
    CheckMiddleKey(
434
2
        key_value_encoding_format, /* num_keys = */ 16, block_restart_interval,
435
2
        /* expected_middle_key = */ 8);
436
2
  }
437
1
}
438
439
1
TEST_F(BlockTest, EncodeThreeSharedPartsSizes) {
440
1
  constexpr auto kNumIters = 100000;
441
442
1
  constexpr auto kBigMaxCompSize = std::numeric_limits<uint32_t>::max() / 16;
443
1
  constexpr auto kSmallMaxCompSize = 16;
444
445
700k
  auto gen_comp_size = []() {
446
349k
    return yb::RandomActWithProbability(0.5) ? 0
447
350k
           : yb::RandomActWithProbability(0.5)
448
174k
               ? yb::RandomUniformInt<uint32_t>(0, kBigMaxCompSize)
449
175k
               : yb::RandomUniformInt<uint32_t>(0, kSmallMaxCompSize);
450
700k
  };
451
1
  std::string buffer;
452
453
100k
  for (auto i = 0; i < kNumIters; ++i) {
454
100k
    YB_LOG_EVERY_N_SECS(INFO, 5) << "Iterations completed: " << i;
455
100k
    buffer.clear();
456
    // Simulate there is something in buffer before encoded key.
457
100k
    buffer.append('X', yb::RandomUniformInt<size_t>(0, 8));
458
100k
    const auto encoded_sizes_start_offset = buffer.size();
459
460
100k
    size_t shared_prefix_size = gen_comp_size();
461
100k
    size_t last_internal_component_reuse_size;
462
100k
    bool is_last_internal_component_inc;
463
100k
    switch (yb::RandomUniformInt<int>(0, 2)) {
464
33.2k
      case 0:
465
33.2k
        last_internal_component_reuse_size = 0;
466
33.2k
        is_last_internal_component_inc = false;
467
33.2k
        break;
468
33.3k
      case 1:
469
33.3k
        last_internal_component_reuse_size = kLastInternalComponentSize;
470
33.3k
        is_last_internal_component_inc = false;
471
33.3k
        break;
472
33.3k
      case 2:
473
33.3k
        last_internal_component_reuse_size = kLastInternalComponentSize;
474
33.3k
        is_last_internal_component_inc = true;
475
33.3k
        break;
476
0
      default:
477
0
        FAIL();
478
100k
    }
479
100k
    ComponentSizes rest_sizes {
480
100k
        .prev_key_non_shared_1_size = gen_comp_size(),
481
100k
        .non_shared_1_size = gen_comp_size(),
482
100k
        .shared_middle_size = gen_comp_size(),
483
100k
        .prev_key_non_shared_2_size = gen_comp_size(),
484
100k
        .non_shared_2_size = gen_comp_size(),
485
100k
    };
486
100k
    if (rest_sizes.shared_middle_size == 0) {
487
51.4k
      rest_sizes.non_shared_2_size = 0;
488
51.4k
      rest_sizes.prev_key_non_shared_2_size = 0;
489
51.4k
    }
490
100k
    const auto prev_key_size =
491
100k
        shared_prefix_size + rest_sizes.prev_key_non_shared_1_size + rest_sizes.shared_middle_size +
492
100k
        rest_sizes.prev_key_non_shared_2_size + last_internal_component_reuse_size;
493
100k
    const auto key_size = shared_prefix_size + rest_sizes.non_shared_1_size +
494
100k
                          rest_sizes.shared_middle_size + rest_sizes.non_shared_2_size +
495
100k
                          last_internal_component_reuse_size;
496
100k
    const auto value_size = gen_comp_size();
497
498
100k
    EncodeThreeSharedPartsSizes(
499
100k
        shared_prefix_size, last_internal_component_reuse_size, is_last_internal_component_inc,
500
100k
        rest_sizes, prev_key_size, key_size, value_size, &buffer);
501
502
100k
    const auto encoded_sizes_end_offset = buffer.size();
503
100k
    const auto payload_size =
504
100k
        rest_sizes.non_shared_1_size + rest_sizes.non_shared_2_size + value_size;
505
506
100k
    uint32_t decoded_shared_prefix_size, decoded_non_shared_1_size, decoded_non_shared_2_size,
507
100k
        decoded_shared_last_component_size, decoded_value_size;
508
100k
    bool decoded_is_something_shared;
509
100k
    int64_t decoded_non_shared_1_size_delta, decoded_non_shared_2_size_delta;
510
100k
    uint64_t decoded_shared_last_component_increase;
511
512
100k
    auto* result = DecodeEntryThreeSharedParts(
513
100k
        buffer.data() + encoded_sizes_start_offset, buffer.data() + buffer.size() + payload_size,
514
100k
        buffer.data(), &decoded_shared_prefix_size, &decoded_non_shared_1_size,
515
100k
        &decoded_non_shared_1_size_delta, &decoded_is_something_shared, &decoded_non_shared_2_size,
516
100k
        &decoded_non_shared_2_size_delta, &decoded_shared_last_component_size,
517
100k
        &decoded_shared_last_component_increase, &decoded_value_size);
518
519
100k
    ASSERT_NE(result, nullptr);
520
100k
    EXPECT_EQ(decoded_shared_last_component_size, last_internal_component_reuse_size);
521
100k
    EXPECT_EQ(decoded_shared_last_component_increase, 0x100 * is_last_internal_component_inc);
522
100k
    EXPECT_EQ(
523
100k
        decoded_is_something_shared,
524
100k
        shared_prefix_size + rest_sizes.shared_middle_size + last_internal_component_reuse_size >
525
100k
            0);
526
100k
    EXPECT_EQ(decoded_non_shared_1_size, rest_sizes.non_shared_1_size);
527
528
100k
    if (decoded_is_something_shared) {
529
91.1k
      EXPECT_EQ(decoded_non_shared_2_size, rest_sizes.non_shared_2_size);
530
182k
      EXPECT_EQ(
531
182k
          rest_sizes.prev_key_non_shared_1_size + decoded_non_shared_1_size_delta,
532
182k
          rest_sizes.non_shared_1_size)
533
182k
          << " prev_key_non_shared_1_size: " << rest_sizes.prev_key_non_shared_1_size
534
182k
          << " decoded_non_shared_1_size_delta: " << decoded_non_shared_1_size_delta
535
182k
          << " non_shared_1_size: " << rest_sizes.non_shared_1_size;
536
182k
      EXPECT_EQ(
537
182k
          rest_sizes.prev_key_non_shared_2_size + decoded_non_shared_2_size_delta,
538
182k
          rest_sizes.non_shared_2_size)
539
182k
          << " prev_key_non_shared_2_size: " << rest_sizes.prev_key_non_shared_2_size
540
182k
          << " decoded_non_shared_2_size_delta: " << decoded_non_shared_2_size_delta
541
182k
          << " non_shared_2_size: " << rest_sizes.non_shared_2_size;
542
91.1k
      EXPECT_EQ(decoded_shared_prefix_size, shared_prefix_size);
543
91.1k
    }
544
545
100k
    EXPECT_EQ(decoded_value_size, value_size);
546
100k
    EXPECT_EQ(result, buffer.data() + encoded_sizes_end_offset);
547
548
100k
    if (testing::Test::HasFailure()) {
549
0
      FLAGS_v = 4;
550
0
      EncodeThreeSharedPartsSizes(
551
0
          shared_prefix_size, last_internal_component_reuse_size, is_last_internal_component_inc,
552
0
          rest_sizes, prev_key_size, key_size, value_size, &buffer);
553
0
      result = DecodeEntryThreeSharedParts(
554
0
          buffer.data() + encoded_sizes_start_offset, buffer.data() + buffer.capacity(),
555
0
          buffer.data(), &decoded_shared_prefix_size, &decoded_non_shared_1_size,
556
0
          &decoded_non_shared_1_size_delta, &decoded_is_something_shared,
557
0
          &decoded_non_shared_2_size, &decoded_non_shared_2_size_delta,
558
0
          &decoded_shared_last_component_size, &decoded_shared_last_component_increase,
559
0
          &decoded_value_size);
560
0
      FAIL();
561
0
    }
562
100k
  }
563
1
}
564
565
1
TEST_F(BlockTest, EncodeThreeSharedParts) {
566
1
  constexpr auto kNumIters = 20;
567
1
  constexpr auto kKeysPerBlock = 1000;
568
1
  constexpr auto kMaxKeySize = 1_KB;
569
1
  constexpr auto kMaxValueSize = 1_KB;
570
1
  constexpr auto kBlockRestartInterval = 16;
571
1
  constexpr auto kKeyValueEncodingFormat =
572
1
      KeyValueEncodingFormat::kKeyDeltaEncodingThreeSharedParts;
573
574
100k
  auto gen_comp_size = [](size_t* size_left) {
575
100k
    auto result =
576
50.1k
        yb::RandomActWithProbability(0.5) ? 0 : yb::RandomUniformInt<size_t>(0, *size_left);
577
100k
    CHECK_GE(*size_left, result);
578
100k
    *size_left -= result;
579
100k
    return result;
580
100k
  };
581
60.0k
  auto append_random_string = [](std::string* buf, size_t size) {
582
16.3M
    while (size > 0) {
583
16.3M
      *buf += yb::RandomUniformInt<char>();
584
16.3M
      size--;
585
16.3M
    }
586
60.0k
  };
587
588
1
  std::vector<std::string> keys;
589
21
  for (auto iter = 0; iter < kNumIters; ++iter) {
590
20
    const bool use_delta_encoding = iter % 2 == 0;
591
20
    keys.clear();
592
20
    {
593
20
      const std::string empty;
594
20
      std::string key;
595
20.0k
      for (auto i = 0; i < kKeysPerBlock; ++i) {
596
19.9k
        const auto& prev_key = keys.empty() ? empty : keys.back();
597
598
        // Generate key based on prev_key in the following format:
599
        // <shared_prefix><non_shared_1><shared_middle><non_shared_2><shared_last>
600
        // Each component might be empty.
601
        // shared_prefix, shared_middle, shared_last are taken from the beginning, middle and
602
        // end of prev_key and have random size (+ random position for the middle).
603
        //
604
        // Since we generate non_shared_1 and non_shared_2 randomly we can still have
605
        // beginning/end/whole of those shared with prev key, but that doesn't matter, because
606
        // we will still have enough non-shared components for the test.
607
20.0k
        auto prev_key_size_left = prev_key.size();
608
20.0k
        const auto shared_prefix_size = gen_comp_size(&prev_key_size_left);
609
20.0k
        const auto shared_middle_size = gen_comp_size(&prev_key_size_left);
610
20.0k
        const auto shared_last_size = gen_comp_size(&prev_key_size_left);
611
20.0k
        const auto shared_size = shared_prefix_size + shared_middle_size + shared_last_size;
612
613
20.0k
        auto key_size_left = yb::RandomUniformInt<size_t>(0, kMaxKeySize - shared_size);
614
20.0k
        const auto non_shared_1_size = gen_comp_size(&key_size_left);
615
20.0k
        const auto non_shared_2_size = key_size_left;
616
0
        DVLOG(4) << "shared_prefix_size: " << shared_prefix_size
617
0
                 << " shared_middle_size: " << shared_middle_size
618
0
                 << " shared_last_size: " << shared_last_size
619
0
                 << " non_shared_1_size: " << non_shared_1_size
620
0
                 << " non_shared_2_size: " << non_shared_2_size;
621
622
20.0k
        const auto prev_key_non_shared_1_size = gen_comp_size(&prev_key_size_left);
623
20.0k
        const auto prev_key_non_shared_2_size = prev_key_size_left;
624
625
20.0k
        key = prev_key.substr(0, shared_prefix_size);
626
20.0k
        append_random_string(&key, non_shared_1_size);
627
20.0k
        key.append(
628
20.0k
            prev_key.substr(shared_prefix_size + prev_key_non_shared_1_size, shared_middle_size));
629
20.0k
        append_random_string(&key, non_shared_2_size);
630
20.0k
        key.append(prev_key.substr(
631
20.0k
            shared_prefix_size + prev_key_non_shared_1_size + shared_middle_size +
632
20.0k
                prev_key_non_shared_2_size,
633
20.0k
            shared_last_size));
634
635
20.0k
        keys.emplace_back(std::move(key));
636
20.0k
      }
637
20
    }
638
639
20
    std::sort(keys.begin(), keys.end());
640
641
20
    BlockBuilder builder(kBlockRestartInterval, kKeyValueEncodingFormat, use_delta_encoding);
642
643
20
    std::vector<std::string> values;
644
20
    {
645
20
      std::string value;
646
20.0k
      for (const auto& key : keys) {
647
20.0k
        value.clear();
648
20.0k
        append_random_string(&value, yb::RandomUniformInt<size_t>(0, kMaxValueSize));
649
20.0k
        builder.Add(key, value);
650
20.0k
        values.emplace_back(std::move(value));
651
20.0k
      }
652
20
    }
653
654
20
    auto rawblock = builder.Finish();
655
656
20
    BlockContents contents;
657
20
    contents.data = rawblock;
658
20
    contents.cachable = false;
659
660
20
    CheckBlockContents(std::move(contents), kKeyValueEncodingFormat, keys, values);
661
20
  }
662
1
}
663
664
}  // namespace rocksdb
665
666
13.2k
int main(int argc, char **argv) {
667
13.2k
  ::testing::InitGoogleTest(&argc, argv);
668
13.2k
  google::ParseCommandLineFlags(&argc, &argv, true);
669
13.2k
  return RUN_ALL_TESTS();
670
13.2k
}