YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/tablet/tablet-split-test.cc
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
16
#include <boost/algorithm/string/join.hpp>
17
18
#include "yb/common/partition.h"
19
#include "yb/common/ql_protocol_util.h"
20
#include "yb/common/ql_rowblock.h"
21
#include "yb/common/ql_value.h"
22
23
#include "yb/docdb/doc_key.h"
24
#include "yb/docdb/docdb_debug.h"
25
26
#include "yb/rocksdb/db.h"
27
28
#include "yb/tablet/local_tablet_writer.h"
29
#include "yb/tablet/read_result.h"
30
#include "yb/tablet/tablet-test-util.h"
31
#include "yb/tablet/tablet_metadata.h"
32
#include "yb/tablet/tablet.h"
33
34
#include "yb/util/random_util.h"
35
#include "yb/util/size_literals.h"
36
37
DECLARE_int64(db_write_buffer_size);
38
DECLARE_bool(rocksdb_disable_compactions);
39
DECLARE_int32(rocksdb_level0_file_num_compaction_trigger);
40
41
namespace yb {
42
namespace tablet {
43
44
class TabletSplitTest : public YBTabletTest {
45
 public:
46
  TabletSplitTest() : YBTabletTest(Schema({ ColumnSchema("key", INT32, false, true),
47
                                            ColumnSchema("val", STRING) },
48
1
                                          1)) {}
49
50
1
  void SetUp() override {
51
1
    FLAGS_db_write_buffer_size = 1_MB;
52
1
    FLAGS_rocksdb_level0_file_num_compaction_trigger = -1;
53
1
    YBTabletTest::SetUp();
54
1
    writer_.reset(new LocalTabletWriter(tablet().get()));
55
1
  }
56
57
 protected:
58
59
10.0k
  docdb::DocKeyHash InsertRow(int key, const std::string& val, LocalTabletWriter::Batch* batch) {
60
10.0k
    QLWriteRequestPB* req = batch->Add();
61
10.0k
    req->set_type(QLWriteRequestPB::QL_STMT_INSERT);
62
10.0k
    QLAddInt32HashValue(req, key);
63
10.0k
    QLAddStringColumnValue(req, kFirstColumnId + 1, val);
64
10.0k
    QLSetHashCode(req);
65
10.0k
    return req->hash_code();
66
10.0k
  }
67
68
13
  Result<std::vector<QLRow>> SelectAll(Tablet* tablet) {
69
13
    ReadHybridTime read_time = ReadHybridTime::SingleTime(VERIFY_RESULT(tablet->SafeTime()));
70
13
    QLReadRequestPB req;
71
13
    QLAddColumns(schema_, {}, &req);
72
13
    QLReadRequestResult result;
73
13
    EXPECT_OK(tablet->HandleQLReadRequest(
74
13
        CoarseTimePoint::max(), read_time, req, TransactionMetadataPB(), &result));
75
76
13
    EXPECT_EQ(QLResponsePB::YQL_STATUS_OK, result.response.status());
77
78
13
    return CreateRowBlock(QLClient::YQL_CLIENT_CQL, schema_, result.rows_data)->rows();
79
13
  }
80
81
10.0k
  docdb::DocKeyHash GetRowHashCode(const QLRow& row) {
82
10.0k
    std::string tmp;
83
10.0k
    AppendToKey(row.column(0).value(), &tmp);
84
10.0k
    return YBPartition::HashColumnCompoundValue(tmp);
85
10.0k
  }
86
87
  std::unique_ptr<LocalTabletWriter> writer_;
88
};
89
90
namespace {
91
92
12
boost::optional<docdb::DocKeyHash> PartitionKeyToHash(const std::string& partition_key) {
93
12
  if (partition_key.empty()) {
94
2
    return boost::none;
95
10
  } else {
96
10
    return PartitionSchema::DecodeMultiColumnHashValue(partition_key);
97
10
  }
98
12
}
99
100
} // namespace
101
102
1
TEST_F(TabletSplitTest, SplitTablet) {
103
1
  constexpr auto kNumRows = 10000;
104
1
  constexpr auto kValuePrefixLength = 1024;
105
1
  constexpr auto kRowsPerSourceFlush = kNumRows / 7;
106
1
  constexpr auto kNumSplits = 5;
107
108
1
  const auto value_format = RandomHumanReadableString(kValuePrefixLength) + "_$0";
109
1
  docdb::DocKeyHash min_hash_code = std::numeric_limits<docdb::DocKeyHash>::max();
110
1
  docdb::DocKeyHash max_hash_code = std::numeric_limits<docdb::DocKeyHash>::min();
111
1
  {
112
1
    LocalTabletWriter::Batch batch;
113
10.0k
    for (auto i = 1; i <= kNumRows; ++i) {
114
10.0k
      const auto hash_code = InsertRow(i, Format(value_format, i), &batch);
115
10.0k
      min_hash_code = std::min(min_hash_code, hash_code);
116
10.0k
      max_hash_code = std::max(max_hash_code, hash_code);
117
10.0k
      if (i % kRowsPerSourceFlush == 0) {
118
7
        ASSERT_OK(writer_->WriteBatch(&batch));
119
7
        batch.Clear();
120
7
        ASSERT_OK(tablet()->Flush(FlushMode::kSync));
121
7
      }
122
10.0k
    }
123
1
    if (!batch.empty()) {
124
1
      ASSERT_OK(writer_->WriteBatch(&batch));
125
1
    }
126
1
  }
127
128
0
  VLOG(1) << "Source tablet:" << std::endl
129
0
          << docdb::DocDBDebugDumpToStr(tablet()->doc_db(), docdb::IncludeBinary::kTrue);
130
1
  const auto source_docdb_dump_str = tablet()->TEST_DocDBDumpStr(IncludeIntents::kTrue);
131
1
  std::unordered_set<std::string> source_docdb_dump;
132
1
  tablet()->TEST_DocDBDumpToContainer(IncludeIntents::kTrue, &source_docdb_dump);
133
134
1
  std::unordered_set<std::string> source_rows;
135
10.0k
  for (const auto& row : ASSERT_RESULT(SelectAll(tablet().get()))) {
136
10.0k
    source_rows.insert(row.ToString());
137
10.0k
  }
138
1
  auto source_rows2 = source_rows;
139
140
1
  std::vector<TabletPtr> split_tablets;
141
142
1
  std::shared_ptr<Partition> partition = tablet()->metadata()->partition();
143
1
  docdb::KeyBounds key_bounds;
144
7
  for (auto i = 1; i <= kNumSplits + 1; ++i) {
145
6
    const auto subtablet_id = Format("$0-sub-$1", tablet()->tablet_id(), yb::ToString(i));
146
147
    // Last sub tablet will contain only one hash to explicitly test this case.
148
6
    if (i <= kNumSplits) {
149
5
      const docdb::DocKeyHash split_hash_code =
150
5
          min_hash_code + i * static_cast<uint32>(max_hash_code - min_hash_code) / kNumSplits;
151
5
      LOG(INFO) << "Split hash code: " << split_hash_code;
152
5
      const auto partition_key = PartitionSchema::EncodeMultiColumnHashValue(split_hash_code);
153
5
      docdb::KeyBytes encoded_doc_key;
154
5
      docdb::DocKeyEncoderAfterTableIdStep(&encoded_doc_key).Hash(
155
5
          split_hash_code, std::vector<docdb::PrimitiveValue>());
156
5
      partition->set_partition_key_end(partition_key);
157
5
      key_bounds.upper = encoded_doc_key;
158
1
    } else {
159
1
      partition->set_partition_key_end("");
160
1
      key_bounds.upper.Clear();
161
1
    }
162
163
6
    ASSERT_OK(tablet()->CreateSubtablet(
164
6
        subtablet_id, *partition, key_bounds, yb::OpId() /* split_op_id */,
165
6
        HybridTime() /* split_hybrid_time */));
166
6
    split_tablets.push_back(ASSERT_RESULT(harness_->OpenTablet(subtablet_id)));
167
168
6
    partition->set_partition_key_start(partition->partition_key_end());
169
6
    key_bounds.lower = key_bounds.upper;
170
6
  }
171
172
6
  for (auto split_tablet : split_tablets) {
173
6
    {
174
6
      RaftGroupReplicaSuperBlockPB super_block;
175
6
      split_tablet->metadata()->ToSuperBlock(&super_block);
176
6
      ASSERT_EQ(split_tablet->tablet_id(), super_block.kv_store().kv_store_id());
177
6
    }
178
6
    const auto split_docdb_dump_str = split_tablet->TEST_DocDBDumpStr(IncludeIntents::kTrue);
179
180
    // Before compaction underlying DocDB dump should be the same.
181
6
    ASSERT_EQ(source_docdb_dump_str, split_docdb_dump_str);
182
183
    // But split tablets should only return relevant data without overlap and no unexpected data.
184
6
    const auto& split_partition = split_tablet->metadata()->partition();
185
6
    const auto start_hash = PartitionKeyToHash(split_partition->partition_key_start());
186
6
    const auto end_hash = PartitionKeyToHash(split_partition->partition_key_end());
187
188
10.0k
    for (const auto& row : ASSERT_RESULT(SelectAll(split_tablet.get()))) {
189
10.0k
      const auto hash_code = GetRowHashCode(row);
190
10.0k
      if (start_hash) {
191
7.96k
        ASSERT_GE(hash_code, *start_hash);
192
7.96k
      }
193
10.0k
      if (end_hash) {
194
9.99k
        ASSERT_LT(hash_code, *end_hash);
195
9.99k
      }
196
10.0k
      ASSERT_EQ(source_rows.erase(row.ToString()), 1);
197
10.0k
    }
198
199
6
    split_tablet->ForceRocksDBCompactInTest();
200
201
0
    VLOG(1) << split_tablet->tablet_id() << " compacted:" << std::endl
202
0
            << split_tablet->TEST_DocDBDumpStr(IncludeIntents::kTrue);
203
204
    // After compaction split tablets' RocksDB instances should have no overlap and no unexpected
205
    // data.
206
6
    std::unordered_set<std::string> split_docdb_dump;
207
6
    split_tablet->TEST_DocDBDumpToContainer(IncludeIntents::kTrue, &split_docdb_dump);
208
20.0k
    for (const auto& entry : split_docdb_dump) {
209
20.0k
      ASSERT_EQ(source_docdb_dump.erase(entry), 1);
210
20.0k
    }
211
212
    // Check data returned by tablet.
213
10.0k
    for (const auto& row : ASSERT_RESULT(SelectAll(split_tablet.get()))) {
214
10.0k
      ASSERT_EQ(source_rows2.erase(row.ToString()), 1);
215
10.0k
    }
216
217
    // Each split tablet data size should be less than original data size divided by number
218
    // of split points.
219
6
    ASSERT_LT(
220
6
        split_tablet->doc_db().regular->GetCurrentVersionDataSstFilesSize(),
221
6
        tablet()->doc_db().regular->GetCurrentVersionDataSstFilesSize() / kNumSplits);
222
6
  }
223
224
  // Split tablets should have all data from the source tablet.
225
2
  ASSERT_TRUE(source_rows.empty()) << boost::algorithm::join(source_rows, "\n");
226
2
  ASSERT_TRUE(source_rows2.empty()) << boost::algorithm::join(source_rows2, "\n");
227
2
  ASSERT_TRUE(source_docdb_dump.empty()) << boost::algorithm::join(source_docdb_dump, "\n");
228
1
}
229
230
// TODO: Need to test with distributed transactions both pending and committed
231
// (but not yet applied) during split.
232
// Split tablets should not return unexpected data for not yet applied, but committed transactions
233
// before and after compaction.
234
// Also check that non-relevant intents are cleaned from split intents DB after compaction.
235
//
236
// This test would be possible as an integration test when upper layers of tablet splitting are
237
// implemented.
238
239
} // namespace tablet
240
} // namespace yb