/Users/deen/code/yugabyte-db/src/yb/tablet/tablet-split-test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // Copyright (c) YugaByte, Inc. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
5 | | // in compliance with the License. You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
10 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
11 | | // or implied. See the License for the specific language governing permissions and limitations |
12 | | // under the License. |
13 | | // |
14 | | // |
15 | | |
16 | | #include <boost/algorithm/string/join.hpp> |
17 | | |
18 | | #include "yb/common/partition.h" |
19 | | #include "yb/common/ql_protocol_util.h" |
20 | | #include "yb/common/ql_rowblock.h" |
21 | | #include "yb/common/ql_value.h" |
22 | | |
23 | | #include "yb/docdb/doc_key.h" |
24 | | #include "yb/docdb/docdb_debug.h" |
25 | | |
26 | | #include "yb/rocksdb/db.h" |
27 | | |
28 | | #include "yb/tablet/local_tablet_writer.h" |
29 | | #include "yb/tablet/read_result.h" |
30 | | #include "yb/tablet/tablet-test-util.h" |
31 | | #include "yb/tablet/tablet_metadata.h" |
32 | | #include "yb/tablet/tablet.h" |
33 | | |
34 | | #include "yb/util/random_util.h" |
35 | | #include "yb/util/size_literals.h" |
36 | | |
37 | | DECLARE_int64(db_write_buffer_size); |
38 | | DECLARE_bool(rocksdb_disable_compactions); |
39 | | DECLARE_int32(rocksdb_level0_file_num_compaction_trigger); |
40 | | |
41 | | namespace yb { |
42 | | namespace tablet { |
43 | | |
44 | | class TabletSplitTest : public YBTabletTest { |
45 | | public: |
46 | | TabletSplitTest() : YBTabletTest(Schema({ ColumnSchema("key", INT32, false, true), |
47 | | ColumnSchema("val", STRING) }, |
48 | 1 | 1)) {} |
49 | | |
50 | 1 | void SetUp() override { |
51 | 1 | FLAGS_db_write_buffer_size = 1_MB; |
52 | 1 | FLAGS_rocksdb_level0_file_num_compaction_trigger = -1; |
53 | 1 | YBTabletTest::SetUp(); |
54 | 1 | writer_.reset(new LocalTabletWriter(tablet().get())); |
55 | 1 | } |
56 | | |
57 | | protected: |
58 | | |
59 | 10.0k | docdb::DocKeyHash InsertRow(int key, const std::string& val, LocalTabletWriter::Batch* batch) { |
60 | 10.0k | QLWriteRequestPB* req = batch->Add(); |
61 | 10.0k | req->set_type(QLWriteRequestPB::QL_STMT_INSERT); |
62 | 10.0k | QLAddInt32HashValue(req, key); |
63 | 10.0k | QLAddStringColumnValue(req, kFirstColumnId + 1, val); |
64 | 10.0k | QLSetHashCode(req); |
65 | 10.0k | return req->hash_code(); |
66 | 10.0k | } |
67 | | |
68 | 13 | Result<std::vector<QLRow>> SelectAll(Tablet* tablet) { |
69 | 13 | ReadHybridTime read_time = ReadHybridTime::SingleTime(VERIFY_RESULT(tablet->SafeTime())); |
70 | 13 | QLReadRequestPB req; |
71 | 13 | QLAddColumns(schema_, {}, &req); |
72 | 13 | QLReadRequestResult result; |
73 | 13 | EXPECT_OK(tablet->HandleQLReadRequest( |
74 | 13 | CoarseTimePoint::max(), read_time, req, TransactionMetadataPB(), &result)); |
75 | | |
76 | 13 | EXPECT_EQ(QLResponsePB::YQL_STATUS_OK, result.response.status()); |
77 | | |
78 | 13 | return CreateRowBlock(QLClient::YQL_CLIENT_CQL, schema_, result.rows_data)->rows(); |
79 | 13 | } |
80 | | |
81 | 10.0k | docdb::DocKeyHash GetRowHashCode(const QLRow& row) { |
82 | 10.0k | std::string tmp; |
83 | 10.0k | AppendToKey(row.column(0).value(), &tmp); |
84 | 10.0k | return YBPartition::HashColumnCompoundValue(tmp); |
85 | 10.0k | } |
86 | | |
87 | | std::unique_ptr<LocalTabletWriter> writer_; |
88 | | }; |
89 | | |
90 | | namespace { |
91 | | |
92 | 12 | boost::optional<docdb::DocKeyHash> PartitionKeyToHash(const std::string& partition_key) { |
93 | 12 | if (partition_key.empty()) { |
94 | 2 | return boost::none; |
95 | 10 | } else { |
96 | 10 | return PartitionSchema::DecodeMultiColumnHashValue(partition_key); |
97 | 10 | } |
98 | 12 | } |
99 | | |
100 | | } // namespace |
101 | | |
102 | 1 | TEST_F(TabletSplitTest, SplitTablet) { |
103 | 1 | constexpr auto kNumRows = 10000; |
104 | 1 | constexpr auto kValuePrefixLength = 1024; |
105 | 1 | constexpr auto kRowsPerSourceFlush = kNumRows / 7; |
106 | 1 | constexpr auto kNumSplits = 5; |
107 | | |
108 | 1 | const auto value_format = RandomHumanReadableString(kValuePrefixLength) + "_$0"; |
109 | 1 | docdb::DocKeyHash min_hash_code = std::numeric_limits<docdb::DocKeyHash>::max(); |
110 | 1 | docdb::DocKeyHash max_hash_code = std::numeric_limits<docdb::DocKeyHash>::min(); |
111 | 1 | { |
112 | 1 | LocalTabletWriter::Batch batch; |
113 | 10.0k | for (auto i = 1; i <= kNumRows; ++i) { |
114 | 10.0k | const auto hash_code = InsertRow(i, Format(value_format, i), &batch); |
115 | 10.0k | min_hash_code = std::min(min_hash_code, hash_code); |
116 | 10.0k | max_hash_code = std::max(max_hash_code, hash_code); |
117 | 10.0k | if (i % kRowsPerSourceFlush == 0) { |
118 | 7 | ASSERT_OK(writer_->WriteBatch(&batch)); |
119 | 7 | batch.Clear(); |
120 | 7 | ASSERT_OK(tablet()->Flush(FlushMode::kSync)); |
121 | 7 | } |
122 | 10.0k | } |
123 | 1 | if (!batch.empty()) { |
124 | 1 | ASSERT_OK(writer_->WriteBatch(&batch)); |
125 | 1 | } |
126 | 1 | } |
127 | | |
128 | 0 | VLOG(1) << "Source tablet:" << std::endl |
129 | 0 | << docdb::DocDBDebugDumpToStr(tablet()->doc_db(), docdb::IncludeBinary::kTrue); |
130 | 1 | const auto source_docdb_dump_str = tablet()->TEST_DocDBDumpStr(IncludeIntents::kTrue); |
131 | 1 | std::unordered_set<std::string> source_docdb_dump; |
132 | 1 | tablet()->TEST_DocDBDumpToContainer(IncludeIntents::kTrue, &source_docdb_dump); |
133 | | |
134 | 1 | std::unordered_set<std::string> source_rows; |
135 | 10.0k | for (const auto& row : ASSERT_RESULT(SelectAll(tablet().get()))) { |
136 | 10.0k | source_rows.insert(row.ToString()); |
137 | 10.0k | } |
138 | 1 | auto source_rows2 = source_rows; |
139 | | |
140 | 1 | std::vector<TabletPtr> split_tablets; |
141 | | |
142 | 1 | std::shared_ptr<Partition> partition = tablet()->metadata()->partition(); |
143 | 1 | docdb::KeyBounds key_bounds; |
144 | 7 | for (auto i = 1; i <= kNumSplits + 1; ++i) { |
145 | 6 | const auto subtablet_id = Format("$0-sub-$1", tablet()->tablet_id(), yb::ToString(i)); |
146 | | |
147 | | // Last sub tablet will contain only one hash to explicitly test this case. |
148 | 6 | if (i <= kNumSplits) { |
149 | 5 | const docdb::DocKeyHash split_hash_code = |
150 | 5 | min_hash_code + i * static_cast<uint32>(max_hash_code - min_hash_code) / kNumSplits; |
151 | 5 | LOG(INFO) << "Split hash code: " << split_hash_code; |
152 | 5 | const auto partition_key = PartitionSchema::EncodeMultiColumnHashValue(split_hash_code); |
153 | 5 | docdb::KeyBytes encoded_doc_key; |
154 | 5 | docdb::DocKeyEncoderAfterTableIdStep(&encoded_doc_key).Hash( |
155 | 5 | split_hash_code, std::vector<docdb::PrimitiveValue>()); |
156 | 5 | partition->set_partition_key_end(partition_key); |
157 | 5 | key_bounds.upper = encoded_doc_key; |
158 | 1 | } else { |
159 | 1 | partition->set_partition_key_end(""); |
160 | 1 | key_bounds.upper.Clear(); |
161 | 1 | } |
162 | | |
163 | 6 | ASSERT_OK(tablet()->CreateSubtablet( |
164 | 6 | subtablet_id, *partition, key_bounds, yb::OpId() /* split_op_id */, |
165 | 6 | HybridTime() /* split_hybrid_time */)); |
166 | 6 | split_tablets.push_back(ASSERT_RESULT(harness_->OpenTablet(subtablet_id))); |
167 | | |
168 | 6 | partition->set_partition_key_start(partition->partition_key_end()); |
169 | 6 | key_bounds.lower = key_bounds.upper; |
170 | 6 | } |
171 | | |
172 | 6 | for (auto split_tablet : split_tablets) { |
173 | 6 | { |
174 | 6 | RaftGroupReplicaSuperBlockPB super_block; |
175 | 6 | split_tablet->metadata()->ToSuperBlock(&super_block); |
176 | 6 | ASSERT_EQ(split_tablet->tablet_id(), super_block.kv_store().kv_store_id()); |
177 | 6 | } |
178 | 6 | const auto split_docdb_dump_str = split_tablet->TEST_DocDBDumpStr(IncludeIntents::kTrue); |
179 | | |
180 | | // Before compaction underlying DocDB dump should be the same. |
181 | 6 | ASSERT_EQ(source_docdb_dump_str, split_docdb_dump_str); |
182 | | |
183 | | // But split tablets should only return relevant data without overlap and no unexpected data. |
184 | 6 | const auto& split_partition = split_tablet->metadata()->partition(); |
185 | 6 | const auto start_hash = PartitionKeyToHash(split_partition->partition_key_start()); |
186 | 6 | const auto end_hash = PartitionKeyToHash(split_partition->partition_key_end()); |
187 | | |
188 | 10.0k | for (const auto& row : ASSERT_RESULT(SelectAll(split_tablet.get()))) { |
189 | 10.0k | const auto hash_code = GetRowHashCode(row); |
190 | 10.0k | if (start_hash) { |
191 | 7.96k | ASSERT_GE(hash_code, *start_hash); |
192 | 7.96k | } |
193 | 10.0k | if (end_hash) { |
194 | 9.99k | ASSERT_LT(hash_code, *end_hash); |
195 | 9.99k | } |
196 | 10.0k | ASSERT_EQ(source_rows.erase(row.ToString()), 1); |
197 | 10.0k | } |
198 | | |
199 | 6 | split_tablet->ForceRocksDBCompactInTest(); |
200 | | |
201 | 0 | VLOG(1) << split_tablet->tablet_id() << " compacted:" << std::endl |
202 | 0 | << split_tablet->TEST_DocDBDumpStr(IncludeIntents::kTrue); |
203 | | |
204 | | // After compaction split tablets' RocksDB instances should have no overlap and no unexpected |
205 | | // data. |
206 | 6 | std::unordered_set<std::string> split_docdb_dump; |
207 | 6 | split_tablet->TEST_DocDBDumpToContainer(IncludeIntents::kTrue, &split_docdb_dump); |
208 | 20.0k | for (const auto& entry : split_docdb_dump) { |
209 | 20.0k | ASSERT_EQ(source_docdb_dump.erase(entry), 1); |
210 | 20.0k | } |
211 | | |
212 | | // Check data returned by tablet. |
213 | 10.0k | for (const auto& row : ASSERT_RESULT(SelectAll(split_tablet.get()))) { |
214 | 10.0k | ASSERT_EQ(source_rows2.erase(row.ToString()), 1); |
215 | 10.0k | } |
216 | | |
217 | | // Each split tablet data size should be less than original data size divided by number |
218 | | // of split points. |
219 | 6 | ASSERT_LT( |
220 | 6 | split_tablet->doc_db().regular->GetCurrentVersionDataSstFilesSize(), |
221 | 6 | tablet()->doc_db().regular->GetCurrentVersionDataSstFilesSize() / kNumSplits); |
222 | 6 | } |
223 | | |
224 | | // Split tablets should have all data from the source tablet. |
225 | 2 | ASSERT_TRUE(source_rows.empty()) << boost::algorithm::join(source_rows, "\n"); |
226 | 2 | ASSERT_TRUE(source_rows2.empty()) << boost::algorithm::join(source_rows2, "\n"); |
227 | 2 | ASSERT_TRUE(source_docdb_dump.empty()) << boost::algorithm::join(source_docdb_dump, "\n"); |
228 | 1 | } |
229 | | |
230 | | // TODO: Need to test with distributed transactions both pending and committed |
231 | | // (but not yet applied) during split. |
232 | | // Split tablets should not return unexpected data for not yet applied, but committed transactions |
233 | | // before and after compaction. |
234 | | // Also check that non-relevant intents are cleaned from split intents DB after compaction. |
235 | | // |
236 | | // This test would be possible as an integration test when upper layers of tablet splitting are |
237 | | // implemented. |
238 | | |
239 | | } // namespace tablet |
240 | | } // namespace yb |