/Users/deen/code/yugabyte-db/ent/src/yb/integration-tests/snapshot-test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | |
13 | | #include <gtest/gtest.h> |
14 | | |
15 | | #include "yb/client/table_handle.h" |
16 | | #include "yb/client/yb_table_name.h" |
17 | | |
18 | | #include "yb/common/ql_value.h" |
19 | | #include "yb/common/wire_protocol.h" |
20 | | |
21 | | #include "yb/consensus/consensus.h" |
22 | | |
23 | | #include "yb/integration-tests/cluster_itest_util.h" |
24 | | #include "yb/integration-tests/mini_cluster.h" |
25 | | #include "yb/integration-tests/test_workload.h" |
26 | | #include "yb/integration-tests/yb_mini_cluster_test_base.h" |
27 | | |
28 | | #include "yb/master/catalog_entity_info.h" |
29 | | #include "yb/master/catalog_manager_if.h" |
30 | | #include "yb/master/master_backup.proxy.h" |
31 | | #include "yb/master/master_ddl.proxy.h" |
32 | | #include "yb/master/master_types.pb.h" |
33 | | #include "yb/master/mini_master.h" |
34 | | #include "yb/master/master-test-util.h" |
35 | | |
36 | | #include "yb/rpc/messenger.h" |
37 | | #include "yb/rpc/proxy.h" |
38 | | #include "yb/rpc/rpc_controller.h" |
39 | | |
40 | | #include "yb/tablet/tablet.h" |
41 | | #include "yb/tablet/tablet_metadata.h" |
42 | | #include "yb/tablet/tablet_peer.h" |
43 | | #include "yb/tablet/tablet_snapshots.h" |
44 | | |
45 | | #include "yb/tools/yb-admin_util.h" |
46 | | |
47 | | #include "yb/tserver/mini_tablet_server.h" |
48 | | #include "yb/tserver/tablet_server.h" |
49 | | #include "yb/tserver/ts_tablet_manager.h" |
50 | | |
51 | | #include "yb/util/cast.h" |
52 | | #include "yb/util/pb_util.h" |
53 | | #include "yb/util/scope_exit.h" |
54 | | #include "yb/util/status_format.h" |
55 | | #include "yb/util/test_util.h" |
56 | | |
57 | | using namespace std::literals; |
58 | | |
59 | | DECLARE_uint64(log_segment_size_bytes); |
60 | | DECLARE_int32(log_min_seconds_to_retain); |
61 | | DECLARE_bool(TEST_tablet_verify_flushed_frontier_after_modifying); |
62 | | DECLARE_bool(enable_ysql); |
63 | | |
64 | | namespace yb { |
65 | | |
66 | | using std::make_shared; |
67 | | using std::shared_ptr; |
68 | | using std::unique_ptr; |
69 | | using std::tuple; |
70 | | using std::set; |
71 | | using std::vector; |
72 | | |
73 | | using google::protobuf::RepeatedPtrField; |
74 | | |
75 | | using client::YBTableName; |
76 | | using master::MasterBackupProxy; |
77 | | using master::SysRowEntry; |
78 | | using master::SysRowEntryType; |
79 | | using master::BackupRowEntryPB; |
80 | | using master::TableInfo; |
81 | | using master::TabletInfo; |
82 | | using rpc::Messenger; |
83 | | using rpc::MessengerBuilder; |
84 | | using rpc::RpcController; |
85 | | using tablet::Tablet; |
86 | | using tablet::TabletPeer; |
87 | | using tserver::MiniTabletServer; |
88 | | |
89 | | using master::CreateSnapshotRequestPB; |
90 | | using master::CreateSnapshotResponsePB; |
91 | | using master::IdPairPB; |
92 | | using master::ImportSnapshotMetaRequestPB; |
93 | | using master::ImportSnapshotMetaResponsePB; |
94 | | using master::ImportSnapshotMetaResponsePB_TableMetaPB; |
95 | | using master::IsCreateTableDoneRequestPB; |
96 | | using master::IsCreateTableDoneResponsePB; |
97 | | using master::ListSnapshotsRequestPB; |
98 | | using master::ListSnapshotsResponsePB; |
99 | | using master::ListSnapshotRestorationsRequestPB; |
100 | | using master::ListSnapshotRestorationsResponsePB; |
101 | | using master::RestoreSnapshotRequestPB; |
102 | | using master::RestoreSnapshotResponsePB; |
103 | | using master::SnapshotInfoPB; |
104 | | using master::SysNamespaceEntryPB; |
105 | | using master::SysTablesEntryPB; |
106 | | using master::SysSnapshotEntryPB; |
107 | | using master::TableIdentifierPB; |
108 | | |
109 | | const YBTableName kTableName(YQL_DATABASE_CQL, "my_keyspace", "snapshot_test_table"); |
110 | | |
111 | | class SnapshotTest : public YBMiniClusterTestBase<MiniCluster> { |
112 | | public: |
113 | 0 | void SetUp() override { |
114 | 0 | YBMiniClusterTestBase::SetUp(); |
115 | |
|
116 | 0 | FLAGS_log_min_seconds_to_retain = 5; |
117 | 0 | FLAGS_TEST_tablet_verify_flushed_frontier_after_modifying = true; |
118 | 0 | FLAGS_enable_ysql = false; |
119 | |
|
120 | 0 | MiniClusterOptions opts; |
121 | 0 | opts.num_tablet_servers = 3; |
122 | 0 | cluster_.reset(new MiniCluster(opts)); |
123 | 0 | ASSERT_OK(cluster_->Start()); |
124 | |
|
125 | 0 | messenger_ = ASSERT_RESULT( |
126 | 0 | MessengerBuilder("test-msgr").set_num_reactors(1).Build()); |
127 | 0 | rpc::ProxyCache proxy_cache(messenger_.get()); |
128 | 0 | proxy_ddl_.reset(new master::MasterDdlProxy( |
129 | 0 | &proxy_cache, cluster_->mini_master()->bound_rpc_addr())); |
130 | 0 | proxy_backup_.reset(new MasterBackupProxy( |
131 | 0 | &proxy_cache, cluster_->mini_master()->bound_rpc_addr())); |
132 | | |
133 | | // Connect to the cluster. |
134 | 0 | client_ = ASSERT_RESULT(cluster_->CreateClient()); |
135 | 0 | } |
136 | | |
137 | 0 | void DoTearDown() override { |
138 | 0 | Result<bool> exist = client_->TableExists(kTableName); |
139 | 0 | ASSERT_OK(exist); |
140 | |
|
141 | 0 | if (exist.get()) { |
142 | 0 | ASSERT_OK(client_->DeleteTable(kTableName)); |
143 | 0 | } |
144 | |
|
145 | 0 | client_.reset(); |
146 | |
|
147 | 0 | messenger_->Shutdown(); |
148 | |
|
149 | 0 | if (cluster_) { |
150 | 0 | cluster_->Shutdown(); |
151 | 0 | cluster_.reset(); |
152 | 0 | } |
153 | |
|
154 | 0 | YBMiniClusterTestBase::DoTearDown(); |
155 | 0 | } |
156 | | |
157 | 0 | RpcController* ResetAndGetController() { |
158 | 0 | controller_.Reset(); |
159 | 0 | controller_.set_timeout(10s); |
160 | 0 | return &controller_; |
161 | 0 | } |
162 | | |
163 | | void CheckAllSnapshots( |
164 | 0 | const std::map<TxnSnapshotId, SysSnapshotEntryPB::State>& snapshot_info) { |
165 | 0 | ListSnapshotsRequestPB list_req; |
166 | 0 | ListSnapshotsResponsePB list_resp; |
167 | |
|
168 | 0 | LOG(INFO) << "Requested available snapshots."; |
169 | 0 | const Status s = proxy_backup_->ListSnapshots( |
170 | 0 | list_req, &list_resp, ResetAndGetController()); |
171 | |
|
172 | 0 | ASSERT_TRUE(s.ok()); |
173 | 0 | SCOPED_TRACE(list_resp.DebugString()); |
174 | 0 | ASSERT_FALSE(list_resp.has_error()); |
175 | |
|
176 | 0 | LOG(INFO) << "Number of snapshots: " << list_resp.snapshots_size(); |
177 | 0 | ASSERT_EQ(list_resp.snapshots_size(), snapshot_info.size()); |
178 | | |
179 | | // Current snapshot is available for non-transaction aware snapshots only. |
180 | 0 | ASSERT_FALSE(list_resp.has_current_snapshot_id()); |
181 | |
|
182 | 0 | for (int i = 0; i < list_resp.snapshots_size(); ++i) { |
183 | 0 | LOG(INFO) << "Snapshot " << i << ": " << list_resp.snapshots(i).DebugString(); |
184 | 0 | auto id = ASSERT_RESULT(FullyDecodeTxnSnapshotId(list_resp.snapshots(i).id())); |
185 | |
|
186 | 0 | auto it = snapshot_info.find(id); |
187 | 0 | ASSERT_NE(it, snapshot_info.end()) << "Unknown snapshot: " << id; |
188 | 0 | ASSERT_EQ(list_resp.snapshots(i).entry().state(), it->second); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | template <typename THandler> |
193 | 0 | Status WaitTillComplete(const string& handler_name, THandler handler) { |
194 | 0 | return LoggedWaitFor(handler, 30s, handler_name, 100ms, 1.5); |
195 | 0 | } Unexecuted instantiation: _ZN2yb12SnapshotTest16WaitTillCompleteIZNS0_30WaitForSnapshotRestorationDoneERKNS_17StronglyTypedUuidINS_28TxnSnapshotRestorationId_TagEEEEUlvE_EENS_6StatusERKNSt3__112basic_stringIcNS9_11char_traitsIcEENS9_9allocatorIcEEEET_ Unexecuted instantiation: _ZN2yb12SnapshotTest16WaitTillCompleteIZNS0_21WaitForSnapshotOpDoneERKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEERKNS_17StronglyTypedUuidINS_17TxnSnapshotId_TagEEEEUlvE_EENS_6StatusESA_T_ Unexecuted instantiation: _ZN2yb12SnapshotTest16WaitTillCompleteIZNS0_22WaitForCreateTableDoneERKNS_6client11YBTableNameEEUlvE_EENS_6StatusERKNSt3__112basic_stringIcNS8_11char_traitsIcEENS8_9allocatorIcEEEET_ |
196 | | |
197 | 0 | Status WaitForSnapshotOpDone(const string& op_name, const TxnSnapshotId& snapshot_id) { |
198 | 0 | return WaitTillComplete( |
199 | 0 | op_name, |
200 | 0 | [this, &snapshot_id]() -> Result<bool> { |
201 | 0 | ListSnapshotsRequestPB list_req; |
202 | 0 | ListSnapshotsResponsePB list_resp; |
203 | 0 | list_req.set_snapshot_id(snapshot_id.AsSlice().ToBuffer()); |
204 | |
|
205 | 0 | RETURN_NOT_OK(proxy_backup_->ListSnapshots( |
206 | 0 | list_req, &list_resp, ResetAndGetController())); |
207 | 0 | SCHECK(!list_resp.has_error(), IllegalState, "Expected response without error"); |
208 | 0 | SCHECK_FORMAT(list_resp.snapshots_size() == 1, IllegalState, |
209 | 0 | "Wrong number of snapshots: ", list_resp.snapshots_size()); |
210 | 0 | return list_resp.snapshots(0).entry().state() == SysSnapshotEntryPB::COMPLETE; |
211 | 0 | }); |
212 | 0 | } |
213 | | |
214 | 0 | Status WaitForSnapshotRestorationDone(const TxnSnapshotRestorationId& restoration_id) { |
215 | 0 | return WaitTillComplete( |
216 | 0 | "IsRestorationDone", |
217 | 0 | [this, &restoration_id]() -> Result<bool> { |
218 | 0 | SCHECK(restoration_id, InvalidArgument, "Invalid restoration id"); |
219 | 0 | ListSnapshotRestorationsRequestPB list_req; |
220 | 0 | ListSnapshotRestorationsResponsePB list_resp; |
221 | 0 | list_req.set_restoration_id(restoration_id.data(), restoration_id.size()); |
222 | |
|
223 | 0 | RETURN_NOT_OK(proxy_backup_->ListSnapshotRestorations( |
224 | 0 | list_req, &list_resp, ResetAndGetController())); |
225 | 0 | if (list_resp.has_status()) { |
226 | 0 | auto status = StatusFromPB(list_resp.status()); |
227 | | // If master is not yet ready, just wait and try another one. |
228 | 0 | if (status.IsServiceUnavailable()) { |
229 | 0 | return false; |
230 | 0 | } |
231 | 0 | RETURN_NOT_OK(status); |
232 | 0 | } |
233 | 0 | SCHECK_FORMAT(list_resp.restorations_size() == 1, IllegalState, |
234 | 0 | "Wrong number of restorations: ", list_resp.restorations_size()); |
235 | 0 | return list_resp.restorations(0).entry().state() == SysSnapshotEntryPB::RESTORED; |
236 | 0 | }); |
237 | 0 | } |
238 | | |
239 | 0 | Status WaitForCreateTableDone(const YBTableName& table_name) { |
240 | 0 | return WaitTillComplete( |
241 | 0 | "IsCreateTableDone", |
242 | 0 | [this, &table_name]() -> Result<bool> { |
243 | 0 | IsCreateTableDoneRequestPB req; |
244 | 0 | IsCreateTableDoneResponsePB resp; |
245 | 0 | table_name.SetIntoTableIdentifierPB(req.mutable_table()); |
246 | |
|
247 | 0 | RETURN_NOT_OK(proxy_ddl_->IsCreateTableDone(req, &resp, ResetAndGetController())); |
248 | 0 | SCHECK(!resp.has_error(), IllegalState, "Expected response without error"); |
249 | 0 | SCHECK(resp.has_done(), IllegalState, "Response must have 'done'"); |
250 | 0 | return resp.done(); |
251 | 0 | }); |
252 | 0 | } |
253 | | |
254 | 0 | TxnSnapshotId CreateSnapshot() { |
255 | 0 | CreateSnapshotRequestPB req; |
256 | 0 | CreateSnapshotResponsePB resp; |
257 | 0 | req.set_transaction_aware(true); |
258 | 0 | TableIdentifierPB* const table = req.mutable_tables()->Add(); |
259 | 0 | table->set_table_name(kTableName.table_name()); |
260 | 0 | table->mutable_namespace_()->set_name(kTableName.namespace_name()); |
261 | | |
262 | | // Check the request. |
263 | 0 | EXPECT_OK(proxy_backup_->CreateSnapshot(req, &resp, ResetAndGetController())); |
264 | | |
265 | | // Check the response. |
266 | 0 | SCOPED_TRACE(resp.DebugString()); |
267 | 0 | EXPECT_FALSE(resp.has_error()); |
268 | 0 | EXPECT_TRUE(resp.has_snapshot_id()); |
269 | 0 | const auto snapshot_id = EXPECT_RESULT(FullyDecodeTxnSnapshotId(resp.snapshot_id())); |
270 | |
|
271 | 0 | LOG(INFO) << "Started snapshot creation: ID=" << snapshot_id; |
272 | | |
273 | | // Check the snapshot creation is complete. |
274 | 0 | EXPECT_OK(WaitForSnapshotOpDone("IsCreateSnapshotDone", snapshot_id)); |
275 | |
|
276 | 0 | CheckAllSnapshots( |
277 | 0 | { |
278 | 0 | { snapshot_id, SysSnapshotEntryPB::COMPLETE } |
279 | 0 | }); |
280 | |
|
281 | 0 | return snapshot_id; |
282 | 0 | } |
283 | | |
284 | 0 | void VerifySnapshotFiles(const TxnSnapshotId& snapshot_id) { |
285 | 0 | std::unordered_map<TabletId, OpId> last_tablet_op; |
286 | |
|
287 | 0 | size_t max_tablets = 0; |
288 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) { |
289 | 0 | MiniTabletServer* const ts = cluster_->mini_tablet_server(i); |
290 | 0 | auto ts_tablet_peers = ts->server()->tablet_manager()->GetTabletPeers(); |
291 | 0 | max_tablets = std::max(max_tablets, ts_tablet_peers.size()); |
292 | 0 | for (const auto& tablet_peer : ts_tablet_peers) { |
293 | 0 | EXPECT_OK(tablet_peer->WaitUntilConsensusRunning(15s)); |
294 | 0 | last_tablet_op[tablet_peer->tablet_id()].MakeAtLeast( |
295 | 0 | tablet_peer->consensus()->GetLastReceivedOpId()); |
296 | 0 | } |
297 | 0 | } |
298 | |
|
299 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) { |
300 | 0 | MiniTabletServer* ts = cluster_->mini_tablet_server(i); |
301 | 0 | auto predicate = [max_tablets, ts]() { |
302 | 0 | return ts->server()->tablet_manager()->GetTabletPeers().size() >= max_tablets; |
303 | 0 | }; |
304 | 0 | ASSERT_OK(WaitFor(predicate, 15s, "Wait for peers to be up")); |
305 | 0 | } |
306 | | |
307 | | // Check snapshot files existence. |
308 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) { |
309 | 0 | MiniTabletServer* const ts = cluster_->mini_tablet_server(i); |
310 | 0 | auto ts_tablet_peers = ts->server()->tablet_manager()->GetTabletPeers(); |
311 | 0 | SCOPED_TRACE(Format("TServer: $0", i)); |
312 | | |
313 | | // Iterate through all available tablets (on this TabletServer), because there is |
314 | | // only one table here (testtb). And snapshot was created for this table. |
315 | 0 | for (const auto& tablet_peer : ts_tablet_peers) { |
316 | 0 | SCOPED_TRACE(Format("Tablet: $0", tablet_peer->tablet_id())); |
317 | 0 | auto last_op_id = last_tablet_op[tablet_peer->tablet_id()]; |
318 | 0 | ASSERT_OK(WaitFor([tablet_peer, last_op_id]() { |
319 | 0 | EXPECT_OK(tablet_peer->WaitUntilConsensusRunning(15s)); |
320 | 0 | return tablet_peer->consensus()->GetLastCommittedOpId() >= last_op_id; |
321 | 0 | }, |
322 | 0 | 15s, |
323 | 0 | "Wait for op id commit" |
324 | 0 | )); |
325 | 0 | FsManager* const fs = tablet_peer->tablet_metadata()->fs_manager(); |
326 | 0 | const auto rocksdb_dir = tablet_peer->tablet_metadata()->rocksdb_dir(); |
327 | 0 | const auto top_snapshots_dir = tablet_peer->tablet_metadata()->snapshots_dir(); |
328 | 0 | const auto snapshot_dir = JoinPathSegments(top_snapshots_dir, snapshot_id.ToString()); |
329 | |
|
330 | 0 | LOG(INFO) << "Checking tablet snapshot folder: " << snapshot_dir; |
331 | 0 | ASSERT_TRUE(fs->Exists(rocksdb_dir)); |
332 | 0 | ASSERT_TRUE(fs->Exists(top_snapshots_dir)); |
333 | 0 | ASSERT_TRUE(fs->Exists(snapshot_dir)); |
334 | | // Check existence of snapshot files: |
335 | 0 | auto list = ASSERT_RESULT(fs->ListDir(snapshot_dir)); |
336 | 0 | ASSERT_TRUE(std::find(list.begin(), list.end(), "CURRENT") != list.end()); |
337 | 0 | bool has_manifest = false; |
338 | 0 | for (const auto& file : list) { |
339 | 0 | SCOPED_TRACE("File: " + file); |
340 | 0 | if (file.find("MANIFEST-") == 0) { |
341 | 0 | has_manifest = true; |
342 | 0 | } |
343 | 0 | if (file.find(".sst") != std::string::npos) { |
344 | 0 | auto snapshot_path = JoinPathSegments(snapshot_dir, file); |
345 | 0 | auto rocksdb_path = JoinPathSegments(rocksdb_dir, file); |
346 | 0 | auto snapshot_inode = ASSERT_RESULT(fs->env()->GetFileINode(snapshot_path)); |
347 | 0 | auto rocksdb_inode = ASSERT_RESULT(fs->env()->GetFileINode(rocksdb_path)); |
348 | 0 | ASSERT_EQ(snapshot_inode, rocksdb_inode); |
349 | 0 | LOG(INFO) << "Snapshot: " << snapshot_path << " vs " << rocksdb_path |
350 | 0 | << ", inode: " << snapshot_inode << " vs " << rocksdb_inode; |
351 | 0 | } |
352 | 0 | } |
353 | 0 | ASSERT_TRUE(has_manifest); |
354 | 0 | } |
355 | 0 | } |
356 | 0 | } |
357 | | |
358 | 0 | TestWorkload SetupWorkload() { |
359 | 0 | TestWorkload workload(cluster_.get()); |
360 | 0 | workload.set_table_name(kTableName); |
361 | 0 | workload.set_sequential_write(true); |
362 | 0 | workload.set_insert_failures_allowed(false); |
363 | 0 | workload.set_num_write_threads(1); |
364 | 0 | workload.set_write_batch_size(10); |
365 | 0 | workload.Setup(); |
366 | 0 | return workload; |
367 | 0 | } |
368 | | |
369 | | protected: |
370 | | std::unique_ptr<Messenger> messenger_; |
371 | | unique_ptr<MasterBackupProxy> proxy_backup_; |
372 | | unique_ptr<master::MasterDdlProxy> proxy_ddl_; |
373 | | RpcController controller_; |
374 | | std::unique_ptr<client::YBClient> client_; |
375 | | }; |
376 | | |
377 | 0 | TEST_F(SnapshotTest, CreateSnapshot) { |
378 | 0 | SetupWorkload(); // Used to create table |
379 | | |
380 | | // Check tablet folders before the snapshot creation. |
381 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) { |
382 | 0 | MiniTabletServer* const ts = cluster_->mini_tablet_server(i); |
383 | 0 | auto ts_tablet_peers = ts->server()->tablet_manager()->GetTabletPeers(); |
384 | | |
385 | | // Iterate through all available tablets (on this TabletServer). |
386 | | // There is only one table here (testtb). |
387 | 0 | for (std::shared_ptr<TabletPeer>& tablet_peer : ts_tablet_peers) { |
388 | 0 | FsManager* const fs = tablet_peer->tablet_metadata()->fs_manager(); |
389 | 0 | const string rocksdb_dir = tablet_peer->tablet_metadata()->rocksdb_dir(); |
390 | 0 | const string top_snapshots_dir = tablet_peer->tablet_metadata()->snapshots_dir(); |
391 | |
|
392 | 0 | ASSERT_TRUE(fs->Exists(rocksdb_dir)); |
393 | 0 | ASSERT_TRUE(fs->Exists(top_snapshots_dir)); |
394 | 0 | } |
395 | 0 | } |
396 | |
|
397 | 0 | CheckAllSnapshots({}); |
398 | | |
399 | | // Check CreateSnapshot(). |
400 | 0 | const auto snapshot_id = CreateSnapshot(); |
401 | |
|
402 | 0 | ASSERT_NO_FATALS(VerifySnapshotFiles(snapshot_id)); |
403 | |
|
404 | 0 | ASSERT_OK(cluster_->RestartSync()); |
405 | 0 | } |
406 | | |
407 | 0 | TEST_F(SnapshotTest, RestoreSnapshot) { |
408 | 0 | TestWorkload workload = SetupWorkload(); |
409 | 0 | workload.Start(); |
410 | |
|
411 | 0 | workload.WaitInserted(100); |
412 | |
|
413 | 0 | CheckAllSnapshots({}); |
414 | |
|
415 | 0 | int64_t min_inserted = workload.rows_inserted(); |
416 | | // Check CreateSnapshot(). |
417 | 0 | const auto snapshot_id = CreateSnapshot(); |
418 | 0 | int64_t max_inserted = workload.rows_inserted(); |
419 | |
|
420 | 0 | workload.WaitInserted(max_inserted + 100); |
421 | |
|
422 | 0 | workload.StopAndJoin(); |
423 | | |
424 | | // Check RestoreSnapshot(). |
425 | 0 | TxnSnapshotRestorationId restoration_id = TxnSnapshotRestorationId::Nil(); |
426 | 0 | { |
427 | 0 | RestoreSnapshotRequestPB req; |
428 | 0 | RestoreSnapshotResponsePB resp; |
429 | 0 | req.set_snapshot_id(snapshot_id.AsSlice().ToBuffer()); |
430 | | |
431 | | // Check the request. |
432 | 0 | ASSERT_OK(proxy_backup_->RestoreSnapshot(req, &resp, ResetAndGetController())); |
433 | | |
434 | | // Check the response. |
435 | 0 | SCOPED_TRACE(resp.DebugString()); |
436 | 0 | ASSERT_FALSE(resp.has_error()); |
437 | 0 | ASSERT_TRUE(resp.has_restoration_id()); |
438 | 0 | restoration_id = TryFullyDecodeTxnSnapshotRestorationId(resp.restoration_id()); |
439 | 0 | LOG(INFO) << "Started snapshot restoring: ID=" << snapshot_id |
440 | 0 | << " Restoration ID=" << restoration_id; |
441 | 0 | } |
442 | | |
443 | | // Check the snapshot restoring is complete. |
444 | 0 | ASSERT_OK(WaitForSnapshotRestorationDone(restoration_id)); |
445 | |
|
446 | 0 | CheckAllSnapshots( |
447 | 0 | { |
448 | 0 | { snapshot_id, SysSnapshotEntryPB::COMPLETE } |
449 | 0 | }); |
450 | |
|
451 | 0 | client::TableHandle table; |
452 | 0 | ASSERT_OK(table.Open(kTableName, client_.get())); |
453 | 0 | int64_t inserted_before_min = 0; |
454 | 0 | for (const auto& row : client::TableRange(table)) { |
455 | 0 | auto key = row.column(0).int32_value(); |
456 | 0 | ASSERT_LE(key, max_inserted); |
457 | 0 | ASSERT_GE(key, 1); |
458 | 0 | if (key <= min_inserted) { |
459 | 0 | ++inserted_before_min; |
460 | 0 | } |
461 | 0 | } |
462 | 0 | ASSERT_EQ(inserted_before_min, min_inserted); |
463 | 0 | } |
464 | | |
465 | 0 | TEST_F(SnapshotTest, SnapshotRemoteBootstrap) { |
466 | 0 | auto* const ts0 = cluster_->mini_tablet_server(0); |
467 | | |
468 | | // Shutdown one node, so remote bootstrap will be required after its start. |
469 | 0 | ts0->Shutdown(); |
470 | 0 | auto se = ScopeExit([ts0] { |
471 | | // Restart the node in the end, because we need to perform table deletion, etc. |
472 | 0 | LOG(INFO) << "Restarting the stopped tserver"; |
473 | 0 | ASSERT_OK(ts0->RestartStoppedServer()); |
474 | 0 | ASSERT_OK(ts0->WaitStarted()); |
475 | 0 | }); |
476 | |
|
477 | 0 | TxnSnapshotId snapshot_id = TxnSnapshotId::Nil(); |
478 | 0 | { |
479 | 0 | LOG(INFO) << "Setting up workload"; |
480 | 0 | TestWorkload workload = SetupWorkload(); |
481 | 0 | workload.Start(); |
482 | 0 | auto se = ScopeExit([&workload] { |
483 | 0 | LOG(INFO) << "Stopping workload"; |
484 | 0 | workload.StopAndJoin(); |
485 | 0 | }); |
486 | 0 | LOG(INFO) << "Waiting for data to be inserted"; |
487 | 0 | workload.WaitInserted(1000); |
488 | |
|
489 | 0 | LOG(INFO) << "Creating snapshot"; |
490 | 0 | snapshot_id = CreateSnapshot(); |
491 | |
|
492 | 0 | LOG(INFO) << "Wait to make sure that we would need remote bootstrap"; |
493 | 0 | std::this_thread::sleep_for(std::chrono::seconds(FLAGS_log_min_seconds_to_retain) * 1.1); |
494 | | |
495 | | // Workload will stop here at the latest. |
496 | 0 | } |
497 | | |
498 | | // Flushing tablets for all tablet servers except for the one that we stopped. |
499 | 0 | for (size_t i = 1; i < cluster_->num_tablet_servers(); ++i) { |
500 | 0 | ASSERT_OK(cluster_->mini_tablet_server(i)->FlushTablets()); |
501 | 0 | } |
502 | |
|
503 | 0 | ASSERT_OK(cluster_->CleanTabletLogs()); |
504 | |
|
505 | 0 | ASSERT_OK(ts0->Start()); |
506 | 0 | ASSERT_NO_FATALS(VerifySnapshotFiles(snapshot_id)); |
507 | 0 | } |
508 | | |
509 | 0 | TEST_F(SnapshotTest, ImportSnapshotMeta) { |
510 | 0 | TestWorkload workload = SetupWorkload(); |
511 | 0 | workload.Start(); |
512 | 0 | workload.WaitInserted(100); |
513 | |
|
514 | 0 | CheckAllSnapshots({}); |
515 | |
|
516 | 0 | Result<bool> result_exist = client_->TableExists(kTableName); |
517 | 0 | ASSERT_OK(result_exist); |
518 | 0 | ASSERT_TRUE(result_exist.get()); |
519 | | |
520 | | // Check CreateSnapshot(). |
521 | 0 | const auto snapshot_id = CreateSnapshot(); |
522 | |
|
523 | 0 | workload.StopAndJoin(); |
524 | | |
525 | | // Check the snapshot creating is complete. |
526 | 0 | ASSERT_OK(WaitForSnapshotOpDone("IsCreateSnapshotDone", snapshot_id)); |
527 | |
|
528 | 0 | CheckAllSnapshots( |
529 | 0 | { |
530 | 0 | { snapshot_id, SysSnapshotEntryPB::COMPLETE } |
531 | 0 | }); |
532 | |
|
533 | 0 | ListSnapshotsRequestPB list_req; |
534 | 0 | ListSnapshotsResponsePB list_resp; |
535 | 0 | list_req.set_snapshot_id(snapshot_id.AsSlice().ToBuffer()); |
536 | 0 | list_req.set_prepare_for_backup(true); |
537 | 0 | ASSERT_OK(proxy_backup_->ListSnapshots(list_req, &list_resp, ResetAndGetController())); |
538 | 0 | LOG(INFO) << "Requested available snapshots."; |
539 | 0 | SCOPED_TRACE(list_resp.DebugString()); |
540 | 0 | ASSERT_FALSE(list_resp.has_error()); |
541 | |
|
542 | 0 | ASSERT_EQ(list_resp.snapshots_size(), 1); |
543 | 0 | const SnapshotInfoPB& snapshot = list_resp.snapshots(0); |
544 | | |
545 | | // Get snapshot items names. |
546 | 0 | const SysSnapshotEntryPB& snapshot_pb = snapshot.entry(); |
547 | 0 | const int old_table_num_tablets = snapshot_pb.tablet_snapshots_size(); |
548 | 0 | string old_table_name, old_namespace_name; |
549 | |
|
550 | 0 | for (const BackupRowEntryPB& backup_entry : snapshot.backup_entries()) { |
551 | 0 | const SysRowEntry& entry = backup_entry.entry(); |
552 | 0 | switch (entry.type()) { |
553 | 0 | case SysRowEntryType::NAMESPACE: { // Get NAMESPACE name. |
554 | 0 | SysNamespaceEntryPB meta; |
555 | 0 | const string& data = entry.data(); |
556 | 0 | ASSERT_OK(pb_util::ParseFromArray(&meta, to_uchar_ptr(data.data()), data.size())); |
557 | 0 | ASSERT_TRUE(old_namespace_name.empty()); // One namespace allowed. |
558 | 0 | old_namespace_name = meta.name(); |
559 | 0 | break; |
560 | 0 | } |
561 | 0 | case SysRowEntryType::TABLE: { // Recreate TABLE. |
562 | 0 | SysTablesEntryPB meta; |
563 | 0 | const string& data = entry.data(); |
564 | 0 | ASSERT_OK(pb_util::ParseFromArray(&meta, to_uchar_ptr(data.data()), data.size())); |
565 | 0 | ASSERT_TRUE(old_table_name.empty()); // One table allowed. |
566 | 0 | old_table_name = meta.name(); |
567 | 0 | break; |
568 | 0 | } |
569 | 0 | case SysRowEntryType::TABLET: // No need to get tablet info. Ignore. |
570 | 0 | break; |
571 | 0 | default: |
572 | 0 | ASSERT_OK(STATUS_SUBSTITUTE( |
573 | 0 | IllegalState, "Unexpected snapshot entry type $0", entry.type())); |
574 | 0 | } |
575 | 0 | } |
576 | |
|
577 | 0 | LOG(INFO) << "Deleting table & namespace: " << kTableName.ToString(); |
578 | 0 | ASSERT_OK(client_->DeleteTable(kTableName)); |
579 | 0 | ASSERT_OK(client_->DeleteNamespace(kTableName.namespace_name())); |
580 | |
|
581 | 0 | result_exist = client_->TableExists(kTableName); |
582 | 0 | ASSERT_OK(result_exist); |
583 | 0 | ASSERT_FALSE(result_exist.get()); |
584 | |
|
585 | 0 | result_exist = client_->NamespaceExists(kTableName.namespace_name()); |
586 | 0 | ASSERT_OK(result_exist); |
587 | 0 | ASSERT_FALSE(result_exist.get()); |
588 | | |
589 | | // Check ImportSnapshotMeta(). |
590 | 0 | { |
591 | 0 | ImportSnapshotMetaRequestPB req; |
592 | 0 | ImportSnapshotMetaResponsePB resp; |
593 | 0 | *req.mutable_snapshot() = snapshot; |
594 | | |
595 | | // Check the request. |
596 | 0 | ASSERT_OK(proxy_backup_->ImportSnapshotMeta(req, &resp, ResetAndGetController())); |
597 | | |
598 | | // Check the response. |
599 | 0 | SCOPED_TRACE(resp.DebugString()); |
600 | 0 | ASSERT_FALSE(resp.has_error()); |
601 | 0 | LOG(INFO) << "Imported snapshot: ID=" << snapshot_id << ". ID map:"; |
602 | |
|
603 | 0 | const RepeatedPtrField<ImportSnapshotMetaResponsePB_TableMetaPB>& tables_meta = |
604 | 0 | resp.tables_meta(); |
605 | |
|
606 | 0 | for (int i = 0; i < tables_meta.size(); ++i) { |
607 | 0 | const ImportSnapshotMetaResponsePB_TableMetaPB& table_meta = tables_meta.Get(i); |
608 | |
|
609 | 0 | const IdPairPB& ns_pair = table_meta.namespace_ids(); |
610 | 0 | LOG(INFO) << "Keyspace: " << ns_pair.old_id() << " -> " << ns_pair.new_id(); |
611 | 0 | ASSERT_NE(ns_pair.old_id(), ns_pair.new_id()); |
612 | |
|
613 | 0 | const string new_namespace_name = cluster_->mini_master()->catalog_manager(). |
614 | 0 | GetNamespaceName(ns_pair.new_id()); |
615 | 0 | ASSERT_EQ(old_namespace_name, new_namespace_name); |
616 | |
|
617 | 0 | const IdPairPB& table_pair = table_meta.table_ids(); |
618 | 0 | LOG(INFO) << "Table: " << table_pair.old_id() << " -> " << table_pair.new_id(); |
619 | 0 | ASSERT_NE(table_pair.old_id(), table_pair.new_id()); |
620 | 0 | scoped_refptr<TableInfo> info = cluster_->mini_master()->catalog_manager(). |
621 | 0 | GetTableInfo(table_pair.new_id()); |
622 | 0 | ASSERT_EQ(old_table_name, info->name()); |
623 | 0 | auto tablets = info->GetTablets(); |
624 | 0 | ASSERT_EQ(old_table_num_tablets, tablets.size()); |
625 | |
|
626 | 0 | const RepeatedPtrField<IdPairPB>& tablets_map = table_meta.tablets_ids(); |
627 | 0 | for (int j = 0; j < tablets_map.size(); ++j) { |
628 | 0 | const IdPairPB& pair = tablets_map.Get(j); |
629 | 0 | LOG(INFO) << "Tablet " << j << ": " << pair.old_id() << " -> " << pair.new_id(); |
630 | 0 | ASSERT_NE(pair.old_id(), pair.new_id()); |
631 | 0 | } |
632 | 0 | } |
633 | 0 | } |
634 | | |
635 | | // Check imported table creating is complete. |
636 | 0 | ASSERT_OK(WaitForCreateTableDone(kTableName)); |
637 | |
|
638 | 0 | result_exist = client_->TableExists(kTableName); |
639 | 0 | ASSERT_OK(result_exist); |
640 | 0 | ASSERT_TRUE(result_exist.get()); |
641 | |
|
642 | 0 | result_exist = client_->NamespaceExists(kTableName.namespace_name()); |
643 | 0 | ASSERT_OK(result_exist); |
644 | 0 | ASSERT_TRUE(result_exist.get()); |
645 | |
|
646 | 0 | LOG(INFO) << "Test ImportSnapshotMeta finished."; |
647 | 0 | } |
648 | | |
649 | | } // namespace yb |