/Users/deen/code/yugabyte-db/src/yb/integration-tests/delete_table-test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include <memory> |
34 | | #include <string> |
35 | | #include <thread> |
36 | | |
37 | | #include <boost/optional.hpp> |
38 | | #include <gtest/gtest.h> |
39 | | |
40 | | #include "yb/client/client-test-util.h" |
41 | | #include "yb/client/schema.h" |
42 | | #include "yb/client/table_creator.h" |
43 | | #include "yb/client/yb_table_name.h" |
44 | | |
45 | | #include "yb/common/partition.h" |
46 | | #include "yb/common/wire_protocol.h" |
47 | | #include "yb/common/wire_protocol-test-util.h" |
48 | | |
49 | | #include "yb/gutil/stl_util.h" |
50 | | #include "yb/gutil/strings/split.h" |
51 | | #include "yb/gutil/strings/substitute.h" |
52 | | |
53 | | #include "yb/integration-tests/cluster_verifier.h" |
54 | | #include "yb/integration-tests/external_mini_cluster-itest-base.h" |
55 | | #include "yb/integration-tests/test_workload.h" |
56 | | |
57 | | #include "yb/master/master_defaults.h" |
58 | | #include "yb/master/master_client.proxy.h" |
59 | | |
60 | | #include "yb/rpc/rpc_controller.h" |
61 | | |
62 | | #include "yb/tablet/tablet.pb.h" |
63 | | |
64 | | #include "yb/tserver/tserver_admin.proxy.h" |
65 | | #include "yb/tserver/tserver.pb.h" |
66 | | |
67 | | #include "yb/util/curl_util.h" |
68 | | #include "yb/util/status_log.h" |
69 | | #include "yb/util/subprocess.h" |
70 | | #include "yb/util/tsan_util.h" |
71 | | |
72 | | using yb::client::YBClient; |
73 | | using yb::client::YBClientBuilder; |
74 | | using yb::client::YBSchema; |
75 | | using yb::client::YBSchemaFromSchema; |
76 | | using yb::client::YBTableCreator; |
77 | | using yb::client::YBTableName; |
78 | | using yb::consensus::CONSENSUS_CONFIG_COMMITTED; |
79 | | using yb::consensus::ConsensusMetadataPB; |
80 | | using yb::consensus::ConsensusStatePB; |
81 | | using yb::consensus::PeerMemberType; |
82 | | using yb::consensus::RaftPeerPB; |
83 | | using yb::itest::TServerDetails; |
84 | | using yb::tablet::TABLET_DATA_COPYING; |
85 | | using yb::tablet::TABLET_DATA_DELETED; |
86 | | using yb::tablet::TABLET_DATA_READY; |
87 | | using yb::tablet::TABLET_DATA_TOMBSTONED; |
88 | | using yb::tablet::TabletDataState; |
89 | | using yb::tablet::RaftGroupReplicaSuperBlockPB; |
90 | | using yb::tserver::ListTabletsResponsePB; |
91 | | using yb::tserver::TabletServerErrorPB; |
92 | | using std::numeric_limits; |
93 | | using std::string; |
94 | | using std::unordered_map; |
95 | | using std::vector; |
96 | | using strings::Substitute; |
97 | | |
98 | | using namespace std::literals; |
99 | | |
100 | | namespace yb { |
101 | | |
102 | | class DeleteTableTest : public ExternalMiniClusterITestBase { |
103 | | protected: |
104 | | enum IsCMetaExpected { |
105 | | CMETA_NOT_EXPECTED = 0, |
106 | | CMETA_EXPECTED = 1 |
107 | | }; |
108 | | |
109 | | enum IsSuperBlockExpected { |
110 | | SUPERBLOCK_NOT_EXPECTED = 0, |
111 | | SUPERBLOCK_EXPECTED = 1 |
112 | | }; |
113 | | |
114 | | // Get the UUID of the leader of the specified tablet, as seen by the TS with |
115 | | // the given 'ts_uuid'. |
116 | | string GetLeaderUUID(const string& ts_uuid, const string& tablet_id); |
117 | | |
118 | | Status CheckTabletTombstonedOrDeletedOnTS( |
119 | | size_t index, |
120 | | const string& tablet_id, |
121 | | TabletDataState data_state, |
122 | | IsCMetaExpected is_cmeta_expected, |
123 | | IsSuperBlockExpected is_superblock_expected); |
124 | | |
125 | | Status CheckTabletTombstonedOnTS(size_t index, |
126 | | const string& tablet_id, |
127 | | IsCMetaExpected is_cmeta_expected); |
128 | | |
129 | | Status CheckTabletDeletedOnTS(size_t index, |
130 | | const string& tablet_id, |
131 | | IsSuperBlockExpected is_superblock_expected); |
132 | | |
133 | | void WaitForTabletTombstonedOnTS(size_t index, |
134 | | const string& tablet_id, |
135 | | IsCMetaExpected is_cmeta_expected); |
136 | | |
137 | | void WaitForTabletDeletedOnTS(size_t index, |
138 | | const string& tablet_id, |
139 | | IsSuperBlockExpected is_superblock_expected); |
140 | | |
141 | | void WaitForAllTSToCrash(); |
142 | | void WaitUntilTabletRunning(size_t index, const std::string& tablet_id); |
143 | | |
144 | | // Delete the given table. If the operation times out, dumps the master stacks |
145 | | // to help debug master-side deadlocks. |
146 | | void DeleteTable(const YBTableName& table_name); |
147 | | |
148 | | // Repeatedly try to delete the tablet, retrying on failure up to the |
149 | | // specified timeout. Deletion can fail when other operations, such as |
150 | | // bootstrap, are running. |
151 | | void DeleteTabletWithRetries(const TServerDetails* ts, const string& tablet_id, |
152 | | TabletDataState delete_type, const MonoDelta& timeout); |
153 | | |
154 | | void WaitForLoadBalanceCompletion(yb::MonoDelta timeout); |
155 | | |
156 | | // Returns a list of all tablet servers registered with the master leader. |
157 | | CHECKED_STATUS ListAllLiveTabletServersRegisteredWithMaster(const MonoDelta& timeout, |
158 | | vector<string>* ts_list); |
159 | | |
160 | | Result<bool> VerifyTableCompletelyDeleted(const YBTableName& table, const string& tablet_id); |
161 | | }; |
162 | | |
163 | 0 | string DeleteTableTest::GetLeaderUUID(const string& ts_uuid, const string& tablet_id) { |
164 | 0 | ConsensusStatePB cstate; |
165 | 0 | auto deadline = MonoTime::Now() + 10s; |
166 | 0 | for (;;) { |
167 | 0 | CHECK_OK(itest::GetConsensusState( |
168 | 0 | ts_map_[ts_uuid].get(), |
169 | 0 | tablet_id, |
170 | 0 | CONSENSUS_CONFIG_COMMITTED, |
171 | 0 | deadline - MonoTime::Now(), |
172 | 0 | &cstate)); |
173 | 0 | if (!cstate.leader_uuid().empty()) { |
174 | 0 | break; |
175 | 0 | } |
176 | 0 | CHECK(MonoTime::Now() <= deadline); |
177 | 0 | std::this_thread::sleep_for(100ms); |
178 | 0 | } |
179 | 0 | CHECK(!cstate.leader_uuid().empty()); |
180 | 0 | return cstate.leader_uuid(); |
181 | 0 | } |
182 | | |
183 | | Status DeleteTableTest::CheckTabletTombstonedOrDeletedOnTS( |
184 | | size_t index, |
185 | | const string& tablet_id, |
186 | | TabletDataState data_state, |
187 | | IsCMetaExpected is_cmeta_expected, |
188 | 0 | IsSuperBlockExpected is_superblock_expected) { |
189 | 0 | CHECK(data_state == TABLET_DATA_TOMBSTONED || data_state == TABLET_DATA_DELETED) << data_state; |
190 | | // There should be no WALs and no cmeta. |
191 | 0 | if (inspect_->CountWALSegmentsForTabletOnTS(index, tablet_id) > 0) { |
192 | 0 | return STATUS(IllegalState, "WAL segments exist for tablet", tablet_id); |
193 | 0 | } |
194 | 0 | if (is_cmeta_expected == CMETA_EXPECTED && |
195 | 0 | !inspect_->DoesConsensusMetaExistForTabletOnTS(index, tablet_id)) { |
196 | 0 | return STATUS(IllegalState, "Expected cmeta for tablet " + tablet_id + " but it doesn't exist"); |
197 | 0 | } |
198 | 0 | if (is_superblock_expected == SUPERBLOCK_EXPECTED) { |
199 | 0 | RETURN_NOT_OK(inspect_->CheckTabletDataStateOnTS(index, tablet_id, data_state)); |
200 | 0 | } else { |
201 | 0 | RaftGroupReplicaSuperBlockPB superblock_pb; |
202 | 0 | Status s = inspect_->ReadTabletSuperBlockOnTS(index, tablet_id, &superblock_pb); |
203 | 0 | if (!s.IsNotFound()) { |
204 | 0 | return STATUS(IllegalState, "Found unexpected superblock for tablet " + tablet_id); |
205 | 0 | } |
206 | 0 | } |
207 | 0 | return Status::OK(); |
208 | 0 | } |
209 | | |
210 | | Status DeleteTableTest::CheckTabletTombstonedOnTS(size_t index, |
211 | | const string& tablet_id, |
212 | 0 | IsCMetaExpected is_cmeta_expected) { |
213 | 0 | return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_TOMBSTONED, |
214 | 0 | is_cmeta_expected, SUPERBLOCK_EXPECTED); |
215 | 0 | } |
216 | | |
217 | | Status DeleteTableTest::CheckTabletDeletedOnTS(size_t index, |
218 | | const string& tablet_id, |
219 | 0 | IsSuperBlockExpected is_superblock_expected) { |
220 | 0 | return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_DELETED, |
221 | 0 | CMETA_NOT_EXPECTED, is_superblock_expected); |
222 | 0 | } |
223 | | |
224 | | void DeleteTableTest::WaitForTabletTombstonedOnTS(size_t index, |
225 | | const string& tablet_id, |
226 | 0 | IsCMetaExpected is_cmeta_expected) { |
227 | 0 | Status s; |
228 | 0 | for (int i = 0; i < 6000; i++) { |
229 | 0 | s = CheckTabletTombstonedOnTS(index, tablet_id, is_cmeta_expected); |
230 | 0 | if (s.ok()) return; |
231 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
232 | 0 | } |
233 | 0 | ASSERT_OK(s); |
234 | 0 | } |
235 | | |
236 | | void DeleteTableTest::WaitForTabletDeletedOnTS(size_t index, |
237 | | const string& tablet_id, |
238 | 0 | IsSuperBlockExpected is_superblock_expected) { |
239 | 0 | Status s; |
240 | 0 | for (int i = 0; i < 6000; i++) { |
241 | 0 | s = CheckTabletDeletedOnTS(index, tablet_id, is_superblock_expected); |
242 | 0 | if (s.ok()) return; |
243 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
244 | 0 | } |
245 | 0 | ASSERT_OK(s); |
246 | 0 | } |
247 | | |
248 | 0 | void DeleteTableTest::WaitForAllTSToCrash() { |
249 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
250 | 0 | ASSERT_OK(cluster_->WaitForTSToCrash(i)); |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | 0 | void DeleteTableTest::WaitUntilTabletRunning(size_t index, const std::string& tablet_id) { |
255 | 0 | ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(index)->uuid()].get(), |
256 | 0 | tablet_id, |
257 | 0 | MonoDelta::FromSeconds(30))); |
258 | 0 | } |
259 | | |
260 | 0 | void DeleteTableTest::DeleteTable(const YBTableName& table_name) { |
261 | 0 | Status s = client_->DeleteTable(table_name); |
262 | 0 | if (s.IsTimedOut()) { |
263 | 0 | WARN_NOT_OK(PstackWatcher::DumpPidStacks(cluster_->master()->pid()), |
264 | 0 | "Couldn't dump stacks"); |
265 | 0 | } |
266 | 0 | ASSERT_OK(s); |
267 | 0 | } |
268 | | |
269 | | void DeleteTableTest::DeleteTabletWithRetries(const TServerDetails* ts, |
270 | | const string& tablet_id, |
271 | | TabletDataState delete_type, |
272 | 0 | const MonoDelta& timeout) { |
273 | 0 | MonoTime start(MonoTime::Now()); |
274 | 0 | MonoTime deadline = start; |
275 | 0 | deadline.AddDelta(timeout); |
276 | 0 | Status s; |
277 | 0 | while (true) { |
278 | 0 | s = itest::DeleteTablet(ts, tablet_id, delete_type, boost::none, timeout); |
279 | 0 | if (s.ok()) return; |
280 | 0 | if (deadline.ComesBefore(MonoTime::Now())) { |
281 | 0 | break; |
282 | 0 | } |
283 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
284 | 0 | } |
285 | 0 | ASSERT_OK(s); |
286 | 0 | } |
287 | | |
288 | 0 | void DeleteTableTest::WaitForLoadBalanceCompletion(yb::MonoDelta timeout) { |
289 | 0 | ASSERT_OK(LoggedWaitFor([&]() -> Result<bool> { |
290 | 0 | return !VERIFY_RESULT(client_->IsLoadBalancerIdle()); |
291 | 0 | }, timeout, "IsLoadBalancerActive")); |
292 | |
|
293 | 0 | ASSERT_OK(LoggedWaitFor([&]() -> Result<bool> { |
294 | 0 | return client_->IsLoadBalancerIdle(); |
295 | 0 | }, timeout, "IsLoadBalancerIdle")); |
296 | 0 | } |
297 | | |
298 | | Status DeleteTableTest::ListAllLiveTabletServersRegisteredWithMaster(const MonoDelta& timeout, |
299 | 0 | vector<string>* ts_list) { |
300 | 0 | master::ListTabletServersRequestPB req; |
301 | 0 | master::ListTabletServersResponsePB resp; |
302 | 0 | rpc::RpcController rpc; |
303 | 0 | rpc.set_timeout(timeout); |
304 | 0 | auto leader_idx = VERIFY_RESULT(cluster_->GetLeaderMasterIndex()); |
305 | |
|
306 | 0 | auto proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(leader_idx); |
307 | 0 | RETURN_NOT_OK(proxy.ListTabletServers(req, &resp, &rpc)); |
308 | |
|
309 | 0 | for (const auto& nodes : resp.servers()) { |
310 | 0 | if (nodes.alive()) { |
311 | 0 | (*ts_list).push_back(nodes.instance_id().permanent_uuid()); |
312 | 0 | } |
313 | 0 | } |
314 | |
|
315 | 0 | return Status::OK(); |
316 | 0 | } |
317 | | |
318 | | Result<bool> DeleteTableTest::VerifyTableCompletelyDeleted( |
319 | 0 | const YBTableName& table, const string& tablet_id) { |
320 | | // 1) Should not list it in ListTables. |
321 | 0 | const auto tables = VERIFY_RESULT(client_->ListTables(table.table_name(), true)); |
322 | 0 | if (tables.size() != 0) { |
323 | 0 | return false; |
324 | 0 | } |
325 | | |
326 | | // 2) Should respond to GetTableSchema with a NotFound error. |
327 | 0 | YBSchema schema; |
328 | 0 | PartitionSchema partition_schema; |
329 | 0 | Status s = client_->GetTableSchema(table, &schema, &partition_schema); |
330 | 0 | if (!s.IsNotFound()) { |
331 | 0 | return false; |
332 | 0 | } |
333 | | |
334 | | // 3) Should return an error for GetTabletLocations RPCs. |
335 | 0 | { |
336 | 0 | rpc::RpcController rpc; |
337 | 0 | master::GetTabletLocationsRequestPB req; |
338 | 0 | master::GetTabletLocationsResponsePB resp; |
339 | 0 | rpc.set_timeout(MonoDelta::FromSeconds(10)); |
340 | 0 | req.add_tablet_ids()->assign(tablet_id); |
341 | 0 | auto leader_idx = VERIFY_RESULT(cluster_->GetLeaderMasterIndex()); |
342 | 0 | RETURN_NOT_OK(cluster_->GetMasterProxy<master::MasterClientProxy>( |
343 | 0 | leader_idx).GetTabletLocations(req, &resp, &rpc)); |
344 | |
|
345 | 0 | if (resp.errors(0).ShortDebugString().find("code: NOT_FOUND") == std::string::npos) { |
346 | 0 | return false; |
347 | 0 | } |
348 | 0 | } |
349 | 0 | return true; |
350 | 0 | } |
351 | | |
352 | 1 | TEST_F(DeleteTableTest, TestPendingDeleteStateClearedOnFailure) { |
353 | 1 | vector<string> tserver_flags, master_flags; |
354 | 1 | master_flags.push_back("--unresponsive_ts_rpc_timeout_ms=5000"); |
355 | | // Disable tablet delete operations. |
356 | 1 | tserver_flags.push_back("--TEST_rpc_delete_tablet_fail=true"); |
357 | 1 | ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags, 3)); |
358 | | // Create a table on the cluster. We're just using TestWorkload |
359 | | // as a convenient way to create it. |
360 | 1 | auto test_workload = TestWorkload(cluster_.get()); |
361 | 1 | test_workload.Setup(); |
362 | | |
363 | | // The table should have replicas on all three tservers. |
364 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
365 | | |
366 | 0 | client_->TEST_set_admin_operation_timeout(MonoDelta::FromSeconds(10)); |
367 | | |
368 | | // Delete the table. |
369 | 0 | DeleteTable(TestWorkloadOptions::kDefaultTableName); |
370 | | |
371 | | // Wait for the load balancer to report no pending deletes after the delete table fails. |
372 | 0 | ASSERT_OK(WaitFor([&] () { return client_->IsLoadBalanced(3); }, |
373 | 0 | MonoDelta::FromSeconds(30), "IsLoadBalanced")); |
374 | 0 | } |
375 | | |
376 | | // Test deleting an empty table, and ensure that the tablets get removed, |
377 | | // and the master no longer shows the table as existing. |
378 | 1 | TEST_F(DeleteTableTest, TestDeleteEmptyTable) { |
379 | 1 | ASSERT_NO_FATALS(StartCluster()); |
380 | | // Create a table on the cluster. We're just using TestWorkload |
381 | | // as a convenient way to create it. |
382 | 1 | TestWorkload(cluster_.get()).Setup(); |
383 | | |
384 | | // The table should have replicas on all three tservers. |
385 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
386 | | |
387 | | // Grab the tablet ID (used later). |
388 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
389 | 0 | ASSERT_EQ(1, tablets.size()); |
390 | 0 | const string& tablet_id = tablets[0]; |
391 | | |
392 | | // Delete it and wait for the replicas to get deleted. |
393 | 0 | ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName)); |
394 | 0 | for (int i = 0; i < 3; i++) { |
395 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED)); |
396 | 0 | } |
397 | | |
398 | | // Restart the cluster, the superblocks should be deleted on startup. |
399 | 0 | cluster_->Shutdown(); |
400 | 0 | ASSERT_OK(cluster_->Restart()); |
401 | 0 | ASSERT_OK(inspect_->WaitForNoData()); |
402 | | |
403 | | // Check that the master no longer exposes the table in any way: |
404 | | |
405 | | // 1) Should not list it in ListTables. |
406 | 0 | const auto tables = ASSERT_RESULT(client_->ListTables(/* filter */ "", /* exclude_ysql */ true)); |
407 | 0 | ASSERT_EQ(master::kNumSystemTables, tables.size()); |
408 | | |
409 | | // 2) Should respond to GetTableSchema with a NotFound error. |
410 | 0 | YBSchema schema; |
411 | 0 | PartitionSchema partition_schema; |
412 | 0 | Status s = client_->GetTableSchema( |
413 | 0 | TestWorkloadOptions::kDefaultTableName, &schema, &partition_schema); |
414 | 0 | ASSERT_TRUE(s.IsNotFound()) << s.ToString(); |
415 | | |
416 | | // 3) Should return an error for GetTabletLocations RPCs. |
417 | 0 | { |
418 | 0 | rpc::RpcController rpc; |
419 | 0 | master::GetTabletLocationsRequestPB req; |
420 | 0 | master::GetTabletLocationsResponsePB resp; |
421 | 0 | rpc.set_timeout(MonoDelta::FromSeconds(10)); |
422 | 0 | req.add_tablet_ids()->assign(tablet_id); |
423 | 0 | ASSERT_OK(cluster_->GetMasterProxy<master::MasterClientProxy>().GetTabletLocations( |
424 | 0 | req, &resp, &rpc)); |
425 | 0 | SCOPED_TRACE(resp.DebugString()); |
426 | 0 | ASSERT_EQ(1, resp.errors_size()); |
427 | 0 | ASSERT_STR_CONTAINS(resp.errors(0).ShortDebugString(), "code: NOT_FOUND"); |
428 | 0 | } |
429 | | |
430 | | // 4) The master 'dump-entities' page should not list the deleted table or tablets. |
431 | 0 | EasyCurl c; |
432 | 0 | faststring entities_buf; |
433 | 0 | ASSERT_OK(c.FetchURL(Substitute("http://$0/dump-entities", |
434 | 0 | cluster_->master()->bound_http_hostport().ToString()), |
435 | 0 | &entities_buf)); |
436 | 0 | ASSERT_TRUE(entities_buf.ToString().find( |
437 | 0 | TestWorkloadOptions::kDefaultTableName.table_name()) == std::string::npos); |
438 | 0 | } |
439 | | |
440 | | // Test that a DeleteTable RPC is rejected without a matching destination UUID. |
441 | 1 | TEST_F(DeleteTableTest, TestDeleteTableDestUuidValidation) { |
442 | 1 | ASSERT_NO_FATALS(StartCluster()); |
443 | | // Create a table on the cluster. We're just using TestWorkload |
444 | | // as a convenient way to create it. |
445 | 1 | TestWorkload(cluster_.get()).Setup(); |
446 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
447 | | |
448 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
449 | 0 | ASSERT_EQ(1, tablets.size()); |
450 | 0 | const string& tablet_id = tablets[0]; |
451 | |
|
452 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get(); |
453 | |
|
454 | 0 | tserver::DeleteTabletRequestPB req; |
455 | 0 | tserver::DeleteTabletResponsePB resp; |
456 | 0 | rpc::RpcController rpc; |
457 | 0 | rpc.set_timeout(MonoDelta::FromSeconds(20)); |
458 | |
|
459 | 0 | req.set_dest_uuid("fake-uuid"); |
460 | 0 | req.set_tablet_id(tablet_id); |
461 | 0 | req.set_delete_type(TABLET_DATA_TOMBSTONED); |
462 | 0 | ASSERT_OK(ts->tserver_admin_proxy->DeleteTablet(req, &resp, &rpc)); |
463 | 0 | ASSERT_TRUE(resp.has_error()); |
464 | 0 | ASSERT_EQ(tserver::TabletServerErrorPB::WRONG_SERVER_UUID, resp.error().code()) |
465 | 0 | << resp.ShortDebugString(); |
466 | 0 | ASSERT_STR_CONTAINS(StatusFromPB(resp.error().status()).ToString(), |
467 | 0 | "Wrong destination UUID"); |
468 | 0 | } |
469 | | |
470 | | // Test the atomic CAS argument to DeleteTablet(). |
471 | 1 | TEST_F(DeleteTableTest, TestAtomicDeleteTablet) { |
472 | 1 | MonoDelta timeout = MonoDelta::FromSeconds(30); |
473 | 1 | ASSERT_NO_FATALS(StartCluster()); |
474 | | // Create a table on the cluster. We're just using TestWorkload |
475 | | // as a convenient way to create it. |
476 | 1 | TestWorkload(cluster_.get()).Setup(); |
477 | | |
478 | | // The table should have replicas on all three tservers. |
479 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
480 | | |
481 | | // Grab the tablet ID (used later). |
482 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
483 | 0 | ASSERT_EQ(1, tablets.size()); |
484 | 0 | const string& tablet_id = tablets[0]; |
485 | |
|
486 | 0 | const int kTsIndex = 0; |
487 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get(); |
488 | | |
489 | | // The committed config starts off with an opid_index of -1, so choose something lower. |
490 | 0 | boost::optional<int64_t> opid_index(-2); |
491 | 0 | tserver::TabletServerErrorPB::Code error_code; |
492 | 0 | ASSERT_OK(itest::WaitUntilTabletRunning(ts, tablet_id, timeout)); |
493 | |
|
494 | 0 | Status s; |
495 | 0 | for (int i = 0; i < 100; i++) { |
496 | 0 | s = itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout, |
497 | 0 | &error_code); |
498 | 0 | if (error_code == TabletServerErrorPB::CAS_FAILED) break; |
499 | | // If we didn't get the expected CAS_FAILED error, it's OK to get 'TABLET_NOT_RUNNING' |
500 | | // because the "creating" maintenance state persists just slightly after it starts to |
501 | | // expose 'RUNNING' state in ListTablets() |
502 | 0 | ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_RUNNING, error_code) |
503 | 0 | << "unexpected error: " << s.ToString(); |
504 | 0 | SleepFor(MonoDelta::FromMilliseconds(100)); |
505 | 0 | } |
506 | |
|
507 | 0 | ASSERT_EQ(TabletServerErrorPB::CAS_FAILED, error_code) << "unexpected error: " << s.ToString(); |
508 | 0 | ASSERT_STR_CONTAINS(s.ToString(), "of -2 but the committed config has opid_index of -1"); |
509 | | |
510 | | // Now use the "latest", which is -1. |
511 | 0 | opid_index = -1; |
512 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout, |
513 | 0 | &error_code)); |
514 | 0 | ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_TOMBSTONED)); |
515 | | |
516 | | // Now that the tablet is already tombstoned, our opid_index should be |
517 | | // ignored (because it's impossible to check it). |
518 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, -9999, timeout, |
519 | 0 | &error_code)); |
520 | 0 | ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_TOMBSTONED)); |
521 | | |
522 | | // Same with TOMBSTONED -> DELETED. |
523 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, -9999, timeout, |
524 | 0 | &error_code)); |
525 | 0 | ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_DELETED)); |
526 | 0 | } |
527 | | |
528 | 1 | TEST_F(DeleteTableTest, TestDeleteTableWithConcurrentWrites) { |
529 | 1 | ASSERT_NO_FATALS(StartCluster()); |
530 | 1 | int n_iters = AllowSlowTests() ? 20 : 1; |
531 | 1 | for (int i = 0; i < n_iters; i++) { |
532 | 1 | TestWorkload workload(cluster_.get()); |
533 | 1 | workload.set_table_name(YBTableName(YQL_DATABASE_CQL, "my_keyspace", |
534 | 1 | Substitute("table-$0", i))); |
535 | | |
536 | | // We'll delete the table underneath the writers, so we expcted |
537 | | // a NotFound error during the writes. |
538 | 1 | workload.set_not_found_allowed(true); |
539 | 1 | workload.Setup(); |
540 | | |
541 | | // Start the workload, and wait to see some rows actually inserted |
542 | 1 | workload.Start(); |
543 | 1 | while (workload.rows_inserted() < 100) { |
544 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
545 | 0 | } |
546 | | |
547 | 1 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
548 | 1 | ASSERT_EQ(1, tablets.size()); |
549 | 0 | const string& tablet_id = tablets[0]; |
550 | | |
551 | | // Delete it and wait for the replicas to get deleted. |
552 | 0 | ASSERT_NO_FATALS(DeleteTable(workload.table_name())); |
553 | 0 | for (int i = 0; i < 3; i++) { |
554 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED)); |
555 | 0 | } |
556 | | |
557 | | // Sleep just a little longer to make sure client threads send |
558 | | // requests to the missing tablets. |
559 | 0 | SleepFor(MonoDelta::FromMilliseconds(50)); |
560 | |
|
561 | 0 | workload.StopAndJoin(); |
562 | 0 | cluster_->AssertNoCrashes(); |
563 | | |
564 | | // Restart the cluster, the superblocks should be deleted on startup. |
565 | 0 | cluster_->Shutdown(); |
566 | 0 | ASSERT_OK(cluster_->Restart()); |
567 | 0 | ASSERT_OK(inspect_->WaitForNoData()); |
568 | 0 | } |
569 | 1 | } |
570 | | |
571 | 1 | TEST_F(DeleteTableTest, DeleteTableWithConcurrentWritesNoRestarts) { |
572 | 1 | ASSERT_NO_FATALS(StartCluster()); |
573 | 1 | constexpr auto kNumIters = 10; |
574 | 1 | for (int iter = 0; iter < kNumIters; iter++) { |
575 | 1 | TestWorkload workload(cluster_.get()); |
576 | 1 | workload.set_table_name(YBTableName(YQL_DATABASE_CQL, "my_keyspace", Format("table-$0", iter))); |
577 | | |
578 | | // We'll delete the table underneath the writers, so we expect a NotFound error during the |
579 | | // writes. |
580 | 1 | workload.set_not_found_allowed(true); |
581 | 1 | workload.Setup(); |
582 | 1 | workload.Start(); |
583 | | |
584 | 1 | ASSERT_OK(LoggedWaitFor( |
585 | 1 | [&workload] { return workload.rows_inserted() > 100; }, 60s, |
586 | 1 | "Waiting until we have inserted some data...", 10ms)); |
587 | | |
588 | 0 | auto tablets = inspect_->ListTabletsWithDataOnTS(1); |
589 | 0 | ASSERT_EQ(1, tablets.size()); |
590 | 0 | const auto& tablet_id = tablets[0]; |
591 | |
|
592 | 0 | ASSERT_NO_FATALS(DeleteTable(workload.table_name())); |
593 | 0 | for (size_t ts_idx = 0; ts_idx < cluster_->num_tablet_servers(); ts_idx++) { |
594 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(ts_idx, tablet_id, SUPERBLOCK_EXPECTED)); |
595 | 0 | } |
596 | |
|
597 | 0 | workload.StopAndJoin(); |
598 | 0 | cluster_->AssertNoCrashes(); |
599 | 0 | } |
600 | 1 | } |
601 | | |
602 | | // Test that a tablet replica is automatically tombstoned on startup if a local |
603 | | // crash occurs in the middle of remote bootstrap. |
604 | 1 | TEST_F(DeleteTableTest, TestAutoTombstoneAfterCrashDuringRemoteBootstrap) { |
605 | 1 | vector<string> tserver_flags, master_flags; |
606 | 1 | master_flags.push_back("--replication_factor=2"); |
607 | 1 | ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags)); |
608 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(40); |
609 | 1 | const int kTsIndex = 0; // We'll test with the first TS. |
610 | | |
611 | | // We'll do a config change to remote bootstrap a replica here later. For |
612 | | // now, shut it down. |
613 | 1 | LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid(); |
614 | 1 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
615 | | |
616 | | // Bounce the Master so it gets new tablet reports and doesn't try to assign |
617 | | // a replica to the dead TS. |
618 | 1 | cluster_->master()->Shutdown(); |
619 | 1 | ASSERT_OK(cluster_->master()->Restart()); |
620 | 1 | ASSERT_OK(cluster_->WaitForTabletServerCount(2, timeout)); |
621 | | |
622 | | // Start a workload on the cluster, and run it for a little while. |
623 | 1 | TestWorkload workload(cluster_.get()); |
624 | 1 | workload.Setup(); |
625 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(2)); |
626 | | |
627 | 0 | workload.Start(); |
628 | 0 | while (workload.rows_inserted() < 100) { |
629 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
630 | 0 | } |
631 | 0 | workload.StopAndJoin(); |
632 | | |
633 | | // Enable a fault crash when remote bootstrap occurs on TS 0. |
634 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart()); |
635 | 0 | const string& kFaultFlag = "TEST_fault_crash_after_rb_files_fetched"; |
636 | 0 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), kFaultFlag, "1.0")); |
637 | | |
638 | | // Figure out the tablet id to remote bootstrap. |
639 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
640 | 0 | ASSERT_EQ(1, tablets.size()); |
641 | 0 | const string& tablet_id = tablets[0]; |
642 | | |
643 | | // Add our TS 0 to the config and wait for it to crash. |
644 | 0 | string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id); |
645 | 0 | TServerDetails* leader = DCHECK_NOTNULL(ts_map_[leader_uuid].get()); |
646 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get(); |
647 | 0 | ASSERT_OK(itest::AddServer( |
648 | 0 | leader, tablet_id, ts, PeerMemberType::PRE_VOTER, boost::none, timeout)); |
649 | 0 | ASSERT_OK(cluster_->WaitForTSToCrash(kTsIndex)); |
650 | | |
651 | | // The superblock should be in TABLET_DATA_COPYING state on disk. |
652 | 0 | ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_COPYING)); |
653 | | |
654 | | // Kill the other tablet servers so the leader doesn't try to remote |
655 | | // bootstrap it again during our verification here. |
656 | 0 | cluster_->tablet_server(1)->Shutdown(); |
657 | 0 | cluster_->tablet_server(2)->Shutdown(); |
658 | | |
659 | | // Now we restart the TS. It will clean up the failed remote bootstrap and |
660 | | // convert it to TABLET_DATA_TOMBSTONED. It crashed, so we have to call |
661 | | // Shutdown() then Restart() to bring it back up. |
662 | 0 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
663 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart()); |
664 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED)); |
665 | 0 | } |
666 | | |
667 | | // Test that a tablet replica automatically tombstones itself if the remote |
668 | | // bootstrap source server fails in the middle of the remote bootstrap process. |
669 | | // Also test that we can remotely bootstrap a tombstoned tablet. |
670 | 1 | TEST_F(DeleteTableTest, TestAutoTombstoneAfterRemoteBootstrapRemoteFails) { |
671 | 1 | vector<string> tserver_flags, master_flags; |
672 | | |
673 | 1 | tserver_flags.push_back("--log_segment_size_mb=1"); // Faster log rolls. |
674 | | |
675 | 1 | master_flags.push_back("--enable_load_balancing=false"); |
676 | 1 | master_flags.push_back("--replication_factor=2"); |
677 | | |
678 | | // Start the cluster with load balancer turned off. |
679 | 1 | ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags)); |
680 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(40); |
681 | 1 | const int kTsIndex = 0; // We'll test with the first TS. |
682 | | |
683 | | // We'll do a config change to remote bootstrap a replica here later. For |
684 | | // now, shut it down. |
685 | 1 | LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid(); |
686 | 1 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
687 | | |
688 | | // Bounce the Master so it gets new tablet reports and doesn't try to assign |
689 | | // a replica to the dead TS. |
690 | 1 | cluster_->master()->Shutdown(); |
691 | 1 | ASSERT_OK(cluster_->master()->Restart()); |
692 | 1 | ASSERT_OK(cluster_->WaitForTabletServerCount(2, timeout)); |
693 | | |
694 | | // Start a workload on the cluster, and run it for a little while. |
695 | 1 | TestWorkload workload(cluster_.get()); |
696 | 1 | workload.set_sequential_write(true); |
697 | 1 | workload.Setup(); |
698 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(2)); |
699 | | |
700 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
701 | 0 | ASSERT_EQ(1, tablets.size()); |
702 | 0 | const string& tablet_id = tablets[0]; |
703 | |
|
704 | 0 | workload.Start(); |
705 | 0 | while (workload.rows_inserted() < 100) { |
706 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
707 | 0 | } |
708 | | |
709 | | // Remote bootstrap doesn't see the active WAL segment, and we need to |
710 | | // download a file to trigger the fault in this test. Due to the log index |
711 | | // chunks, that means 3 files minimum: One in-flight WAL segment, one index |
712 | | // chunk file (these files grow much more slowly than the WAL segments), and |
713 | | // one completed WAL segment. |
714 | 0 | string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id); |
715 | 0 | int leader_index = cluster_->tablet_server_index_by_uuid(leader_uuid); |
716 | 0 | ASSERT_NE(-1, leader_index); |
717 | 0 | ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(leader_index, tablet_id, 3)); |
718 | 0 | workload.StopAndJoin(); |
719 | | |
720 | | // Cause the leader to crash when a follower tries to remotely bootstrap from it. |
721 | 0 | const string& fault_flag = "TEST_fault_crash_on_handle_rb_fetch_data"; |
722 | 0 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(leader_index), fault_flag, "1.0")); |
723 | | |
724 | | // Add our TS 0 to the config and wait for the leader to crash. |
725 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart()); |
726 | 0 | TServerDetails* leader = ts_map_[leader_uuid].get(); |
727 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get(); |
728 | 0 | ASSERT_OK(itest::AddServer( |
729 | 0 | leader, tablet_id, ts, PeerMemberType::PRE_VOTER, boost::none, timeout)); |
730 | 0 | ASSERT_OK(cluster_->WaitForTSToCrash(leader_index)); |
731 | | |
732 | | // The tablet server will detect that the leader failed, and automatically |
733 | | // tombstone its replica. Shut down the other non-leader replica to avoid |
734 | | // interference while we wait for this to happen. |
735 | 0 | cluster_->tablet_server(1)->Shutdown(); |
736 | 0 | cluster_->tablet_server(2)->Shutdown(); |
737 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED)); |
738 | | |
739 | | // Now bring the other replicas back, and wait for the leader to remote |
740 | | // bootstrap the tombstoned replica. This will have replaced a tablet with no |
741 | | // consensus metadata. |
742 | 0 | ASSERT_OK(cluster_->tablet_server(1)->Restart()); |
743 | 0 | ASSERT_OK(cluster_->tablet_server(2)->Restart()); |
744 | 0 | ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY)); |
745 | | |
746 | | // Because the object deleter (whose destructor will unset the variable transition_in_progress_) |
747 | | // is created before rb_client in TsTabletManager::StartRemoteBootstrap, rb_client will be |
748 | | // destroyed before deleter. Before rb_client is destroyed, the remote bootstrap session has to be |
749 | | // destroyed too. With the new PRE_VOTER member_type, a remote bootstrap session won't finish |
750 | | // until we have successfully started a ChangeConfig. This will delay the destruction of |
751 | | // rb_client. Thus we need to wait until we know that tablet_server(0) has been promoted to a |
752 | | // VOTER role before we continue. Otherwise, we might send the DeleteTablet request before |
753 | | // transition_in_progress_ has been cleared and we'll get error |
754 | | // "State transition of tablet XXX already in progress: remote bootstrapping tablet". |
755 | 0 | leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id); |
756 | 0 | auto leader_it = ts_map_.find(leader_uuid); |
757 | 0 | ASSERT_NE(leader_it, ts_map_.end()) |
758 | 0 | << "Leader UUID: " << leader_uuid << ", ts map: " << yb::ToString(ts_map_); |
759 | 0 | leader = leader_it->second.get(); |
760 | 0 | ASSERT_OK(WaitUntilCommittedConfigNumVotersIs(3, leader, tablet_id, timeout)); |
761 | |
|
762 | 0 | ClusterVerifier cluster_verifier(cluster_.get()); |
763 | 0 | ASSERT_NO_FATALS(cluster_verifier.CheckCluster()); |
764 | 0 | ASSERT_NO_FATALS(cluster_verifier.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST, |
765 | 0 | workload.rows_inserted())); |
766 | | |
767 | | // For now there is no way to know if the server has finished its remote bootstrap (by verifying |
768 | | // that its role has changed in its consensus object). As a workaround, sleep for 10 seconds |
769 | | // before pausing the other two servers which are needed to propagate the consensus to the new |
770 | | // server. |
771 | 0 | SleepFor(MonoDelta::FromSeconds(10)); |
772 | | // Now pause the other replicas and tombstone our replica again. |
773 | 0 | ASSERT_OK(cluster_->tablet_server(1)->Pause()); |
774 | 0 | ASSERT_OK(cluster_->tablet_server(2)->Pause()); |
775 | | |
776 | | // If we send the request before the lock in StartRemoteBootstrap is released (not really a lock, |
777 | | // but effectively it serves as one), we need to retry. |
778 | 0 | ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout)); |
779 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED)); |
780 | | |
781 | | // Bring them back again, let them yet again bootstrap our tombstoned replica. |
782 | | // This time, the leader will have replaced a tablet with consensus metadata. |
783 | 0 | ASSERT_OK(cluster_->tablet_server(1)->Resume()); |
784 | 0 | ASSERT_OK(cluster_->tablet_server(2)->Resume()); |
785 | 0 | ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY)); |
786 | |
|
787 | 0 | ASSERT_NO_FATALS(cluster_verifier.CheckCluster()); |
788 | 0 | ASSERT_NO_FATALS(cluster_verifier.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST, |
789 | 0 | workload.rows_inserted())); |
790 | 0 | } |
791 | | |
792 | | // Test for correct remote bootstrap merge of consensus metadata. |
793 | 1 | TEST_F(DeleteTableTest, TestMergeConsensusMetadata) { |
794 | | // Enable manual leader selection. |
795 | 1 | std::vector<std::string> ts_flags = { |
796 | 1 | "--enable_leader_failure_detection=false"s, |
797 | | // Disable pre-elections since we wait for term to become 2, |
798 | | // that does not happen with pre-elections |
799 | 1 | "--use_preelection=false"s |
800 | 1 | }; |
801 | | |
802 | 1 | std::vector<std::string> master_flags = { |
803 | 1 | "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s, |
804 | 1 | "--use_create_table_leader_hint=false"s, |
805 | 1 | }; |
806 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags)); |
807 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(10); |
808 | 1 | const int kTsIndex = 0; |
809 | | |
810 | 1 | TestWorkload workload(cluster_.get()); |
811 | 1 | workload.Setup(); |
812 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
813 | | |
814 | | // Figure out the tablet id to remote bootstrap. |
815 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(1); |
816 | 0 | ASSERT_EQ(1, tablets.size()); |
817 | 0 | const string& tablet_id = tablets[0]; |
818 | |
|
819 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
820 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(i, tablet_id)); |
821 | 0 | } |
822 | | |
823 | | // Elect a leader and run some data through the cluster. |
824 | 0 | int leader_index = 1; |
825 | 0 | string leader_uuid = cluster_->tablet_server(leader_index)->uuid(); |
826 | 0 | ASSERT_OK(itest::StartElection(ts_map_[leader_uuid].get(), tablet_id, timeout)); |
827 | 0 | workload.Start(); |
828 | 0 | while (workload.rows_inserted() < 100) { |
829 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
830 | 0 | } |
831 | 0 | workload.StopAndJoin(); |
832 | 0 | ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, workload.batches_completed())); |
833 | | |
834 | | // Verify that TS 0 voted for the chosen leader. |
835 | 0 | ConsensusMetadataPB cmeta_pb; |
836 | 0 | ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb)); |
837 | 0 | ASSERT_EQ(1, cmeta_pb.current_term()); |
838 | 0 | ASSERT_EQ(leader_uuid, cmeta_pb.voted_for()); |
839 | | |
840 | | // Shut down all but TS 0 and try to elect TS 0. The election will fail but |
841 | | // the TS will record a vote for itself as well as a new term (term 2). |
842 | 0 | cluster_->tablet_server(1)->Shutdown(); |
843 | 0 | cluster_->tablet_server(2)->Shutdown(); |
844 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(kTsIndex, tablet_id)); |
845 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get(); |
846 | 0 | ASSERT_OK(itest::StartElection(ts, tablet_id, timeout)); |
847 | 0 | for (int i = 0; i < 6000; i++) { |
848 | 0 | Status s = inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb); |
849 | 0 | if (s.ok() && |
850 | 0 | cmeta_pb.current_term() == 2 && |
851 | 0 | cmeta_pb.voted_for() == ts->uuid()) { |
852 | 0 | break; |
853 | 0 | } |
854 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
855 | 0 | } |
856 | 0 | ASSERT_EQ(2, cmeta_pb.current_term()); |
857 | 0 | ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for()); |
858 | | |
859 | | // Tombstone our special little guy, then shut him down. |
860 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout)); |
861 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED)); |
862 | 0 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
863 | | |
864 | | // Restart the other dudes and re-elect the same leader. |
865 | 0 | ASSERT_OK(cluster_->tablet_server(1)->Restart()); |
866 | 0 | ASSERT_OK(cluster_->tablet_server(2)->Restart()); |
867 | 0 | TServerDetails* leader = ts_map_[leader_uuid].get(); |
868 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(1, tablet_id)); |
869 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(2, tablet_id)); |
870 | 0 | ASSERT_OK(itest::StartElection(leader, tablet_id, timeout)); |
871 | 0 | ASSERT_OK(itest::WaitUntilLeader(leader, tablet_id, timeout)); |
872 | | |
873 | | // Bring our special little guy back up. |
874 | | // Wait until he gets remote bootstrapped. |
875 | 0 | LOG(INFO) << "Bringing TS " << cluster_->tablet_server(kTsIndex)->uuid() |
876 | 0 | << " back up..."; |
877 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart()); |
878 | 0 | ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY)); |
879 | | |
880 | | // Assert that the election history is retained (voted for self). |
881 | 0 | ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb)); |
882 | 0 | ASSERT_EQ(2, cmeta_pb.current_term()); |
883 | 0 | ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for()); |
884 | | |
885 | | // Now do the same thing as above, where we tombstone TS 0 then trigger a new |
886 | | // term (term 3) on the other machines. TS 0 will get remotely bootstrapped |
887 | | // again, but this time the vote record on TS 0 for term 2 should not be |
888 | | // retained after remote bootstrap occurs. |
889 | 0 | cluster_->tablet_server(1)->Shutdown(); |
890 | 0 | cluster_->tablet_server(2)->Shutdown(); |
891 | | |
892 | | // Delete with retries because the tablet might still be bootstrapping. |
893 | 0 | ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout)); |
894 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED)); |
895 | |
|
896 | 0 | ASSERT_OK(cluster_->tablet_server(1)->Restart()); |
897 | 0 | ASSERT_OK(cluster_->tablet_server(2)->Restart()); |
898 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(1, tablet_id)); |
899 | 0 | ASSERT_NO_FATALS(WaitUntilTabletRunning(2, tablet_id)); |
900 | 0 | ASSERT_OK(itest::StartElection(leader, tablet_id, timeout)); |
901 | 0 | ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY)); |
902 | | |
903 | | // The election history should have been wiped out. |
904 | 0 | ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb)); |
905 | 0 | ASSERT_EQ(3, cmeta_pb.current_term()); |
906 | 0 | ASSERT_TRUE(!cmeta_pb.has_voted_for()) << cmeta_pb.ShortDebugString(); |
907 | 0 | } |
908 | | |
909 | | // Regression test for KUDU-987, a bug where followers with transactions in |
910 | | // REPLICATING state, which means they have not yet been committed to a |
911 | | // majority, cannot shut down during a DeleteTablet() call. |
912 | 1 | TEST_F(DeleteTableTest, TestDeleteFollowerWithReplicatingOperation) { |
913 | 1 | if (!AllowSlowTests()) { |
914 | | // We will typically wait at least 5 seconds for timeouts to occur. |
915 | 1 | LOG(INFO) << "Skipping test in fast-test mode."; |
916 | 1 | return; |
917 | 1 | } |
918 | | |
919 | 0 | const MonoDelta timeout = MonoDelta::FromSeconds(10); |
920 | |
|
921 | 0 | const int kNumTabletServers = 5; |
922 | 0 | std::vector<std::string> ts_flags = { |
923 | 0 | "--enable_leader_failure_detection=false"s, |
924 | 0 | "--maintenance_manager_polling_interval_ms=100"s, |
925 | 0 | }; |
926 | 0 | std::vector<std::string> master_flags = { |
927 | 0 | "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s, |
928 | 0 | "--use_create_table_leader_hint=false"s, |
929 | 0 | }; |
930 | 0 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers)); |
931 | |
|
932 | 0 | const int kTsIndex = 0; // We'll test with the first TS. |
933 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get(); |
934 | | |
935 | | // Create the table. |
936 | 0 | TestWorkload workload(cluster_.get()); |
937 | 0 | workload.Setup(); |
938 | | |
939 | | // Figure out the tablet ids of the created tablets. |
940 | 0 | vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets; |
941 | 0 | ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets)); |
942 | 0 | const string& tablet_id = tablets[0].tablet_status().tablet_id(); |
943 | | |
944 | | // Wait until all replicas are up and running. |
945 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
946 | 0 | ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(), |
947 | 0 | tablet_id, timeout)); |
948 | 0 | } |
949 | | |
950 | | // Elect TS 1 as leader. |
951 | 0 | const int kLeaderIndex = 1; |
952 | 0 | const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid(); |
953 | 0 | TServerDetails* leader = ts_map_[kLeaderUuid].get(); |
954 | 0 | ASSERT_OK(itest::StartElection(leader, tablet_id, timeout)); |
955 | 0 | ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); |
956 | | |
957 | | // Kill a majority, but leave the leader and a single follower. |
958 | 0 | LOG(INFO) << "Killing majority"; |
959 | 0 | for (int i = 2; i < kNumTabletServers; i++) { |
960 | 0 | cluster_->tablet_server(i)->Shutdown(); |
961 | 0 | } |
962 | | |
963 | | // Now write a single row to the leader. |
964 | | // We give 5 seconds for the timeout to pretty much guarantee that a flush |
965 | | // will occur due to the low flush threshold we set. |
966 | 0 | LOG(INFO) << "Writing a row"; |
967 | 0 | Status s = WriteSimpleTestRow(leader, tablet_id, 1, 1, "hola, world", MonoDelta::FromSeconds(5)); |
968 | 0 | ASSERT_TRUE(s.IsTimedOut()); |
969 | 0 | ASSERT_STR_CONTAINS(s.ToString(), "timed out"); |
970 | |
|
971 | 0 | LOG(INFO) << "Killing the leader..."; |
972 | 0 | cluster_->tablet_server(kLeaderIndex)->Shutdown(); |
973 | | |
974 | | // Now tombstone the follower tablet. This should succeed even though there |
975 | | // are uncommitted operations on the replica. |
976 | 0 | LOG(INFO) << "Tombstoning tablet " << tablet_id << " on TS " << ts->uuid(); |
977 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout)); |
978 | 0 | } |
979 | | |
980 | | // Verify that memtable is not flushed when tablet is deleted. |
981 | 1 | TEST_F(DeleteTableTest, TestMemtableNoFlushOnTabletDelete) { |
982 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(10); |
983 | | |
984 | 1 | const int kNumTabletServers = 1; |
985 | 1 | vector<string> ts_flags, master_flags; |
986 | 1 | master_flags.push_back("--replication_factor=1"); |
987 | 1 | master_flags.push_back("--yb_num_shards_per_tserver=1"); |
988 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers)); |
989 | | |
990 | 1 | const int kTsIndex = 0; // We'll test with the first TS. |
991 | 1 | TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get(); |
992 | | |
993 | | // Create the table. |
994 | 1 | TestWorkload workload(cluster_.get()); |
995 | 1 | workload.Setup(); |
996 | | |
997 | | // Figure out the tablet ids of the created tablets. |
998 | 1 | vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets; |
999 | 1 | ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets)); |
1000 | 1 | const string& tablet_id = tablets[0].tablet_status().tablet_id(); |
1001 | | |
1002 | | // Wait until all replicas are up and running. |
1003 | 2 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
1004 | 1 | ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(), |
1005 | 1 | tablet_id, timeout)); |
1006 | 1 | } |
1007 | | |
1008 | | // Elect TS 0 as leader. |
1009 | 1 | const int kLeaderIndex = 0; |
1010 | 1 | const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid(); |
1011 | 1 | TServerDetails* leader = ts_map_[kLeaderUuid].get(); |
1012 | 1 | ASSERT_OK(itest::StartElection(leader, tablet_id, timeout)); |
1013 | 1 | ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); |
1014 | | |
1015 | | // Now write a single row to the leader. |
1016 | 1 | LOG(INFO) << "Writing a row"; |
1017 | 1 | ASSERT_OK(WriteSimpleTestRow(leader, tablet_id, 1, 1, "hola, world", MonoDelta::FromSeconds(5))); |
1018 | | |
1019 | | // Set test flag to detect that memtable should not be flushed on table delete. |
1020 | 1 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex), |
1021 | 1 | "TEST_rocksdb_crash_on_flush", "true")); |
1022 | | |
1023 | | // Now delete the tablet. |
1024 | 1 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, boost::none, timeout)); |
1025 | | |
1026 | | // Sleep to allow background memtable flush to be scheduled (in case). |
1027 | 1 | SleepFor(MonoDelta::FromMilliseconds(5 * 1000)); |
1028 | | |
1029 | | // Unset test flag to allow other memtable flushes (if any) in teardown |
1030 | 1 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex), |
1031 | 1 | "TEST_rocksdb_crash_on_flush", "false")); |
1032 | 1 | } |
1033 | | |
1034 | | // Test that orphaned blocks are cleared from the superblock when a tablet is tombstoned. |
1035 | 1 | TEST_F(DeleteTableTest, TestOrphanedBlocksClearedOnDelete) { |
1036 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(30); |
1037 | 1 | std::vector<std::string> ts_flags = { |
1038 | 1 | "--enable_leader_failure_detection=false"s, |
1039 | 1 | "--maintenance_manager_polling_interval_ms=100"s, |
1040 | 1 | }; |
1041 | 1 | std::vector<std::string> master_flags = { |
1042 | 1 | "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s, |
1043 | 1 | "--use_create_table_leader_hint=false"s, |
1044 | 1 | }; |
1045 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags)); |
1046 | | |
1047 | 1 | const int kFollowerIndex = 0; |
1048 | 1 | TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()].get(); |
1049 | | |
1050 | | // Create the table. |
1051 | 1 | TestWorkload workload(cluster_.get()); |
1052 | 1 | workload.Setup(); |
1053 | | |
1054 | | // Figure out the tablet id of the created tablet. |
1055 | 1 | vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets; |
1056 | 1 | ASSERT_OK(WaitForNumTabletsOnTS(follower_ts, 1, timeout, &tablets)); |
1057 | 0 | const string& tablet_id = tablets[0].tablet_status().tablet_id(); |
1058 | | |
1059 | | // Wait until all replicas are up and running. |
1060 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
1061 | 0 | ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(), |
1062 | 0 | tablet_id, timeout)); |
1063 | 0 | } |
1064 | | |
1065 | | // Elect TS 1 as leader. |
1066 | 0 | const int kLeaderIndex = 1; |
1067 | 0 | const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid(); |
1068 | 0 | TServerDetails* leader_ts = ts_map_[kLeaderUuid].get(); |
1069 | 0 | ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout)); |
1070 | 0 | ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); |
1071 | | |
1072 | | // Run a write workload and wait some time for the workload to add data. |
1073 | 0 | workload.Start(); |
1074 | 0 | SleepFor(MonoDelta::FromMilliseconds(2000)); |
1075 | 0 | ASSERT_GT(workload.rows_inserted(), 20); |
1076 | | // Shut down the leader so it doesn't try to bootstrap our follower later. |
1077 | 0 | workload.StopAndJoin(); |
1078 | 0 | cluster_->tablet_server(kLeaderIndex)->Shutdown(); |
1079 | | |
1080 | | // Tombstone the follower and check that follower superblock is still accessible. |
1081 | 0 | ASSERT_OK(itest::DeleteTablet(follower_ts, tablet_id, TABLET_DATA_TOMBSTONED, |
1082 | 0 | boost::none, timeout)); |
1083 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kFollowerIndex, tablet_id, CMETA_EXPECTED)); |
1084 | 0 | RaftGroupReplicaSuperBlockPB superblock_pb; |
1085 | 0 | ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb)); |
1086 | 0 | } |
1087 | | |
1088 | 2 | vector<const string*> Grep(const string& needle, const vector<string>& haystack) { |
1089 | 2 | vector<const string*> results; |
1090 | 672 | for (const string& s : haystack) { |
1091 | 672 | if (s.find(needle) != string::npos) { |
1092 | 0 | results.push_back(&s); |
1093 | 0 | } |
1094 | 672 | } |
1095 | 2 | return results; |
1096 | 2 | } |
1097 | | |
1098 | 2 | vector<string> ListOpenFiles(pid_t pid) { |
1099 | 2 | string cmd = strings::Substitute("export PATH=$$PATH:/usr/bin:/usr/sbin; lsof -n -p $0", pid); |
1100 | 2 | vector<string> argv = { "bash", "-c", cmd }; |
1101 | 2 | string out; |
1102 | 2 | CHECK_OK(Subprocess::Call(argv, &out)); |
1103 | 2 | vector<string> lines = strings::Split(out, "\n"); |
1104 | 2 | return lines; |
1105 | 2 | } |
1106 | | |
1107 | 2 | size_t PrintOpenTabletFiles(pid_t pid, const string& tablet_id) { |
1108 | 2 | vector<string> lines = ListOpenFiles(pid); |
1109 | 2 | vector<const string*> wal_lines = Grep(tablet_id, lines); |
1110 | 2 | LOG(INFO) << "There are " << wal_lines.size() << " open WAL files for pid " << pid << ":"; |
1111 | 0 | for (const string* l : wal_lines) { |
1112 | 0 | LOG(INFO) << *l; |
1113 | 0 | } |
1114 | 2 | return wal_lines.size(); |
1115 | 2 | } |
1116 | | |
1117 | | // Regression test for tablet deletion FD leak. See KUDU-1288. |
1118 | 1 | TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) { |
1119 | 1 | const MonoDelta timeout = MonoDelta::FromSeconds(30); |
1120 | | |
1121 | 1 | vector<string> ts_flags, master_flags; |
1122 | 1 | master_flags.push_back("--replication_factor=1"); |
1123 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, 1)); |
1124 | | |
1125 | | // Create the table. |
1126 | 1 | TestWorkload workload(cluster_.get()); |
1127 | 1 | workload.Setup(); |
1128 | 1 | workload.Start(); |
1129 | 4 | while (workload.rows_inserted() < 1000) { |
1130 | 3 | SleepFor(MonoDelta::FromMilliseconds(10)); |
1131 | 3 | } |
1132 | 1 | workload.StopAndJoin(); |
1133 | | |
1134 | | // Figure out the tablet id of the created tablet. |
1135 | 1 | vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets; |
1136 | 1 | ASSERT_OK(WaitForNumTabletsOnTS(ts_map_.begin()->second.get(), 1, timeout, &tablets)); |
1137 | 1 | const string& tablet_id = tablets[0].tablet_status().tablet_id(); |
1138 | | |
1139 | | // Tombstone the tablet and then ensure that lsof does not list any |
1140 | | // tablet-related paths. |
1141 | 1 | ExternalTabletServer* ets = cluster_->tablet_server(0); |
1142 | 1 | ASSERT_OK(itest::DeleteTablet(ts_map_[ets->uuid()].get(), |
1143 | 1 | tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout)); |
1144 | 1 | ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id)); |
1145 | | |
1146 | | // Restart the TS after deletion and then do the same lsof check again. |
1147 | 1 | ets->Shutdown(); |
1148 | 1 | ASSERT_OK(ets->Restart()); |
1149 | 1 | ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id)); |
1150 | 1 | } |
1151 | | |
1152 | | // This test simulates the following scenario. |
1153 | | // 1. Create an RF 3 with 3 TS and 3 masters. |
1154 | | // 2. Add a fourth TS. |
1155 | | // 3. Create a table. |
1156 | | // 4. Stop one of the TS completely (i.e. replicate its data to the TS created in (2)). |
1157 | | // 5. Delete the table. |
1158 | | // 6. Failover the master leader so that in-memory table/tablet maps are deleted. |
1159 | | // 7. Restart the tserver stopped in (4). |
1160 | | // Expectation: There shouldn't be any relic of the table on the TS. |
1161 | 1 | TEST_F(DeleteTableTest, TestRemoveUnknownTablets) { |
1162 | | // Default timeout to be used for operations. |
1163 | 1 | const MonoDelta kTimeout = MonoDelta::FromSeconds(30); |
1164 | | |
1165 | | // Reduce the timeouts after which TS is DEAD. |
1166 | 1 | vector<string> extra_tserver_flags = { |
1167 | 1 | "--follower_unavailable_considered_failed_sec=18" |
1168 | 1 | }; |
1169 | 1 | vector<string> extra_master_flags = { |
1170 | 1 | "--tserver_unresponsive_timeout_ms=15000" |
1171 | 1 | }; |
1172 | | // Start a cluster with 3 TS and 3 masters. |
1173 | 1 | ASSERT_NO_FATALS(StartCluster( |
1174 | 1 | extra_tserver_flags, extra_master_flags, 3, 3, false |
1175 | 1 | )); |
1176 | 1 | LOG(INFO) << "Cluster with 3 masters and 3 tservers started successfully"; |
1177 | | |
1178 | | // Create a table on the cluster. We're just using TestWorkload |
1179 | | // as a convenient way to create it. |
1180 | 1 | TestWorkload(cluster_.get()).Setup(); |
1181 | | // The table should have replicas on all three tservers. |
1182 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
1183 | 0 | LOG(INFO) << "Table with 1 tablet and 3 replicas created successfully"; |
1184 | | |
1185 | | // Add a 4th TS. The load should stay [1, 1, 1, 0]. |
1186 | | // This new TS will get the replica when we delete one |
1187 | | // of the old TS. |
1188 | 0 | ASSERT_OK(cluster_->AddTabletServer(true, extra_tserver_flags)); |
1189 | 0 | ASSERT_OK(cluster_->WaitForTabletServerCount(4, kTimeout)); |
1190 | 0 | LOG(INFO) << "Added a fourth tserver successfully"; |
1191 | | |
1192 | | // Grab the tablet ID (used later). |
1193 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(0); |
1194 | 0 | ASSERT_EQ(1, tablets.size()); |
1195 | 0 | const TabletId& tablet_id = tablets[0]; |
1196 | 0 | const string& ts_uuid = cluster_->tablet_server(0)->uuid(); |
1197 | | |
1198 | | // Shutdowm TS 0. We'll restart it back later. |
1199 | 0 | cluster_->tablet_server(0)->Shutdown(); |
1200 | | |
1201 | | // Wait for the master to mark this TS as failed. |
1202 | 0 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
1203 | 0 | vector<string> ts_list; |
1204 | 0 | if (!ListAllLiveTabletServersRegisteredWithMaster(kTimeout, &ts_list).ok()) { |
1205 | 0 | return false; |
1206 | 0 | } |
1207 | 0 | return std::find(ts_list.begin(), ts_list.end(), ts_uuid) == ts_list.end(); |
1208 | 0 | }, kTimeout, "Wait for TS to be marked dead by master")); |
1209 | | // Wait for its replicas to be migrated to another tserver. |
1210 | 0 | WaitForLoadBalanceCompletion(kTimeout); |
1211 | 0 | LOG(INFO) << "Tablet Server with id 0 removed completely and successfully"; |
1212 | | |
1213 | | // Delete the table now and wait for the replicas to get deleted. |
1214 | 0 | ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName)); |
1215 | 0 | for (int i = 1; i < 3; i++) { |
1216 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED)); |
1217 | 0 | } |
1218 | | // Verify that the table is deleted completely. |
1219 | 0 | bool deleted = ASSERT_RESULT(VerifyTableCompletelyDeleted( |
1220 | 0 | TestWorkloadOptions::kDefaultTableName, tablet_id)); |
1221 | 0 | ASSERT_EQ(deleted, true); |
1222 | 0 | LOG(INFO) << "Table deleted successfully"; |
1223 | | |
1224 | | // Failover the master leader for the table to be removed from in-memory maps. |
1225 | 0 | ASSERT_OK(cluster_->StepDownMasterLeaderAndWaitForNewLeader()); |
1226 | | |
1227 | | // Now restart the TServer and wait for the replica to be deleted. |
1228 | 0 | ASSERT_OK(cluster_->tablet_server(0)->Restart()); |
1229 | |
|
1230 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(0, tablet_id, SUPERBLOCK_EXPECTED)); |
1231 | 0 | } |
1232 | | |
1233 | 1 | TEST_F(DeleteTableTest, DeleteWithDeadTS) { |
1234 | 1 | vector<string> extra_master_flags = { |
1235 | 1 | "--tserver_unresponsive_timeout_ms=5000" |
1236 | 1 | }; |
1237 | | // Start a cluster with 3 TS and 3 masters. |
1238 | 1 | ASSERT_NO_FATALS(StartCluster( |
1239 | 1 | {}, extra_master_flags, 3, 3, false |
1240 | 1 | )); |
1241 | 1 | LOG(INFO) << "Cluster with 3 masters and 3 tservers started successfully"; |
1242 | | |
1243 | | // Create a table on the cluster. We're just using TestWorkload |
1244 | | // as a convenient way to create it. |
1245 | 1 | TestWorkload(cluster_.get()).Setup(); |
1246 | | // The table should have replicas on all three tservers. |
1247 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
1248 | 0 | LOG(INFO) << "Table with 1 tablet and 3 replicas created successfully"; |
1249 | | |
1250 | | // Grab the tablet ID (used later). |
1251 | 0 | vector<string> tablets = inspect_->ListTabletsOnTS(0); |
1252 | 0 | ASSERT_EQ(1, tablets.size()); |
1253 | 0 | const TabletId& tablet_id = tablets[0]; |
1254 | 0 | const string& ts_uuid = cluster_->tablet_server(0)->uuid(); |
1255 | | |
1256 | | // Shutdowm TS 0. We'll restart it back later. |
1257 | 0 | cluster_->tablet_server(0)->Shutdown(); |
1258 | | |
1259 | | // Wait for the master to mark this TS as failed. |
1260 | 0 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
1261 | 0 | vector<string> ts_list; |
1262 | 0 | if (!ListAllLiveTabletServersRegisteredWithMaster(30s * kTimeMultiplier, &ts_list).ok()) { |
1263 | 0 | return false; |
1264 | 0 | } |
1265 | 0 | return std::find(ts_list.begin(), ts_list.end(), ts_uuid) == ts_list.end(); |
1266 | 0 | }, 60s * kTimeMultiplier, "Wait for TS to be marked dead by master")); |
1267 | |
|
1268 | 0 | LOG(INFO) << "Tablet Server with index 0 removed completely and successfully"; |
1269 | | |
1270 | | // Delete the table now and wait for the replicas to get deleted. |
1271 | 0 | ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName)); |
1272 | 0 | for (int i = 1; i < 3; i++) { |
1273 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED)); |
1274 | 0 | } |
1275 | | |
1276 | | // Check that the table is deleted completely. |
1277 | 0 | bool deleted = ASSERT_RESULT(VerifyTableCompletelyDeleted( |
1278 | 0 | TestWorkloadOptions::kDefaultTableName, tablet_id)); |
1279 | 0 | ASSERT_EQ(deleted, true); |
1280 | 0 | LOG(INFO) << "Table deleted successfully"; |
1281 | | |
1282 | | // Now restart the TServer and wait for the replica to be deleted. |
1283 | 0 | ASSERT_OK(cluster_->tablet_server(0)->Restart()); |
1284 | |
|
1285 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(0, tablet_id, SUPERBLOCK_EXPECTED)); |
1286 | 0 | } |
1287 | | |
1288 | | // Parameterized test case for TABLET_DATA_DELETED deletions. |
1289 | | class DeleteTableDeletedParamTest : public DeleteTableTest, |
1290 | | public ::testing::WithParamInterface<const char*> { |
1291 | | }; |
1292 | | |
1293 | | // Test that if a server crashes mid-delete that the delete will be rolled |
1294 | | // forward on startup. Parameterized by different fault flags that cause a |
1295 | | // crash at various points. |
1296 | 3 | TEST_P(DeleteTableDeletedParamTest, TestRollForwardDelete) { |
1297 | 3 | ASSERT_NO_FATALS(StartCluster()); |
1298 | 3 | const string fault_flag = GetParam(); |
1299 | 3 | LOG(INFO) << "Running with fault flag: " << fault_flag; |
1300 | | |
1301 | | // Dynamically set the fault flag so they crash when DeleteTablet() is called |
1302 | | // by the Master. |
1303 | 12 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
1304 | 9 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), fault_flag, "1.0")); |
1305 | 9 | } |
1306 | | |
1307 | | // Create a table on the cluster. We're just using TestWorkload |
1308 | | // as a convenient way to create it. |
1309 | 3 | TestWorkload(cluster_.get()).Setup(); |
1310 | | |
1311 | | // The table should have replicas on all three tservers. |
1312 | 3 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
1313 | | |
1314 | | // Delete it and wait for the tablet servers to crash. |
1315 | | // The DeleteTable() call can be blocking, so it should be called in a separate thread. |
1316 | 0 | std::thread delete_table_thread([&]() { |
1317 | 0 | ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName)); |
1318 | 0 | }); |
1319 | |
|
1320 | 0 | SleepFor(MonoDelta::FromMilliseconds(50)); |
1321 | 0 | ASSERT_NO_FATALS(WaitForAllTSToCrash()); |
1322 | | |
1323 | | // There should still be data left on disk. |
1324 | 0 | Status s = inspect_->CheckNoData(); |
1325 | 0 | ASSERT_TRUE(s.IsIllegalState()) << s.ToString(); |
1326 | | |
1327 | | // Now restart the tablet servers. They should roll forward their deletes. |
1328 | | // We don't have to reset the fault flag here because it was set dynamically. |
1329 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
1330 | 0 | cluster_->tablet_server(i)->Shutdown(); |
1331 | 0 | ASSERT_OK(cluster_->tablet_server(i)->Restart()); |
1332 | 0 | } |
1333 | |
|
1334 | 0 | delete_table_thread.join(); |
1335 | 0 | ASSERT_OK(inspect_->WaitForNoData()); |
1336 | 0 | } |
1337 | | |
1338 | | // Faults appropriate for the TABLET_DATA_DELETED case. |
1339 | | const char* deleted_faults[] = {"TEST_fault_crash_after_blocks_deleted", |
1340 | | "TEST_fault_crash_after_wal_deleted", |
1341 | | "TEST_fault_crash_after_cmeta_deleted"}; |
1342 | | |
1343 | | INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableDeletedParamTest, |
1344 | | ::testing::ValuesIn(deleted_faults)); |
1345 | | |
1346 | | // Parameterized test case for TABLET_DATA_TOMBSTONED deletions. |
1347 | | class DeleteTableTombstonedParamTest : public DeleteTableTest, |
1348 | | public ::testing::WithParamInterface<const char*> { |
1349 | | }; |
1350 | | |
1351 | | // Regression test for tablet tombstoning. Tests: |
1352 | | // 1. basic creation & tombstoning of a tablet. |
1353 | | // 2. roll-forward (crash recovery) of a partially-completed tombstoning of a tablet. |
1354 | | // 3. permanent deletion of a TOMBSTONED tablet |
1355 | | // (transition from TABLET_DATA_TOMBSTONED to TABLET_DATA_DELETED). |
1356 | 2 | TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) { |
1357 | 2 | vector<string> flags; |
1358 | 2 | flags.push_back("--log_segment_size_mb=1"); // Faster log rolls. |
1359 | 2 | ASSERT_NO_FATALS(StartCluster(flags)); |
1360 | 2 | const string fault_flag = GetParam(); |
1361 | 2 | LOG(INFO) << "Running with fault flag: " << fault_flag; |
1362 | | |
1363 | 2 | MonoDelta timeout = MonoDelta::FromSeconds(30); |
1364 | | |
1365 | | // Create a table with 2 tablets. We delete the first tablet without |
1366 | | // injecting any faults, then we delete the second tablet while exercising |
1367 | | // several fault injection points. |
1368 | 2 | ASSERT_OK(client_->CreateNamespaceIfNotExists( |
1369 | 2 | TestWorkloadOptions::kDefaultTableName.namespace_name(), |
1370 | 2 | TestWorkloadOptions::kDefaultTableName.namespace_type())); |
1371 | 2 | const int kNumTablets = 2; |
1372 | 2 | Schema schema(GetSimpleTestSchema()); |
1373 | 2 | client::YBSchema client_schema(client::YBSchemaFromSchema(schema)); |
1374 | 2 | std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator()); |
1375 | 2 | ASSERT_OK(table_creator->table_name(TestWorkloadOptions::kDefaultTableName) |
1376 | 2 | .num_tablets(kNumTablets) |
1377 | 2 | .schema(&client_schema) |
1378 | 2 | .Create()); |
1379 | | |
1380 | | // Start a workload on the cluster, and run it until we find WALs on disk. |
1381 | 2 | TestWorkload workload(cluster_.get()); |
1382 | 2 | workload.Setup(); |
1383 | | |
1384 | | // The table should have 2 tablets (1 split) on all 3 tservers (for a total of 6). |
1385 | 2 | ASSERT_OK(inspect_->WaitForReplicaCount(6)); |
1386 | | |
1387 | | // Set up the proxies so we can easily send DeleteTablet() RPCs. |
1388 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get(); |
1389 | | |
1390 | | // Ensure the tablet server is reporting 2 tablets. |
1391 | 0 | vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets; |
1392 | 0 | ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets)); |
1393 | | |
1394 | | // Run the workload against whoever the leader is until WALs appear on TS 0 |
1395 | | // for the tablets we created. |
1396 | 0 | const int kTsIndex = 0; // Index of the tablet server we'll use for the test. |
1397 | 0 | workload.Start(); |
1398 | 0 | while (workload.rows_inserted() < 100) { |
1399 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
1400 | 0 | } |
1401 | 0 | ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex, |
1402 | 0 | tablets[0].tablet_status().tablet_id(), 3)); |
1403 | 0 | ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex, |
1404 | 0 | tablets[1].tablet_status().tablet_id(), 3)); |
1405 | 0 | workload.StopAndJoin(); |
1406 | | |
1407 | | // Shut down the master and the other tablet servers so they don't interfere |
1408 | | // by attempting to create tablets or remote bootstrap while we delete tablets. |
1409 | 0 | cluster_->master()->Shutdown(); |
1410 | 0 | cluster_->tablet_server(1)->Shutdown(); |
1411 | 0 | cluster_->tablet_server(2)->Shutdown(); |
1412 | | |
1413 | | // Tombstone the first tablet. |
1414 | 0 | string tablet_id = tablets[0].tablet_status().tablet_id(); |
1415 | 0 | LOG(INFO) << "Tombstoning first tablet " << tablet_id << "..."; |
1416 | 0 | ASSERT_TRUE(inspect_->DoesConsensusMetaExistForTabletOnTS(kTsIndex, tablet_id)) << tablet_id; |
1417 | 0 | ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout)); |
1418 | 0 | LOG(INFO) << "Waiting for first tablet to be tombstoned..."; |
1419 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED)); |
1420 | |
|
1421 | 0 | ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets)); |
1422 | 0 | for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) { |
1423 | 0 | if (t.tablet_status().tablet_id() == tablet_id) { |
1424 | 0 | ASSERT_EQ(tablet::SHUTDOWN, t.tablet_status().state()); |
1425 | 0 | ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state()) |
1426 | 0 | << t.tablet_status().tablet_id() << " not tombstoned"; |
1427 | 0 | } |
1428 | 0 | } |
1429 | | |
1430 | | // Now tombstone the 2nd tablet, causing a fault. |
1431 | 0 | ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), fault_flag, "1.0")); |
1432 | 0 | tablet_id = tablets[1].tablet_status().tablet_id(); |
1433 | 0 | LOG(INFO) << "Tombstoning second tablet " << tablet_id << "..."; |
1434 | 0 | WARN_NOT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout), |
1435 | 0 | "Delete tablet failed"); |
1436 | 0 | ASSERT_OK(cluster_->WaitForTSToCrash(kTsIndex)); |
1437 | | |
1438 | | // Restart the tablet server and wait for the WALs to be deleted and for the |
1439 | | // superblock to show that it is tombstoned. |
1440 | 0 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
1441 | | // Don't start the CQL proxy, since it'll try to connect to the master. |
1442 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart(false)); |
1443 | 0 | LOG(INFO) << "Waiting for second tablet to be tombstoned..."; |
1444 | 0 | ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED)); |
1445 | | |
1446 | | // The tombstoned tablets will still show up in ListTablets(), |
1447 | | // just with their data state set as TOMBSTONED. They should also be listed |
1448 | | // as NOT_STARTED because we restarted the server. |
1449 | 0 | ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets)); |
1450 | 0 | for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) { |
1451 | 0 | ASSERT_EQ(tablet::NOT_STARTED, t.tablet_status().state()); |
1452 | 0 | ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state()) |
1453 | 0 | << t.tablet_status().tablet_id() << " not tombstoned"; |
1454 | 0 | } |
1455 | | |
1456 | | // Check that, upon restart of the tablet server with a tombstoned tablet, |
1457 | | // we don't unnecessary "roll forward" and rewrite the tablet metadata file |
1458 | | // when it is already fully deleted. |
1459 | 0 | int64_t orig_mtime = inspect_->GetTabletSuperBlockMTimeOrDie(kTsIndex, tablet_id); |
1460 | 0 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
1461 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart()); |
1462 | 0 | int64_t new_mtime = inspect_->GetTabletSuperBlockMTimeOrDie(kTsIndex, tablet_id); |
1463 | 0 | ASSERT_EQ(orig_mtime, new_mtime) |
1464 | 0 | << "Tablet superblock should not have been re-flushed unnecessarily"; |
1465 | | |
1466 | | // Finally, delete all tablets on the TS, and wait for all data to be gone. |
1467 | 0 | LOG(INFO) << "Deleting all tablets..."; |
1468 | 0 | for (const ListTabletsResponsePB::StatusAndSchemaPB& tablet : tablets) { |
1469 | 0 | string tablet_id = tablet.tablet_status().tablet_id(); |
1470 | | // We need retries here, since some of the tablets may still be |
1471 | | // bootstrapping after being restarted above. |
1472 | 0 | ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_DELETED, timeout)); |
1473 | 0 | ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(kTsIndex, tablet_id, SUPERBLOCK_EXPECTED)); |
1474 | 0 | } |
1475 | | |
1476 | | // Restart the TS, the superblock should be deleted on startup. |
1477 | 0 | cluster_->tablet_server(kTsIndex)->Shutdown(); |
1478 | | // Don't start the CQL proxy, since it'll try to connect to the master. |
1479 | 0 | ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart(false)); |
1480 | 0 | ASSERT_OK(inspect_->WaitForNoDataOnTS(kTsIndex)); |
1481 | 0 | } |
1482 | | |
1483 | | // Faults appropriate for the TABLET_DATA_TOMBSTONED case. |
1484 | | // Tombstoning a tablet does not delete the consensus metadata. |
1485 | | const char* tombstoned_faults[] = {"TEST_fault_crash_after_blocks_deleted", |
1486 | | "TEST_fault_crash_after_wal_deleted"}; |
1487 | | |
1488 | | INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableTombstonedParamTest, |
1489 | | ::testing::ValuesIn(tombstoned_faults)); |
1490 | | |
1491 | | } // namespace yb |