YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/integration-tests/delete_table-test.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include <memory>
34
#include <string>
35
#include <thread>
36
37
#include <boost/optional.hpp>
38
#include <gtest/gtest.h>
39
40
#include "yb/client/client-test-util.h"
41
#include "yb/client/schema.h"
42
#include "yb/client/table_creator.h"
43
#include "yb/client/yb_table_name.h"
44
45
#include "yb/common/partition.h"
46
#include "yb/common/wire_protocol.h"
47
#include "yb/common/wire_protocol-test-util.h"
48
49
#include "yb/gutil/stl_util.h"
50
#include "yb/gutil/strings/split.h"
51
#include "yb/gutil/strings/substitute.h"
52
53
#include "yb/integration-tests/cluster_verifier.h"
54
#include "yb/integration-tests/external_mini_cluster-itest-base.h"
55
#include "yb/integration-tests/test_workload.h"
56
57
#include "yb/master/master_defaults.h"
58
#include "yb/master/master_client.proxy.h"
59
60
#include "yb/rpc/rpc_controller.h"
61
62
#include "yb/tablet/tablet.pb.h"
63
64
#include "yb/tserver/tserver_admin.proxy.h"
65
#include "yb/tserver/tserver.pb.h"
66
67
#include "yb/util/curl_util.h"
68
#include "yb/util/status_log.h"
69
#include "yb/util/subprocess.h"
70
#include "yb/util/tsan_util.h"
71
72
using yb::client::YBClient;
73
using yb::client::YBClientBuilder;
74
using yb::client::YBSchema;
75
using yb::client::YBSchemaFromSchema;
76
using yb::client::YBTableCreator;
77
using yb::client::YBTableName;
78
using yb::consensus::CONSENSUS_CONFIG_COMMITTED;
79
using yb::consensus::ConsensusMetadataPB;
80
using yb::consensus::ConsensusStatePB;
81
using yb::consensus::PeerMemberType;
82
using yb::consensus::RaftPeerPB;
83
using yb::itest::TServerDetails;
84
using yb::tablet::TABLET_DATA_COPYING;
85
using yb::tablet::TABLET_DATA_DELETED;
86
using yb::tablet::TABLET_DATA_READY;
87
using yb::tablet::TABLET_DATA_TOMBSTONED;
88
using yb::tablet::TabletDataState;
89
using yb::tablet::RaftGroupReplicaSuperBlockPB;
90
using yb::tserver::ListTabletsResponsePB;
91
using yb::tserver::TabletServerErrorPB;
92
using std::numeric_limits;
93
using std::string;
94
using std::unordered_map;
95
using std::vector;
96
using strings::Substitute;
97
98
using namespace std::literals;
99
100
namespace yb {
101
102
class DeleteTableTest : public ExternalMiniClusterITestBase {
103
 protected:
104
  enum IsCMetaExpected {
105
    CMETA_NOT_EXPECTED = 0,
106
    CMETA_EXPECTED = 1
107
  };
108
109
  enum IsSuperBlockExpected {
110
    SUPERBLOCK_NOT_EXPECTED = 0,
111
    SUPERBLOCK_EXPECTED = 1
112
  };
113
114
  // Get the UUID of the leader of the specified tablet, as seen by the TS with
115
  // the given 'ts_uuid'.
116
  string GetLeaderUUID(const string& ts_uuid, const string& tablet_id);
117
118
  Status CheckTabletTombstonedOrDeletedOnTS(
119
      size_t index,
120
      const string& tablet_id,
121
      TabletDataState data_state,
122
      IsCMetaExpected is_cmeta_expected,
123
      IsSuperBlockExpected is_superblock_expected);
124
125
  Status CheckTabletTombstonedOnTS(size_t index,
126
                                   const string& tablet_id,
127
                                   IsCMetaExpected is_cmeta_expected);
128
129
  Status CheckTabletDeletedOnTS(size_t index,
130
                                const string& tablet_id,
131
                                IsSuperBlockExpected is_superblock_expected);
132
133
  void WaitForTabletTombstonedOnTS(size_t index,
134
                                   const string& tablet_id,
135
                                   IsCMetaExpected is_cmeta_expected);
136
137
  void WaitForTabletDeletedOnTS(size_t index,
138
                                const string& tablet_id,
139
                                IsSuperBlockExpected is_superblock_expected);
140
141
  void WaitForAllTSToCrash();
142
  void WaitUntilTabletRunning(size_t index, const std::string& tablet_id);
143
144
  // Delete the given table. If the operation times out, dumps the master stacks
145
  // to help debug master-side deadlocks.
146
  void DeleteTable(const YBTableName& table_name);
147
148
  // Repeatedly try to delete the tablet, retrying on failure up to the
149
  // specified timeout. Deletion can fail when other operations, such as
150
  // bootstrap, are running.
151
  void DeleteTabletWithRetries(const TServerDetails* ts, const string& tablet_id,
152
                               TabletDataState delete_type, const MonoDelta& timeout);
153
154
  void WaitForLoadBalanceCompletion(yb::MonoDelta timeout);
155
156
  // Returns a list of all tablet servers registered with the master leader.
157
  CHECKED_STATUS ListAllLiveTabletServersRegisteredWithMaster(const MonoDelta& timeout,
158
                                                          vector<string>* ts_list);
159
160
  Result<bool> VerifyTableCompletelyDeleted(const YBTableName& table, const string& tablet_id);
161
};
162
163
0
string DeleteTableTest::GetLeaderUUID(const string& ts_uuid, const string& tablet_id) {
164
0
  ConsensusStatePB cstate;
165
0
  auto deadline = MonoTime::Now() + 10s;
166
0
  for (;;) {
167
0
    CHECK_OK(itest::GetConsensusState(
168
0
        ts_map_[ts_uuid].get(),
169
0
        tablet_id,
170
0
        CONSENSUS_CONFIG_COMMITTED,
171
0
        deadline - MonoTime::Now(),
172
0
        &cstate));
173
0
    if (!cstate.leader_uuid().empty()) {
174
0
      break;
175
0
    }
176
0
    CHECK(MonoTime::Now() <= deadline);
177
0
    std::this_thread::sleep_for(100ms);
178
0
  }
179
0
  CHECK(!cstate.leader_uuid().empty());
180
0
  return cstate.leader_uuid();
181
0
}
182
183
Status DeleteTableTest::CheckTabletTombstonedOrDeletedOnTS(
184
      size_t index,
185
      const string& tablet_id,
186
      TabletDataState data_state,
187
      IsCMetaExpected is_cmeta_expected,
188
0
      IsSuperBlockExpected is_superblock_expected) {
189
0
  CHECK(data_state == TABLET_DATA_TOMBSTONED || data_state == TABLET_DATA_DELETED) << data_state;
190
  // There should be no WALs and no cmeta.
191
0
  if (inspect_->CountWALSegmentsForTabletOnTS(index, tablet_id) > 0) {
192
0
    return STATUS(IllegalState, "WAL segments exist for tablet", tablet_id);
193
0
  }
194
0
  if (is_cmeta_expected == CMETA_EXPECTED &&
195
0
      !inspect_->DoesConsensusMetaExistForTabletOnTS(index, tablet_id)) {
196
0
    return STATUS(IllegalState, "Expected cmeta for tablet " + tablet_id + " but it doesn't exist");
197
0
  }
198
0
  if (is_superblock_expected == SUPERBLOCK_EXPECTED) {
199
0
    RETURN_NOT_OK(inspect_->CheckTabletDataStateOnTS(index, tablet_id, data_state));
200
0
  } else {
201
0
    RaftGroupReplicaSuperBlockPB superblock_pb;
202
0
    Status s = inspect_->ReadTabletSuperBlockOnTS(index, tablet_id, &superblock_pb);
203
0
    if (!s.IsNotFound()) {
204
0
      return STATUS(IllegalState, "Found unexpected superblock for tablet " + tablet_id);
205
0
    }
206
0
  }
207
0
  return Status::OK();
208
0
}
209
210
Status DeleteTableTest::CheckTabletTombstonedOnTS(size_t index,
211
                                                  const string& tablet_id,
212
0
                                                  IsCMetaExpected is_cmeta_expected) {
213
0
  return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_TOMBSTONED,
214
0
                                            is_cmeta_expected, SUPERBLOCK_EXPECTED);
215
0
}
216
217
Status DeleteTableTest::CheckTabletDeletedOnTS(size_t index,
218
                                               const string& tablet_id,
219
0
                                               IsSuperBlockExpected is_superblock_expected) {
220
0
  return CheckTabletTombstonedOrDeletedOnTS(index, tablet_id, TABLET_DATA_DELETED,
221
0
                                            CMETA_NOT_EXPECTED, is_superblock_expected);
222
0
}
223
224
void DeleteTableTest::WaitForTabletTombstonedOnTS(size_t index,
225
                                                  const string& tablet_id,
226
0
                                                  IsCMetaExpected is_cmeta_expected) {
227
0
  Status s;
228
0
  for (int i = 0; i < 6000; i++) {
229
0
    s = CheckTabletTombstonedOnTS(index, tablet_id, is_cmeta_expected);
230
0
    if (s.ok()) return;
231
0
    SleepFor(MonoDelta::FromMilliseconds(10));
232
0
  }
233
0
  ASSERT_OK(s);
234
0
}
235
236
void DeleteTableTest::WaitForTabletDeletedOnTS(size_t index,
237
                                               const string& tablet_id,
238
0
                                               IsSuperBlockExpected is_superblock_expected) {
239
0
  Status s;
240
0
  for (int i = 0; i < 6000; i++) {
241
0
    s = CheckTabletDeletedOnTS(index, tablet_id, is_superblock_expected);
242
0
    if (s.ok()) return;
243
0
    SleepFor(MonoDelta::FromMilliseconds(10));
244
0
  }
245
0
  ASSERT_OK(s);
246
0
}
247
248
0
void DeleteTableTest::WaitForAllTSToCrash() {
249
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
250
0
    ASSERT_OK(cluster_->WaitForTSToCrash(i));
251
0
  }
252
0
}
253
254
0
void DeleteTableTest::WaitUntilTabletRunning(size_t index, const std::string& tablet_id) {
255
0
  ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(index)->uuid()].get(),
256
0
                                          tablet_id,
257
0
                                          MonoDelta::FromSeconds(30)));
258
0
}
259
260
0
void DeleteTableTest::DeleteTable(const YBTableName& table_name) {
261
0
  Status s = client_->DeleteTable(table_name);
262
0
  if (s.IsTimedOut()) {
263
0
    WARN_NOT_OK(PstackWatcher::DumpPidStacks(cluster_->master()->pid()),
264
0
                        "Couldn't dump stacks");
265
0
  }
266
0
  ASSERT_OK(s);
267
0
}
268
269
void DeleteTableTest::DeleteTabletWithRetries(const TServerDetails* ts,
270
                                              const string& tablet_id,
271
                                              TabletDataState delete_type,
272
0
                                              const MonoDelta& timeout) {
273
0
  MonoTime start(MonoTime::Now());
274
0
  MonoTime deadline = start;
275
0
  deadline.AddDelta(timeout);
276
0
  Status s;
277
0
  while (true) {
278
0
    s = itest::DeleteTablet(ts, tablet_id, delete_type, boost::none, timeout);
279
0
    if (s.ok()) return;
280
0
    if (deadline.ComesBefore(MonoTime::Now())) {
281
0
      break;
282
0
    }
283
0
    SleepFor(MonoDelta::FromMilliseconds(10));
284
0
  }
285
0
  ASSERT_OK(s);
286
0
}
287
288
0
void DeleteTableTest::WaitForLoadBalanceCompletion(yb::MonoDelta timeout) {
289
0
  ASSERT_OK(LoggedWaitFor([&]() -> Result<bool> {
290
0
    return !VERIFY_RESULT(client_->IsLoadBalancerIdle());
291
0
  }, timeout, "IsLoadBalancerActive"));
292
293
0
  ASSERT_OK(LoggedWaitFor([&]() -> Result<bool> {
294
0
    return client_->IsLoadBalancerIdle();
295
0
  }, timeout, "IsLoadBalancerIdle"));
296
0
}
297
298
Status DeleteTableTest::ListAllLiveTabletServersRegisteredWithMaster(const MonoDelta& timeout,
299
0
                                                                     vector<string>* ts_list) {
300
0
    master::ListTabletServersRequestPB req;
301
0
    master::ListTabletServersResponsePB resp;
302
0
    rpc::RpcController rpc;
303
0
    rpc.set_timeout(timeout);
304
0
    auto leader_idx = VERIFY_RESULT(cluster_->GetLeaderMasterIndex());
305
306
0
    auto proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(leader_idx);
307
0
    RETURN_NOT_OK(proxy.ListTabletServers(req, &resp, &rpc));
308
309
0
    for (const auto& nodes : resp.servers()) {
310
0
      if (nodes.alive()) {
311
0
        (*ts_list).push_back(nodes.instance_id().permanent_uuid());
312
0
      }
313
0
    }
314
315
0
    return Status::OK();
316
0
}
317
318
Result<bool> DeleteTableTest::VerifyTableCompletelyDeleted(
319
0
    const YBTableName& table, const string& tablet_id) {
320
  // 1) Should not list it in ListTables.
321
0
  const auto tables = VERIFY_RESULT(client_->ListTables(table.table_name(), true));
322
0
  if (tables.size() != 0) {
323
0
    return false;
324
0
  }
325
326
  // 2) Should respond to GetTableSchema with a NotFound error.
327
0
  YBSchema schema;
328
0
  PartitionSchema partition_schema;
329
0
  Status s = client_->GetTableSchema(table, &schema, &partition_schema);
330
0
  if (!s.IsNotFound()) {
331
0
    return false;
332
0
  }
333
334
  // 3) Should return an error for GetTabletLocations RPCs.
335
0
  {
336
0
    rpc::RpcController rpc;
337
0
    master::GetTabletLocationsRequestPB req;
338
0
    master::GetTabletLocationsResponsePB resp;
339
0
    rpc.set_timeout(MonoDelta::FromSeconds(10));
340
0
    req.add_tablet_ids()->assign(tablet_id);
341
0
    auto leader_idx = VERIFY_RESULT(cluster_->GetLeaderMasterIndex());
342
0
    RETURN_NOT_OK(cluster_->GetMasterProxy<master::MasterClientProxy>(
343
0
        leader_idx).GetTabletLocations(req, &resp, &rpc));
344
345
0
    if (resp.errors(0).ShortDebugString().find("code: NOT_FOUND") == std::string::npos) {
346
0
      return false;
347
0
    }
348
0
  }
349
0
  return true;
350
0
}
351
352
1
TEST_F(DeleteTableTest, TestPendingDeleteStateClearedOnFailure) {
353
1
  vector<string> tserver_flags, master_flags;
354
1
  master_flags.push_back("--unresponsive_ts_rpc_timeout_ms=5000");
355
  // Disable tablet delete operations.
356
1
  tserver_flags.push_back("--TEST_rpc_delete_tablet_fail=true");
357
1
  ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags, 3));
358
  // Create a table on the cluster. We're just using TestWorkload
359
  // as a convenient way to create it.
360
1
  auto test_workload = TestWorkload(cluster_.get());
361
1
  test_workload.Setup();
362
363
  // The table should have replicas on all three tservers.
364
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
365
366
0
  client_->TEST_set_admin_operation_timeout(MonoDelta::FromSeconds(10));
367
368
  // Delete the table.
369
0
  DeleteTable(TestWorkloadOptions::kDefaultTableName);
370
371
  // Wait for the load balancer to report no pending deletes after the delete table fails.
372
0
  ASSERT_OK(WaitFor([&] () { return client_->IsLoadBalanced(3); },
373
0
            MonoDelta::FromSeconds(30), "IsLoadBalanced"));
374
0
}
375
376
// Test deleting an empty table, and ensure that the tablets get removed,
377
// and the master no longer shows the table as existing.
378
1
TEST_F(DeleteTableTest, TestDeleteEmptyTable) {
379
1
  ASSERT_NO_FATALS(StartCluster());
380
  // Create a table on the cluster. We're just using TestWorkload
381
  // as a convenient way to create it.
382
1
  TestWorkload(cluster_.get()).Setup();
383
384
  // The table should have replicas on all three tservers.
385
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
386
387
  // Grab the tablet ID (used later).
388
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
389
0
  ASSERT_EQ(1, tablets.size());
390
0
  const string& tablet_id = tablets[0];
391
392
  // Delete it and wait for the replicas to get deleted.
393
0
  ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName));
394
0
  for (int i = 0; i < 3; i++) {
395
0
    ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED));
396
0
  }
397
398
  // Restart the cluster, the superblocks should be deleted on startup.
399
0
  cluster_->Shutdown();
400
0
  ASSERT_OK(cluster_->Restart());
401
0
  ASSERT_OK(inspect_->WaitForNoData());
402
403
  // Check that the master no longer exposes the table in any way:
404
405
  // 1) Should not list it in ListTables.
406
0
  const auto tables = ASSERT_RESULT(client_->ListTables(/* filter */ "", /* exclude_ysql */ true));
407
0
  ASSERT_EQ(master::kNumSystemTables, tables.size());
408
409
  // 2) Should respond to GetTableSchema with a NotFound error.
410
0
  YBSchema schema;
411
0
  PartitionSchema partition_schema;
412
0
  Status s = client_->GetTableSchema(
413
0
      TestWorkloadOptions::kDefaultTableName, &schema, &partition_schema);
414
0
  ASSERT_TRUE(s.IsNotFound()) << s.ToString();
415
416
  // 3) Should return an error for GetTabletLocations RPCs.
417
0
  {
418
0
    rpc::RpcController rpc;
419
0
    master::GetTabletLocationsRequestPB req;
420
0
    master::GetTabletLocationsResponsePB resp;
421
0
    rpc.set_timeout(MonoDelta::FromSeconds(10));
422
0
    req.add_tablet_ids()->assign(tablet_id);
423
0
    ASSERT_OK(cluster_->GetMasterProxy<master::MasterClientProxy>().GetTabletLocations(
424
0
        req, &resp, &rpc));
425
0
    SCOPED_TRACE(resp.DebugString());
426
0
    ASSERT_EQ(1, resp.errors_size());
427
0
    ASSERT_STR_CONTAINS(resp.errors(0).ShortDebugString(), "code: NOT_FOUND");
428
0
  }
429
430
  // 4) The master 'dump-entities' page should not list the deleted table or tablets.
431
0
  EasyCurl c;
432
0
  faststring entities_buf;
433
0
  ASSERT_OK(c.FetchURL(Substitute("http://$0/dump-entities",
434
0
                                  cluster_->master()->bound_http_hostport().ToString()),
435
0
                       &entities_buf));
436
0
  ASSERT_TRUE(entities_buf.ToString().find(
437
0
      TestWorkloadOptions::kDefaultTableName.table_name()) == std::string::npos);
438
0
}
439
440
// Test that a DeleteTable RPC is rejected without a matching destination UUID.
441
1
TEST_F(DeleteTableTest, TestDeleteTableDestUuidValidation) {
442
1
  ASSERT_NO_FATALS(StartCluster());
443
  // Create a table on the cluster. We're just using TestWorkload
444
  // as a convenient way to create it.
445
1
  TestWorkload(cluster_.get()).Setup();
446
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
447
448
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
449
0
  ASSERT_EQ(1, tablets.size());
450
0
  const string& tablet_id = tablets[0];
451
452
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get();
453
454
0
  tserver::DeleteTabletRequestPB req;
455
0
  tserver::DeleteTabletResponsePB resp;
456
0
  rpc::RpcController rpc;
457
0
  rpc.set_timeout(MonoDelta::FromSeconds(20));
458
459
0
  req.set_dest_uuid("fake-uuid");
460
0
  req.set_tablet_id(tablet_id);
461
0
  req.set_delete_type(TABLET_DATA_TOMBSTONED);
462
0
  ASSERT_OK(ts->tserver_admin_proxy->DeleteTablet(req, &resp, &rpc));
463
0
  ASSERT_TRUE(resp.has_error());
464
0
  ASSERT_EQ(tserver::TabletServerErrorPB::WRONG_SERVER_UUID, resp.error().code())
465
0
      << resp.ShortDebugString();
466
0
  ASSERT_STR_CONTAINS(StatusFromPB(resp.error().status()).ToString(),
467
0
                      "Wrong destination UUID");
468
0
}
469
470
// Test the atomic CAS argument to DeleteTablet().
471
1
TEST_F(DeleteTableTest, TestAtomicDeleteTablet) {
472
1
  MonoDelta timeout = MonoDelta::FromSeconds(30);
473
1
  ASSERT_NO_FATALS(StartCluster());
474
  // Create a table on the cluster. We're just using TestWorkload
475
  // as a convenient way to create it.
476
1
  TestWorkload(cluster_.get()).Setup();
477
478
  // The table should have replicas on all three tservers.
479
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
480
481
  // Grab the tablet ID (used later).
482
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
483
0
  ASSERT_EQ(1, tablets.size());
484
0
  const string& tablet_id = tablets[0];
485
486
0
  const int kTsIndex = 0;
487
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get();
488
489
  // The committed config starts off with an opid_index of -1, so choose something lower.
490
0
  boost::optional<int64_t> opid_index(-2);
491
0
  tserver::TabletServerErrorPB::Code error_code;
492
0
  ASSERT_OK(itest::WaitUntilTabletRunning(ts, tablet_id, timeout));
493
494
0
  Status s;
495
0
  for (int i = 0; i < 100; i++) {
496
0
    s = itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
497
0
                            &error_code);
498
0
    if (error_code == TabletServerErrorPB::CAS_FAILED) break;
499
    // If we didn't get the expected CAS_FAILED error, it's OK to get 'TABLET_NOT_RUNNING'
500
    // because the "creating" maintenance state persists just slightly after it starts to
501
    // expose 'RUNNING' state in ListTablets()
502
0
    ASSERT_EQ(TabletServerErrorPB::TABLET_NOT_RUNNING, error_code)
503
0
        << "unexpected error: " << s.ToString();
504
0
    SleepFor(MonoDelta::FromMilliseconds(100));
505
0
  }
506
507
0
  ASSERT_EQ(TabletServerErrorPB::CAS_FAILED, error_code) << "unexpected error: " << s.ToString();
508
0
  ASSERT_STR_CONTAINS(s.ToString(), "of -2 but the committed config has opid_index of -1");
509
510
  // Now use the "latest", which is -1.
511
0
  opid_index = -1;
512
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, opid_index, timeout,
513
0
                                &error_code));
514
0
  ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_TOMBSTONED));
515
516
  // Now that the tablet is already tombstoned, our opid_index should be
517
  // ignored (because it's impossible to check it).
518
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, -9999, timeout,
519
0
                                &error_code));
520
0
  ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_TOMBSTONED));
521
522
  // Same with TOMBSTONED -> DELETED.
523
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, -9999, timeout,
524
0
                                &error_code));
525
0
  ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_DELETED));
526
0
}
527
528
1
TEST_F(DeleteTableTest, TestDeleteTableWithConcurrentWrites) {
529
1
  ASSERT_NO_FATALS(StartCluster());
530
1
  int n_iters = AllowSlowTests() ? 20 : 1;
531
1
  for (int i = 0; i < n_iters; i++) {
532
1
    TestWorkload workload(cluster_.get());
533
1
    workload.set_table_name(YBTableName(YQL_DATABASE_CQL, "my_keyspace",
534
1
        Substitute("table-$0", i)));
535
536
    // We'll delete the table underneath the writers, so we expcted
537
    // a NotFound error during the writes.
538
1
    workload.set_not_found_allowed(true);
539
1
    workload.Setup();
540
541
    // Start the workload, and wait to see some rows actually inserted
542
1
    workload.Start();
543
1
    while (workload.rows_inserted() < 100) {
544
0
      SleepFor(MonoDelta::FromMilliseconds(10));
545
0
    }
546
547
1
    vector<string> tablets = inspect_->ListTabletsOnTS(1);
548
1
    ASSERT_EQ(1, tablets.size());
549
0
    const string& tablet_id = tablets[0];
550
551
    // Delete it and wait for the replicas to get deleted.
552
0
    ASSERT_NO_FATALS(DeleteTable(workload.table_name()));
553
0
    for (int i = 0; i < 3; i++) {
554
0
      ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED));
555
0
    }
556
557
    // Sleep just a little longer to make sure client threads send
558
    // requests to the missing tablets.
559
0
    SleepFor(MonoDelta::FromMilliseconds(50));
560
561
0
    workload.StopAndJoin();
562
0
    cluster_->AssertNoCrashes();
563
564
    // Restart the cluster, the superblocks should be deleted on startup.
565
0
    cluster_->Shutdown();
566
0
    ASSERT_OK(cluster_->Restart());
567
0
    ASSERT_OK(inspect_->WaitForNoData());
568
0
  }
569
1
}
570
571
1
TEST_F(DeleteTableTest, DeleteTableWithConcurrentWritesNoRestarts) {
572
1
  ASSERT_NO_FATALS(StartCluster());
573
1
  constexpr auto kNumIters = 10;
574
1
  for (int iter = 0; iter < kNumIters; iter++) {
575
1
    TestWorkload workload(cluster_.get());
576
1
    workload.set_table_name(YBTableName(YQL_DATABASE_CQL, "my_keyspace", Format("table-$0", iter)));
577
578
    // We'll delete the table underneath the writers, so we expect a NotFound error during the
579
    // writes.
580
1
    workload.set_not_found_allowed(true);
581
1
    workload.Setup();
582
1
    workload.Start();
583
584
1
    ASSERT_OK(LoggedWaitFor(
585
1
        [&workload] { return workload.rows_inserted() > 100; }, 60s,
586
1
        "Waiting until we have inserted some data...", 10ms));
587
588
0
    auto tablets = inspect_->ListTabletsWithDataOnTS(1);
589
0
    ASSERT_EQ(1, tablets.size());
590
0
    const auto& tablet_id = tablets[0];
591
592
0
    ASSERT_NO_FATALS(DeleteTable(workload.table_name()));
593
0
    for (size_t ts_idx = 0; ts_idx < cluster_->num_tablet_servers(); ts_idx++) {
594
0
      ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(ts_idx, tablet_id, SUPERBLOCK_EXPECTED));
595
0
    }
596
597
0
    workload.StopAndJoin();
598
0
    cluster_->AssertNoCrashes();
599
0
  }
600
1
}
601
602
// Test that a tablet replica is automatically tombstoned on startup if a local
603
// crash occurs in the middle of remote bootstrap.
604
1
TEST_F(DeleteTableTest, TestAutoTombstoneAfterCrashDuringRemoteBootstrap) {
605
1
  vector<string> tserver_flags, master_flags;
606
1
  master_flags.push_back("--replication_factor=2");
607
1
  ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags));
608
1
  const MonoDelta timeout = MonoDelta::FromSeconds(40);
609
1
  const int kTsIndex = 0;  // We'll test with the first TS.
610
611
  // We'll do a config change to remote bootstrap a replica here later. For
612
  // now, shut it down.
613
1
  LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
614
1
  cluster_->tablet_server(kTsIndex)->Shutdown();
615
616
  // Bounce the Master so it gets new tablet reports and doesn't try to assign
617
  // a replica to the dead TS.
618
1
  cluster_->master()->Shutdown();
619
1
  ASSERT_OK(cluster_->master()->Restart());
620
1
  ASSERT_OK(cluster_->WaitForTabletServerCount(2, timeout));
621
622
  // Start a workload on the cluster, and run it for a little while.
623
1
  TestWorkload workload(cluster_.get());
624
1
  workload.Setup();
625
1
  ASSERT_OK(inspect_->WaitForReplicaCount(2));
626
627
0
  workload.Start();
628
0
  while (workload.rows_inserted() < 100) {
629
0
    SleepFor(MonoDelta::FromMilliseconds(10));
630
0
  }
631
0
  workload.StopAndJoin();
632
633
  // Enable a fault crash when remote bootstrap occurs on TS 0.
634
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
635
0
  const string& kFaultFlag = "TEST_fault_crash_after_rb_files_fetched";
636
0
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), kFaultFlag, "1.0"));
637
638
  // Figure out the tablet id to remote bootstrap.
639
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
640
0
  ASSERT_EQ(1, tablets.size());
641
0
  const string& tablet_id = tablets[0];
642
643
  // Add our TS 0 to the config and wait for it to crash.
644
0
  string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id);
645
0
  TServerDetails* leader = DCHECK_NOTNULL(ts_map_[leader_uuid].get());
646
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get();
647
0
  ASSERT_OK(itest::AddServer(
648
0
      leader, tablet_id, ts, PeerMemberType::PRE_VOTER, boost::none, timeout));
649
0
  ASSERT_OK(cluster_->WaitForTSToCrash(kTsIndex));
650
651
  // The superblock should be in TABLET_DATA_COPYING state on disk.
652
0
  ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_COPYING));
653
654
  // Kill the other tablet servers so the leader doesn't try to remote
655
  // bootstrap it again during our verification here.
656
0
  cluster_->tablet_server(1)->Shutdown();
657
0
  cluster_->tablet_server(2)->Shutdown();
658
659
  // Now we restart the TS. It will clean up the failed remote bootstrap and
660
  // convert it to TABLET_DATA_TOMBSTONED. It crashed, so we have to call
661
  // Shutdown() then Restart() to bring it back up.
662
0
  cluster_->tablet_server(kTsIndex)->Shutdown();
663
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
664
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
665
0
}
666
667
// Test that a tablet replica automatically tombstones itself if the remote
668
// bootstrap source server fails in the middle of the remote bootstrap process.
669
// Also test that we can remotely bootstrap a tombstoned tablet.
670
1
TEST_F(DeleteTableTest, TestAutoTombstoneAfterRemoteBootstrapRemoteFails) {
671
1
  vector<string> tserver_flags, master_flags;
672
673
1
  tserver_flags.push_back("--log_segment_size_mb=1");  // Faster log rolls.
674
675
1
  master_flags.push_back("--enable_load_balancing=false");
676
1
  master_flags.push_back("--replication_factor=2");
677
678
  // Start the cluster with load balancer turned off.
679
1
  ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags));
680
1
  const MonoDelta timeout = MonoDelta::FromSeconds(40);
681
1
  const int kTsIndex = 0;  // We'll test with the first TS.
682
683
  // We'll do a config change to remote bootstrap a replica here later. For
684
  // now, shut it down.
685
1
  LOG(INFO) << "Shutting down TS " << cluster_->tablet_server(kTsIndex)->uuid();
686
1
  cluster_->tablet_server(kTsIndex)->Shutdown();
687
688
  // Bounce the Master so it gets new tablet reports and doesn't try to assign
689
  // a replica to the dead TS.
690
1
  cluster_->master()->Shutdown();
691
1
  ASSERT_OK(cluster_->master()->Restart());
692
1
  ASSERT_OK(cluster_->WaitForTabletServerCount(2, timeout));
693
694
  // Start a workload on the cluster, and run it for a little while.
695
1
  TestWorkload workload(cluster_.get());
696
1
  workload.set_sequential_write(true);
697
1
  workload.Setup();
698
1
  ASSERT_OK(inspect_->WaitForReplicaCount(2));
699
700
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
701
0
  ASSERT_EQ(1, tablets.size());
702
0
  const string& tablet_id = tablets[0];
703
704
0
  workload.Start();
705
0
  while (workload.rows_inserted() < 100) {
706
0
    SleepFor(MonoDelta::FromMilliseconds(10));
707
0
  }
708
709
  // Remote bootstrap doesn't see the active WAL segment, and we need to
710
  // download a file to trigger the fault in this test. Due to the log index
711
  // chunks, that means 3 files minimum: One in-flight WAL segment, one index
712
  // chunk file (these files grow much more slowly than the WAL segments), and
713
  // one completed WAL segment.
714
0
  string leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id);
715
0
  int leader_index = cluster_->tablet_server_index_by_uuid(leader_uuid);
716
0
  ASSERT_NE(-1, leader_index);
717
0
  ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(leader_index, tablet_id, 3));
718
0
  workload.StopAndJoin();
719
720
  // Cause the leader to crash when a follower tries to remotely bootstrap from it.
721
0
  const string& fault_flag = "TEST_fault_crash_on_handle_rb_fetch_data";
722
0
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(leader_index), fault_flag, "1.0"));
723
724
  // Add our TS 0 to the config and wait for the leader to crash.
725
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
726
0
  TServerDetails* leader = ts_map_[leader_uuid].get();
727
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get();
728
0
  ASSERT_OK(itest::AddServer(
729
0
      leader, tablet_id, ts, PeerMemberType::PRE_VOTER, boost::none, timeout));
730
0
  ASSERT_OK(cluster_->WaitForTSToCrash(leader_index));
731
732
  // The tablet server will detect that the leader failed, and automatically
733
  // tombstone its replica. Shut down the other non-leader replica to avoid
734
  // interference while we wait for this to happen.
735
0
  cluster_->tablet_server(1)->Shutdown();
736
0
  cluster_->tablet_server(2)->Shutdown();
737
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
738
739
  // Now bring the other replicas back, and wait for the leader to remote
740
  // bootstrap the tombstoned replica. This will have replaced a tablet with no
741
  // consensus metadata.
742
0
  ASSERT_OK(cluster_->tablet_server(1)->Restart());
743
0
  ASSERT_OK(cluster_->tablet_server(2)->Restart());
744
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY));
745
746
  // Because the object deleter (whose destructor will unset the variable transition_in_progress_)
747
  // is created before rb_client in TsTabletManager::StartRemoteBootstrap, rb_client will be
748
  // destroyed before deleter. Before rb_client is destroyed, the remote bootstrap session has to be
749
  // destroyed too. With the new PRE_VOTER member_type, a remote bootstrap session won't finish
750
  // until we have successfully started a ChangeConfig. This will delay the destruction of
751
  // rb_client. Thus we need to wait until we know that tablet_server(0) has been promoted to a
752
  // VOTER role before we continue. Otherwise, we might send the DeleteTablet request before
753
  // transition_in_progress_ has been cleared and we'll get error
754
  // "State transition of tablet XXX already in progress: remote bootstrapping tablet".
755
0
  leader_uuid = GetLeaderUUID(cluster_->tablet_server(1)->uuid(), tablet_id);
756
0
  auto leader_it = ts_map_.find(leader_uuid);
757
0
  ASSERT_NE(leader_it, ts_map_.end())
758
0
      << "Leader UUID: " << leader_uuid << ", ts map: " << yb::ToString(ts_map_);
759
0
  leader = leader_it->second.get();
760
0
  ASSERT_OK(WaitUntilCommittedConfigNumVotersIs(3, leader, tablet_id, timeout));
761
762
0
  ClusterVerifier cluster_verifier(cluster_.get());
763
0
  ASSERT_NO_FATALS(cluster_verifier.CheckCluster());
764
0
  ASSERT_NO_FATALS(cluster_verifier.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
765
0
                                                  workload.rows_inserted()));
766
767
  // For now there is no way to know if the server has finished its remote bootstrap (by verifying
768
  // that its role has changed in its consensus object). As a workaround, sleep for 10 seconds
769
  // before pausing the other two servers which are needed to propagate the consensus to the new
770
  // server.
771
0
  SleepFor(MonoDelta::FromSeconds(10));
772
  // Now pause the other replicas and tombstone our replica again.
773
0
  ASSERT_OK(cluster_->tablet_server(1)->Pause());
774
0
  ASSERT_OK(cluster_->tablet_server(2)->Pause());
775
776
  // If we send the request before the lock in StartRemoteBootstrap is released (not really a lock,
777
  // but effectively it serves as one), we need to retry.
778
0
  ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout));
779
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_NOT_EXPECTED));
780
781
  // Bring them back again, let them yet again bootstrap our tombstoned replica.
782
  // This time, the leader will have replaced a tablet with consensus metadata.
783
0
  ASSERT_OK(cluster_->tablet_server(1)->Resume());
784
0
  ASSERT_OK(cluster_->tablet_server(2)->Resume());
785
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY));
786
787
0
  ASSERT_NO_FATALS(cluster_verifier.CheckCluster());
788
0
  ASSERT_NO_FATALS(cluster_verifier.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
789
0
                            workload.rows_inserted()));
790
0
}
791
792
// Test for correct remote bootstrap merge of consensus metadata.
793
1
TEST_F(DeleteTableTest, TestMergeConsensusMetadata) {
794
  // Enable manual leader selection.
795
1
  std::vector<std::string> ts_flags = {
796
1
    "--enable_leader_failure_detection=false"s,
797
    // Disable pre-elections since we wait for term to become 2,
798
    // that does not happen with pre-elections
799
1
    "--use_preelection=false"s
800
1
  };
801
802
1
  std::vector<std::string> master_flags = {
803
1
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
804
1
    "--use_create_table_leader_hint=false"s,
805
1
  };
806
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags));
807
1
  const MonoDelta timeout = MonoDelta::FromSeconds(10);
808
1
  const int kTsIndex = 0;
809
810
1
  TestWorkload workload(cluster_.get());
811
1
  workload.Setup();
812
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
813
814
  // Figure out the tablet id to remote bootstrap.
815
0
  vector<string> tablets = inspect_->ListTabletsOnTS(1);
816
0
  ASSERT_EQ(1, tablets.size());
817
0
  const string& tablet_id = tablets[0];
818
819
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
820
0
    ASSERT_NO_FATALS(WaitUntilTabletRunning(i, tablet_id));
821
0
  }
822
823
  // Elect a leader and run some data through the cluster.
824
0
  int leader_index = 1;
825
0
  string leader_uuid = cluster_->tablet_server(leader_index)->uuid();
826
0
  ASSERT_OK(itest::StartElection(ts_map_[leader_uuid].get(), tablet_id, timeout));
827
0
  workload.Start();
828
0
  while (workload.rows_inserted() < 100) {
829
0
    SleepFor(MonoDelta::FromMilliseconds(10));
830
0
  }
831
0
  workload.StopAndJoin();
832
0
  ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, workload.batches_completed()));
833
834
  // Verify that TS 0 voted for the chosen leader.
835
0
  ConsensusMetadataPB cmeta_pb;
836
0
  ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
837
0
  ASSERT_EQ(1, cmeta_pb.current_term());
838
0
  ASSERT_EQ(leader_uuid, cmeta_pb.voted_for());
839
840
  // Shut down all but TS 0 and try to elect TS 0. The election will fail but
841
  // the TS will record a vote for itself as well as a new term (term 2).
842
0
  cluster_->tablet_server(1)->Shutdown();
843
0
  cluster_->tablet_server(2)->Shutdown();
844
0
  ASSERT_NO_FATALS(WaitUntilTabletRunning(kTsIndex, tablet_id));
845
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get();
846
0
  ASSERT_OK(itest::StartElection(ts, tablet_id, timeout));
847
0
  for (int i = 0; i < 6000; i++) {
848
0
    Status s = inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb);
849
0
    if (s.ok() &&
850
0
        cmeta_pb.current_term() == 2 &&
851
0
        cmeta_pb.voted_for() == ts->uuid()) {
852
0
      break;
853
0
    }
854
0
    SleepFor(MonoDelta::FromMilliseconds(10));
855
0
  }
856
0
  ASSERT_EQ(2, cmeta_pb.current_term());
857
0
  ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());
858
859
  // Tombstone our special little guy, then shut him down.
860
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
861
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
862
0
  cluster_->tablet_server(kTsIndex)->Shutdown();
863
864
  // Restart the other dudes and re-elect the same leader.
865
0
  ASSERT_OK(cluster_->tablet_server(1)->Restart());
866
0
  ASSERT_OK(cluster_->tablet_server(2)->Restart());
867
0
  TServerDetails* leader = ts_map_[leader_uuid].get();
868
0
  ASSERT_NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
869
0
  ASSERT_NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
870
0
  ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
871
0
  ASSERT_OK(itest::WaitUntilLeader(leader, tablet_id, timeout));
872
873
  // Bring our special little guy back up.
874
  // Wait until he gets remote bootstrapped.
875
0
  LOG(INFO) << "Bringing TS " << cluster_->tablet_server(kTsIndex)->uuid()
876
0
            << " back up...";
877
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
878
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY));
879
880
  // Assert that the election history is retained (voted for self).
881
0
  ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
882
0
  ASSERT_EQ(2, cmeta_pb.current_term());
883
0
  ASSERT_EQ(ts->uuid(), cmeta_pb.voted_for());
884
885
  // Now do the same thing as above, where we tombstone TS 0 then trigger a new
886
  // term (term 3) on the other machines. TS 0 will get remotely bootstrapped
887
  // again, but this time the vote record on TS 0 for term 2 should not be
888
  // retained after remote bootstrap occurs.
889
0
  cluster_->tablet_server(1)->Shutdown();
890
0
  cluster_->tablet_server(2)->Shutdown();
891
892
  // Delete with retries because the tablet might still be bootstrapping.
893
0
  ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_TOMBSTONED, timeout));
894
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
895
896
0
  ASSERT_OK(cluster_->tablet_server(1)->Restart());
897
0
  ASSERT_OK(cluster_->tablet_server(2)->Restart());
898
0
  ASSERT_NO_FATALS(WaitUntilTabletRunning(1, tablet_id));
899
0
  ASSERT_NO_FATALS(WaitUntilTabletRunning(2, tablet_id));
900
0
  ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
901
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kTsIndex, tablet_id, TABLET_DATA_READY));
902
903
  // The election history should have been wiped out.
904
0
  ASSERT_OK(inspect_->ReadConsensusMetadataOnTS(kTsIndex, tablet_id, &cmeta_pb));
905
0
  ASSERT_EQ(3, cmeta_pb.current_term());
906
0
  ASSERT_TRUE(!cmeta_pb.has_voted_for()) << cmeta_pb.ShortDebugString();
907
0
}
908
909
// Regression test for KUDU-987, a bug where followers with transactions in
910
// REPLICATING state, which means they have not yet been committed to a
911
// majority, cannot shut down during a DeleteTablet() call.
912
1
TEST_F(DeleteTableTest, TestDeleteFollowerWithReplicatingOperation) {
913
1
  if (!AllowSlowTests()) {
914
    // We will typically wait at least 5 seconds for timeouts to occur.
915
1
    LOG(INFO) << "Skipping test in fast-test mode.";
916
1
    return;
917
1
  }
918
919
0
  const MonoDelta timeout = MonoDelta::FromSeconds(10);
920
921
0
  const int kNumTabletServers = 5;
922
0
  std::vector<std::string> ts_flags = {
923
0
    "--enable_leader_failure_detection=false"s,
924
0
    "--maintenance_manager_polling_interval_ms=100"s,
925
0
  };
926
0
  std::vector<std::string> master_flags = {
927
0
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
928
0
    "--use_create_table_leader_hint=false"s,
929
0
  };
930
0
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers));
931
932
0
  const int kTsIndex = 0;  // We'll test with the first TS.
933
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get();
934
935
  // Create the table.
936
0
  TestWorkload workload(cluster_.get());
937
0
  workload.Setup();
938
939
  // Figure out the tablet ids of the created tablets.
940
0
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
941
0
  ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets));
942
0
  const string& tablet_id = tablets[0].tablet_status().tablet_id();
943
944
  // Wait until all replicas are up and running.
945
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
946
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
947
0
                                            tablet_id, timeout));
948
0
  }
949
950
  // Elect TS 1 as leader.
951
0
  const int kLeaderIndex = 1;
952
0
  const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
953
0
  TServerDetails* leader = ts_map_[kLeaderUuid].get();
954
0
  ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
955
0
  ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
956
957
  // Kill a majority, but leave the leader and a single follower.
958
0
  LOG(INFO) << "Killing majority";
959
0
  for (int i = 2; i < kNumTabletServers; i++) {
960
0
    cluster_->tablet_server(i)->Shutdown();
961
0
  }
962
963
  // Now write a single row to the leader.
964
  // We give 5 seconds for the timeout to pretty much guarantee that a flush
965
  // will occur due to the low flush threshold we set.
966
0
  LOG(INFO) << "Writing a row";
967
0
  Status s = WriteSimpleTestRow(leader, tablet_id, 1, 1, "hola, world", MonoDelta::FromSeconds(5));
968
0
  ASSERT_TRUE(s.IsTimedOut());
969
0
  ASSERT_STR_CONTAINS(s.ToString(), "timed out");
970
971
0
  LOG(INFO) << "Killing the leader...";
972
0
  cluster_->tablet_server(kLeaderIndex)->Shutdown();
973
974
  // Now tombstone the follower tablet. This should succeed even though there
975
  // are uncommitted operations on the replica.
976
0
  LOG(INFO) << "Tombstoning tablet " << tablet_id << " on TS " << ts->uuid();
977
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
978
0
}
979
980
// Verify that memtable is not flushed when tablet is deleted.
981
1
TEST_F(DeleteTableTest, TestMemtableNoFlushOnTabletDelete) {
982
1
  const MonoDelta timeout = MonoDelta::FromSeconds(10);
983
984
1
  const int kNumTabletServers = 1;
985
1
  vector<string> ts_flags, master_flags;
986
1
  master_flags.push_back("--replication_factor=1");
987
1
  master_flags.push_back("--yb_num_shards_per_tserver=1");
988
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumTabletServers));
989
990
1
  const int kTsIndex = 0;  // We'll test with the first TS.
991
1
  TServerDetails* ts = ts_map_[cluster_->tablet_server(kTsIndex)->uuid()].get();
992
993
  // Create the table.
994
1
  TestWorkload workload(cluster_.get());
995
1
  workload.Setup();
996
997
  // Figure out the tablet ids of the created tablets.
998
1
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
999
1
  ASSERT_OK(WaitForNumTabletsOnTS(ts, 1, timeout, &tablets));
1000
1
  const string& tablet_id = tablets[0].tablet_status().tablet_id();
1001
1002
  // Wait until all replicas are up and running.
1003
2
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
1004
1
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
1005
1
                                            tablet_id, timeout));
1006
1
  }
1007
1008
  // Elect TS 0 as leader.
1009
1
  const int kLeaderIndex = 0;
1010
1
  const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
1011
1
  TServerDetails* leader = ts_map_[kLeaderUuid].get();
1012
1
  ASSERT_OK(itest::StartElection(leader, tablet_id, timeout));
1013
1
  ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
1014
1015
  // Now write a single row to the leader.
1016
1
  LOG(INFO) << "Writing a row";
1017
1
  ASSERT_OK(WriteSimpleTestRow(leader, tablet_id, 1, 1, "hola, world", MonoDelta::FromSeconds(5)));
1018
1019
  // Set test flag to detect that memtable should not be flushed on table delete.
1020
1
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex),
1021
1
        "TEST_rocksdb_crash_on_flush", "true"));
1022
1023
  // Now delete the tablet.
1024
1
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_DELETED, boost::none, timeout));
1025
1026
  // Sleep to allow background memtable flush to be scheduled (in case).
1027
1
  SleepFor(MonoDelta::FromMilliseconds(5 * 1000));
1028
1029
  // Unset test flag to allow other memtable flushes (if any) in teardown
1030
1
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kLeaderIndex),
1031
1
        "TEST_rocksdb_crash_on_flush", "false"));
1032
1
}
1033
1034
// Test that orphaned blocks are cleared from the superblock when a tablet is tombstoned.
1035
1
TEST_F(DeleteTableTest, TestOrphanedBlocksClearedOnDelete) {
1036
1
  const MonoDelta timeout = MonoDelta::FromSeconds(30);
1037
1
  std::vector<std::string> ts_flags = {
1038
1
    "--enable_leader_failure_detection=false"s,
1039
1
    "--maintenance_manager_polling_interval_ms=100"s,
1040
1
  };
1041
1
  std::vector<std::string> master_flags = {
1042
1
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
1043
1
    "--use_create_table_leader_hint=false"s,
1044
1
  };
1045
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags));
1046
1047
1
  const int kFollowerIndex = 0;
1048
1
  TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()].get();
1049
1050
  // Create the table.
1051
1
  TestWorkload workload(cluster_.get());
1052
1
  workload.Setup();
1053
1054
  // Figure out the tablet id of the created tablet.
1055
1
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
1056
1
  ASSERT_OK(WaitForNumTabletsOnTS(follower_ts, 1, timeout, &tablets));
1057
0
  const string& tablet_id = tablets[0].tablet_status().tablet_id();
1058
1059
  // Wait until all replicas are up and running.
1060
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
1061
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
1062
0
                                            tablet_id, timeout));
1063
0
  }
1064
1065
  // Elect TS 1 as leader.
1066
0
  const int kLeaderIndex = 1;
1067
0
  const string kLeaderUuid = cluster_->tablet_server(kLeaderIndex)->uuid();
1068
0
  TServerDetails* leader_ts = ts_map_[kLeaderUuid].get();
1069
0
  ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
1070
0
  ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
1071
1072
  // Run a write workload and wait some time for the workload to add data.
1073
0
  workload.Start();
1074
0
  SleepFor(MonoDelta::FromMilliseconds(2000));
1075
0
  ASSERT_GT(workload.rows_inserted(), 20);
1076
  // Shut down the leader so it doesn't try to bootstrap our follower later.
1077
0
  workload.StopAndJoin();
1078
0
  cluster_->tablet_server(kLeaderIndex)->Shutdown();
1079
1080
  // Tombstone the follower and check that follower superblock is still accessible.
1081
0
  ASSERT_OK(itest::DeleteTablet(follower_ts, tablet_id, TABLET_DATA_TOMBSTONED,
1082
0
                                boost::none, timeout));
1083
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kFollowerIndex, tablet_id, CMETA_EXPECTED));
1084
0
  RaftGroupReplicaSuperBlockPB superblock_pb;
1085
0
  ASSERT_OK(inspect_->ReadTabletSuperBlockOnTS(kFollowerIndex, tablet_id, &superblock_pb));
1086
0
}
1087
1088
2
vector<const string*> Grep(const string& needle, const vector<string>& haystack) {
1089
2
  vector<const string*> results;
1090
672
  for (const string& s : haystack) {
1091
672
    if (s.find(needle) != string::npos) {
1092
0
      results.push_back(&s);
1093
0
    }
1094
672
  }
1095
2
  return results;
1096
2
}
1097
1098
2
vector<string> ListOpenFiles(pid_t pid) {
1099
2
  string cmd = strings::Substitute("export PATH=$$PATH:/usr/bin:/usr/sbin; lsof -n -p $0", pid);
1100
2
  vector<string> argv = { "bash", "-c", cmd };
1101
2
  string out;
1102
2
  CHECK_OK(Subprocess::Call(argv, &out));
1103
2
  vector<string> lines = strings::Split(out, "\n");
1104
2
  return lines;
1105
2
}
1106
1107
2
size_t PrintOpenTabletFiles(pid_t pid, const string& tablet_id) {
1108
2
  vector<string> lines = ListOpenFiles(pid);
1109
2
  vector<const string*> wal_lines = Grep(tablet_id, lines);
1110
2
  LOG(INFO) << "There are " << wal_lines.size() << " open WAL files for pid " << pid << ":";
1111
0
  for (const string* l : wal_lines) {
1112
0
    LOG(INFO) << *l;
1113
0
  }
1114
2
  return wal_lines.size();
1115
2
}
1116
1117
// Regression test for tablet deletion FD leak. See KUDU-1288.
1118
1
TEST_F(DeleteTableTest, TestFDsNotLeakedOnTabletTombstone) {
1119
1
  const MonoDelta timeout = MonoDelta::FromSeconds(30);
1120
1121
1
  vector<string> ts_flags, master_flags;
1122
1
  master_flags.push_back("--replication_factor=1");
1123
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, 1));
1124
1125
  // Create the table.
1126
1
  TestWorkload workload(cluster_.get());
1127
1
  workload.Setup();
1128
1
  workload.Start();
1129
4
  while (workload.rows_inserted() < 1000) {
1130
3
    SleepFor(MonoDelta::FromMilliseconds(10));
1131
3
  }
1132
1
  workload.StopAndJoin();
1133
1134
  // Figure out the tablet id of the created tablet.
1135
1
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
1136
1
  ASSERT_OK(WaitForNumTabletsOnTS(ts_map_.begin()->second.get(), 1, timeout, &tablets));
1137
1
  const string& tablet_id = tablets[0].tablet_status().tablet_id();
1138
1139
  // Tombstone the tablet and then ensure that lsof does not list any
1140
  // tablet-related paths.
1141
1
  ExternalTabletServer* ets = cluster_->tablet_server(0);
1142
1
  ASSERT_OK(itest::DeleteTablet(ts_map_[ets->uuid()].get(),
1143
1
                                tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
1144
1
  ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
1145
1146
  // Restart the TS after deletion and then do the same lsof check again.
1147
1
  ets->Shutdown();
1148
1
  ASSERT_OK(ets->Restart());
1149
1
  ASSERT_EQ(0, PrintOpenTabletFiles(ets->pid(), tablet_id));
1150
1
}
1151
1152
// This test simulates the following scenario.
1153
// 1. Create an RF 3 with 3 TS and 3 masters.
1154
// 2. Add a fourth TS.
1155
// 3. Create a table.
1156
// 4. Stop one of the TS completely (i.e. replicate its data to the TS created in (2)).
1157
// 5. Delete the table.
1158
// 6. Failover the master leader so that in-memory table/tablet maps are deleted.
1159
// 7. Restart the tserver stopped in (4).
1160
// Expectation: There shouldn't be any relic of the table on the TS.
1161
1
TEST_F(DeleteTableTest, TestRemoveUnknownTablets) {
1162
  // Default timeout to be used for operations.
1163
1
  const MonoDelta kTimeout = MonoDelta::FromSeconds(30);
1164
1165
  // Reduce the timeouts after which TS is DEAD.
1166
1
  vector<string> extra_tserver_flags = {
1167
1
    "--follower_unavailable_considered_failed_sec=18"
1168
1
  };
1169
1
  vector<string> extra_master_flags = {
1170
1
    "--tserver_unresponsive_timeout_ms=15000"
1171
1
  };
1172
  // Start a cluster with 3 TS and 3 masters.
1173
1
  ASSERT_NO_FATALS(StartCluster(
1174
1
    extra_tserver_flags, extra_master_flags, 3, 3, false
1175
1
  ));
1176
1
  LOG(INFO) << "Cluster with 3 masters and 3 tservers started successfully";
1177
1178
  // Create a table on the cluster. We're just using TestWorkload
1179
  // as a convenient way to create it.
1180
1
  TestWorkload(cluster_.get()).Setup();
1181
  // The table should have replicas on all three tservers.
1182
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
1183
0
  LOG(INFO) << "Table with 1 tablet and 3 replicas created successfully";
1184
1185
  // Add a 4th TS. The load should stay [1, 1, 1, 0].
1186
  // This new TS will get the replica when we delete one
1187
  // of the old TS.
1188
0
  ASSERT_OK(cluster_->AddTabletServer(true, extra_tserver_flags));
1189
0
  ASSERT_OK(cluster_->WaitForTabletServerCount(4, kTimeout));
1190
0
  LOG(INFO) << "Added a fourth tserver successfully";
1191
1192
  // Grab the tablet ID (used later).
1193
0
  vector<string> tablets = inspect_->ListTabletsOnTS(0);
1194
0
  ASSERT_EQ(1, tablets.size());
1195
0
  const TabletId& tablet_id = tablets[0];
1196
0
  const string& ts_uuid = cluster_->tablet_server(0)->uuid();
1197
1198
  // Shutdowm TS 0. We'll restart it back later.
1199
0
  cluster_->tablet_server(0)->Shutdown();
1200
1201
  // Wait for the master to mark this TS as failed.
1202
0
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
1203
0
    vector<string> ts_list;
1204
0
    if (!ListAllLiveTabletServersRegisteredWithMaster(kTimeout, &ts_list).ok()) {
1205
0
      return false;
1206
0
    }
1207
0
    return std::find(ts_list.begin(), ts_list.end(), ts_uuid) == ts_list.end();
1208
0
  }, kTimeout, "Wait for TS to be marked dead by master"));
1209
  // Wait for its replicas to be migrated to another tserver.
1210
0
  WaitForLoadBalanceCompletion(kTimeout);
1211
0
  LOG(INFO) << "Tablet Server with id 0 removed completely and successfully";
1212
1213
  // Delete the table now and wait for the replicas to get deleted.
1214
0
  ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName));
1215
0
  for (int i = 1; i < 3; i++) {
1216
0
    ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED));
1217
0
  }
1218
  // Verify that the table is deleted completely.
1219
0
  bool deleted = ASSERT_RESULT(VerifyTableCompletelyDeleted(
1220
0
      TestWorkloadOptions::kDefaultTableName, tablet_id));
1221
0
  ASSERT_EQ(deleted, true);
1222
0
  LOG(INFO) << "Table deleted successfully";
1223
1224
  // Failover the master leader for the table to be removed from in-memory maps.
1225
0
  ASSERT_OK(cluster_->StepDownMasterLeaderAndWaitForNewLeader());
1226
1227
  // Now restart the TServer and wait for the replica to be deleted.
1228
0
  ASSERT_OK(cluster_->tablet_server(0)->Restart());
1229
1230
0
  ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(0, tablet_id, SUPERBLOCK_EXPECTED));
1231
0
}
1232
1233
1
TEST_F(DeleteTableTest, DeleteWithDeadTS) {
1234
1
  vector<string> extra_master_flags = {
1235
1
    "--tserver_unresponsive_timeout_ms=5000"
1236
1
  };
1237
  // Start a cluster with 3 TS and 3 masters.
1238
1
  ASSERT_NO_FATALS(StartCluster(
1239
1
    {}, extra_master_flags, 3, 3, false
1240
1
  ));
1241
1
  LOG(INFO) << "Cluster with 3 masters and 3 tservers started successfully";
1242
1243
  // Create a table on the cluster. We're just using TestWorkload
1244
  // as a convenient way to create it.
1245
1
  TestWorkload(cluster_.get()).Setup();
1246
  // The table should have replicas on all three tservers.
1247
1
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
1248
0
  LOG(INFO) << "Table with 1 tablet and 3 replicas created successfully";
1249
1250
  // Grab the tablet ID (used later).
1251
0
  vector<string> tablets = inspect_->ListTabletsOnTS(0);
1252
0
  ASSERT_EQ(1, tablets.size());
1253
0
  const TabletId& tablet_id = tablets[0];
1254
0
  const string& ts_uuid = cluster_->tablet_server(0)->uuid();
1255
1256
  // Shutdowm TS 0. We'll restart it back later.
1257
0
  cluster_->tablet_server(0)->Shutdown();
1258
1259
  // Wait for the master to mark this TS as failed.
1260
0
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
1261
0
    vector<string> ts_list;
1262
0
    if (!ListAllLiveTabletServersRegisteredWithMaster(30s * kTimeMultiplier, &ts_list).ok()) {
1263
0
      return false;
1264
0
    }
1265
0
    return std::find(ts_list.begin(), ts_list.end(), ts_uuid) == ts_list.end();
1266
0
  }, 60s * kTimeMultiplier, "Wait for TS to be marked dead by master"));
1267
1268
0
  LOG(INFO) << "Tablet Server with index 0 removed completely and successfully";
1269
1270
  // Delete the table now and wait for the replicas to get deleted.
1271
0
  ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName));
1272
0
  for (int i = 1; i < 3; i++) {
1273
0
    ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(i, tablet_id, SUPERBLOCK_EXPECTED));
1274
0
  }
1275
1276
  // Check that the table is deleted completely.
1277
0
  bool deleted = ASSERT_RESULT(VerifyTableCompletelyDeleted(
1278
0
      TestWorkloadOptions::kDefaultTableName, tablet_id));
1279
0
  ASSERT_EQ(deleted, true);
1280
0
  LOG(INFO) << "Table deleted successfully";
1281
1282
  // Now restart the TServer and wait for the replica to be deleted.
1283
0
  ASSERT_OK(cluster_->tablet_server(0)->Restart());
1284
1285
0
  ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(0, tablet_id, SUPERBLOCK_EXPECTED));
1286
0
}
1287
1288
// Parameterized test case for TABLET_DATA_DELETED deletions.
1289
class DeleteTableDeletedParamTest : public DeleteTableTest,
1290
                                    public ::testing::WithParamInterface<const char*> {
1291
};
1292
1293
// Test that if a server crashes mid-delete that the delete will be rolled
1294
// forward on startup. Parameterized by different fault flags that cause a
1295
// crash at various points.
1296
3
TEST_P(DeleteTableDeletedParamTest, TestRollForwardDelete) {
1297
3
  ASSERT_NO_FATALS(StartCluster());
1298
3
  const string fault_flag = GetParam();
1299
3
  LOG(INFO) << "Running with fault flag: " << fault_flag;
1300
1301
  // Dynamically set the fault flag so they crash when DeleteTablet() is called
1302
  // by the Master.
1303
12
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
1304
9
    ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(i), fault_flag, "1.0"));
1305
9
  }
1306
1307
  // Create a table on the cluster. We're just using TestWorkload
1308
  // as a convenient way to create it.
1309
3
  TestWorkload(cluster_.get()).Setup();
1310
1311
  // The table should have replicas on all three tservers.
1312
3
  ASSERT_OK(inspect_->WaitForReplicaCount(3));
1313
1314
  // Delete it and wait for the tablet servers to crash.
1315
  // The DeleteTable() call can be blocking, so it should be called in a separate thread.
1316
0
  std::thread delete_table_thread([&]() {
1317
0
        ASSERT_NO_FATALS(DeleteTable(TestWorkloadOptions::kDefaultTableName));
1318
0
      });
1319
1320
0
  SleepFor(MonoDelta::FromMilliseconds(50));
1321
0
  ASSERT_NO_FATALS(WaitForAllTSToCrash());
1322
1323
  // There should still be data left on disk.
1324
0
  Status s = inspect_->CheckNoData();
1325
0
  ASSERT_TRUE(s.IsIllegalState()) << s.ToString();
1326
1327
  // Now restart the tablet servers. They should roll forward their deletes.
1328
  // We don't have to reset the fault flag here because it was set dynamically.
1329
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
1330
0
    cluster_->tablet_server(i)->Shutdown();
1331
0
    ASSERT_OK(cluster_->tablet_server(i)->Restart());
1332
0
  }
1333
1334
0
  delete_table_thread.join();
1335
0
  ASSERT_OK(inspect_->WaitForNoData());
1336
0
}
1337
1338
// Faults appropriate for the TABLET_DATA_DELETED case.
1339
const char* deleted_faults[] = {"TEST_fault_crash_after_blocks_deleted",
1340
                                "TEST_fault_crash_after_wal_deleted",
1341
                                "TEST_fault_crash_after_cmeta_deleted"};
1342
1343
INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableDeletedParamTest,
1344
                        ::testing::ValuesIn(deleted_faults));
1345
1346
// Parameterized test case for TABLET_DATA_TOMBSTONED deletions.
1347
class DeleteTableTombstonedParamTest : public DeleteTableTest,
1348
                                       public ::testing::WithParamInterface<const char*> {
1349
};
1350
1351
// Regression test for tablet tombstoning. Tests:
1352
// 1. basic creation & tombstoning of a tablet.
1353
// 2. roll-forward (crash recovery) of a partially-completed tombstoning of a tablet.
1354
// 3. permanent deletion of a TOMBSTONED tablet
1355
//    (transition from TABLET_DATA_TOMBSTONED to TABLET_DATA_DELETED).
1356
2
TEST_P(DeleteTableTombstonedParamTest, TestTabletTombstone) {
1357
2
  vector<string> flags;
1358
2
  flags.push_back("--log_segment_size_mb=1");  // Faster log rolls.
1359
2
  ASSERT_NO_FATALS(StartCluster(flags));
1360
2
  const string fault_flag = GetParam();
1361
2
  LOG(INFO) << "Running with fault flag: " << fault_flag;
1362
1363
2
  MonoDelta timeout = MonoDelta::FromSeconds(30);
1364
1365
  // Create a table with 2 tablets. We delete the first tablet without
1366
  // injecting any faults, then we delete the second tablet while exercising
1367
  // several fault injection points.
1368
2
  ASSERT_OK(client_->CreateNamespaceIfNotExists(
1369
2
      TestWorkloadOptions::kDefaultTableName.namespace_name(),
1370
2
      TestWorkloadOptions::kDefaultTableName.namespace_type()));
1371
2
  const int kNumTablets = 2;
1372
2
  Schema schema(GetSimpleTestSchema());
1373
2
  client::YBSchema client_schema(client::YBSchemaFromSchema(schema));
1374
2
  std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator());
1375
2
  ASSERT_OK(table_creator->table_name(TestWorkloadOptions::kDefaultTableName)
1376
2
                          .num_tablets(kNumTablets)
1377
2
                          .schema(&client_schema)
1378
2
                          .Create());
1379
1380
  // Start a workload on the cluster, and run it until we find WALs on disk.
1381
2
  TestWorkload workload(cluster_.get());
1382
2
  workload.Setup();
1383
1384
  // The table should have 2 tablets (1 split) on all 3 tservers (for a total of 6).
1385
2
  ASSERT_OK(inspect_->WaitForReplicaCount(6));
1386
1387
  // Set up the proxies so we can easily send DeleteTablet() RPCs.
1388
0
  TServerDetails* ts = ts_map_[cluster_->tablet_server(0)->uuid()].get();
1389
1390
  // Ensure the tablet server is reporting 2 tablets.
1391
0
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
1392
0
  ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
1393
1394
  // Run the workload against whoever the leader is until WALs appear on TS 0
1395
  // for the tablets we created.
1396
0
  const int kTsIndex = 0;  // Index of the tablet server we'll use for the test.
1397
0
  workload.Start();
1398
0
  while (workload.rows_inserted() < 100) {
1399
0
    SleepFor(MonoDelta::FromMilliseconds(10));
1400
0
  }
1401
0
  ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
1402
0
            tablets[0].tablet_status().tablet_id(), 3));
1403
0
  ASSERT_OK(inspect_->WaitForMinFilesInTabletWalDirOnTS(kTsIndex,
1404
0
            tablets[1].tablet_status().tablet_id(), 3));
1405
0
  workload.StopAndJoin();
1406
1407
  // Shut down the master and the other tablet servers so they don't interfere
1408
  // by attempting to create tablets or remote bootstrap while we delete tablets.
1409
0
  cluster_->master()->Shutdown();
1410
0
  cluster_->tablet_server(1)->Shutdown();
1411
0
  cluster_->tablet_server(2)->Shutdown();
1412
1413
  // Tombstone the first tablet.
1414
0
  string tablet_id = tablets[0].tablet_status().tablet_id();
1415
0
  LOG(INFO) << "Tombstoning first tablet " << tablet_id << "...";
1416
0
  ASSERT_TRUE(inspect_->DoesConsensusMetaExistForTabletOnTS(kTsIndex, tablet_id)) << tablet_id;
1417
0
  ASSERT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout));
1418
0
  LOG(INFO) << "Waiting for first tablet to be tombstoned...";
1419
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
1420
1421
0
  ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
1422
0
  for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
1423
0
    if (t.tablet_status().tablet_id() == tablet_id) {
1424
0
      ASSERT_EQ(tablet::SHUTDOWN, t.tablet_status().state());
1425
0
      ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
1426
0
          << t.tablet_status().tablet_id() << " not tombstoned";
1427
0
    }
1428
0
  }
1429
1430
  // Now tombstone the 2nd tablet, causing a fault.
1431
0
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server(kTsIndex), fault_flag, "1.0"));
1432
0
  tablet_id = tablets[1].tablet_status().tablet_id();
1433
0
  LOG(INFO) << "Tombstoning second tablet " << tablet_id << "...";
1434
0
  WARN_NOT_OK(itest::DeleteTablet(ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none, timeout),
1435
0
              "Delete tablet failed");
1436
0
  ASSERT_OK(cluster_->WaitForTSToCrash(kTsIndex));
1437
1438
  // Restart the tablet server and wait for the WALs to be deleted and for the
1439
  // superblock to show that it is tombstoned.
1440
0
  cluster_->tablet_server(kTsIndex)->Shutdown();
1441
  // Don't start the CQL proxy, since it'll try to connect to the master.
1442
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart(false));
1443
0
  LOG(INFO) << "Waiting for second tablet to be tombstoned...";
1444
0
  ASSERT_NO_FATALS(WaitForTabletTombstonedOnTS(kTsIndex, tablet_id, CMETA_EXPECTED));
1445
1446
  // The tombstoned tablets will still show up in ListTablets(),
1447
  // just with their data state set as TOMBSTONED. They should also be listed
1448
  // as NOT_STARTED because we restarted the server.
1449
0
  ASSERT_OK(itest::WaitForNumTabletsOnTS(ts, 2, timeout, &tablets));
1450
0
  for (const ListTabletsResponsePB::StatusAndSchemaPB& t : tablets) {
1451
0
    ASSERT_EQ(tablet::NOT_STARTED, t.tablet_status().state());
1452
0
    ASSERT_EQ(TABLET_DATA_TOMBSTONED, t.tablet_status().tablet_data_state())
1453
0
        << t.tablet_status().tablet_id() << " not tombstoned";
1454
0
  }
1455
1456
  // Check that, upon restart of the tablet server with a tombstoned tablet,
1457
  // we don't unnecessary "roll forward" and rewrite the tablet metadata file
1458
  // when it is already fully deleted.
1459
0
  int64_t orig_mtime = inspect_->GetTabletSuperBlockMTimeOrDie(kTsIndex, tablet_id);
1460
0
  cluster_->tablet_server(kTsIndex)->Shutdown();
1461
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart());
1462
0
  int64_t new_mtime = inspect_->GetTabletSuperBlockMTimeOrDie(kTsIndex, tablet_id);
1463
0
  ASSERT_EQ(orig_mtime, new_mtime)
1464
0
                << "Tablet superblock should not have been re-flushed unnecessarily";
1465
1466
  // Finally, delete all tablets on the TS, and wait for all data to be gone.
1467
0
  LOG(INFO) << "Deleting all tablets...";
1468
0
  for (const ListTabletsResponsePB::StatusAndSchemaPB& tablet : tablets) {
1469
0
    string tablet_id = tablet.tablet_status().tablet_id();
1470
    // We need retries here, since some of the tablets may still be
1471
    // bootstrapping after being restarted above.
1472
0
    ASSERT_NO_FATALS(DeleteTabletWithRetries(ts, tablet_id, TABLET_DATA_DELETED, timeout));
1473
0
    ASSERT_NO_FATALS(WaitForTabletDeletedOnTS(kTsIndex, tablet_id, SUPERBLOCK_EXPECTED));
1474
0
  }
1475
1476
  // Restart the TS, the superblock should be deleted on startup.
1477
0
  cluster_->tablet_server(kTsIndex)->Shutdown();
1478
  // Don't start the CQL proxy, since it'll try to connect to the master.
1479
0
  ASSERT_OK(cluster_->tablet_server(kTsIndex)->Restart(false));
1480
0
  ASSERT_OK(inspect_->WaitForNoDataOnTS(kTsIndex));
1481
0
}
1482
1483
// Faults appropriate for the TABLET_DATA_TOMBSTONED case.
1484
// Tombstoning a tablet does not delete the consensus metadata.
1485
const char* tombstoned_faults[] = {"TEST_fault_crash_after_blocks_deleted",
1486
                                   "TEST_fault_crash_after_wal_deleted"};
1487
1488
INSTANTIATE_TEST_CASE_P(FaultFlags, DeleteTableTombstonedParamTest,
1489
                        ::testing::ValuesIn(tombstoned_faults));
1490
1491
}  // namespace yb