YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/integration-tests/master_failover-itest.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include <functional>
34
#include <string>
35
#include <vector>
36
37
#include <glog/logging.h>
38
#include <gtest/gtest.h>
39
40
#include "yb/client/client-internal.h"
41
#include "yb/client/client-test-util.h"
42
#include "yb/client/client.h"
43
#include "yb/client/schema.h"
44
#include "yb/client/table.h"
45
#include "yb/client/table_alterer.h"
46
#include "yb/client/table_creator.h"
47
#include "yb/client/table_handle.h"
48
#include "yb/client/table_info.h"
49
#include "yb/client/tablet_server.h"
50
#include "yb/client/yb_table_name.h"
51
52
#include "yb/common/common.pb.h"
53
#include "yb/common/schema.h"
54
#include "yb/common/wire_protocol-test-util.h"
55
56
#include "yb/gutil/strings/substitute.h"
57
58
#include "yb/integration-tests/external_mini_cluster.h"
59
60
#include "yb/master/master_cluster.proxy.h"
61
62
#include "yb/rpc/rpc_controller.h"
63
64
#include "yb/tools/yb-admin_client.h"
65
66
#include "yb/util/logging.h"
67
#include "yb/util/monotime.h"
68
#include "yb/util/net/net_util.h"
69
#include "yb/util/result.h"
70
#include "yb/util/test_util.h"
71
#include "yb/util/tsan_util.h"
72
73
using namespace std::literals;
74
75
DECLARE_int32(ycql_num_tablets);
76
DECLARE_int32(ysql_num_tablets);
77
DECLARE_int32(heartbeat_interval_ms);
78
79
namespace yb {
80
81
// Note: this test needs to be in the client namespace in order for
82
// YBClient::Data class methods to be visible via FRIEND_TEST macro.
83
namespace client {
84
85
const int kNumTabletServerReplicas = 3;
86
const int kHeartbeatIntervalMs = 500;
87
88
using std::shared_ptr;
89
using std::string;
90
using std::vector;
91
using client::YBTableName;
92
93
class MasterFailoverTest : public YBTest {
94
 public:
95
  enum CreateTableMode {
96
    kWaitForCreate = 0,
97
    kNoWaitForCreate = 1
98
  };
99
100
14
  MasterFailoverTest() {
101
14
    opts_.master_rpc_ports = { 0, 0, 0 };
102
14
    opts_.num_masters = num_masters_ = opts_.master_rpc_ports.size();
103
14
    opts_.num_tablet_servers = kNumTabletServerReplicas;
104
105
    // Reduce various timeouts below as to make the detection of
106
    // leader master failures (specifically, failures as result of
107
    // long pauses) more rapid.
108
109
    // Set the TS->master heartbeat timeout to 1 second (down from 15 seconds).
110
14
    opts_.extra_tserver_flags.push_back("--heartbeat_rpc_timeout_ms=1000");
111
    // Allow one TS heartbeat failure before retrying with back-off (down from 3).
112
14
    opts_.extra_tserver_flags.push_back("--heartbeat_max_failures_before_backoff=1");
113
    // Wait for 500 ms after 'max_consecutive_failed_heartbeats'
114
    // before trying again (down from 1 second).
115
14
    string heartbeat_interval_flag =
116
14
                "--heartbeat_interval_ms="+std::to_string(kHeartbeatIntervalMs);
117
14
    opts_.extra_tserver_flags.push_back(heartbeat_interval_flag);
118
14
  }
119
120
14
  void SetUp() override {
121
14
    YBTest::SetUp();
122
14
    ASSERT_NO_FATALS(RestartCluster());
123
14
  }
124
125
13
  void TearDown() override {
126
13
    client_.reset();
127
13
    if (cluster_) {
128
13
      cluster_->Shutdown();
129
13
    }
130
13
    YBTest::TearDown();
131
13
  }
132
133
14
  void RestartCluster() {
134
14
    if (cluster_) {
135
0
      cluster_->Shutdown();
136
0
      cluster_.reset();
137
0
    }
138
14
    opts_.timeout = MonoDelta::FromSeconds(NonTsanVsTsan(20, 60));
139
14
    cluster_.reset(new ExternalMiniCluster(opts_));
140
14
    ASSERT_OK(cluster_->Start());
141
14
    client_ = ASSERT_RESULT(cluster_->CreateClient());
142
14
  }
143
144
11
  Status CreateTable(const YBTableName& table_name, CreateTableMode mode) {
145
11
    RETURN_NOT_OK_PREPEND(
146
11
        client_->CreateNamespaceIfNotExists(table_name.namespace_name()),
147
11
        "Unable to create namespace " + table_name.namespace_name());
148
11
    client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema()));
149
11
    std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator());
150
11
    return table_creator->table_name(table_name)
151
11
        .table_type(YBTableType::YQL_TABLE_TYPE)
152
11
        .schema(&client_schema)
153
11
        .hash_schema(YBHashSchema::kMultiColumnHash)
154
11
        .timeout(MonoDelta::FromSeconds(90))
155
11
        .wait(mode == kWaitForCreate)
156
11
        .Create();
157
11
  }
158
159
  Status CreateIndex(
160
18
      const YBTableName& indexed_table_name, const YBTableName& index_name, CreateTableMode mode) {
161
18
    RETURN_NOT_OK_PREPEND(
162
18
      client_->CreateNamespaceIfNotExists(index_name.namespace_name()),
163
18
      "Unable to create namespace " + index_name.namespace_name());
164
18
    client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema()));
165
18
    client::TableHandle table;
166
18
    RETURN_NOT_OK_PREPEND(
167
18
        table.Open(indexed_table_name, client_.get()),
168
18
        "Unable to open table " + indexed_table_name.ToString());
169
170
18
    std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator());
171
18
    return table_creator->table_name(index_name)
172
18
        .table_type(YBTableType::YQL_TABLE_TYPE)
173
18
        .indexed_table_id(table->id())
174
18
        .schema(&client_schema)
175
18
        .hash_schema(YBHashSchema::kMultiColumnHash)
176
18
        .timeout(MonoDelta::FromSeconds(90))
177
18
        .wait(mode == kWaitForCreate)
178
        // In the new style create index request, the CQL proxy populates the
179
        // index info instead of the master. However, in these tests we bypass
180
        // the proxy and go directly to the master. We need to use the old
181
        // style create request to have the master generate the appropriate
182
        // index info.
183
18
        .TEST_use_old_style_create_request()
184
18
        .Create();
185
18
  }
186
187
0
  Status RenameTable(const YBTableName& table_name_orig, const YBTableName& table_name_new) {
188
0
    std::unique_ptr<YBTableAlterer> table_alterer(client_->NewTableAlterer(table_name_orig));
189
0
    return table_alterer
190
0
      ->RenameTo(table_name_new)
191
0
      ->timeout(MonoDelta::FromSeconds(90))
192
0
      ->wait(true)
193
0
      ->Alter();
194
0
  }
195
196
  // Test that we can get the table location information from the
197
  // master and then open scanners on the tablet server. This involves
198
  // sending RPCs to both the master and the tablet servers and
199
  // requires that the table and tablet exist both on the masters and
200
  // the tablet servers.
201
0
  Status OpenTableAndScanner(const YBTableName& table_name) {
202
0
    client::TableHandle table;
203
0
    RETURN_NOT_OK_PREPEND(table.Open(table_name, client_.get()),
204
0
                          "Unable to open table " + table_name.ToString());
205
0
    client::TableRange range(table);
206
0
    auto it = range.begin();
207
0
    if (it != range.end()) {
208
0
      ++it;
209
0
    }
210
211
0
    return Status::OK();
212
0
  }
213
214
 protected:
215
  size_t num_masters_;
216
  ExternalMiniClusterOptions opts_;
217
  std::unique_ptr<ExternalMiniCluster> cluster_;
218
  std::unique_ptr<YBClient> client_;
219
};
220
221
class MasterFailoverTestIndexCreation : public MasterFailoverTest,
222
                                        public ::testing::WithParamInterface<int> {
223
 public:
224
9
  MasterFailoverTestIndexCreation() {
225
9
    opts_.extra_tserver_flags.push_back("--allow_index_table_read_write=true");
226
9
    opts_.extra_tserver_flags.push_back(
227
9
        "--index_backfill_upperbound_for_user_enforced_txn_duration_ms=100");
228
9
    opts_.extra_master_flags.push_back("--TEST_slowdown_backfill_alter_table_rpcs_ms=50");
229
9
    opts_.extra_master_flags.push_back("--disable_index_backfill=false");
230
    // Sometimes during master failover we have the create index kick in before the tservers have
231
    // checked in. By default we wait for enough TSs -- else we fail the create table/idx request.
232
    // We don't have to wait for that in the tests here.
233
9
    opts_.extra_master_flags.push_back("--catalog_manager_check_ts_count_for_create_table=false");
234
9
  }
235
236
  // Master has to do 5 RPCs to TServers to create+backfill an index.
237
  // 4 corresponding to set each of the 4 IndexPermissions, and 1 for GetSafeTime.
238
  // We want to simulate a failure before and after each RPC, so total 10 stages.
239
  static constexpr int kNumMaxStages = 10;
240
};
241
242
INSTANTIATE_TEST_CASE_P(
243
    MasterFailoverTestIndexCreation, MasterFailoverTestIndexCreation,
244
    ::testing::Range(1, MasterFailoverTestIndexCreation::kNumMaxStages));
245
// Test that synchronous CreateTable (issue CreateTable call and then
246
// wait until the table has been created) works even when the original
247
// leader master has been paused.
248
//
249
// Temporarily disabled since multi-master isn't supported yet.
250
// This test fails as of KUDU-1138, since the tablet servers haven't
251
// registered with the follower master, and thus it's likely to deny
252
// the CreateTable request thinking there are no TS available.
253
0
TEST_F(MasterFailoverTest, DISABLED_TestCreateTableSync) {
254
0
  if (!AllowSlowTests()) {
255
0
    LOG(INFO) << "This test can only be run in slow mode.";
256
0
    return;
257
0
  }
258
259
0
  auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
260
261
0
  LOG(INFO) << "Pausing leader master";
262
0
  ASSERT_OK(cluster_->master(leader_idx)->Pause());
263
0
  ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
264
265
0
  YBTableName table_name(YQL_DATABASE_CQL, "testCreateTableSync");
266
0
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
267
0
  ASSERT_OK(OpenTableAndScanner(table_name));
268
0
}
269
270
// Test that we can issue a CreateTable call, pause the leader master
271
// immediately after, then verify that the table has been created on
272
// the newly elected leader master.
273
//
274
// TODO enable this test once flakiness issues are worked out and
275
// eliminated on test machines.
276
0
TEST_F(MasterFailoverTest, DISABLED_TestPauseAfterCreateTableIssued) {
277
0
  if (!AllowSlowTests()) {
278
0
    LOG(INFO) << "This test can only be run in slow mode.";
279
0
    return;
280
0
  }
281
282
0
  auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
283
284
0
  YBTableName table_name(YQL_DATABASE_CQL, "testPauseAfterCreateTableIssued");
285
0
  LOG(INFO) << "Issuing CreateTable for " << table_name.ToString();
286
0
  ASSERT_OK(CreateTable(table_name, kNoWaitForCreate));
287
288
0
  LOG(INFO) << "Pausing leader master";
289
0
  ASSERT_OK(cluster_->master(leader_idx)->Pause());
290
0
  ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
291
292
0
  auto deadline = CoarseMonoClock::Now() + 90s;
293
0
  ASSERT_OK(client_->data_->WaitForCreateTableToFinish(client_.get(), table_name, "" /* table_id */,
294
0
                                                       deadline));
295
296
0
  ASSERT_OK(OpenTableAndScanner(table_name));
297
0
}
298
299
// Orchestrate a master failover at various points of a backfill,
300
// ensure that the backfill eventually completes.
301
9
TEST_P(MasterFailoverTestIndexCreation, TestPauseAfterCreateIndexIssued) {
302
9
  const int kPauseAfterStage = GetParam();
303
9
  YBTableName table_name(YQL_DATABASE_CQL, "test", "testPauseAfterCreateTableIssued");
304
9
  LOG(INFO) << "Issuing CreateTable for " << table_name.ToString();
305
9
  FLAGS_ycql_num_tablets = 5;
306
9
  FLAGS_ysql_num_tablets = 5;
307
9
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
308
9
  LOG(INFO) << "CreateTable done for " << table_name.ToString();
309
310
9
  MonoDelta total_time_taken_for_one_iteration;
311
  // In the first run, we estimate the total time taken for one create index to complete.
312
  // The second run will pause the master at the desired point during create index.
313
27
  for (int i = 0; i < 2; i++) {
314
18
    auto start = ToSteady(CoarseMonoClock::Now());
315
18
    auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
316
18
    ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
317
318
18
    OpIdPB op_id;
319
18
    ASSERT_OK(cluster_->GetLastOpIdForLeader(&op_id));
320
18
    ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(static_cast<int>(op_id.index())));
321
322
18
    YBTableName index_table_name(
323
18
        YQL_DATABASE_CQL, "test", "testPauseAfterCreateTableIssuedIdx" + yb::ToString(i));
324
18
    LOG(INFO) << "Issuing CreateIndex for " << index_table_name.ToString();
325
18
    ASSERT_OK(CreateIndex(table_name, index_table_name, kNoWaitForCreate));
326
327
18
    if (i != 0) {
328
      // In the first run, we estimate how long it takes for an uninterrupted
329
      // backfill process to complete, then the remaining iterations kill the
330
      // master leader at various points to cause the  master failover during
331
      // the various stages of index backfill.
332
9
      MonoDelta sleep_time = total_time_taken_for_one_iteration * kPauseAfterStage / kNumMaxStages;
333
9
      LOG(INFO) << "Sleeping for " << sleep_time << ", before master pause";
334
9
      SleepFor(sleep_time);
335
336
9
      LOG(INFO) << "Pausing leader master 0-based: " << leader_idx << " i.e. m-"
337
9
                << 1 + leader_idx;
338
9
      ASSERT_OK(cluster_->master(leader_idx)->Pause());
339
9
    }
340
341
18
    IndexInfoPB index_info_pb;
342
18
    TableId index_table_id;
343
18
    const auto deadline = CoarseMonoClock::Now() + 900s;
344
18
    do {
345
18
      ASSERT_OK(client_->data_->WaitForCreateTableToFinish(
346
18
          client_.get(), index_table_name, "" /* table_id */, deadline));
347
18
      ASSERT_OK(client_->data_->WaitForCreateTableToFinish(
348
18
          client_.get(), table_name, "" /* table_id */, deadline));
349
350
18
      Result<YBTableInfo> table_info = client_->GetYBTableInfo(table_name);
351
18
      ASSERT_TRUE(table_info);
352
18
      Result<YBTableInfo> index_table_info = client_->GetYBTableInfo(index_table_name);
353
18
      ASSERT_TRUE(index_table_info);
354
355
18
      index_table_id = index_table_info->table_id;
356
18
      index_info_pb.Clear();
357
18
      table_info->index_map[index_table_id].ToPB(&index_info_pb);
358
18
      YB_LOG_EVERY_N_SECS(INFO, 1) << "The index info for "
359
18
                                   << index_table_name.ToString() << " is "
360
18
                                   << yb::ToString(index_info_pb);
361
362
18
      ASSERT_TRUE(index_info_pb.has_index_permissions());
363
18
    } while (index_info_pb.index_permissions() <
364
18
                 IndexPermissions::INDEX_PERM_READ_WRITE_AND_DELETE &&
365
0
             CoarseMonoClock::Now() < deadline);
366
367
18
    EXPECT_EQ(index_info_pb.index_permissions(),
368
18
              IndexPermissions::INDEX_PERM_READ_WRITE_AND_DELETE);
369
370
18
    LOG(INFO) << "All good for iteration " << i;
371
18
    ASSERT_OK(client_->DeleteIndexTable(index_table_name, nullptr, /* wait */ true));
372
18
    ASSERT_OK(client_->data_->WaitForDeleteTableToFinish(
373
18
        client_.get(), index_table_id, deadline));
374
375
    // For the first round we just simply calculate the time it takes
376
18
    if (i == 0) {
377
9
      total_time_taken_for_one_iteration = ToSteady(CoarseMonoClock::Now()) - start;
378
9
    }
379
18
  }
380
9
}
381
382
// Test the scenario where we create a table, pause the leader master,
383
// and then issue the DeleteTable call: DeleteTable should go to the newly
384
// elected leader master and succeed.
385
1
TEST_F(MasterFailoverTest, TestDeleteTableSync) {
386
1
  if (!AllowSlowTests()) {
387
1
    LOG(INFO) << "This test can only be run in slow mode.";
388
1
    return;
389
1
  }
390
391
0
  auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
392
393
0
  YBTableName table_name(YQL_DATABASE_CQL, "test", "testDeleteTableSync");
394
0
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
395
396
0
  LOG(INFO) << "Pausing leader master";
397
0
  ASSERT_OK(cluster_->master(leader_idx)->Pause());
398
0
  ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
399
400
0
  ASSERT_OK(client_->DeleteTable(table_name));
401
0
  shared_ptr<YBTable> table;
402
0
  Status s = client_->OpenTable(table_name, &table);
403
0
  ASSERT_TRUE(s.IsNotFound());
404
0
}
405
406
// Test the scenario where we create a table, pause the leader master,
407
// and then issue the AlterTable call renaming a table: AlterTable
408
// should go to the newly elected leader master and succeed, renaming
409
// the table.
410
//
411
// TODO: Add an equivalent async test. Add a test for adding and/or
412
// renaming a column in a table.
413
1
TEST_F(MasterFailoverTest, TestRenameTableSync) {
414
1
  if (!AllowSlowTests()) {
415
1
    LOG(INFO) << "This test can only be run in slow mode.";
416
1
    return;
417
1
  }
418
419
0
  auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
420
421
0
  YBTableName table_name_orig(YQL_DATABASE_CQL, "test", "testAlterTableSync");
422
0
  ASSERT_OK(CreateTable(table_name_orig, kWaitForCreate));
423
424
0
  LOG(INFO) << "Pausing leader master";
425
0
  ASSERT_OK(cluster_->master(leader_idx)->Pause());
426
0
  ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
427
428
0
  YBTableName table_name_new(YQL_DATABASE_CQL, "test", "testAlterTableSyncRenamed");
429
0
  ASSERT_OK(RenameTable(table_name_orig, table_name_new));
430
0
  shared_ptr<YBTable> table;
431
0
  ASSERT_OK(client_->OpenTable(table_name_new, &table));
432
433
0
  Status s = client_->OpenTable(table_name_orig, &table);
434
0
  ASSERT_TRUE(s.IsNotFound());
435
0
}
436
437
1
TEST_F(MasterFailoverTest, TestFailoverAfterTsFailure) {
438
3
  for (auto master : cluster_->master_daemons()) {
439
3
    ASSERT_OK(cluster_->SetFlag(master, "enable_register_ts_from_raft", "true"));
440
3
  }
441
1
  YBTableName table_name(YQL_DATABASE_CQL, "test", "testFailoverAfterTsFailure");
442
1
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
443
444
1
  cluster_->tablet_server(0)->Shutdown();
445
446
  // Roll over to a new master.
447
1
  ASSERT_OK(cluster_->ChangeConfig(cluster_->GetLeaderMaster(), consensus::REMOVE_SERVER));
448
449
  // Count all servers equal to 3.
450
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
451
1
    int tserver_count;
452
1
    RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, false /* primary_only */));
453
1
    return tserver_count == 3;
454
1
  }, MonoDelta::FromSeconds(30), "Wait for tablet server count"));
455
456
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
457
1
    int tserver_count;
458
1
    RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, true /* primary_only */));
459
1
    return tserver_count == 2;
460
1
  }, MonoDelta::FromSeconds(30), "Wait for tablet server count"));
461
462
1
  google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets;
463
1
  ASSERT_OK(client_->GetTablets(table_name, 0, &tablets, /* partition_list_version =*/ nullptr));
464
465
  // Assert master sees that all tablets have 3 replicas.
466
3
  for (const auto& loc : tablets) {
467
3
    ASSERT_EQ(loc.replicas_size(), 3);
468
3
  }
469
470
  // Make sure we can issue a delete table that doesn't crash with the fake ts. Then, make sure
471
  // when we restart the server, we properly re-register and have no crashes.
472
1
  ASSERT_OK(client_->DeleteTable(table_name, false /* wait */));
473
1
  ASSERT_OK(cluster_->tablet_server(0)->Start());
474
475
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
476
1
    int tserver_count;
477
1
    RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, true /* primary_only */));
478
1
    bool is_idle = VERIFY_RESULT(client_->IsLoadBalancerIdle());
479
    // We have registered the new tserver and the LB is idle.
480
1
    return tserver_count == 3 && is_idle;
481
1
  }, MonoDelta::FromSeconds(30), "Wait for LB idle"));
482
483
1
  cluster_->AssertNoCrashes();
484
1
}
485
486
1
TEST_F(MasterFailoverTest, TestLoadMoveCompletion) {
487
  // Original cluster is RF3 so add a TS.
488
1
  LOG(INFO) << "Adding a T-Server.";
489
1
  ASSERT_OK(cluster_->AddTabletServer());
490
491
  // Create a table to introduce some workload.
492
1
  YBTableName table_name(YQL_DATABASE_CQL, "test", "testLoadMoveCompletion");
493
1
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
494
495
  // Give some time for the cluster balancer to balance tablets.
496
1
  std::function<Result<bool> ()> is_idle = [&]() -> Result<bool> {
497
1
    return client_->IsLoadBalancerIdle();
498
1
  };
499
1
  ASSERT_OK(WaitFor(is_idle,
500
1
                MonoDelta::FromSeconds(60),
501
1
                "Load Balancer Idle check failed"));
502
503
  // Disable TS heartbeats.
504
1
  LOG(INFO) << "Disabled Heartbeats";
505
1
  ASSERT_OK(cluster_->SetFlagOnTServers("TEST_tserver_disable_heartbeat", "true"));
506
507
  // Blacklist a TS.
508
1
  ExternalMaster *leader = cluster_->GetLeaderMaster();
509
1
  ExternalTabletServer *ts = cluster_->tablet_server(3);
510
1
  ASSERT_OK(cluster_->AddTServerToBlacklist(leader, ts));
511
1
  LOG(INFO) << "Blacklisted tserver#3";
512
513
  // Get the initial load.
514
1
  auto idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
515
516
1
  auto proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(idx);
517
518
1
  rpc::RpcController rpc;
519
1
  master::GetLoadMovePercentRequestPB req;
520
1
  master::GetLoadMovePercentResponsePB resp;
521
1
  ASSERT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc));
522
523
1
  auto initial_total_load = resp.total();
524
525
  // Failover the leader.
526
1
  LOG(INFO) << "Failing over master leader.";
527
1
  ASSERT_OK(cluster_->StepDownMasterLeaderAndWaitForNewLeader());
528
529
  // Get the final load and validate.
530
1
  req.Clear();
531
1
  resp.Clear();
532
1
  rpc.Reset();
533
534
1
  idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex());
535
536
1
  proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(idx);
537
1
  ASSERT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc));
538
1
  LOG(INFO) << "Initial loads. Before master leader failover: " <<  initial_total_load
539
1
            << " v/s after master leader failover: " << resp.total();
540
541
2
  EXPECT_EQ(resp.total(), initial_total_load)
542
2
      << "Expected the initial blacklisted load to be propagated to new leader master.";
543
544
  // The progress should be reported as 0 until tservers heartbeat
545
  // their tablet reports.
546
2
  EXPECT_EQ(resp.percent(), 0) << "Expected the initial progress"
547
2
                                  " to be zero.";
548
549
  // Now enable heartbeats.
550
1
  ASSERT_OK(cluster_->SetFlagOnTServers("TEST_tserver_disable_heartbeat", "false"));
551
0
  ASSERT_OK(cluster_->SetFlagOnMasters("blacklist_progress_initial_delay_secs",
552
0
                                        std::to_string((kHeartbeatIntervalMs * 20)/1000)));
553
0
  LOG(INFO) << "Enabled heartbeats";
554
555
0
  ASSERT_OK(LoggedWaitFor(
556
0
    [&]() -> Result<bool> {
557
0
      req.Clear();
558
0
      resp.Clear();
559
0
      rpc.Reset();
560
0
      RETURN_NOT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc));
561
0
      return resp.percent() >= 100;
562
0
    },
563
0
    MonoDelta::FromSeconds(300),
564
0
    "Waiting for blacklist load transfer to complete"
565
0
  ));
566
0
}
567
568
class MasterFailoverTestWithPlacement : public MasterFailoverTest {
569
 public:
570
1
  virtual void SetUp() override {
571
1
    opts_.extra_tserver_flags.push_back("--placement_cloud=c");
572
1
    opts_.extra_tserver_flags.push_back("--placement_region=r");
573
1
    opts_.extra_tserver_flags.push_back("--placement_zone=z${index}");
574
1
    opts_.extra_tserver_flags.push_back("--placement_uuid=" + kLivePlacementUuid);
575
1
    opts_.extra_master_flags.push_back("--enable_register_ts_from_raft=true");
576
1
    MasterFailoverTest::SetUp();
577
1
    yb_admin_client_ = std::make_unique<tools::enterprise::ClusterAdminClient>(
578
1
        cluster_->GetMasterAddresses(), MonoDelta::FromSeconds(30));
579
1
    ASSERT_OK(yb_admin_client_->Init());
580
1
    ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0,c.r.z1,c.r.z2", 3, kLivePlacementUuid));
581
1
  }
582
583
0
  virtual void TearDown() override {
584
0
    yb_admin_client_.reset();
585
0
    MasterFailoverTest::TearDown();
586
0
  }
587
588
  void AssertTserverHasPlacementUuid(
589
      const string& ts_uuid, const string& placement_uuid,
590
0
      const std::vector<YBTabletServer>& tablet_servers) {
591
0
    auto it = std::find_if(tablet_servers.begin(), tablet_servers.end(), [&](const auto& ts) {
592
0
        return ts.uuid == ts_uuid;
593
0
    });
594
0
    ASSERT_TRUE(it != tablet_servers.end());
595
0
    ASSERT_EQ(it->placement_uuid, placement_uuid);
596
0
  }
597
598
 protected:
599
  const string kReadReplicaPlacementUuid = "read_replica";
600
  const string kLivePlacementUuid = "live";
601
  std::unique_ptr<tools::enterprise::ClusterAdminClient> yb_admin_client_;
602
};
603
604
1
TEST_F_EX(MasterFailoverTest, TestFailoverWithReadReplicas, MasterFailoverTestWithPlacement) {
605
1
  ASSERT_OK(yb_admin_client_->AddReadReplicaPlacementInfo(
606
1
      "c.r.z0:1", 1, kReadReplicaPlacementUuid));
607
608
  // Add a new read replica tserver to the cluster with a matching cloud info to a live placement,
609
  // to test that we distinguish not just by cloud info but also by peer role.
610
1
  std::vector<std::string> extra_opts;
611
1
  extra_opts.push_back("--placement_cloud=c");
612
1
  extra_opts.push_back("--placement_region=r");
613
1
  extra_opts.push_back("--placement_zone=z0");
614
1
  extra_opts.push_back("--placement_uuid=" + kReadReplicaPlacementUuid);
615
1
  ASSERT_OK(cluster_->AddTabletServer(true, extra_opts));
616
617
0
  YBTableName table_name(YQL_DATABASE_CQL, "test", "testFailoverWithReadReplicas");
618
0
  ASSERT_OK(CreateTable(table_name, kWaitForCreate));
619
620
  // Shutdown the live ts in c.r.z0
621
0
  auto live_ts_uuid = cluster_->tablet_server(0)->instance_id().permanent_uuid();
622
0
  cluster_->tablet_server(0)->Shutdown();
623
624
  // Shutdown the rr ts in c.r.z0
625
0
  auto rr_ts_uuid = cluster_->tablet_server(3)->instance_id().permanent_uuid();
626
0
  cluster_->tablet_server(3)->Shutdown();
627
628
  // Roll over to a new master.
629
0
  ASSERT_OK(cluster_->ChangeConfig(cluster_->GetLeaderMaster(), consensus::REMOVE_SERVER));
630
631
  // Count all servers equal to 4.
632
0
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
633
0
    int tserver_count;
634
0
    RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, false /* primary_only */));
635
0
    return tserver_count == 4;
636
0
  }, MonoDelta::FromSeconds(30), "Wait for tablet server count"));
637
638
0
  const auto tablet_servers = ASSERT_RESULT(client_->ListTabletServers());
639
640
0
  ASSERT_NO_FATALS(AssertTserverHasPlacementUuid(live_ts_uuid, kLivePlacementUuid, tablet_servers));
641
0
  ASSERT_NO_FATALS(AssertTserverHasPlacementUuid(
642
0
      rr_ts_uuid, kReadReplicaPlacementUuid, tablet_servers));
643
0
  cluster_->AssertNoCrashes();
644
0
}
645
646
}  // namespace client
647
}  // namespace yb