YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/integration-tests/load_balancer_multi_table-test.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include <gtest/gtest.h>
15
16
#include "yb/client/client.h"
17
#include "yb/client/schema.h"
18
#include "yb/client/table_creator.h"
19
#include "yb/client/yb_table_name.h"
20
21
#include "yb/consensus/consensus.pb.h"
22
#include "yb/consensus/consensus.proxy.h"
23
24
#include "yb/gutil/algorithm.h"
25
26
#include "yb/integration-tests/external_mini_cluster.h"
27
#include "yb/integration-tests/load_balancer_test_util.h"
28
#include "yb/integration-tests/mini_cluster.h"
29
#include "yb/integration-tests/yb_table_test_base.h"
30
31
#include "yb/tools/yb-admin_client.h"
32
33
#include "yb/util/monotime.h"
34
#include "yb/util/result.h"
35
#include "yb/util/tsan_util.h"
36
37
using namespace std::literals;
38
39
namespace yb {
40
namespace integration_tests {
41
42
const auto kDefaultTimeout = 30000ms;
43
constexpr int kNumTables = 3;
44
constexpr int kMovesPerTable = 1;
45
46
// We need multiple tables in order to test load_balancer_max_concurrent_moves_per_table.
47
class LoadBalancerMultiTableTest : public YBTableTestBase {
48
 protected:
49
11
  bool use_yb_admin_client() override { return true; }
50
51
122
  bool use_external_mini_cluster() override { return true; }
52
53
6
  bool enable_ysql() override {
54
    // Do not create the transaction status table.
55
6
    return false;
56
6
  }
57
58
44
  int num_tablets() override {
59
44
    return 5;
60
44
  }
61
62
6
  int num_drives() override {
63
6
    return 2;
64
6
  }
65
66
6
  client::YBTableName table_name() override {
67
6
    return table_names_[0];
68
6
  }
69
70
6
  void CustomizeExternalMiniCluster(ExternalMiniClusterOptions* opts) override {
71
6
    opts->extra_tserver_flags.push_back("--placement_cloud=c");
72
6
    opts->extra_tserver_flags.push_back("--placement_region=r");
73
6
    opts->extra_tserver_flags.push_back("--placement_zone=z${index}");
74
6
    opts->extra_master_flags.push_back("--load_balancer_max_concurrent_moves=10");
75
6
    opts->extra_master_flags.push_back("--load_balancer_max_concurrent_moves_per_table="
76
6
                                       + std::to_string(kMovesPerTable));
77
6
    opts->extra_master_flags.push_back("--enable_global_load_balancing=true");
78
6
  }
79
80
6
  void CreateTables() {
81
24
    for (int i = 1; i <= kNumTables; ++i) {
82
18
      table_names_.emplace_back(YQL_DATABASE_CQL,
83
18
                               "my_keyspace-" + std::to_string(i),
84
18
                               "kv-table-test-" + std::to_string(i));
85
18
    }
86
87
18
    for (const auto& tn : table_names_) {
88
18
      ASSERT_OK(client_->CreateNamespaceIfNotExists(tn.namespace_name(), tn.namespace_type()));
89
90
18
      client::YBSchemaBuilder b;
91
18
      b.AddColumn("k")->Type(BINARY)->NotNull()->HashPrimaryKey();
92
18
      b.AddColumn("v")->Type(BINARY)->NotNull();
93
18
      ASSERT_OK(b.Build(&schema_));
94
95
18
      ASSERT_OK(NewTableCreator()->table_name(tn).schema(&schema_).Create());
96
18
    }
97
6
  }
98
99
5
  void DeleteTables() {
100
15
    for (const auto& tn : table_names_) {
101
15
      ASSERT_OK(client_->DeleteTable(tn));
102
15
    }
103
5
    table_names_.clear();
104
5
  }
105
106
6
  void CreateTable() override {
107
6
    if (!table_exists_) {
108
6
      CreateTables();
109
6
      table_exists_ = true;
110
6
    }
111
6
  }
112
113
5
  void DeleteTable() override {
114
5
    if (table_exists_) {
115
5
      DeleteTables();
116
5
      table_exists_ = false;
117
5
    }
118
5
  }
119
};
120
121
1
TEST_F(LoadBalancerMultiTableTest, MultipleLeaderTabletMovesPerTable) {
122
1
  const int default_bg_task_wait_ms = std::stoi(ASSERT_RESULT(
123
1
      external_mini_cluster_->master(0)->GetFlag("catalog_manager_bg_task_wait_ms")));
124
1
  const int test_bg_task_wait_ms = 5 * default_bg_task_wait_ms;
125
126
  // Start with 3 tables each with 5 tablets on 3 servers.
127
1
  ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0,c.r.z1,c.r.z2", 3, ""));
128
129
1
  auto ts0 = external_mini_cluster_->tablet_server(0);
130
1
  string ts0_uuid = ts0->instance_id().permanent_uuid();
131
1
  LOG(INFO) << "Shutting down ts-0. UUID: " << ts0_uuid;
132
1
  ts0->Shutdown();
133
1
  SleepFor(MonoDelta::FromMilliseconds(1000));
134
135
  // Wait for load balancing to complete.
136
1
  WaitForLoadBalanceCompletion();
137
138
  // Get current leader counts.
139
1
  std::unordered_map<string, std::unordered_map<string, int>> initial_leader_counts;
140
3
  for (const auto& tn : table_names_) {
141
3
    initial_leader_counts[tn.table_name()] = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
142
    // Verify that ts-0 has no leaders.
143
3
    ASSERT_EQ(initial_leader_counts[tn.table_name()][ts0_uuid], 0);
144
3
  }
145
146
2
  for (size_t i = 0; i < num_masters(); ++i) {
147
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
148
1
                                              "catalog_manager_bg_task_wait_ms",
149
1
                                              std::to_string(test_bg_task_wait_ms)));
150
1
  }
151
152
  // Sleep for default_bg_task_wait_ms * 2 after setting the wait time to test_bg_task_wait_ms, to
153
  // ensure that the restart of ts-0 occurs while the load balancer is sleeping. Without this, we
154
  // might accidentally sleep through two load balancer runs and overcount leader moves.
155
1
  SleepFor(MonoDelta::FromMilliseconds(default_bg_task_wait_ms * 2));
156
157
1
  LOG(INFO) << "Restarting ts-0";
158
1
  ASSERT_OK(ts0->Restart());
159
1
  LOG(INFO) << "Done restarting ts-0";
160
161
  // Wait for one run of the load balancer to complete
162
1
  SleepFor(MonoDelta::FromMilliseconds(test_bg_task_wait_ms));
163
164
  // Check new leader counts.
165
1
  int num_leader_moves = 0;
166
3
  for (const auto& tn : table_names_) {
167
3
    const auto new_leader_counts = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
168
    // Only count increases in leaders
169
9
    for (const auto& lc : new_leader_counts) {
170
9
      num_leader_moves += max(0, lc.second - initial_leader_counts[tn.table_name()][lc.first]);
171
9
    }
172
3
  }
173
174
  // Ensure that we moved one run's worth of leaders (should be one leader move per table).
175
1
  LOG(INFO) << "Moved " << num_leader_moves << " leaders in total.";
176
1
  ASSERT_EQ(num_leader_moves, kMovesPerTable * kNumTables);
177
1
}
178
179
1
TEST_F(LoadBalancerMultiTableTest, GlobalLoadBalancing) {
180
1
  const int rf = 3;
181
1
  std::vector<uint32_t> z0_tserver_loads;
182
  // Start with 3 tables with 5 tablets.
183
1
  ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0,c.r.z1,c.r.z2", rf, ""));
184
185
  // Disable global load balancing.
186
2
  for (size_t i = 0; i < num_masters(); ++i) {
187
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
188
1
                                              "enable_global_load_balancing", "false"));
189
1
  }
190
191
  //// Two tservers:
192
  // Add a new tserver to c.r.z0.
193
  // This zone will then have 15 tablets on the old ts and 0 on the new one.
194
1
  std::vector<std::string> extra_opts;
195
1
  extra_opts.push_back("--placement_cloud=c");
196
1
  extra_opts.push_back("--placement_region=r");
197
1
  extra_opts.push_back("--placement_zone=z0");
198
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true, extra_opts));
199
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_tablet_servers() + 1,
200
1
      kDefaultTimeout));
201
202
  // Wait for load balancing to complete.
203
1
  WaitForLoadBalanceCompletion();
204
205
  // Zone 0 should have 9 tablets on the old ts and 6 on the new ts, since each table will be
206
  // balanced with 3 tablets on the old ts and 2 on the new one. This results in each table having
207
  // a balanced load, but that the global load is skewed.
208
209
  // Assert that each table is balanced, but we are not globally balanced.
210
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
211
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 3 }));
212
1
  ASSERT_FALSE(AreLoadsBalanced(z0_tserver_loads));
213
214
  // Enable global load balancing.
215
2
  for (size_t i = 0; i < num_masters(); ++i) {
216
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
217
1
                                              "enable_global_load_balancing", "true"));
218
1
  }
219
220
  // Wait for load balancing to complete.
221
1
  WaitForLoadBalanceCompletion();
222
223
  // Assert that each table is balanced, and that we are now globally balanced.
224
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
225
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 3 }));
226
1
  ASSERT_TRUE(AreLoadsBalanced(z0_tserver_loads));
227
228
229
  //// Three tservers:
230
  // Disable global load balancing.
231
2
  for (size_t i = 0; i < num_masters(); ++i) {
232
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
233
1
                                              "enable_global_load_balancing", "false"));
234
1
  }
235
236
  // Add in a third tserver to zone 0.
237
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true, extra_opts));
238
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_tablet_servers() + 2,
239
1
      kDefaultTimeout));
240
241
  // Wait for load balancing to complete.
242
1
  WaitForLoadBalanceCompletion();
243
244
  // Load will not be evenly spread across these tservers, each table will be (2, 2, 1), leading to
245
  // a global load of (6, 6, 3) in zone 0.
246
247
  // Assert that each table is balanced, and that we are not globally balanced.
248
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
249
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 3, 4 }));
250
1
  ASSERT_FALSE(AreLoadsBalanced(z0_tserver_loads));
251
252
  // Enable global load balancing.
253
2
  for (size_t i = 0; i < num_masters(); ++i) {
254
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
255
1
                                              "enable_global_load_balancing", "true"));
256
1
  }
257
258
  // Wait for load balancing to complete.
259
1
  WaitForLoadBalanceCompletion();
260
261
  // Assert that each table is balanced, and that we are now globally balanced.
262
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
263
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 3, 4 }));
264
1
  ASSERT_TRUE(AreLoadsBalanced(z0_tserver_loads));
265
  // Each node should have exactly 5 tablets on it.
266
1
  ASSERT_EQ(z0_tserver_loads[0], 5);
267
1
  ASSERT_EQ(z0_tserver_loads[1], 5);
268
1
  ASSERT_EQ(z0_tserver_loads[2], 5);
269
1
}
270
271
1
TEST_F(LoadBalancerMultiTableTest, GlobalLoadBalancingWithBlacklist) {
272
1
  const int rf = 3;
273
1
  std::vector<uint32_t> z0_tserver_loads;
274
  // Start with 3 tables with 5 tablets.
275
1
  ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0,c.r.z1,c.r.z2", rf, ""));
276
277
  // Add two tservers to z0 and wait for everything to be balanced (globally and per table).
278
1
  std::vector<std::string> extra_opts;
279
1
  extra_opts.push_back("--placement_cloud=c");
280
1
  extra_opts.push_back("--placement_region=r");
281
1
  extra_opts.push_back("--placement_zone=z0");
282
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true, extra_opts));
283
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true, extra_opts));
284
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_tablet_servers() + 2,
285
1
      kDefaultTimeout));
286
287
  // Wait for load balancing to complete.
288
1
  WaitForLoadBalanceCompletion();
289
290
  // Assert that each table is balanced, and that we are globally balanced.
291
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
292
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 3, 4 }));
293
1
  ASSERT_TRUE(AreLoadsBalanced(z0_tserver_loads));
294
  // Each node should have exactly 5 tablets on it.
295
1
  ASSERT_EQ(z0_tserver_loads[0], 5);
296
1
  ASSERT_EQ(z0_tserver_loads[1], 5);
297
1
  ASSERT_EQ(z0_tserver_loads[2], 5);
298
299
  // Blacklist one tserver in z0.
300
1
  ASSERT_OK(external_mini_cluster()->AddTServerToBlacklist(
301
1
      external_mini_cluster()->master(),
302
1
      external_mini_cluster()->tablet_server(0)));
303
304
  // Wait for load balancing to complete.
305
1
  WaitForLoadBalanceCompletion();
306
307
  // Assert that the blacklisted tserver has no load.
308
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0 }));
309
1
  ASSERT_EQ(z0_tserver_loads[0], 0);
310
311
  // Assert that each table is balanced, and that we are globally balanced amongst the other nodes.
312
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
313
1
  z0_tserver_loads = ASSERT_RESULT(GetTserverLoads({ 3, 4 }));
314
1
  ASSERT_TRUE(AreLoadsBalanced(z0_tserver_loads));
315
1
}
316
317
1
TEST_F(LoadBalancerMultiTableTest, TestDeadNodesLeaderBalancing) {
318
1
  static const int rf = 3;
319
1
  const auto& ts2_id = external_mini_cluster()->tablet_server(2)->uuid();
320
1
  const auto& ts1_id = external_mini_cluster()->tablet_server(1)->uuid();
321
322
  // Reduce the time after which a TS is marked DEAD.
323
  // Logically, after a tserver is killed, we are giving tablets whose leaders are present
324
  // on the dead tserver, ~3x time (3s*3).
325
1
  static const int tserver_unresponsive_timeout_ms = 10000*kTimeMultiplier;
326
1
  bool allow_dead_node_lb = true;
327
2
  for (size_t i = 0; i < num_masters(); ++i) {
328
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
329
1
                                              "tserver_unresponsive_timeout_ms",
330
1
                                              std::to_string(tserver_unresponsive_timeout_ms)));
331
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
332
1
                                              "allow_leader_balancing_dead_node",
333
1
                                              std::to_string(allow_dead_node_lb)));
334
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
335
1
                                              "min_leader_stepdown_retry_interval_ms",
336
1
                                              "3000"));
337
1
  }
338
339
4
  for (size_t i = 0; i < num_tablet_servers(); ++i) {
340
3
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->tablet_server(i),
341
3
                                              "after_stepdown_delay_election_multiplier",
342
3
                                              "1"));
343
3
  }
344
345
  // Verify that the load is evenly distributed.
346
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
347
348
1
  std::vector<uint32_t> tserver_loads;
349
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2 }));
350
1
  ASSERT_TRUE(AreLoadsBalanced(tserver_loads));
351
352
  // Leader blacklist a TS.
353
1
  LOG(INFO) << "Blacklisting node#2 for leaders";
354
355
1
  ASSERT_OK(external_mini_cluster()->AddTServerToLeaderBlacklist(
356
1
      external_mini_cluster()->master(),
357
1
      external_mini_cluster()->tablet_server(2)));
358
359
  // Wait for LB to finish and then verify leaders and load evenly distributed.
360
1
  WaitForLoadBalanceCompletion();
361
362
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2 }));
363
1
  ASSERT_TRUE(AreLoadsBalanced(tserver_loads));
364
1
  ASSERT_EQ(tserver_loads[0], 15);
365
1
  ASSERT_EQ(tserver_loads[1], 15);
366
1
  ASSERT_EQ(tserver_loads[2], 15);
367
368
1
  std::unordered_set<TabletServerId> zero_load_ts;
369
1
  zero_load_ts.insert(ts2_id);
370
3
  for (const auto& tn : table_names_) {
371
3
    const auto new_leader_counts = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
372
3
    ASSERT_TRUE(AreLoadsAsExpected(new_leader_counts, zero_load_ts));
373
3
  }
374
375
  // Stop a TS and empty blacklist.
376
1
  LOG(INFO) << "Killing tablet server #" << 1;
377
1
  ASSERT_OK(external_mini_cluster()->tablet_server(1)->Pause());
378
379
  // Wait for the master leader to mark it dead.
380
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
381
1
    return external_mini_cluster()->is_ts_stale(1);
382
1
  },
383
1
  MonoDelta::FromMilliseconds(2 * tserver_unresponsive_timeout_ms),
384
1
  "Is TS dead",
385
1
  MonoDelta::FromSeconds(1)));
386
387
  // All the leaders should now be on the first TS.
388
1
  zero_load_ts.insert(ts1_id);
389
3
  for (const auto& tn : table_names_) {
390
3
    const auto new_leader_counts = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
391
3
    ASSERT_TRUE(AreLoadsAsExpected(new_leader_counts, zero_load_ts));
392
3
  }
393
394
  // Remove TS 2 from leader blacklist so that leader load gets transferred
395
  // to TS2 in the presenece of a DEAD TS1.
396
1
  LOG(INFO) << "Emptying blacklist";
397
1
  ASSERT_OK(external_mini_cluster()->ClearBlacklist(
398
1
      external_mini_cluster()->master()));
399
400
1
  WaitForLoadBalanceCompletion();
401
402
  // Verify loads and leader loads.
403
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2 }));
404
1
  ASSERT_TRUE(AreLoadsBalanced(tserver_loads));
405
1
  ASSERT_EQ(tserver_loads[0], 15);
406
1
  ASSERT_EQ(tserver_loads[1], 15);
407
1
  ASSERT_EQ(tserver_loads[2], 15);
408
409
  // Check new leader counts. TS 0 and 2 should contain all the leaders.
410
1
  zero_load_ts.erase(ts2_id);
411
3
  for (const auto& tn : table_names_) {
412
3
    const auto new_leader_counts = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
413
3
    ASSERT_TRUE(AreLoadsAsExpected(new_leader_counts, zero_load_ts));
414
3
  }
415
416
1
  LOG(INFO) << "Resuming TS#1";
417
1
  ASSERT_OK(external_mini_cluster()->tablet_server(1)->Resume());
418
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_tablet_servers(),
419
1
                                                              kDefaultTimeout));
420
421
1
  WaitForLoadBalanceCompletion();
422
1
}
423
424
1
TEST_F(LoadBalancerMultiTableTest, TestLBWithDeadBlacklistedTS) {
425
1
  const int rf = 3;
426
1
  auto num_ts = num_tablet_servers();
427
428
  // Reduce the time after which a TS is marked DEAD.
429
1
  int tserver_unresponsive_timeout_ms = 5000;
430
2
  for (size_t i = 0; i < num_masters(); ++i) {
431
1
    ASSERT_OK(external_mini_cluster_->SetFlag(external_mini_cluster_->master(i),
432
1
                                              "tserver_unresponsive_timeout_ms",
433
1
                                              std::to_string(tserver_unresponsive_timeout_ms)));
434
1
  }
435
436
  // Add a TS and wait for LB to complete.
437
1
  LOG(INFO) << "Adding a TS";
438
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true));
439
1
  ++num_ts;
440
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_ts, kDefaultTimeout));
441
442
1
  WaitForLoadBalanceCompletion();
443
444
  // Load should be balanced.
445
1
  ASSERT_OK(client_->IsLoadBalanced(kNumTables * num_tablets() * rf));
446
447
1
  std::vector<uint32_t> tserver_loads;
448
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2, 3 }));
449
1
  ASSERT_TRUE(AreLoadsBalanced(tserver_loads));
450
451
  // Test 1: Test load movement with existing tservers.
452
  // Kill and blacklist a TS.
453
1
  LOG(INFO) << "Killing tablet server #" << 2;
454
1
  external_mini_cluster()->tablet_server(2)->Shutdown();
455
456
  // Wait for the master leader to mark it dead.
457
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
458
1
    return external_mini_cluster()->is_ts_stale(2);
459
1
  },
460
1
  MonoDelta::FromMilliseconds(2 * tserver_unresponsive_timeout_ms),
461
1
  "Is TS dead",
462
1
  MonoDelta::FromSeconds(1)));
463
464
1
  LOG(INFO) << "Node #2 dead. Blacklisting it.";
465
466
1
  ASSERT_OK(external_mini_cluster()->AddTServerToBlacklist(
467
1
      external_mini_cluster()->master(),
468
1
      external_mini_cluster()->tablet_server(2)));
469
470
  // Wait for LB to become idle.
471
1
  WaitForLoadBalanceCompletion();
472
473
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2, 3 }));
474
475
  // Each node should have exactly 15 tablets except node 2.
476
1
  ASSERT_EQ(tserver_loads[0], 15);
477
1
  ASSERT_EQ(tserver_loads[1], 15);
478
1
  ASSERT_EQ(tserver_loads[2], 0);
479
1
  ASSERT_EQ(tserver_loads[3], 15);
480
481
  // Test 2: Test adding a new node while blacklist+dead is in progress.
482
  // Kill and blacklist a TS.
483
1
  LOG(INFO) << "Killing tablet server #" << 3;
484
1
  external_mini_cluster()->tablet_server(3)->Shutdown();
485
486
  // Wait for the master leader to mark it dead.
487
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
488
1
    return external_mini_cluster()->is_ts_stale(3);
489
1
  },
490
1
  MonoDelta::FromMilliseconds(2 * tserver_unresponsive_timeout_ms),
491
1
  "Is TS dead",
492
1
  MonoDelta::FromSeconds(1)));
493
494
1
  LOG(INFO) << "Node #3 dead. Blacklisting it.";
495
496
1
  ASSERT_OK(external_mini_cluster()->AddTServerToBlacklist(
497
1
      external_mini_cluster()->master(),
498
1
      external_mini_cluster()->tablet_server(3)));
499
500
  // Add a TS now and check if load is transferred.
501
1
  LOG(INFO) << "Adding a TS";
502
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer(true));
503
1
  ++num_ts;
504
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_ts, kDefaultTimeout));
505
506
  // Wait for LB to become idle.
507
1
  WaitForLoadBalanceCompletion();
508
509
1
  tserver_loads = ASSERT_RESULT(GetTserverLoads({ 0, 1, 2, 3, 4 }));
510
511
  // Each node should have exactly 15 tablets on it except node 2 and 3.
512
1
  ASSERT_EQ(tserver_loads[0], 15);
513
1
  ASSERT_EQ(tserver_loads[1], 15);
514
1
  ASSERT_EQ(tserver_loads[2], 0);
515
1
  ASSERT_EQ(tserver_loads[3], 0);
516
1
  ASSERT_EQ(tserver_loads[4], 15);
517
1
}
518
519
1
TEST_F(LoadBalancerMultiTableTest, GlobalLeaderBalancing) {
520
1
  auto num_ts = num_tablet_servers();
521
522
1
  ASSERT_OK(WaitFor([&]() -> Result<bool> {
523
1
    return client_->IsLoadBalancerIdle();
524
1
  }, kDefaultTimeout, "IsLoadBalancerIdle"));
525
1
  ASSERT_OK(external_mini_cluster()->SetFlagOnMasters("enable_load_balancing", "false"));
526
527
  // Add a couple of TServers so that each node has 1 leader tablet per table.
528
1
  LOG(INFO) << "Adding 2 tservers";
529
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer());
530
1
  ++num_ts;
531
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer());
532
1
  ++num_ts;
533
1
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_ts, kDefaultTimeout));
534
535
1
  ASSERT_OK(external_mini_cluster()->SetFlagOnMasters("enable_load_balancing", "true"));
536
  // Wait for load balancing to complete.
537
1
  WaitForLoadBalanceCompletion();
538
539
  // Now add a new TS. Per table there won't be any leader transfer
540
  // as each node has 1 leader/table.
541
  // Total leader loads without global leader balancing will be:
542
  // 3, 3, 3, 3, 3, 0.
543
  // Global leader balancing should kick in and make it
544
  // 3, 3, 3, 2, 2, 2.
545
1
  LOG(INFO) << "Adding another tserver on which global leader load should be transferred";
546
1
  ASSERT_OK(external_mini_cluster()->AddTabletServer());
547
0
  ++num_ts;
548
0
  ASSERT_OK(external_mini_cluster()->WaitForTabletServerCount(num_ts, kDefaultTimeout));
549
0
  string new_ts = external_mini_cluster()->tablet_server(num_ts - 1)->uuid();
550
0
  ASSERT_FALSE(new_ts.empty());
551
0
  WaitForLoadBalanceCompletion();
552
553
  // Check for leader loads.
554
0
  std::vector<uint32_t> leader_tserver_loads;
555
0
  std::unordered_map<TabletServerId, int> per_ts_leader_loads;
556
0
  int total_leaders = 0;
557
0
  for (const auto& tn : table_names_) {
558
0
    const auto new_leader_counts = ASSERT_RESULT(yb_admin_client_->GetLeaderCounts(tn));
559
0
    for (const auto& lc : new_leader_counts) {
560
0
      per_ts_leader_loads[lc.first] += lc.second;
561
0
      total_leaders += lc.second;
562
0
    }
563
0
  }
564
565
0
  ASSERT_EQ(total_leaders, 15);
566
  // TODO: replace with AreLoadsAsExpected(per_ts_leader_loads) after
567
  // https://github.com/yugabyte/yugabyte-db/issues/10002 is fixed.
568
0
  ASSERT_GT(per_ts_leader_loads[new_ts], 0);
569
0
}
570
571
} // namespace integration_tests
572
} // namespace yb