/Users/deen/code/yugabyte-db/src/yb/integration-tests/master_failover-itest.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include <functional> |
34 | | #include <string> |
35 | | #include <vector> |
36 | | |
37 | | #include <glog/logging.h> |
38 | | #include <gtest/gtest.h> |
39 | | |
40 | | #include "yb/client/client-internal.h" |
41 | | #include "yb/client/client-test-util.h" |
42 | | #include "yb/client/client.h" |
43 | | #include "yb/client/schema.h" |
44 | | #include "yb/client/table.h" |
45 | | #include "yb/client/table_alterer.h" |
46 | | #include "yb/client/table_creator.h" |
47 | | #include "yb/client/table_handle.h" |
48 | | #include "yb/client/table_info.h" |
49 | | #include "yb/client/tablet_server.h" |
50 | | #include "yb/client/yb_table_name.h" |
51 | | |
52 | | #include "yb/common/common.pb.h" |
53 | | #include "yb/common/schema.h" |
54 | | #include "yb/common/wire_protocol-test-util.h" |
55 | | |
56 | | #include "yb/gutil/strings/substitute.h" |
57 | | |
58 | | #include "yb/integration-tests/external_mini_cluster.h" |
59 | | |
60 | | #include "yb/master/master_cluster.proxy.h" |
61 | | |
62 | | #include "yb/rpc/rpc_controller.h" |
63 | | |
64 | | #include "yb/tools/yb-admin_client.h" |
65 | | |
66 | | #include "yb/util/logging.h" |
67 | | #include "yb/util/monotime.h" |
68 | | #include "yb/util/net/net_util.h" |
69 | | #include "yb/util/result.h" |
70 | | #include "yb/util/test_util.h" |
71 | | #include "yb/util/tsan_util.h" |
72 | | |
73 | | using namespace std::literals; |
74 | | |
75 | | DECLARE_int32(ycql_num_tablets); |
76 | | DECLARE_int32(ysql_num_tablets); |
77 | | DECLARE_int32(heartbeat_interval_ms); |
78 | | |
79 | | namespace yb { |
80 | | |
81 | | // Note: this test needs to be in the client namespace in order for |
82 | | // YBClient::Data class methods to be visible via FRIEND_TEST macro. |
83 | | namespace client { |
84 | | |
85 | | const int kNumTabletServerReplicas = 3; |
86 | | const int kHeartbeatIntervalMs = 500; |
87 | | |
88 | | using std::shared_ptr; |
89 | | using std::string; |
90 | | using std::vector; |
91 | | using client::YBTableName; |
92 | | |
93 | | class MasterFailoverTest : public YBTest { |
94 | | public: |
95 | | enum CreateTableMode { |
96 | | kWaitForCreate = 0, |
97 | | kNoWaitForCreate = 1 |
98 | | }; |
99 | | |
100 | 14 | MasterFailoverTest() { |
101 | 14 | opts_.master_rpc_ports = { 0, 0, 0 }; |
102 | 14 | opts_.num_masters = num_masters_ = opts_.master_rpc_ports.size(); |
103 | 14 | opts_.num_tablet_servers = kNumTabletServerReplicas; |
104 | | |
105 | | // Reduce various timeouts below as to make the detection of |
106 | | // leader master failures (specifically, failures as result of |
107 | | // long pauses) more rapid. |
108 | | |
109 | | // Set the TS->master heartbeat timeout to 1 second (down from 15 seconds). |
110 | 14 | opts_.extra_tserver_flags.push_back("--heartbeat_rpc_timeout_ms=1000"); |
111 | | // Allow one TS heartbeat failure before retrying with back-off (down from 3). |
112 | 14 | opts_.extra_tserver_flags.push_back("--heartbeat_max_failures_before_backoff=1"); |
113 | | // Wait for 500 ms after 'max_consecutive_failed_heartbeats' |
114 | | // before trying again (down from 1 second). |
115 | 14 | string heartbeat_interval_flag = |
116 | 14 | "--heartbeat_interval_ms="+std::to_string(kHeartbeatIntervalMs); |
117 | 14 | opts_.extra_tserver_flags.push_back(heartbeat_interval_flag); |
118 | 14 | } |
119 | | |
120 | 14 | void SetUp() override { |
121 | 14 | YBTest::SetUp(); |
122 | 14 | ASSERT_NO_FATALS(RestartCluster()); |
123 | 14 | } |
124 | | |
125 | 13 | void TearDown() override { |
126 | 13 | client_.reset(); |
127 | 13 | if (cluster_) { |
128 | 13 | cluster_->Shutdown(); |
129 | 13 | } |
130 | 13 | YBTest::TearDown(); |
131 | 13 | } |
132 | | |
133 | 14 | void RestartCluster() { |
134 | 14 | if (cluster_) { |
135 | 0 | cluster_->Shutdown(); |
136 | 0 | cluster_.reset(); |
137 | 0 | } |
138 | 14 | opts_.timeout = MonoDelta::FromSeconds(NonTsanVsTsan(20, 60)); |
139 | 14 | cluster_.reset(new ExternalMiniCluster(opts_)); |
140 | 14 | ASSERT_OK(cluster_->Start()); |
141 | 14 | client_ = ASSERT_RESULT(cluster_->CreateClient()); |
142 | 14 | } |
143 | | |
144 | 11 | Status CreateTable(const YBTableName& table_name, CreateTableMode mode) { |
145 | 11 | RETURN_NOT_OK_PREPEND( |
146 | 11 | client_->CreateNamespaceIfNotExists(table_name.namespace_name()), |
147 | 11 | "Unable to create namespace " + table_name.namespace_name()); |
148 | 11 | client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema())); |
149 | 11 | std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator()); |
150 | 11 | return table_creator->table_name(table_name) |
151 | 11 | .table_type(YBTableType::YQL_TABLE_TYPE) |
152 | 11 | .schema(&client_schema) |
153 | 11 | .hash_schema(YBHashSchema::kMultiColumnHash) |
154 | 11 | .timeout(MonoDelta::FromSeconds(90)) |
155 | 11 | .wait(mode == kWaitForCreate) |
156 | 11 | .Create(); |
157 | 11 | } |
158 | | |
159 | | Status CreateIndex( |
160 | 18 | const YBTableName& indexed_table_name, const YBTableName& index_name, CreateTableMode mode) { |
161 | 18 | RETURN_NOT_OK_PREPEND( |
162 | 18 | client_->CreateNamespaceIfNotExists(index_name.namespace_name()), |
163 | 18 | "Unable to create namespace " + index_name.namespace_name()); |
164 | 18 | client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema())); |
165 | 18 | client::TableHandle table; |
166 | 18 | RETURN_NOT_OK_PREPEND( |
167 | 18 | table.Open(indexed_table_name, client_.get()), |
168 | 18 | "Unable to open table " + indexed_table_name.ToString()); |
169 | | |
170 | 18 | std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator()); |
171 | 18 | return table_creator->table_name(index_name) |
172 | 18 | .table_type(YBTableType::YQL_TABLE_TYPE) |
173 | 18 | .indexed_table_id(table->id()) |
174 | 18 | .schema(&client_schema) |
175 | 18 | .hash_schema(YBHashSchema::kMultiColumnHash) |
176 | 18 | .timeout(MonoDelta::FromSeconds(90)) |
177 | 18 | .wait(mode == kWaitForCreate) |
178 | | // In the new style create index request, the CQL proxy populates the |
179 | | // index info instead of the master. However, in these tests we bypass |
180 | | // the proxy and go directly to the master. We need to use the old |
181 | | // style create request to have the master generate the appropriate |
182 | | // index info. |
183 | 18 | .TEST_use_old_style_create_request() |
184 | 18 | .Create(); |
185 | 18 | } |
186 | | |
187 | 0 | Status RenameTable(const YBTableName& table_name_orig, const YBTableName& table_name_new) { |
188 | 0 | std::unique_ptr<YBTableAlterer> table_alterer(client_->NewTableAlterer(table_name_orig)); |
189 | 0 | return table_alterer |
190 | 0 | ->RenameTo(table_name_new) |
191 | 0 | ->timeout(MonoDelta::FromSeconds(90)) |
192 | 0 | ->wait(true) |
193 | 0 | ->Alter(); |
194 | 0 | } |
195 | | |
196 | | // Test that we can get the table location information from the |
197 | | // master and then open scanners on the tablet server. This involves |
198 | | // sending RPCs to both the master and the tablet servers and |
199 | | // requires that the table and tablet exist both on the masters and |
200 | | // the tablet servers. |
201 | 0 | Status OpenTableAndScanner(const YBTableName& table_name) { |
202 | 0 | client::TableHandle table; |
203 | 0 | RETURN_NOT_OK_PREPEND(table.Open(table_name, client_.get()), |
204 | 0 | "Unable to open table " + table_name.ToString()); |
205 | 0 | client::TableRange range(table); |
206 | 0 | auto it = range.begin(); |
207 | 0 | if (it != range.end()) { |
208 | 0 | ++it; |
209 | 0 | } |
210 | |
|
211 | 0 | return Status::OK(); |
212 | 0 | } |
213 | | |
214 | | protected: |
215 | | size_t num_masters_; |
216 | | ExternalMiniClusterOptions opts_; |
217 | | std::unique_ptr<ExternalMiniCluster> cluster_; |
218 | | std::unique_ptr<YBClient> client_; |
219 | | }; |
220 | | |
221 | | class MasterFailoverTestIndexCreation : public MasterFailoverTest, |
222 | | public ::testing::WithParamInterface<int> { |
223 | | public: |
224 | 9 | MasterFailoverTestIndexCreation() { |
225 | 9 | opts_.extra_tserver_flags.push_back("--allow_index_table_read_write=true"); |
226 | 9 | opts_.extra_tserver_flags.push_back( |
227 | 9 | "--index_backfill_upperbound_for_user_enforced_txn_duration_ms=100"); |
228 | 9 | opts_.extra_master_flags.push_back("--TEST_slowdown_backfill_alter_table_rpcs_ms=50"); |
229 | 9 | opts_.extra_master_flags.push_back("--disable_index_backfill=false"); |
230 | | // Sometimes during master failover we have the create index kick in before the tservers have |
231 | | // checked in. By default we wait for enough TSs -- else we fail the create table/idx request. |
232 | | // We don't have to wait for that in the tests here. |
233 | 9 | opts_.extra_master_flags.push_back("--catalog_manager_check_ts_count_for_create_table=false"); |
234 | 9 | } |
235 | | |
236 | | // Master has to do 5 RPCs to TServers to create+backfill an index. |
237 | | // 4 corresponding to set each of the 4 IndexPermissions, and 1 for GetSafeTime. |
238 | | // We want to simulate a failure before and after each RPC, so total 10 stages. |
239 | | static constexpr int kNumMaxStages = 10; |
240 | | }; |
241 | | |
242 | | INSTANTIATE_TEST_CASE_P( |
243 | | MasterFailoverTestIndexCreation, MasterFailoverTestIndexCreation, |
244 | | ::testing::Range(1, MasterFailoverTestIndexCreation::kNumMaxStages)); |
245 | | // Test that synchronous CreateTable (issue CreateTable call and then |
246 | | // wait until the table has been created) works even when the original |
247 | | // leader master has been paused. |
248 | | // |
249 | | // Temporarily disabled since multi-master isn't supported yet. |
250 | | // This test fails as of KUDU-1138, since the tablet servers haven't |
251 | | // registered with the follower master, and thus it's likely to deny |
252 | | // the CreateTable request thinking there are no TS available. |
253 | 0 | TEST_F(MasterFailoverTest, DISABLED_TestCreateTableSync) { |
254 | 0 | if (!AllowSlowTests()) { |
255 | 0 | LOG(INFO) << "This test can only be run in slow mode."; |
256 | 0 | return; |
257 | 0 | } |
258 | | |
259 | 0 | auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
260 | |
|
261 | 0 | LOG(INFO) << "Pausing leader master"; |
262 | 0 | ASSERT_OK(cluster_->master(leader_idx)->Pause()); |
263 | 0 | ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx)); |
264 | |
|
265 | 0 | YBTableName table_name(YQL_DATABASE_CQL, "testCreateTableSync"); |
266 | 0 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
267 | 0 | ASSERT_OK(OpenTableAndScanner(table_name)); |
268 | 0 | } |
269 | | |
270 | | // Test that we can issue a CreateTable call, pause the leader master |
271 | | // immediately after, then verify that the table has been created on |
272 | | // the newly elected leader master. |
273 | | // |
274 | | // TODO enable this test once flakiness issues are worked out and |
275 | | // eliminated on test machines. |
276 | 0 | TEST_F(MasterFailoverTest, DISABLED_TestPauseAfterCreateTableIssued) { |
277 | 0 | if (!AllowSlowTests()) { |
278 | 0 | LOG(INFO) << "This test can only be run in slow mode."; |
279 | 0 | return; |
280 | 0 | } |
281 | | |
282 | 0 | auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
283 | |
|
284 | 0 | YBTableName table_name(YQL_DATABASE_CQL, "testPauseAfterCreateTableIssued"); |
285 | 0 | LOG(INFO) << "Issuing CreateTable for " << table_name.ToString(); |
286 | 0 | ASSERT_OK(CreateTable(table_name, kNoWaitForCreate)); |
287 | |
|
288 | 0 | LOG(INFO) << "Pausing leader master"; |
289 | 0 | ASSERT_OK(cluster_->master(leader_idx)->Pause()); |
290 | 0 | ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx)); |
291 | |
|
292 | 0 | auto deadline = CoarseMonoClock::Now() + 90s; |
293 | 0 | ASSERT_OK(client_->data_->WaitForCreateTableToFinish(client_.get(), table_name, "" /* table_id */, |
294 | 0 | deadline)); |
295 | |
|
296 | 0 | ASSERT_OK(OpenTableAndScanner(table_name)); |
297 | 0 | } |
298 | | |
299 | | // Orchestrate a master failover at various points of a backfill, |
300 | | // ensure that the backfill eventually completes. |
301 | 9 | TEST_P(MasterFailoverTestIndexCreation, TestPauseAfterCreateIndexIssued) { |
302 | 9 | const int kPauseAfterStage = GetParam(); |
303 | 9 | YBTableName table_name(YQL_DATABASE_CQL, "test", "testPauseAfterCreateTableIssued"); |
304 | 9 | LOG(INFO) << "Issuing CreateTable for " << table_name.ToString(); |
305 | 9 | FLAGS_ycql_num_tablets = 5; |
306 | 9 | FLAGS_ysql_num_tablets = 5; |
307 | 9 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
308 | 9 | LOG(INFO) << "CreateTable done for " << table_name.ToString(); |
309 | | |
310 | 9 | MonoDelta total_time_taken_for_one_iteration; |
311 | | // In the first run, we estimate the total time taken for one create index to complete. |
312 | | // The second run will pause the master at the desired point during create index. |
313 | 27 | for (int i = 0; i < 2; i++) { |
314 | 18 | auto start = ToSteady(CoarseMonoClock::Now()); |
315 | 18 | auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
316 | 18 | ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx)); |
317 | | |
318 | 18 | OpIdPB op_id; |
319 | 18 | ASSERT_OK(cluster_->GetLastOpIdForLeader(&op_id)); |
320 | 18 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(static_cast<int>(op_id.index()))); |
321 | | |
322 | 18 | YBTableName index_table_name( |
323 | 18 | YQL_DATABASE_CQL, "test", "testPauseAfterCreateTableIssuedIdx" + yb::ToString(i)); |
324 | 18 | LOG(INFO) << "Issuing CreateIndex for " << index_table_name.ToString(); |
325 | 18 | ASSERT_OK(CreateIndex(table_name, index_table_name, kNoWaitForCreate)); |
326 | | |
327 | 18 | if (i != 0) { |
328 | | // In the first run, we estimate how long it takes for an uninterrupted |
329 | | // backfill process to complete, then the remaining iterations kill the |
330 | | // master leader at various points to cause the master failover during |
331 | | // the various stages of index backfill. |
332 | 9 | MonoDelta sleep_time = total_time_taken_for_one_iteration * kPauseAfterStage / kNumMaxStages; |
333 | 9 | LOG(INFO) << "Sleeping for " << sleep_time << ", before master pause"; |
334 | 9 | SleepFor(sleep_time); |
335 | | |
336 | 9 | LOG(INFO) << "Pausing leader master 0-based: " << leader_idx << " i.e. m-" |
337 | 9 | << 1 + leader_idx; |
338 | 9 | ASSERT_OK(cluster_->master(leader_idx)->Pause()); |
339 | 9 | } |
340 | | |
341 | 18 | IndexInfoPB index_info_pb; |
342 | 18 | TableId index_table_id; |
343 | 18 | const auto deadline = CoarseMonoClock::Now() + 900s; |
344 | 18 | do { |
345 | 18 | ASSERT_OK(client_->data_->WaitForCreateTableToFinish( |
346 | 18 | client_.get(), index_table_name, "" /* table_id */, deadline)); |
347 | 18 | ASSERT_OK(client_->data_->WaitForCreateTableToFinish( |
348 | 18 | client_.get(), table_name, "" /* table_id */, deadline)); |
349 | | |
350 | 18 | Result<YBTableInfo> table_info = client_->GetYBTableInfo(table_name); |
351 | 18 | ASSERT_TRUE(table_info); |
352 | 18 | Result<YBTableInfo> index_table_info = client_->GetYBTableInfo(index_table_name); |
353 | 18 | ASSERT_TRUE(index_table_info); |
354 | | |
355 | 18 | index_table_id = index_table_info->table_id; |
356 | 18 | index_info_pb.Clear(); |
357 | 18 | table_info->index_map[index_table_id].ToPB(&index_info_pb); |
358 | 18 | YB_LOG_EVERY_N_SECS(INFO, 1) << "The index info for " |
359 | 18 | << index_table_name.ToString() << " is " |
360 | 18 | << yb::ToString(index_info_pb); |
361 | | |
362 | 18 | ASSERT_TRUE(index_info_pb.has_index_permissions()); |
363 | 18 | } while (index_info_pb.index_permissions() < |
364 | 18 | IndexPermissions::INDEX_PERM_READ_WRITE_AND_DELETE && |
365 | 0 | CoarseMonoClock::Now() < deadline); |
366 | | |
367 | 18 | EXPECT_EQ(index_info_pb.index_permissions(), |
368 | 18 | IndexPermissions::INDEX_PERM_READ_WRITE_AND_DELETE); |
369 | | |
370 | 18 | LOG(INFO) << "All good for iteration " << i; |
371 | 18 | ASSERT_OK(client_->DeleteIndexTable(index_table_name, nullptr, /* wait */ true)); |
372 | 18 | ASSERT_OK(client_->data_->WaitForDeleteTableToFinish( |
373 | 18 | client_.get(), index_table_id, deadline)); |
374 | | |
375 | | // For the first round we just simply calculate the time it takes |
376 | 18 | if (i == 0) { |
377 | 9 | total_time_taken_for_one_iteration = ToSteady(CoarseMonoClock::Now()) - start; |
378 | 9 | } |
379 | 18 | } |
380 | 9 | } |
381 | | |
382 | | // Test the scenario where we create a table, pause the leader master, |
383 | | // and then issue the DeleteTable call: DeleteTable should go to the newly |
384 | | // elected leader master and succeed. |
385 | 1 | TEST_F(MasterFailoverTest, TestDeleteTableSync) { |
386 | 1 | if (!AllowSlowTests()) { |
387 | 1 | LOG(INFO) << "This test can only be run in slow mode."; |
388 | 1 | return; |
389 | 1 | } |
390 | | |
391 | 0 | auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
392 | |
|
393 | 0 | YBTableName table_name(YQL_DATABASE_CQL, "test", "testDeleteTableSync"); |
394 | 0 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
395 | |
|
396 | 0 | LOG(INFO) << "Pausing leader master"; |
397 | 0 | ASSERT_OK(cluster_->master(leader_idx)->Pause()); |
398 | 0 | ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx)); |
399 | |
|
400 | 0 | ASSERT_OK(client_->DeleteTable(table_name)); |
401 | 0 | shared_ptr<YBTable> table; |
402 | 0 | Status s = client_->OpenTable(table_name, &table); |
403 | 0 | ASSERT_TRUE(s.IsNotFound()); |
404 | 0 | } |
405 | | |
406 | | // Test the scenario where we create a table, pause the leader master, |
407 | | // and then issue the AlterTable call renaming a table: AlterTable |
408 | | // should go to the newly elected leader master and succeed, renaming |
409 | | // the table. |
410 | | // |
411 | | // TODO: Add an equivalent async test. Add a test for adding and/or |
412 | | // renaming a column in a table. |
413 | 1 | TEST_F(MasterFailoverTest, TestRenameTableSync) { |
414 | 1 | if (!AllowSlowTests()) { |
415 | 1 | LOG(INFO) << "This test can only be run in slow mode."; |
416 | 1 | return; |
417 | 1 | } |
418 | | |
419 | 0 | auto leader_idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
420 | |
|
421 | 0 | YBTableName table_name_orig(YQL_DATABASE_CQL, "test", "testAlterTableSync"); |
422 | 0 | ASSERT_OK(CreateTable(table_name_orig, kWaitForCreate)); |
423 | |
|
424 | 0 | LOG(INFO) << "Pausing leader master"; |
425 | 0 | ASSERT_OK(cluster_->master(leader_idx)->Pause()); |
426 | 0 | ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx)); |
427 | |
|
428 | 0 | YBTableName table_name_new(YQL_DATABASE_CQL, "test", "testAlterTableSyncRenamed"); |
429 | 0 | ASSERT_OK(RenameTable(table_name_orig, table_name_new)); |
430 | 0 | shared_ptr<YBTable> table; |
431 | 0 | ASSERT_OK(client_->OpenTable(table_name_new, &table)); |
432 | |
|
433 | 0 | Status s = client_->OpenTable(table_name_orig, &table); |
434 | 0 | ASSERT_TRUE(s.IsNotFound()); |
435 | 0 | } |
436 | | |
437 | 1 | TEST_F(MasterFailoverTest, TestFailoverAfterTsFailure) { |
438 | 3 | for (auto master : cluster_->master_daemons()) { |
439 | 3 | ASSERT_OK(cluster_->SetFlag(master, "enable_register_ts_from_raft", "true")); |
440 | 3 | } |
441 | 1 | YBTableName table_name(YQL_DATABASE_CQL, "test", "testFailoverAfterTsFailure"); |
442 | 1 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
443 | | |
444 | 1 | cluster_->tablet_server(0)->Shutdown(); |
445 | | |
446 | | // Roll over to a new master. |
447 | 1 | ASSERT_OK(cluster_->ChangeConfig(cluster_->GetLeaderMaster(), consensus::REMOVE_SERVER)); |
448 | | |
449 | | // Count all servers equal to 3. |
450 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
451 | 1 | int tserver_count; |
452 | 1 | RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, false /* primary_only */)); |
453 | 1 | return tserver_count == 3; |
454 | 1 | }, MonoDelta::FromSeconds(30), "Wait for tablet server count")); |
455 | | |
456 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
457 | 1 | int tserver_count; |
458 | 1 | RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, true /* primary_only */)); |
459 | 1 | return tserver_count == 2; |
460 | 1 | }, MonoDelta::FromSeconds(30), "Wait for tablet server count")); |
461 | | |
462 | 1 | google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets; |
463 | 1 | ASSERT_OK(client_->GetTablets(table_name, 0, &tablets, /* partition_list_version =*/ nullptr)); |
464 | | |
465 | | // Assert master sees that all tablets have 3 replicas. |
466 | 3 | for (const auto& loc : tablets) { |
467 | 3 | ASSERT_EQ(loc.replicas_size(), 3); |
468 | 3 | } |
469 | | |
470 | | // Make sure we can issue a delete table that doesn't crash with the fake ts. Then, make sure |
471 | | // when we restart the server, we properly re-register and have no crashes. |
472 | 1 | ASSERT_OK(client_->DeleteTable(table_name, false /* wait */)); |
473 | 1 | ASSERT_OK(cluster_->tablet_server(0)->Start()); |
474 | | |
475 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
476 | 1 | int tserver_count; |
477 | 1 | RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, true /* primary_only */)); |
478 | 1 | bool is_idle = VERIFY_RESULT(client_->IsLoadBalancerIdle()); |
479 | | // We have registered the new tserver and the LB is idle. |
480 | 1 | return tserver_count == 3 && is_idle; |
481 | 1 | }, MonoDelta::FromSeconds(30), "Wait for LB idle")); |
482 | | |
483 | 1 | cluster_->AssertNoCrashes(); |
484 | 1 | } |
485 | | |
486 | 1 | TEST_F(MasterFailoverTest, TestLoadMoveCompletion) { |
487 | | // Original cluster is RF3 so add a TS. |
488 | 1 | LOG(INFO) << "Adding a T-Server."; |
489 | 1 | ASSERT_OK(cluster_->AddTabletServer()); |
490 | | |
491 | | // Create a table to introduce some workload. |
492 | 1 | YBTableName table_name(YQL_DATABASE_CQL, "test", "testLoadMoveCompletion"); |
493 | 1 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
494 | | |
495 | | // Give some time for the cluster balancer to balance tablets. |
496 | 1 | std::function<Result<bool> ()> is_idle = [&]() -> Result<bool> { |
497 | 1 | return client_->IsLoadBalancerIdle(); |
498 | 1 | }; |
499 | 1 | ASSERT_OK(WaitFor(is_idle, |
500 | 1 | MonoDelta::FromSeconds(60), |
501 | 1 | "Load Balancer Idle check failed")); |
502 | | |
503 | | // Disable TS heartbeats. |
504 | 1 | LOG(INFO) << "Disabled Heartbeats"; |
505 | 1 | ASSERT_OK(cluster_->SetFlagOnTServers("TEST_tserver_disable_heartbeat", "true")); |
506 | | |
507 | | // Blacklist a TS. |
508 | 1 | ExternalMaster *leader = cluster_->GetLeaderMaster(); |
509 | 1 | ExternalTabletServer *ts = cluster_->tablet_server(3); |
510 | 1 | ASSERT_OK(cluster_->AddTServerToBlacklist(leader, ts)); |
511 | 1 | LOG(INFO) << "Blacklisted tserver#3"; |
512 | | |
513 | | // Get the initial load. |
514 | 1 | auto idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
515 | | |
516 | 1 | auto proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(idx); |
517 | | |
518 | 1 | rpc::RpcController rpc; |
519 | 1 | master::GetLoadMovePercentRequestPB req; |
520 | 1 | master::GetLoadMovePercentResponsePB resp; |
521 | 1 | ASSERT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc)); |
522 | | |
523 | 1 | auto initial_total_load = resp.total(); |
524 | | |
525 | | // Failover the leader. |
526 | 1 | LOG(INFO) << "Failing over master leader."; |
527 | 1 | ASSERT_OK(cluster_->StepDownMasterLeaderAndWaitForNewLeader()); |
528 | | |
529 | | // Get the final load and validate. |
530 | 1 | req.Clear(); |
531 | 1 | resp.Clear(); |
532 | 1 | rpc.Reset(); |
533 | | |
534 | 1 | idx = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
535 | | |
536 | 1 | proxy = cluster_->GetMasterProxy<master::MasterClusterProxy>(idx); |
537 | 1 | ASSERT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc)); |
538 | 1 | LOG(INFO) << "Initial loads. Before master leader failover: " << initial_total_load |
539 | 1 | << " v/s after master leader failover: " << resp.total(); |
540 | | |
541 | 2 | EXPECT_EQ(resp.total(), initial_total_load) |
542 | 2 | << "Expected the initial blacklisted load to be propagated to new leader master."; |
543 | | |
544 | | // The progress should be reported as 0 until tservers heartbeat |
545 | | // their tablet reports. |
546 | 2 | EXPECT_EQ(resp.percent(), 0) << "Expected the initial progress" |
547 | 2 | " to be zero."; |
548 | | |
549 | | // Now enable heartbeats. |
550 | 1 | ASSERT_OK(cluster_->SetFlagOnTServers("TEST_tserver_disable_heartbeat", "false")); |
551 | 0 | ASSERT_OK(cluster_->SetFlagOnMasters("blacklist_progress_initial_delay_secs", |
552 | 0 | std::to_string((kHeartbeatIntervalMs * 20)/1000))); |
553 | 0 | LOG(INFO) << "Enabled heartbeats"; |
554 | |
|
555 | 0 | ASSERT_OK(LoggedWaitFor( |
556 | 0 | [&]() -> Result<bool> { |
557 | 0 | req.Clear(); |
558 | 0 | resp.Clear(); |
559 | 0 | rpc.Reset(); |
560 | 0 | RETURN_NOT_OK(proxy.GetLoadMoveCompletion(req, &resp, &rpc)); |
561 | 0 | return resp.percent() >= 100; |
562 | 0 | }, |
563 | 0 | MonoDelta::FromSeconds(300), |
564 | 0 | "Waiting for blacklist load transfer to complete" |
565 | 0 | )); |
566 | 0 | } |
567 | | |
568 | | class MasterFailoverTestWithPlacement : public MasterFailoverTest { |
569 | | public: |
570 | 1 | virtual void SetUp() override { |
571 | 1 | opts_.extra_tserver_flags.push_back("--placement_cloud=c"); |
572 | 1 | opts_.extra_tserver_flags.push_back("--placement_region=r"); |
573 | 1 | opts_.extra_tserver_flags.push_back("--placement_zone=z${index}"); |
574 | 1 | opts_.extra_tserver_flags.push_back("--placement_uuid=" + kLivePlacementUuid); |
575 | 1 | opts_.extra_master_flags.push_back("--enable_register_ts_from_raft=true"); |
576 | 1 | MasterFailoverTest::SetUp(); |
577 | 1 | yb_admin_client_ = std::make_unique<tools::enterprise::ClusterAdminClient>( |
578 | 1 | cluster_->GetMasterAddresses(), MonoDelta::FromSeconds(30)); |
579 | 1 | ASSERT_OK(yb_admin_client_->Init()); |
580 | 1 | ASSERT_OK(yb_admin_client_->ModifyPlacementInfo("c.r.z0,c.r.z1,c.r.z2", 3, kLivePlacementUuid)); |
581 | 1 | } |
582 | | |
583 | 0 | virtual void TearDown() override { |
584 | 0 | yb_admin_client_.reset(); |
585 | 0 | MasterFailoverTest::TearDown(); |
586 | 0 | } |
587 | | |
588 | | void AssertTserverHasPlacementUuid( |
589 | | const string& ts_uuid, const string& placement_uuid, |
590 | 0 | const std::vector<YBTabletServer>& tablet_servers) { |
591 | 0 | auto it = std::find_if(tablet_servers.begin(), tablet_servers.end(), [&](const auto& ts) { |
592 | 0 | return ts.uuid == ts_uuid; |
593 | 0 | }); |
594 | 0 | ASSERT_TRUE(it != tablet_servers.end()); |
595 | 0 | ASSERT_EQ(it->placement_uuid, placement_uuid); |
596 | 0 | } |
597 | | |
598 | | protected: |
599 | | const string kReadReplicaPlacementUuid = "read_replica"; |
600 | | const string kLivePlacementUuid = "live"; |
601 | | std::unique_ptr<tools::enterprise::ClusterAdminClient> yb_admin_client_; |
602 | | }; |
603 | | |
604 | 1 | TEST_F_EX(MasterFailoverTest, TestFailoverWithReadReplicas, MasterFailoverTestWithPlacement) { |
605 | 1 | ASSERT_OK(yb_admin_client_->AddReadReplicaPlacementInfo( |
606 | 1 | "c.r.z0:1", 1, kReadReplicaPlacementUuid)); |
607 | | |
608 | | // Add a new read replica tserver to the cluster with a matching cloud info to a live placement, |
609 | | // to test that we distinguish not just by cloud info but also by peer role. |
610 | 1 | std::vector<std::string> extra_opts; |
611 | 1 | extra_opts.push_back("--placement_cloud=c"); |
612 | 1 | extra_opts.push_back("--placement_region=r"); |
613 | 1 | extra_opts.push_back("--placement_zone=z0"); |
614 | 1 | extra_opts.push_back("--placement_uuid=" + kReadReplicaPlacementUuid); |
615 | 1 | ASSERT_OK(cluster_->AddTabletServer(true, extra_opts)); |
616 | | |
617 | 0 | YBTableName table_name(YQL_DATABASE_CQL, "test", "testFailoverWithReadReplicas"); |
618 | 0 | ASSERT_OK(CreateTable(table_name, kWaitForCreate)); |
619 | | |
620 | | // Shutdown the live ts in c.r.z0 |
621 | 0 | auto live_ts_uuid = cluster_->tablet_server(0)->instance_id().permanent_uuid(); |
622 | 0 | cluster_->tablet_server(0)->Shutdown(); |
623 | | |
624 | | // Shutdown the rr ts in c.r.z0 |
625 | 0 | auto rr_ts_uuid = cluster_->tablet_server(3)->instance_id().permanent_uuid(); |
626 | 0 | cluster_->tablet_server(3)->Shutdown(); |
627 | | |
628 | | // Roll over to a new master. |
629 | 0 | ASSERT_OK(cluster_->ChangeConfig(cluster_->GetLeaderMaster(), consensus::REMOVE_SERVER)); |
630 | | |
631 | | // Count all servers equal to 4. |
632 | 0 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
633 | 0 | int tserver_count; |
634 | 0 | RETURN_NOT_OK(client_->TabletServerCount(&tserver_count, false /* primary_only */)); |
635 | 0 | return tserver_count == 4; |
636 | 0 | }, MonoDelta::FromSeconds(30), "Wait for tablet server count")); |
637 | |
|
638 | 0 | const auto tablet_servers = ASSERT_RESULT(client_->ListTabletServers()); |
639 | |
|
640 | 0 | ASSERT_NO_FATALS(AssertTserverHasPlacementUuid(live_ts_uuid, kLivePlacementUuid, tablet_servers)); |
641 | 0 | ASSERT_NO_FATALS(AssertTserverHasPlacementUuid( |
642 | 0 | rr_ts_uuid, kReadReplicaPlacementUuid, tablet_servers)); |
643 | 0 | cluster_->AssertNoCrashes(); |
644 | 0 | } |
645 | | |
646 | | } // namespace client |
647 | | } // namespace yb |