/Users/deen/code/yugabyte-db/src/yb/integration-tests/master_config-itest.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include <algorithm> |
15 | | #include <functional> |
16 | | #include <memory> |
17 | | #include <string> |
18 | | #include <unordered_map> |
19 | | #include <unordered_set> |
20 | | #include <utility> |
21 | | #include <vector> |
22 | | |
23 | | #include <glog/logging.h> |
24 | | #include <gtest/gtest.h> |
25 | | |
26 | | #include "yb/common/common.pb.h" |
27 | | #include "yb/common/entity_ids_types.h" |
28 | | |
29 | | #include "yb/consensus/consensus.pb.h" |
30 | | #include "yb/consensus/consensus.proxy.h" |
31 | | |
32 | | #include "yb/gutil/algorithm.h" |
33 | | #include "yb/gutil/strings/substitute.h" |
34 | | |
35 | | #include "yb/integration-tests/external_mini_cluster.h" |
36 | | |
37 | | |
38 | | #include "yb/util/result.h" |
39 | | #include "yb/util/status.h" |
40 | | #include "yb/util/test_util.h" |
41 | | #include "yb/util/tsan_util.h" |
42 | | |
43 | | using std::shared_ptr; |
44 | | using std::string; |
45 | | using std::vector; |
46 | | using strings::Substitute; |
47 | | using yb::rpc::Messenger; |
48 | | using yb::rpc::MessengerBuilder; |
49 | | using yb::consensus::ChangeConfigRequestPB; |
50 | | using yb::consensus::ChangeConfigResponsePB; |
51 | | using yb::consensus::ConsensusServiceProxy; |
52 | | using yb::consensus::RaftPeerPB; |
53 | | using yb::tserver::TabletServerErrorPB; |
54 | | |
55 | | using namespace std::chrono_literals; |
56 | | |
57 | | namespace yb { |
58 | | namespace master { |
59 | | |
60 | | // Test master addition and removal config changes via an external mini cluster |
61 | | class MasterChangeConfigTest : public YBTest { |
62 | | public: |
63 | 12 | MasterChangeConfigTest() {} |
64 | | |
65 | 12 | ~MasterChangeConfigTest() {} |
66 | | |
67 | | protected: |
68 | 12 | void SetUp() override { |
69 | 12 | YBTest::SetUp(); |
70 | 12 | ExternalMiniClusterOptions opts; |
71 | 12 | opts.master_rpc_ports = { 0, 0, 0 }; // external mini-cluster Start() gets the free ports. |
72 | 12 | opts.num_masters = num_masters_ = opts.master_rpc_ports.size(); |
73 | 12 | opts.num_tablet_servers = 0; |
74 | 12 | opts.timeout = MonoDelta::FromSeconds(30); |
75 | | // Master failovers should not be happening concurrently with us trying to load an initial sys |
76 | | // catalog snapshot. At least this is not supported as of 05/27/2019. |
77 | 12 | opts.enable_ysql = false; |
78 | 12 | cluster_.reset(new ExternalMiniCluster(opts)); |
79 | 12 | ASSERT_OK(cluster_->Start()); |
80 | | |
81 | 12 | ASSERT_OK(cluster_->WaitForLeaderCommitTermAdvance()); |
82 | | |
83 | 12 | ASSERT_OK(CheckNumMastersWithCluster("Start")); |
84 | 12 | } |
85 | | |
86 | 12 | void TearDown() override { |
87 | 12 | if (cluster_) { |
88 | 12 | cluster_->Shutdown(); |
89 | 12 | cluster_.reset(); |
90 | 12 | } |
91 | | |
92 | 12 | YBTest::TearDown(); |
93 | 12 | } |
94 | | |
95 | 14 | Status CheckNumMastersWithCluster(string msg) { |
96 | 14 | if (num_masters_ != cluster_->num_masters()) { |
97 | 0 | return STATUS(IllegalState, Substitute( |
98 | 0 | "$0 : expected to have $1 masters but our cluster has $2 masters.", |
99 | 0 | msg, num_masters_, cluster_->num_masters())); |
100 | 0 | } |
101 | | |
102 | 14 | return Status::OK(); |
103 | 14 | } |
104 | | |
105 | 1 | Status RestartCluster() { |
106 | 1 | if (!cluster_) { |
107 | 0 | return STATUS(IllegalState, "Cluster was not initialized, cannot restart."); |
108 | 0 | } |
109 | | |
110 | 1 | RETURN_NOT_OK(CheckNumMastersWithCluster("Pre Restart")); |
111 | | |
112 | 1 | cluster_->Shutdown(); |
113 | 1 | RETURN_NOT_OK(cluster_->Restart()); |
114 | | |
115 | 1 | RETURN_NOT_OK(CheckNumMastersWithCluster("Post Restart")); |
116 | | |
117 | 1 | RETURN_NOT_OK(cluster_->WaitForLeaderCommitTermAdvance()); |
118 | | |
119 | 1 | return Status::OK(); |
120 | 1 | } |
121 | | |
122 | | // Ensure that the leader's in-memory state has the expected number of peers. |
123 | | void VerifyLeaderMasterPeerCount(); |
124 | | |
125 | | // Ensure that each non-leader's in-memory state has the expected number of peers. |
126 | | void VerifyNonLeaderMastersPeerCount(); |
127 | | |
128 | | // Waits till the master leader is ready - as deemed by the catalog manager. If the leader never |
129 | | // loads the sys catalog, this api will timeout. If 'master' is not the leader it will surely |
130 | | // timeout. Return status of OK() implies leader is ready. |
131 | | Status WaitForMasterLeaderToBeReady(ExternalMaster* master, int timeout_sec); |
132 | | |
133 | | // API to capture the latest commit index on the master leader. |
134 | | void SetCurLogIndex(); |
135 | | |
136 | | size_t num_masters_; |
137 | | int64_t cur_log_index_; |
138 | | std::unique_ptr<ExternalMiniCluster> cluster_; |
139 | | }; |
140 | | |
141 | 9 | void MasterChangeConfigTest::VerifyLeaderMasterPeerCount() { |
142 | 9 | int num_peers = 0; |
143 | 9 | ExternalMaster *leader_master = cluster_->GetLeaderMaster(); |
144 | 9 | LOG(INFO) << "Checking leader at port " << leader_master->bound_rpc_hostport().port(); |
145 | 9 | Status s = cluster_->GetNumMastersAsSeenBy(leader_master, &num_peers); |
146 | 9 | ASSERT_OK_PREPEND(s, "Leader master number of peers lookup returned error"); |
147 | 9 | EXPECT_EQ(num_peers, num_masters_); |
148 | 9 | } |
149 | | |
150 | 6 | void MasterChangeConfigTest::VerifyNonLeaderMastersPeerCount() { |
151 | 6 | int num_peers = 0; |
152 | 6 | auto leader_index = ASSERT_RESULT(cluster_->GetLeaderMasterIndex()); |
153 | | |
154 | 26 | for (size_t i = 0; i < num_masters_; i++) { |
155 | 20 | if (i == leader_index) { |
156 | 6 | continue; |
157 | 6 | } |
158 | | |
159 | 14 | ExternalMaster *non_leader_master = cluster_->master(i); |
160 | | |
161 | 14 | LOG(INFO) << "Checking non_leader " << i << " at port " |
162 | 14 | << non_leader_master->bound_rpc_hostport().port(); |
163 | 14 | num_peers = 0; |
164 | 14 | Status s; |
165 | 14 | ASSERT_OK_PREPEND( |
166 | 14 | WaitFor( |
167 | 14 | [&] { |
168 | 14 | s = cluster_->GetNumMastersAsSeenBy(non_leader_master, &num_peers); |
169 | 14 | return s.ok(); |
170 | 14 | }, |
171 | 14 | 5s * kTimeMultiplier, "Waiting master is initialized"), |
172 | 14 | Format("Non-leader master number of peers lookup returned error: $0", s)); |
173 | 14 | EXPECT_EQ(num_peers, num_masters_); |
174 | 14 | } |
175 | 6 | } |
176 | | |
177 | | Status MasterChangeConfigTest::WaitForMasterLeaderToBeReady( |
178 | | ExternalMaster* master, |
179 | 1 | int timeout_sec) { |
180 | 1 | MonoTime now = MonoTime::Now(); |
181 | 1 | MonoTime deadline = now; |
182 | 1 | deadline.AddDelta(MonoDelta::FromSeconds(timeout_sec)); |
183 | 1 | Status s; |
184 | | |
185 | 1 | for (int i = 1; now.ComesBefore(deadline); ++i) { |
186 | 1 | s = cluster_->GetIsMasterLeaderServiceReady(master); |
187 | 1 | if (!s.ok()) { |
188 | | // Spew out error info only if it is something other than not-the-leader. |
189 | 0 | if (s.ToString().find("NOT_THE_LEADER") == std::string::npos) { |
190 | 0 | LOG(WARNING) << "Hit error '" << s.ToString() << "', in iter " << i; |
191 | 0 | } |
192 | 1 | } else { |
193 | 1 | LOG(INFO) << "Got leader ready in iter " << i; |
194 | 1 | return Status::OK(); |
195 | 1 | } |
196 | 0 | SleepFor(MonoDelta::FromMilliseconds(min(i, 10))); |
197 | 0 | now = MonoTime::Now(); |
198 | 0 | } |
199 | | |
200 | 0 | return STATUS(TimedOut, Substitute("Timed out as master leader $0 term not ready.", |
201 | 1 | master->bound_rpc_hostport().ToString())); |
202 | 1 | } |
203 | | |
204 | 11 | void MasterChangeConfigTest::SetCurLogIndex() { |
205 | 11 | OpIdPB op_id; |
206 | 11 | ASSERT_OK(cluster_->GetLastOpIdForLeader(&op_id)); |
207 | 11 | cur_log_index_ = op_id.index(); |
208 | 11 | LOG(INFO) << "cur_log_index_ " << cur_log_index_; |
209 | 11 | } |
210 | | |
211 | 1 | TEST_F(MasterChangeConfigTest, TestAddMaster) { |
212 | | // NOTE: Not using smart pointer as ExternalMaster is derived from a RefCounted base class. |
213 | 1 | ExternalMaster* new_master = nullptr; |
214 | 1 | cluster_->StartShellMaster(&new_master); |
215 | | |
216 | 1 | SetCurLogIndex(); |
217 | | |
218 | 1 | Status s = cluster_->ChangeConfig(new_master, consensus::ADD_SERVER); |
219 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error : "); |
220 | | |
221 | | // Adding a server will generate two ChangeConfig calls. One to add a server as a learner, and one |
222 | | // to promote this server to a voter once bootstrapping is finished. |
223 | 1 | cur_log_index_ += 2; |
224 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(cur_log_index_)); |
225 | 1 | ++num_masters_; |
226 | | |
227 | 1 | VerifyLeaderMasterPeerCount(); |
228 | 1 | VerifyNonLeaderMastersPeerCount(); |
229 | 1 | } |
230 | | |
231 | 1 | TEST_F(MasterChangeConfigTest, TestSlowRemoteBootstrapDoesNotCrashMaster) { |
232 | 1 | ExternalMaster* new_master = nullptr; |
233 | 1 | cluster_->StartShellMaster(&new_master); |
234 | 1 | ASSERT_OK(cluster_->SetFlag(new_master, "TEST_inject_latency_during_remote_bootstrap_secs", "8")); |
235 | | |
236 | 1 | SetCurLogIndex(); |
237 | | |
238 | 1 | Status s = cluster_->ChangeConfig(new_master, consensus::ADD_SERVER); |
239 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error : "); |
240 | | |
241 | | // Adding a server will generate two ChangeConfig calls. One to add a server as a learner, and one |
242 | | // to promote this server to a voter once bootstrapping is finished. |
243 | 1 | cur_log_index_ += 2; |
244 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(cur_log_index_)); |
245 | 1 | ++num_masters_; |
246 | | |
247 | 1 | VerifyLeaderMasterPeerCount(); |
248 | 1 | VerifyNonLeaderMastersPeerCount(); |
249 | 1 | } |
250 | | |
251 | 1 | TEST_F(MasterChangeConfigTest, TestRemoveMaster) { |
252 | 1 | auto non_leader_index = ASSERT_RESULT(cluster_->GetFirstNonLeaderMasterIndex()); |
253 | 1 | ExternalMaster* remove_master = cluster_->master(non_leader_index); |
254 | | |
255 | 1 | LOG(INFO) << "Going to remove master at port " << remove_master->bound_rpc_hostport().port(); |
256 | | |
257 | 1 | SetCurLogIndex(); |
258 | | |
259 | 1 | auto s = cluster_->ChangeConfig(remove_master, consensus::REMOVE_SERVER); |
260 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error"); |
261 | | |
262 | | // REMOVE_SERVER causes the op index to increase by one. |
263 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(++cur_log_index_)); |
264 | | |
265 | 1 | --num_masters_; |
266 | | |
267 | 1 | VerifyLeaderMasterPeerCount(); |
268 | 1 | VerifyNonLeaderMastersPeerCount(); |
269 | 1 | } |
270 | | |
271 | 1 | TEST_F(MasterChangeConfigTest, TestRemoveDeadMaster) { |
272 | 1 | auto non_leader_index = ASSERT_RESULT(cluster_->GetFirstNonLeaderMasterIndex()); |
273 | 1 | ExternalMaster* remove_master = cluster_->master(non_leader_index); |
274 | 1 | remove_master->Shutdown(); |
275 | 1 | LOG(INFO) << "Stopped and removing master at " << remove_master->bound_rpc_hostport().port(); |
276 | | |
277 | 1 | SetCurLogIndex(); |
278 | | |
279 | 1 | auto s = cluster_->ChangeConfig(remove_master, consensus::REMOVE_SERVER, |
280 | 1 | consensus::PeerMemberType::PRE_VOTER, true /* use_hostport */); |
281 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error"); |
282 | | |
283 | | // REMOVE_SERVER causes the op index to increase by one. |
284 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(++cur_log_index_)); |
285 | | |
286 | 1 | --num_masters_; |
287 | | |
288 | 1 | VerifyLeaderMasterPeerCount(); |
289 | 1 | VerifyNonLeaderMastersPeerCount(); |
290 | 1 | } |
291 | | |
292 | 1 | TEST_F(MasterChangeConfigTest, TestRestartAfterConfigChange) { |
293 | 1 | ExternalMaster* new_master = nullptr; |
294 | 1 | cluster_->StartShellMaster(&new_master); |
295 | | |
296 | 1 | SetCurLogIndex(); |
297 | | |
298 | 1 | Status s = cluster_->ChangeConfig(new_master, consensus::ADD_SERVER); |
299 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error"); |
300 | | |
301 | 1 | ++num_masters_; |
302 | | |
303 | | // Adding a server will generate two ChangeConfig calls. One to add a server as a learner, and one |
304 | | // to promote this server to a voter once bootstrapping is finished. |
305 | 1 | cur_log_index_ += 2; |
306 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(cur_log_index_)); |
307 | | |
308 | 1 | VerifyLeaderMasterPeerCount(); |
309 | 1 | VerifyNonLeaderMastersPeerCount(); |
310 | | |
311 | | // Give time for cmeta to get flushed on all peers - TODO(Bharat) ENG-104 |
312 | 1 | SleepFor(MonoDelta::FromSeconds(5)); |
313 | | |
314 | 1 | s = RestartCluster(); |
315 | 1 | ASSERT_OK_PREPEND(s, "Restart Cluster failed"); |
316 | | |
317 | 1 | VerifyLeaderMasterPeerCount(); |
318 | 1 | VerifyNonLeaderMastersPeerCount(); |
319 | 1 | } |
320 | | |
321 | 1 | TEST_F(MasterChangeConfigTest, TestNewLeaderWithPendingConfigLoadsSysCatalog) { |
322 | 1 | ExternalMaster* new_master = nullptr; |
323 | 1 | cluster_->StartShellMaster(&new_master); |
324 | | |
325 | 1 | LOG(INFO) << "New master " << new_master->bound_rpc_hostport().ToString(); |
326 | | |
327 | 1 | SetCurLogIndex(); |
328 | | |
329 | | // This will disable new elections on the old masters. |
330 | 1 | vector<ExternalMaster*> masters = cluster_->master_daemons(); |
331 | 3 | for (auto master : masters) { |
332 | 3 | ASSERT_OK(cluster_->SetFlag(master, "TEST_do_not_start_election_test_only", "true")); |
333 | | // Do not let the followers commit change role - to keep their opid same as the new master, |
334 | | // and hence will vote for it. |
335 | 3 | ASSERT_OK(cluster_->SetFlag(master, "inject_delay_commit_pre_voter_to_voter_secs", "5")); |
336 | 3 | } |
337 | | |
338 | | // Wait for 5 seconds on new master to commit voter mode transition. Note that this should be |
339 | | // less than the timeout sent to WaitForMasterLeaderToBeReady() below. We want the pending |
340 | | // config to be preset when the new master is deemed as leader to start the sys catalog load, but |
341 | | // would need to get that pending config committed for load to progress. |
342 | 1 | ASSERT_OK(cluster_->SetFlag(new_master, "inject_delay_commit_pre_voter_to_voter_secs", "5")); |
343 | | // And don't let it start an election too soon. |
344 | 1 | ASSERT_OK(cluster_->SetFlag(new_master, "TEST_do_not_start_election_test_only", "true")); |
345 | | |
346 | 1 | Status s = cluster_->ChangeConfig(new_master, consensus::ADD_SERVER); |
347 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error"); |
348 | | |
349 | | // Wait for addition of the new master as a PRE_VOTER to commit on all peers. The CHANGE_ROLE |
350 | | // part is not committed on all the followers, as that might block the new master from becoming |
351 | | // the leader as others would have a opid higher than the new master and will not vote for it. |
352 | | // The new master will become FOLLOWER and can start an election once it has a pending change |
353 | | // that makes it a VOTER. |
354 | 1 | cur_log_index_ += 1; |
355 | 1 | ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(cur_log_index_)); |
356 | 1 | TabletServerErrorPB::Code dummy_err = TabletServerErrorPB::UNKNOWN_ERROR; |
357 | | // Leader step down. |
358 | 1 | s = cluster_->StepDownMasterLeader(&dummy_err); |
359 | | |
360 | | // Now the new master should start the election process. |
361 | 1 | ASSERT_OK(cluster_->SetFlag(new_master, "TEST_do_not_start_election_test_only", "false")); |
362 | | |
363 | | // Leader stepdown might not succeed as PRE_VOTER could still be uncommitted. Let it go through |
364 | | // as new master should get the other votes anyway once it starts the election. |
365 | 1 | if (!s.IsIllegalState()) { |
366 | 1 | ASSERT_OK_PREPEND(s, "Leader step down failed."); |
367 | 0 | } else { |
368 | 0 | LOG(INFO) << "Triggering election as step down failed."; |
369 | 0 | ASSERT_OK_PREPEND(cluster_->StartElection(new_master), "Start Election failed"); |
370 | 0 | SleepFor(MonoDelta::FromSeconds(2)); |
371 | 0 | } |
372 | | |
373 | | // Ensure that the new leader is the new master we spun up above. |
374 | 1 | ExternalMaster* new_leader = cluster_->GetLeaderMaster(); |
375 | 1 | LOG(INFO) << "New leader " << new_leader->bound_rpc_hostport().ToString(); |
376 | 1 | ASSERT_EQ(new_master->bound_rpc_addr().port(), new_leader->bound_rpc_addr().port()); |
377 | | |
378 | | // This check ensures that the sys catalog is loaded into new leader even when it has a |
379 | | // pending config change. |
380 | 1 | ASSERT_OK(WaitForMasterLeaderToBeReady(new_master, 8 /* timeout_sec */)); |
381 | 1 | } |
382 | | |
383 | 1 | TEST_F(MasterChangeConfigTest, TestChangeAllMasters) { |
384 | 1 | ExternalMaster* new_masters[3] = { nullptr, nullptr, nullptr }; |
385 | 1 | ExternalMaster* remove_master = nullptr; |
386 | | |
387 | | // Create all new masters before to avoid rpc port reuse. |
388 | 4 | for (int idx = 0; idx <= 2; idx++) { |
389 | 3 | cluster_->StartShellMaster(&new_masters[idx]); |
390 | 3 | } |
391 | | |
392 | 1 | SetCurLogIndex(); |
393 | | |
394 | 4 | for (int idx = 0; idx <= 2; idx++) { |
395 | 3 | LOG(INFO) << "LOOP " << idx << " start."; |
396 | 3 | LOG(INFO) << "ADD " << new_masters[idx]->bound_rpc_hostport().ToString(); |
397 | 3 | ASSERT_OK_PREPEND(cluster_->ChangeConfig(new_masters[idx], consensus::ADD_SERVER), |
398 | 3 | "Add Change Config returned error"); |
399 | 3 | ++num_masters_; |
400 | 3 | remove_master = cluster_->master(0); |
401 | 3 | LOG(INFO) << "REMOVE " << remove_master->bound_rpc_hostport().ToString(); |
402 | 3 | ASSERT_OK_PREPEND(cluster_->ChangeConfig(remove_master, consensus::REMOVE_SERVER), |
403 | 3 | "Remove Change Config returned error"); |
404 | 3 | --num_masters_; |
405 | 3 | LOG(INFO) << "LOOP " << idx << " end."; |
406 | 3 | } |
407 | | |
408 | | // Followers might not be up to speed as we did not wait, so just check leader. |
409 | 1 | VerifyLeaderMasterPeerCount(); |
410 | 1 | } |
411 | | |
412 | 1 | TEST_F(MasterChangeConfigTest, TestAddPreObserverMaster) { |
413 | 1 | ExternalMaster* new_master = nullptr; |
414 | 1 | cluster_->StartShellMaster(&new_master); |
415 | | |
416 | 1 | SetCurLogIndex(); |
417 | 1 | ASSERT_OK_PREPEND(cluster_->ChangeConfig(new_master, consensus::ADD_SERVER, |
418 | 1 | consensus::PeerMemberType::PRE_OBSERVER), |
419 | 1 | "Add Change Config returned error"); |
420 | 1 | ++num_masters_; |
421 | | |
422 | | // Followers might not be up to speed as we did not wait, so just check leader. |
423 | 1 | VerifyLeaderMasterPeerCount(); |
424 | 1 | } |
425 | | |
426 | 1 | TEST_F(MasterChangeConfigTest, TestWaitForChangeRoleCompletion) { |
427 | 1 | ExternalMaster* new_master = nullptr; |
428 | 1 | cluster_->StartShellMaster(&new_master); |
429 | 1 | ExternalMaster* leader = cluster_->GetLeaderMaster(); |
430 | | |
431 | | // Ensure leader does not change. |
432 | 4 | for (int idx = 0; idx <= 2; idx++) { |
433 | 3 | ExternalMaster* master = cluster_->master(idx); |
434 | 3 | if (master->bound_rpc_hostport().port() != leader->bound_rpc_hostport().port()) { |
435 | 2 | ASSERT_OK(cluster_->SetFlag(master, "TEST_do_not_start_election_test_only", "false")); |
436 | 2 | } |
437 | 3 | } |
438 | | |
439 | 1 | ASSERT_OK(cluster_->SetFlag(leader, |
440 | 1 | "TEST_inject_delay_leader_change_role_append_secs", "8")); |
441 | 1 | SetCurLogIndex(); |
442 | 1 | ASSERT_OK_PREPEND(cluster_->ChangeConfig(new_master, consensus::ADD_SERVER), |
443 | 1 | "Add Change Config returned error"); |
444 | | |
445 | | // Wait a bit for PRE_VOTER to be committed. This should be less than the value of 8 seconds |
446 | | // set in the injected delay above. |
447 | 1 | SleepFor(MonoDelta::FromSeconds(1)); |
448 | | |
449 | 1 | LOG(INFO) << "Remove Leader " << leader->bound_rpc_hostport().ToString(); |
450 | 1 | ASSERT_OK_PREPEND(cluster_->ChangeConfig(leader, consensus::REMOVE_SERVER), |
451 | 1 | "Remove Change Config returned error"); |
452 | | |
453 | 1 | VerifyLeaderMasterPeerCount(); |
454 | 1 | } |
455 | | |
456 | 1 | TEST_F(MasterChangeConfigTest, TestLeaderSteppedDownNotElected) { |
457 | 1 | SetCurLogIndex(); |
458 | 1 | ExternalMaster* old_leader = cluster_->GetLeaderMaster(); |
459 | | // Give the other peers few iterations to converge. |
460 | 1 | ASSERT_OK(cluster_->SetFlag(old_leader, "leader_failure_max_missed_heartbeat_periods", "24")); |
461 | 1 | LOG(INFO) << "Current leader bound to " << old_leader->bound_rpc_hostport().ToString(); |
462 | 1 | TabletServerErrorPB::Code dummy_err = TabletServerErrorPB::UNKNOWN_ERROR; |
463 | 1 | ASSERT_OK_PREPEND(cluster_->StepDownMasterLeader(&dummy_err), |
464 | 1 | "Leader step down failed."); |
465 | | // Ensure that the new leader is not the old leader. |
466 | 1 | ExternalMaster* new_leader = cluster_->GetLeaderMaster(); |
467 | 1 | LOG(INFO) << "New leader bound to " << new_leader->bound_rpc_hostport().ToString(); |
468 | 1 | ASSERT_NE(old_leader->bound_rpc_addr().port(), new_leader->bound_rpc_addr().port()); |
469 | 1 | } |
470 | | |
471 | 1 | TEST_F(MasterChangeConfigTest, TestMulitpleLeaderRestarts) { |
472 | 1 | ExternalMaster* first_leader = cluster_->GetLeaderMaster(); |
473 | 1 | first_leader->Shutdown(); |
474 | | // Ensure that the new leader is not the old leader. |
475 | 1 | ExternalMaster* second_leader = cluster_->GetLeaderMaster(); |
476 | 1 | ASSERT_NE(second_leader->bound_rpc_addr().port(), first_leader->bound_rpc_addr().port()); |
477 | | // Revive the first leader. |
478 | 1 | ASSERT_OK(first_leader->Restart()); |
479 | 1 | ExternalMaster* check_leader = cluster_->GetLeaderMaster(); |
480 | | // Leader should not be first leader. |
481 | 1 | ASSERT_NE(check_leader->bound_rpc_addr().port(), first_leader->bound_rpc_addr().port()); |
482 | 1 | second_leader->Shutdown(); |
483 | 1 | check_leader = cluster_->GetLeaderMaster(); |
484 | | // Leader should not be second one, it can be any one of the other masters. |
485 | 1 | ASSERT_NE(second_leader->bound_rpc_addr().port(), check_leader->bound_rpc_addr().port()); |
486 | 1 | } |
487 | | |
488 | 1 | TEST_F(MasterChangeConfigTest, TestPingShellMaster) { |
489 | 1 | string peers = ""; |
490 | | // Create a shell master as `peers` is empty (for master_addresses). |
491 | 1 | Result<ExternalMaster *> new_shell_master = cluster_->StartMasterWithPeers(peers); |
492 | 1 | ASSERT_OK(new_shell_master); |
493 | | // Add the new shell master to the quorum and ensure it is still running and pingable. |
494 | 1 | SetCurLogIndex(); |
495 | 1 | Status s = cluster_->ChangeConfig(*new_shell_master, consensus::ADD_SERVER); |
496 | 1 | LOG(INFO) << "Started shell " << (*new_shell_master)->bound_rpc_hostport().ToString(); |
497 | 1 | ASSERT_OK_PREPEND(s, "Change Config returned error : "); |
498 | 1 | ++num_masters_; |
499 | 1 | ASSERT_OK(cluster_->PingMaster(*new_shell_master)); |
500 | 1 | } |
501 | | |
502 | | // Process that stops/fails internal to external mini cluster is not allowing test to terminate. |
503 | 0 | TEST_F(MasterChangeConfigTest, DISABLED_TestIncorrectMasterStart) { |
504 | 0 | string peers = cluster_->GetMasterAddresses(); |
505 | | // Master process start with master_addresses not containing a new master host/port should fail |
506 | | // and become un-pingable. |
507 | 0 | Result<ExternalMaster *> new_master = cluster_->StartMasterWithPeers(peers); |
508 | 0 | ASSERT_OK(new_master); |
509 | 0 | LOG(INFO) << "Tried incorrect master " << (*new_master)->bound_rpc_hostport().ToString(); |
510 | 0 | ASSERT_NOK(cluster_->PingMaster(*new_master)); |
511 | 0 | (*new_master)->Shutdown(); |
512 | 0 | } |
513 | | |
514 | | } // namespace master |
515 | | } // namespace yb |