/Users/deen/code/yugabyte-db/src/yb/integration-tests/client_failover-itest.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include <memory> |
34 | | #include <set> |
35 | | #include <unordered_map> |
36 | | |
37 | | #include <boost/optional.hpp> |
38 | | |
39 | | #include "yb/client/client-test-util.h" |
40 | | #include "yb/client/table_handle.h" |
41 | | |
42 | | #include "yb/common/wire_protocol.h" |
43 | | |
44 | | #include "yb/gutil/map-util.h" |
45 | | |
46 | | #include "yb/integration-tests/external_mini_cluster-itest-base.h" |
47 | | #include "yb/integration-tests/test_workload.h" |
48 | | |
49 | | |
50 | | using yb::client::CountTableRows; |
51 | | using yb::client::YBTable; |
52 | | using yb::client::YBTableName; |
53 | | using std::shared_ptr; |
54 | | using yb::itest::TServerDetails; |
55 | | using yb::tablet::TABLET_DATA_TOMBSTONED; |
56 | | using std::set; |
57 | | using std::string; |
58 | | using std::vector; |
59 | | using std::unordered_map; |
60 | | |
61 | | namespace yb { |
62 | | |
63 | | namespace { |
64 | | const int kNumberOfRetries = 5; |
65 | | } |
66 | | |
67 | | // Integration test for client failover behavior. |
68 | | class ClientFailoverITest : public ExternalMiniClusterITestBase { |
69 | | }; |
70 | | |
71 | | // Test that we can delete the leader replica while scanning it and still get |
72 | | // results back. |
73 | 1 | TEST_F(ClientFailoverITest, TestDeleteLeaderWhileScanning) { |
74 | 1 | const MonoDelta kTimeout = MonoDelta::FromSeconds(30); |
75 | | |
76 | 1 | vector<string> ts_flags = { "--TEST_enable_remote_bootstrap=false" }; |
77 | 1 | vector<string> master_flags = {"--catalog_manager_wait_for_new_tablets_to_elect_leader=false"}; |
78 | | |
79 | | // Start up with 4 tablet servers. |
80 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, 4)); |
81 | | |
82 | | // Create the test table. |
83 | 1 | TestWorkload workload(cluster_.get()); |
84 | 1 | workload.set_write_timeout_millis(kTimeout.ToMilliseconds()); |
85 | 1 | workload.Setup(); |
86 | | |
87 | | // Figure out the tablet id. |
88 | 1 | ASSERT_OK(inspect_->WaitForReplicaCount(3)); |
89 | 0 | vector<string> tablets = inspect_->ListTablets(); |
90 | 0 | ASSERT_EQ(1, tablets.size()); |
91 | 0 | const string& tablet_id = tablets[0]; |
92 | | |
93 | | // Record the locations of the tablet replicas and the one TS that doesn't have a replica. |
94 | 0 | ssize_t missing_replica_index = -1; |
95 | 0 | std::set<ssize_t> replica_indexes; |
96 | 0 | unordered_map<string, itest::TServerDetails*> active_ts_map; |
97 | 0 | for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) { |
98 | 0 | if (inspect_->ListTabletsOnTS(i).empty()) { |
99 | 0 | missing_replica_index = i; |
100 | 0 | } else { |
101 | 0 | replica_indexes.insert(i); |
102 | 0 | TServerDetails* ts = ts_map_[cluster_->tablet_server(i)->uuid()].get(); |
103 | 0 | active_ts_map[ts->uuid()] = ts; |
104 | 0 | ASSERT_OK(WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(), |
105 | 0 | tablet_id, |
106 | 0 | kTimeout)); |
107 | 0 | } |
108 | 0 | } |
109 | 0 | auto leader_index = *replica_indexes.begin(); |
110 | 0 | TServerDetails* leader = ts_map_[cluster_->tablet_server(leader_index)->uuid()].get(); |
111 | 0 | for (auto retries_left = kNumberOfRetries; ;) { |
112 | 0 | TServerDetails *current_leader = nullptr; |
113 | 0 | ASSERT_OK(itest::FindTabletLeader(active_ts_map, tablet_id, kTimeout, ¤t_leader)); |
114 | 0 | if (current_leader->uuid() == leader->uuid()) { |
115 | 0 | break; |
116 | 0 | } else if (retries_left <= 0) { |
117 | 0 | FAIL() << "Failed to elect first server as leader"; |
118 | 0 | } |
119 | 0 | ASSERT_OK(itest::StartElection(leader, tablet_id, kTimeout)); |
120 | 0 | --retries_left; |
121 | 0 | SleepFor(MonoDelta::FromMilliseconds(150 * (kNumberOfRetries - retries_left))); |
122 | 0 | } |
123 | |
|
124 | 0 | int64_t expected_index = 0; |
125 | 0 | ASSERT_OK(WaitForServersToAgree(kTimeout, active_ts_map, tablet_id, 0, &expected_index)); |
126 | | |
127 | | // Write data to a tablet. |
128 | 0 | workload.Start(); |
129 | 0 | while (workload.rows_inserted() < 100) { |
130 | 0 | SleepFor(MonoDelta::FromMilliseconds(10)); |
131 | 0 | } |
132 | 0 | workload.StopAndJoin(); |
133 | |
|
134 | 0 | expected_index += workload.batches_completed(); |
135 | | |
136 | | // We don't want the leader that takes over after we kill the first leader to |
137 | | // be unsure whether the writes have been committed, so wait until all |
138 | | // replicas have all of the writes. |
139 | 0 | ASSERT_OK(WaitForServersToAgree(kTimeout, |
140 | 0 | active_ts_map, |
141 | 0 | tablet_id, |
142 | 0 | expected_index, |
143 | 0 | &expected_index)); |
144 | | |
145 | | // Open the scanner and count the rows. |
146 | 0 | client::TableHandle table; |
147 | 0 | ASSERT_OK(table.Open(TestWorkloadOptions::kDefaultTableName, client_.get())); |
148 | 0 | ASSERT_EQ(workload.rows_inserted(), CountTableRows(table)); |
149 | 0 | LOG(INFO) << "Number of rows: " << workload.rows_inserted() |
150 | 0 | << ", batches: " << workload.batches_completed() |
151 | 0 | << ", expected index: " << expected_index; |
152 | | |
153 | | // Delete the leader replica. This will cause the next scan to the same |
154 | | // leader to get a TABLET_NOT_FOUND error. |
155 | 0 | ASSERT_OK(itest::DeleteTablet(leader, tablet_id, TABLET_DATA_TOMBSTONED, |
156 | 0 | boost::none, kTimeout)); |
157 | |
|
158 | 0 | ssize_t old_leader_index = leader_index; |
159 | | // old_leader - node that was leader before we started to elect a new leader |
160 | 0 | TServerDetails* old_leader = leader; |
161 | |
|
162 | 0 | ASSERT_EQ(1, replica_indexes.erase(old_leader_index)); |
163 | 0 | ASSERT_EQ(1, active_ts_map.erase(old_leader->uuid())); |
164 | |
|
165 | 0 | for (auto retries_left = kNumberOfRetries; ; --retries_left) { |
166 | | // current_leader - node that currently leader |
167 | 0 | TServerDetails *current_leader = nullptr; |
168 | 0 | ASSERT_OK(itest::FindTabletLeader(active_ts_map, tablet_id, kTimeout, ¤t_leader)); |
169 | 0 | if (current_leader->uuid() != old_leader->uuid()) { |
170 | 0 | leader = current_leader; |
171 | | // Do a config change to remove the old replica and add a new one. |
172 | | // Cause the new replica to become leader, then do the scan again. |
173 | | // Since old_leader is not changed in loop we would not remove more than one node. |
174 | 0 | auto result = RemoveServer(leader, tablet_id, old_leader, boost::none, kTimeout, NULL, |
175 | 0 | false /* retry */); |
176 | 0 | if (result.ok()) { |
177 | 0 | break; |
178 | 0 | } else if (retries_left <= 0) { |
179 | 0 | FAIL() << "RemoveServer failed and out of retries: " << result.ToString(); |
180 | 0 | } else { |
181 | 0 | LOG(WARNING) << "RemoveServer failed: " << result.ToString() << ", after " |
182 | 0 | << kNumberOfRetries << " retries"; |
183 | 0 | } |
184 | 0 | SleepFor(MonoDelta::FromMilliseconds(100)); |
185 | 0 | } else if (retries_left <= 0) { |
186 | 0 | FAIL() << "Failed to elect new leader instead of " << old_leader->uuid(); |
187 | 0 | } |
188 | 0 | TServerDetails* desired_leader = nullptr; |
189 | 0 | for (auto index : replica_indexes) { |
190 | 0 | auto new_leader_uuid = cluster_->tablet_server(index)->uuid(); |
191 | 0 | if (new_leader_uuid != old_leader->uuid()) { |
192 | 0 | desired_leader = ts_map_[new_leader_uuid].get(); |
193 | 0 | break; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | ASSERT_NE(desired_leader, nullptr); |
197 | 0 | ASSERT_OK(itest::StartElection(desired_leader, tablet_id, kTimeout)); |
198 | 0 | ASSERT_OK(WaitUntilCommittedOpIdIndexIsGreaterThan(&expected_index, |
199 | 0 | desired_leader, |
200 | 0 | tablet_id, |
201 | 0 | kTimeout)); |
202 | 0 | } |
203 | | |
204 | | // Wait until the config is committed, otherwise AddServer() will fail. |
205 | 0 | ASSERT_OK(WaitUntilCommittedOpIdIndexIsGreaterThan(&expected_index, |
206 | 0 | leader, |
207 | 0 | tablet_id, |
208 | 0 | kTimeout, |
209 | 0 | itest::CommittedEntryType::CONFIG)); |
210 | |
|
211 | 0 | TServerDetails* to_add = ts_map_[cluster_->tablet_server(missing_replica_index)->uuid()].get(); |
212 | 0 | ASSERT_OK(AddServer(leader, tablet_id, to_add, consensus::PeerMemberType::PRE_VOTER, |
213 | 0 | boost::none, kTimeout)); |
214 | 0 | HostPort hp = HostPortFromPB(leader->registration->common().private_rpc_addresses(0)); |
215 | 0 | ASSERT_OK(StartRemoteBootstrap(to_add, tablet_id, leader->uuid(), hp, 1, kTimeout)); |
216 | |
|
217 | 0 | const string& new_ts_uuid = cluster_->tablet_server(missing_replica_index)->uuid(); |
218 | 0 | InsertOrDie(&replica_indexes, missing_replica_index); |
219 | 0 | InsertOrDie(&active_ts_map, new_ts_uuid, ts_map_[new_ts_uuid].get()); |
220 | | |
221 | | // Wait for remote bootstrap to complete. Then elect the new node. |
222 | 0 | ASSERT_OK(WaitForServersToAgree(kTimeout, |
223 | 0 | active_ts_map, |
224 | 0 | tablet_id, |
225 | 0 | ++expected_index, |
226 | 0 | &expected_index)); |
227 | 0 | leader_index = missing_replica_index; |
228 | 0 | leader = ts_map_[cluster_->tablet_server(leader_index)->uuid()].get(); |
229 | 0 | ASSERT_OK(itest::StartElection(leader, tablet_id, kTimeout)); |
230 | | // Wait for all tservers to agree and have all the updates including the config change. |
231 | 0 | ASSERT_OK(WaitForServersToAgree(kTimeout, |
232 | 0 | active_ts_map, |
233 | 0 | tablet_id, |
234 | 0 | ++expected_index, |
235 | 0 | &expected_index)); |
236 | |
|
237 | 0 | ASSERT_EQ(workload.rows_inserted(), CountTableRows(table)); |
238 | | |
239 | | // Rotate leaders among the replicas and verify the new leader is the designated one each time. |
240 | 0 | for (const auto& ts_map : active_ts_map) { |
241 | 0 | for (auto retries_left = kNumberOfRetries; ; --retries_left) { |
242 | 0 | TServerDetails* current_leader = nullptr; |
243 | 0 | TServerDetails* new_leader = ts_map.second; |
244 | 0 | ASSERT_OK(itest::FindTabletLeader(active_ts_map, tablet_id, kTimeout, ¤t_leader)); |
245 | 0 | ASSERT_OK(itest::LeaderStepDown(current_leader, tablet_id, new_leader, kTimeout)); |
246 | | // Wait for all tservers to agree and have all the updates including the config change. |
247 | 0 | ASSERT_OK(WaitForServersToAgree(kTimeout, |
248 | 0 | active_ts_map, |
249 | 0 | tablet_id, |
250 | 0 | ++expected_index, |
251 | 0 | &expected_index)); |
252 | 0 | current_leader = new_leader; |
253 | 0 | ASSERT_OK(itest::FindTabletLeader(active_ts_map, tablet_id, kTimeout, &new_leader)); |
254 | 0 | if (current_leader->uuid() == new_leader->uuid()) { |
255 | 0 | break; |
256 | 0 | } else if (retries_left <= 0) { |
257 | 0 | FAIL() << "Failed to elect new leader instead of " << old_leader->uuid() |
258 | 0 | << ", after " << kNumberOfRetries << " retries"; |
259 | 0 | } |
260 | 0 | } |
261 | 0 | } |
262 | |
|
263 | 0 | } |
264 | | |
265 | | } // namespace yb |