YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/integration-tests/tablet_replacement-itest.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include <functional>
34
#include <memory>
35
#include <string>
36
#include <unordered_map>
37
38
#include <boost/optional.hpp>
39
#include <gtest/gtest.h>
40
41
#include "yb/common/wire_protocol-test-util.h"
42
#include "yb/common/wire_protocol.h"
43
44
#include "yb/gutil/strings/substitute.h"
45
46
#include "yb/integration-tests/cluster_verifier.h"
47
#include "yb/integration-tests/external_mini_cluster-itest-base.h"
48
#include "yb/integration-tests/test_workload.h"
49
50
#include "yb/rpc/rpc_controller.h"
51
52
#include "yb/tserver/tserver_service.proxy.h"
53
54
#include "yb/util/countdown_latch.h"
55
56
using yb::consensus::RaftPeerPB;
57
using yb::consensus::PeerMemberType;
58
using yb::itest::TServerDetails;
59
using yb::tablet::TABLET_DATA_READY;
60
using yb::tablet::TABLET_DATA_TOMBSTONED;
61
using yb::tserver::ListTabletsResponsePB;
62
using std::shared_ptr;
63
using std::string;
64
using std::unordered_map;
65
using std::vector;
66
using strings::Substitute;
67
68
namespace yb {
69
70
class TabletReplacementITest : public ExternalMiniClusterITestBase {
71
};
72
73
// Test that the Master will tombstone a newly-evicted replica.
74
// Then, test that the Master will NOT tombstone a newly-added replica that is
75
// not part of the committed config yet (only the pending config).
76
1
TEST_F(TabletReplacementITest, TestMasterTombstoneEvictedReplica) {
77
1
  MonoDelta timeout = MonoDelta::FromSeconds(30);
78
1
  vector<string> ts_flags = { "--enable_leader_failure_detection=false" };
79
1
  int num_tservers = 5;
80
1
  vector<string> master_flags = {
81
1
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
82
1
    "--replication_factor=5",
83
1
    "--use_create_table_leader_hint=false"s,
84
1
  };
85
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, num_tservers));
86
87
1
  TestWorkload workload(cluster_.get());
88
1
  workload.Setup(); // Easy way to create a new tablet.
89
90
1
  const int kLeaderIndex = 0;
91
1
  TServerDetails* leader_ts = ts_map_[cluster_->tablet_server(kLeaderIndex)->uuid()].get();
92
1
  const int kFollowerIndex = 4;
93
1
  TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()].get();
94
95
  // Figure out the tablet id of the created tablet.
96
1
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
97
1
  ASSERT_OK(itest::WaitForNumTabletsOnTS(leader_ts, 1, timeout, &tablets));
98
0
  string tablet_id = tablets[0].tablet_status().tablet_id();
99
100
  // Wait until all replicas are up and running.
101
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
102
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
103
0
                                            tablet_id, timeout));
104
0
  }
105
106
  // Elect a leader (TS 0)
107
0
  ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
108
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); // Wait for NO_OP.
109
0
  ASSERT_OK(itest::WaitUntilCommittedOpIdIndexIs(1, leader_ts, tablet_id, timeout));
110
111
  // Remove a follower from the config.
112
0
  ASSERT_OK(itest::RemoveServer(leader_ts, tablet_id, follower_ts, boost::none, timeout));
113
114
  // Wait for the Master to tombstone the replica.
115
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kFollowerIndex, tablet_id, TABLET_DATA_TOMBSTONED,
116
0
                                                 timeout));
117
118
0
  if (!AllowSlowTests()) {
119
    // The rest of this test has multi-second waits, so we do it in slow test mode.
120
0
    LOG(INFO) << "Not verifying that a newly-added replica won't be tombstoned in fast-test mode";
121
0
    return;
122
0
  }
123
124
  // Shut down a majority of followers (3 servers) and then try to add the
125
  // follower back to the config. This will cause the config change to end up
126
  // in a pending state.
127
0
  auto active_ts_map = CreateTabletServerMapUnowned(ts_map_);
128
0
  for (int i = 1; i <= 3; i++) {
129
0
    cluster_->tablet_server(i)->Shutdown();
130
0
    ASSERT_EQ(1, active_ts_map.erase(cluster_->tablet_server(i)->uuid()));
131
0
  }
132
  // This will time out, but should take effect.
133
0
  Status s = itest::AddServer(leader_ts, tablet_id, follower_ts, PeerMemberType::PRE_VOTER,
134
0
                              boost::none, MonoDelta::FromSeconds(5), NULL,
135
0
                              false /* retry */);
136
0
  ASSERT_TRUE(s.IsTimedOut());
137
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kFollowerIndex, tablet_id, TABLET_DATA_READY,
138
0
                                                 timeout));
139
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, active_ts_map, tablet_id, 3));
140
141
  // Sleep for a few more seconds and check again to ensure that the Master
142
  // didn't end up tombstoning the replica.
143
0
  SleepFor(MonoDelta::FromSeconds(3));
144
0
  ASSERT_OK(inspect_->CheckTabletDataStateOnTS(kFollowerIndex, tablet_id, TABLET_DATA_READY));
145
0
}
146
147
// Ensure that the Master will tombstone a replica if it reports in with an old
148
// config. This tests a slightly different code path in the catalog manager
149
// than TestMasterTombstoneEvictedReplica does.
150
1
TEST_F(TabletReplacementITest, TestMasterTombstoneOldReplicaOnReport) {
151
1
  MonoDelta timeout = MonoDelta::FromSeconds(30);
152
1
  vector<string> ts_flags = { "--enable_leader_failure_detection=false" };
153
1
  vector<string> master_flags = {
154
1
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
155
1
    "--use_create_table_leader_hint=false"s,
156
1
  };
157
1
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags));
158
159
1
  TestWorkload workload(cluster_.get());
160
1
  workload.Setup(); // Easy way to create a new tablet.
161
162
1
  const int kLeaderIndex = 0;
163
1
  TServerDetails* leader_ts = ts_map_[cluster_->tablet_server(kLeaderIndex)->uuid()].get();
164
1
  const int kFollowerIndex = 2;
165
1
  TServerDetails* follower_ts = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()].get();
166
167
  // Figure out the tablet id of the created tablet.
168
1
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
169
1
  ASSERT_OK(itest::WaitForNumTabletsOnTS(leader_ts, 1, timeout, &tablets));
170
0
  string tablet_id = tablets[0].tablet_status().tablet_id();
171
172
  // Wait until all replicas are up and running.
173
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
174
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
175
0
                                            tablet_id, timeout));
176
0
  }
177
178
  // Elect a leader (TS 0)
179
0
  ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
180
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); // Wait for NO_OP.
181
0
  ASSERT_OK(itest::WaitUntilCommittedOpIdIndexIs(1, leader_ts, tablet_id, timeout));
182
183
  // Shut down the follower to be removed, then remove it from the config.
184
  // We will wait for the Master to be notified of the config change, then shut
185
  // down the rest of the cluster and bring the follower back up. The follower
186
  // will heartbeat to the Master and then be tombstoned.
187
0
  cluster_->tablet_server(kFollowerIndex)->Shutdown();
188
189
  // Remove the follower from the config and wait for the Master to notice the
190
  // config change.
191
0
  ASSERT_OK(itest::RemoveServer(leader_ts, tablet_id, follower_ts, boost::none, timeout));
192
0
  ASSERT_OK(itest::WaitForNumVotersInConfigOnMaster(cluster_.get(), tablet_id, 2, timeout));
193
194
  // Shut down the remaining tablet servers and restart the dead one.
195
0
  cluster_->tablet_server(0)->Shutdown();
196
0
  cluster_->tablet_server(1)->Shutdown();
197
0
  ASSERT_OK(cluster_->tablet_server(kFollowerIndex)->Restart());
198
199
  // Wait for the Master to tombstone the revived follower.
200
0
  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(kFollowerIndex, tablet_id, TABLET_DATA_TOMBSTONED,
201
0
                                                 timeout));
202
0
}
203
204
// Test that unreachable followers are evicted and replaced.
205
1
TEST_F(TabletReplacementITest, TestEvictAndReplaceDeadFollower) {
206
1
  if (!AllowSlowTests()) {
207
1
    LOG(INFO) << "Skipping test in fast-test mode.";
208
1
    return;
209
1
  }
210
211
0
  MonoDelta timeout = MonoDelta::FromSeconds(30);
212
0
  vector<string> ts_flags = { "--enable_leader_failure_detection=false",
213
0
                              "--follower_unavailable_considered_failed_sec=5" };
214
0
  vector<string> master_flags = {
215
0
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
216
0
    "--use_create_table_leader_hint=false"s,
217
0
  };
218
0
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags));
219
220
0
  TestWorkload workload(cluster_.get());
221
0
  workload.Setup(); // Easy way to create a new tablet.
222
223
0
  const int kLeaderIndex = 0;
224
0
  TServerDetails* leader_ts = ts_map_[cluster_->tablet_server(kLeaderIndex)->uuid()].get();
225
0
  const int kFollowerIndex = 2;
226
227
  // Figure out the tablet id of the created tablet.
228
0
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
229
0
  ASSERT_OK(itest::WaitForNumTabletsOnTS(leader_ts, 1, timeout, &tablets));
230
0
  string tablet_id = tablets[0].tablet_status().tablet_id();
231
232
  // Wait until all replicas are up and running.
233
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
234
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
235
0
                                            tablet_id, timeout));
236
0
  }
237
238
  // Elect a leader (TS 0)
239
0
  ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
240
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); // Wait for NO_OP.
241
242
  // Shut down the follower to be removed. It should be evicted.
243
0
  cluster_->tablet_server(kFollowerIndex)->Shutdown();
244
245
  // With a RemoveServer and AddServer, the opid_index of the committed config will be 3.
246
0
  ASSERT_OK(itest::WaitUntilCommittedOpIdIndexIs(3,
247
0
                                                 leader_ts,
248
0
                                                 tablet_id,
249
0
                                                 timeout,
250
0
                                                 itest::CommittedEntryType::CONFIG));
251
0
  ASSERT_OK(cluster_->tablet_server(kFollowerIndex)->Restart());
252
0
}
253
254
// Regression test for KUDU-1233. This test creates a situation in which tablet
255
// bootstrap will attempt to replay committed (and applied) config change
256
// operations. This is achieved by delaying application of a write at the
257
// tablet level that precedes the config change operations in the WAL, then
258
// initiating a remote bootstrap to a follower. The follower will not have the
259
// COMMIT for the write operation, so will ignore COMMIT messages for the
260
// applied config change operations. At startup time, the newly
261
// remotely-bootstrapped tablet should detect that these config change
262
// operations have already been applied and skip them.
263
1
TEST_F(TabletReplacementITest, TestRemoteBoostrapWithPendingConfigChangeCommits) {
264
1
  if (!AllowSlowTests()) {
265
1
    LOG(INFO) << "Skipping test in fast-test mode.";
266
1
    return;
267
1
  }
268
269
0
  MonoDelta timeout = MonoDelta::FromSeconds(30);
270
0
  vector<string> ts_flags = {
271
0
    "--enable_leader_failure_detection=false"s,
272
0
  };
273
  // We will manage doing the AddServer() manually, in order to make this test
274
  // more deterministic.
275
0
  vector<string> master_flags = {
276
0
    "--master_tombstone_evicted_tablet_replicas=false"s,
277
0
    "--catalog_manager_wait_for_new_tablets_to_elect_leader=false"s,
278
0
    "--use_create_table_leader_hint=false"s,
279
0
  };
280
0
  ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags));
281
282
0
  TestWorkload workload(cluster_.get());
283
0
  workload.Setup(); // Convenient way to create a table.
284
285
0
  const int kLeaderIndex = 0;
286
0
  TServerDetails* leader_ts = ts_map_[cluster_->tablet_server(kLeaderIndex)->uuid()].get();
287
0
  const int kFollowerIndex = 2;
288
0
  TServerDetails* ts_to_remove = ts_map_[cluster_->tablet_server(kFollowerIndex)->uuid()].get();
289
290
  // Wait for tablet creation and then identify the tablet id.
291
0
  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
292
0
  ASSERT_OK(itest::WaitForNumTabletsOnTS(leader_ts, 1, timeout, &tablets));
293
0
  string tablet_id = tablets[0].tablet_status().tablet_id();
294
295
  // Wait until all replicas are up and running.
296
0
  for (size_t i = 0; i < cluster_->num_tablet_servers(); i++) {
297
0
    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()].get(),
298
0
                                            tablet_id, timeout));
299
0
  }
300
301
  // Elect a leader (TS 0)
302
0
  ASSERT_OK(itest::StartElection(leader_ts, tablet_id, timeout));
303
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, ts_map_, tablet_id, 1)); // Wait for NO_OP.
304
305
  // Write a single row.
306
0
  ASSERT_OK(WriteSimpleTestRow(leader_ts, tablet_id, 0, 0, "", timeout));
307
308
  // Delay tablet applies in order to delay COMMIT messages to trigger KUDU-1233.
309
  // Then insert another row.
310
0
  ASSERT_OK(cluster_->SetFlag(cluster_->tablet_server_by_uuid(leader_ts->uuid()),
311
0
                              "TEST_tablet_inject_latency_on_apply_write_txn_ms", "5000"));
312
313
  // Kick off an async insert, which will be delayed for 5 seconds. This is
314
  // normally enough time to evict a replica, tombstone it, add it back, and
315
  // remotely bootstrap it when the log is only a few entries.
316
0
  tserver::WriteRequestPB req;
317
0
  tserver::WriteResponsePB resp;
318
0
  CountDownLatch latch(1);
319
0
  rpc::RpcController rpc;
320
0
  rpc.set_timeout(timeout);
321
0
  req.set_tablet_id(tablet_id);
322
0
  AddTestRowInsert(1, 1, "", &req);
323
0
  leader_ts->tserver_proxy->WriteAsync(req, &resp, &rpc, [&latch]() { latch.CountDown(); });
324
325
  // Wait for the replicate to show up (this doesn't wait for COMMIT messages).
326
0
  ASSERT_OK(itest::WaitForServersToAgree(timeout, ts_map_, tablet_id, 3));
327
0
  ASSERT_OK(itest::WaitUntilCommittedOpIdIndexIs(3, leader_ts, tablet_id, timeout));
328
329
  // Manually evict the server from the cluster, tombstone the replica, then
330
  // add the replica back to the cluster. Without the fix for KUDU-1233, this
331
  // will cause the replica to fail to start up.
332
0
  ASSERT_OK(itest::RemoveServer(leader_ts, tablet_id, ts_to_remove, boost::none, timeout));
333
0
  ASSERT_OK(itest::DeleteTablet(ts_to_remove, tablet_id, TABLET_DATA_TOMBSTONED,
334
0
                                boost::none, timeout));
335
0
  ASSERT_OK(itest::AddServer(leader_ts, tablet_id, ts_to_remove, PeerMemberType::PRE_VOTER,
336
0
                             boost::none, timeout));
337
0
  ASSERT_OK(itest::WaitUntilTabletRunning(ts_to_remove, tablet_id, timeout));
338
339
0
  ClusterVerifier cluster_verifier(cluster_.get());
340
0
  ASSERT_NO_FATALS(cluster_verifier.CheckCluster());
341
0
  ASSERT_NO_FATALS(cluster_verifier.CheckRowCount(workload.table_name(),
342
0
                            ClusterVerifier::EXACTLY, 2));
343
344
0
  latch.Wait(); // Avoid use-after-free on the response from the delayed RPC callback.
345
0
}
346
347
} // namespace yb