YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/integration-tests/cluster_itest_util.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
// This header file contains generic helper utilities for writing tests against
33
// MiniClusters and ExternalMiniClusters. Ideally, the functions will be
34
// generic enough to use with either type of cluster, due to operating
35
// primarily through RPC-based APIs or through YBClient.
36
// However, it's also OK to include common operations against a particular
37
// cluster type if it's general enough to use from multiple tests while not
38
// belonging in the MiniCluster / ExternalMiniCluster classes themselves. But
39
// consider just putting stuff like that in those classes.
40
41
#ifndef YB_INTEGRATION_TESTS_CLUSTER_ITEST_UTIL_H_
42
#define YB_INTEGRATION_TESTS_CLUSTER_ITEST_UTIL_H_
43
44
#include <inttypes.h>
45
46
#include <cstdint>
47
#include <iosfwd>
48
#include <limits>
49
#include <memory>
50
#include <ostream>
51
#include <string>
52
#include <type_traits>
53
#include <unordered_map>
54
#include <vector>
55
56
#include <boost/optional/optional_fwd.hpp>
57
58
#include "yb/client/client_fwd.h"
59
60
#include "yb/common/entity_ids.h"
61
#include "yb/common/hybrid_time.h"
62
63
#include "yb/consensus/consensus_fwd.h"
64
#include "yb/consensus/consensus_types.pb.h"
65
#include "yb/consensus/leader_lease.h"
66
#include "yb/consensus/metadata.pb.h"
67
68
#include "yb/gutil/ref_counted.h"
69
70
#include "yb/master/master_fwd.h"
71
#include "yb/master/master_client.fwd.h"
72
73
#include "yb/rpc/rpc_controller.h"
74
75
#include "yb/server/server_fwd.h"
76
77
#include "yb/tablet/metadata.pb.h"
78
79
#include "yb/tserver/tserver_fwd.h"
80
#include "yb/tserver/tserver_types.pb.h"
81
82
#include "yb/util/format.h"
83
#include "yb/util/monotime.h"
84
#include "yb/util/result.h"
85
#include "yb/util/opid.h"
86
87
using namespace std::literals;
88
89
namespace yb {
90
91
class ExternalMiniCluster;
92
class HostPort;
93
class MonoDelta;
94
class Schema;
95
class Status;
96
97
using yb::OpId;
98
99
namespace itest {
100
101
struct TServerDetails {
102
  NodeInstancePB instance_id;
103
  std::unique_ptr<master::TSRegistrationPB> registration;
104
  std::unique_ptr<tserver::TabletServerServiceProxy> tserver_proxy;
105
  std::unique_ptr<tserver::TabletServerAdminServiceProxy> tserver_admin_proxy;
106
  std::unique_ptr<consensus::ConsensusServiceProxy> consensus_proxy;
107
  std::unique_ptr<server::GenericServiceProxy> generic_proxy;
108
109
  TServerDetails();
110
  ~TServerDetails();
111
112
  // Convenience function to get the UUID from the instance_id struct.
113
  const std::string& uuid() const;
114
115
  std::string ToString() const;
116
};
117
118
// tablet_id -> replica map.
119
typedef std::unordered_multimap<std::string, TServerDetails*> TabletReplicaMap;
120
121
// uuid -> tablet server map.
122
typedef std::unordered_map<TabletServerId, std::unique_ptr<TServerDetails>> TabletServerMap;
123
typedef std::unordered_map<TabletServerId, TServerDetails*> TabletServerMapUnowned;
124
125
YB_STRONGLY_TYPED_BOOL(MustBeCommitted);
126
127
// Returns possibly the simplest imaginable schema, with a single int key column.
128
client::YBSchema SimpleIntKeyYBSchema();
129
130
// Create a populated TabletServerMap by interrogating the master.
131
// Note: The bare-pointer TServerDetails values must be deleted by the caller!
132
// Consider using ValueDeleter (in gutil/stl_util.h) for that.
133
Result<TabletServerMap> CreateTabletServerMap(
134
    const master::MasterClusterProxy& proxy, rpc::ProxyCache* cache);
135
Result<TabletServerMap> CreateTabletServerMap(ExternalMiniCluster* cluster);
136
137
template <class Getter>
138
auto GetForEachReplica(const std::vector<TServerDetails*>& replicas,
139
                       const MonoDelta& timeout,
140
                       const Getter& getter)
141
188
    -> Result<std::vector<typename decltype(getter(nullptr, nullptr))::ValueType>> {
142
188
  std::vector<typename decltype(getter(nullptr, nullptr))::ValueType> result;
143
188
  auto deadline = CoarseMonoClock::now() + timeout;
144
188
  rpc::RpcController controller;
145
146
405
  for (TServerDetails* ts : replicas) {
147
405
    controller.Reset();
148
405
    controller.set_deadline(deadline);
149
405
    result.push_back(
VERIFY_RESULT_PREPEND379
(
150
379
        getter(ts, &controller),
151
379
        Format("Failed to fetch last op id from $0", ts->instance_id)));
152
379
  }
153
154
162
  return result;
155
188
}
156
157
// Gets a vector containing the latest OpId for each of the given replicas.
158
// Returns a bad Status if any replica cannot be reached.
159
Result<std::vector<OpId>> GetLastOpIdForEachReplica(
160
    const TabletId& tablet_id,
161
    const std::vector<TServerDetails*>& replicas,
162
    consensus::OpIdType opid_type,
163
    const MonoDelta& timeout,
164
    consensus::OperationType op_type = consensus::OperationType::UNKNOWN_OP);
165
166
// Like the above, but for a single replica.
167
Result<OpId> GetLastOpIdForReplica(
168
    const TabletId& tablet_id,
169
    TServerDetails* replica,
170
    consensus::OpIdType opid_type,
171
    const MonoDelta& timeout);
172
173
// Creates server vector from map.
174
vector<TServerDetails*> TServerDetailsVector(const TabletServerMap& tablet_servers);
175
vector<TServerDetails*> TServerDetailsVector(const TabletServerMapUnowned& tablet_servers);
176
177
// Creates copy of tablet server map, which does n  ot own TServerDetails.
178
TabletServerMapUnowned CreateTabletServerMapUnowned(const TabletServerMap& tablet_servers,
179
                                                    const std::set<std::string>& exclude = {});
180
181
// Wait until the latest op on the target replica is from the current term.
182
Status WaitForOpFromCurrentTerm(TServerDetails* replica,
183
                                const std::string& tablet_id,
184
                                consensus::OpIdType opid_type,
185
                                const MonoDelta& timeout,
186
                                yb::OpId* opid = nullptr);
187
188
// Wait until all of the servers have converged on the same log index.
189
// The converged index must be at least equal to 'minimum_index'.
190
//
191
// Requires that all servers are running. Returns Status::TimedOut if the
192
// indexes do not converge within the given timeout.
193
//
194
// If actual_index is not nullptr, the index that the servers have agreed on is written to
195
// actual_index. If the servers fail to agree, it is set to zero.
196
//
197
// If must_be_committed is true, we require committed OpIds to also be the same across all servers
198
// and be the same as last received OpIds. This will make sure all followers know that all entries
199
// they received are committed, and we can actually read those entries from the followers.
200
// One place where this makes a difference is LinkedListTest.TestLoadWhileOneServerDownAndVerify.
201
Status WaitForServersToAgree(const MonoDelta& timeout,
202
                             const TabletServerMap& tablet_servers,
203
                             const TabletId& tablet_id,
204
                             int64_t minimum_index,
205
                             int64_t* actual_index = nullptr,
206
                             MustBeCommitted must_be_committed = MustBeCommitted::kFalse);
207
208
Status WaitForServersToAgree(const MonoDelta& timeout,
209
                             const TabletServerMapUnowned& tablet_servers,
210
                             const TabletId& tablet_id,
211
                             int64_t minimum_index,
212
                             int64_t* actual_index = nullptr,
213
                             MustBeCommitted must_be_committed = MustBeCommitted::kFalse);
214
215
Status WaitForServersToAgree(const MonoDelta& timeout,
216
                             const vector<TServerDetails*>& tablet_servers,
217
                             const string& tablet_id,
218
                             int64_t minimum_index,
219
                             int64_t* actual_index = nullptr,
220
                             MustBeCommitted must_be_committed = MustBeCommitted::kFalse);
221
222
// Wait until all specified replicas have logged at least the given index.
223
// Unlike WaitForServersToAgree(), the servers do not actually have to converge
224
// or quiesce. They only need to progress to or past the given index.
225
Status WaitUntilAllReplicasHaveOp(const int64_t log_index,
226
                                  const TabletId& tablet_id,
227
                                  const std::vector<TServerDetails*>& replicas,
228
                                  const MonoDelta& timeout,
229
                                  int64_t* actual_minimum_index = nullptr);
230
231
// Wait until the number of alive tservers is equal to n_tservers. An alive tserver is a tserver
232
// that has heartbeated the master at least once in the last FLAGS_raft_heartbeat_interval_ms
233
// milliseconds.
234
Status WaitUntilNumberOfAliveTServersEqual(int n_tservers,
235
                                           const master::MasterClusterProxy& master_proxy,
236
                                           const MonoDelta& timeout);
237
238
// Get the consensus state from the given replica.
239
Status GetConsensusState(const TServerDetails* replica,
240
                         const TabletId& tablet_id,
241
                         consensus::ConsensusConfigType type,
242
                         const MonoDelta& timeout,
243
                         consensus::ConsensusStatePB* consensus_state,
244
                         consensus::LeaderLeaseStatus* leader_lease_status = nullptr);
245
246
// Wait until the number of servers with the specified member type in the committed consensus
247
// configuration is equal to config_size.
248
Status WaitUntilCommittedConfigMemberTypeIs(size_t config_size,
249
                                            const TServerDetails* replica,
250
                                            const TabletId& tablet_id,
251
                                            const MonoDelta& timeout,
252
                                            consensus::PeerMemberType member_type);
253
254
// Wait until the number of voters in the committed consensus configuration is
255
// 'quorum_size', according to the specified replica.
256
Status WaitUntilCommittedConfigNumVotersIs(size_t config_size,
257
                                           const TServerDetails* replica,
258
                                           const TabletId& tablet_id,
259
                                           const MonoDelta& timeout);
260
261
enum WaitForLeader {
262
  DONT_WAIT_FOR_LEADER = 0,
263
  WAIT_FOR_LEADER = 1
264
};
265
266
// Wait for the specified number of replicas to be reported by the master for
267
// the given tablet. Fails when leader is not found or number of replicas
268
// did not match up, or timeout waiting for leader.
269
Status WaitForReplicasReportedToMaster(
270
    ExternalMiniCluster* cluster,
271
    int num_replicas, const std::string& tablet_id,
272
    const MonoDelta& timeout,
273
    WaitForLeader wait_for_leader,
274
    bool* has_leader,
275
    master::TabletLocationsPB* tablet_locations);
276
277
// Used to specify committed entry type.
278
enum class CommittedEntryType {
279
  ANY,
280
  CONFIG,
281
};
282
283
// Wait until the last committed OpId has index exactly 'opid_index'.
284
// 'type' - type of committed entry for check.
285
Status WaitUntilCommittedOpIdIndexIs(int64_t opid_index,
286
                                     TServerDetails* replica,
287
                                     const TabletId& tablet_id,
288
                                     const MonoDelta& timeout,
289
                                     CommittedEntryType type = CommittedEntryType::ANY);
290
291
// Wait until the last committed OpId index is greater than 'opid_index' and store the new index in
292
// the same variable.
293
// The value pointed by 'opid_index' should not change during the execution of this function.
294
// 'type' - type of committed entry for check.
295
Status WaitUntilCommittedOpIdIndexIsGreaterThan(int64_t* opid_index,
296
                                                TServerDetails* replica,
297
                                                const TabletId& tablet_id,
298
                                                const MonoDelta& timeout,
299
                                                CommittedEntryType type = CommittedEntryType::ANY);
300
301
// Wait until the last committed OpId index is at least equal to 'opid_index' and store the index
302
// in the same variable.
303
// The value pointed by 'opid_index' should not change during the execution of this function.
304
// 'type' - type of committed entry for check.
305
Status WaitUntilCommittedOpIdIndexIsAtLeast(int64_t* opid_index,
306
                                            TServerDetails* replica,
307
                                            const TabletId& tablet_id,
308
                                            const MonoDelta& timeout,
309
                                            CommittedEntryType type = CommittedEntryType::ANY);
310
311
// Returns:
312
// Status::OK() if the replica is alive and leader of the consensus configuration.
313
// STATUS(NotFound, "") if the replica is not part of the consensus configuration or is dead.
314
// Status::IllegalState() if the replica is live but not the leader.
315
Status GetReplicaStatusAndCheckIfLeader(
316
    const TServerDetails* replica,
317
    const TabletId& tablet_id,
318
    const MonoDelta& timeout,
319
    consensus::LeaderLeaseCheckMode lease_check_mode =
320
        consensus::LeaderLeaseCheckMode::NEED_LEASE);
321
322
// Wait until the specified replica is leader.
323
Status WaitUntilLeader(
324
    const TServerDetails* replica,
325
    const TabletId& tablet_id,
326
    const MonoDelta& timeout,
327
    consensus::LeaderLeaseCheckMode lease_check_mode =
328
        consensus::LeaderLeaseCheckMode::NEED_LEASE);
329
330
// Loops over the replicas, attempting to determine the leader, until it finds
331
// the first replica that believes it is the leader.
332
Status FindTabletLeader(const TabletServerMap& tablet_servers,
333
                        const TabletId& tablet_id,
334
                        const MonoDelta& timeout,
335
                        TServerDetails** leader);
336
337
Status FindTabletLeader(const TabletServerMapUnowned& tablet_servers,
338
                        const string& tablet_id,
339
                        const MonoDelta& timeout,
340
                        TServerDetails** leader);
341
342
Status FindTabletLeader(const vector<TServerDetails*>& tservers,
343
                        const string& tablet_id,
344
                        const MonoDelta& timeout,
345
                        TServerDetails** leader);
346
347
// Grabs list of followers using FindTabletLeader() above.
348
Status FindTabletFollowers(const TabletServerMapUnowned& tablet_servers,
349
                           const string& tablet_id,
350
                           const MonoDelta& timeout,
351
                           vector<TServerDetails*>* followers);
352
353
// Start an election on the specified tserver.
354
// 'timeout' only refers to the RPC asking the peer to start an election. The
355
// StartElection() RPC does not block waiting for the results of the election,
356
// and neither does this call.
357
Status StartElection(
358
    const TServerDetails* replica,
359
    const TabletId& tablet_id,
360
    const MonoDelta& timeout,
361
    consensus::TEST_SuppressVoteRequest suppress_vote_request =
362
        consensus::TEST_SuppressVoteRequest::kFalse);
363
364
// Request the given replica to vote. This is thin wrapper around
365
// RequestConsensusVote(). See the definition of VoteRequestPB in
366
// consensus.proto for parameter details.
367
Status RequestVote(const TServerDetails* replica,
368
                   const std::string& tablet_id,
369
                   const std::string& candidate_uuid,
370
                   int64_t candidate_term,
371
                   const OpIdPB& last_logged_opid,
372
                   boost::optional<bool> ignore_live_leader,
373
                   boost::optional<bool> is_pre_election,
374
                   const MonoDelta& timeout);
375
376
// Cause a leader to step down on the specified server.
377
// 'timeout' refers to the RPC timeout waiting synchronously for stepdown to
378
// complete on the leader side. Since that does not require communication with
379
// other nodes at this time, this call is rather quick.
380
// 'new_leader', if not null, is the replica that should start the election to
381
// become the new leader.
382
Status LeaderStepDown(
383
    const TServerDetails* replica,
384
    const TabletId& tablet_id,
385
    const TServerDetails* new_leader,
386
    const MonoDelta& timeout,
387
    const bool disable_graceful_transition = false,
388
    tserver::TabletServerErrorPB* error = nullptr);
389
390
// Write a "simple test schema" row to the specified tablet on the given
391
// replica. This schema is commonly used by tests and is defined in
392
// wire_protocol-test-util.h
393
// The caller must specify whether this is an INSERT or UPDATE call via
394
// write_type.
395
Status WriteSimpleTestRow(const TServerDetails* replica,
396
                          const TabletId& tablet_id,
397
                          int32_t key,
398
                          int32_t int_val,
399
                          const std::string& string_val,
400
                          const MonoDelta& timeout);
401
402
// Run a ConfigChange to ADD_SERVER on 'replica_to_add'.
403
// The RPC request is sent to 'leader'.
404
Status AddServer(const TServerDetails* leader,
405
                 const TabletId& tablet_id,
406
                 const TServerDetails* replica_to_add,
407
                 consensus::PeerMemberType member_type,
408
                 const boost::optional<int64_t>& cas_config_opid_index,
409
                 const MonoDelta& timeout,
410
                 tserver::TabletServerErrorPB::Code* error_code = nullptr,
411
                 bool retry = true);
412
413
// Run a ConfigChange to REMOVE_SERVER on 'replica_to_remove'.
414
// The RPC request is sent to 'leader'.
415
Status RemoveServer(const TServerDetails* leader,
416
                    const TabletId& tablet_id,
417
                    const TServerDetails* replica_to_remove,
418
                    const boost::optional<int64_t>& cas_config_opid_index,
419
                    const MonoDelta& timeout,
420
                    tserver::TabletServerErrorPB::Code* error_code = nullptr,
421
                    bool retry = true);
422
423
// Get the list of tablets from the remote server.
424
Status ListTablets(const TServerDetails* ts,
425
                   const MonoDelta& timeout,
426
                   std::vector<tserver::ListTabletsResponsePB_StatusAndSchemaPB>* tablets);
427
428
// Get the list of RUNNING tablet ids from the remote server.
429
Status ListRunningTabletIds(const TServerDetails* ts,
430
                            const MonoDelta& timeout,
431
                            std::vector<TabletId>* tablet_ids);
432
433
// Get the list of tablet locations for the specified tablet from the Master.
434
Status GetTabletLocations(ExternalMiniCluster* cluster,
435
                          const TabletId& tablet_id,
436
                          const MonoDelta& timeout,
437
                          master::TabletLocationsPB* tablet_locations);
438
439
// Get the list of tablet locations for all tablets in the specified table from the Master.
440
Status GetTableLocations(ExternalMiniCluster* cluster,
441
                         const client::YBTableName& table_name,
442
                         const MonoDelta& timeout,
443
                         RequireTabletsRunning require_tablets_running,
444
                         master::GetTableLocationsResponsePB* table_locations);
445
446
// Wait for the specified number of voters to be reported to the config on the
447
// master for the specified tablet.
448
Status WaitForNumVotersInConfigOnMaster(
449
    ExternalMiniCluster* cluster,
450
    const TabletId& tablet_id,
451
    int num_voters,
452
    const MonoDelta& timeout);
453
454
// Repeatedly invoke GetTablets(), waiting for up to 'timeout' time for the
455
// specified 'count' number of replicas.
456
Status WaitForNumTabletsOnTS(
457
    TServerDetails* ts,
458
    size_t count,
459
    const MonoDelta& timeout,
460
    std::vector<tserver::ListTabletsResponsePB_StatusAndSchemaPB>* tablets);
461
462
// Wait until the specified replica is in the specified state.
463
Status WaitUntilTabletInState(TServerDetails* ts,
464
                              const TabletId& tablet_id,
465
                              tablet::RaftGroupStatePB state,
466
                              const MonoDelta& timeout,
467
                              const MonoDelta& list_tablets_timeout = 10s);
468
469
// Wait until the specified tablet is in RUNNING state.
470
Status WaitUntilTabletRunning(TServerDetails* ts,
471
                              const TabletId& tablet_id,
472
                              const MonoDelta& timeout);
473
474
// Send a DeleteTablet() to the server at 'ts' of the specified 'delete_type'.
475
Status DeleteTablet(const TServerDetails* ts,
476
                    const TabletId& tablet_id,
477
                    const tablet::TabletDataState delete_type,
478
                    const boost::optional<int64_t>& cas_config_opid_index_less_or_equal,
479
                    const MonoDelta& timeout,
480
                    tserver::TabletServerErrorPB::Code* error_code = nullptr);
481
482
// Cause the remote to initiate remote bootstrap using the specified host as a
483
// source.
484
Status StartRemoteBootstrap(const TServerDetails* ts,
485
                            const TabletId& tablet_id,
486
                            const std::string& bootstrap_source_uuid,
487
                            const HostPort& bootstrap_source_addr,
488
                            int64_t caller_term,
489
                            const MonoDelta& timeout);
490
491
// Get the latest OpId for the given master replica proxy. Note that this works for tablet servers
492
// also, though GetLastOpIdForReplica is customized for tablet server for now.
493
Status GetLastOpIdForMasterReplica(
494
    const std::shared_ptr<consensus::ConsensusServiceProxy>& consensus_proxy,
495
    const TabletId& tablet_id,
496
    const std::string& dest_uuid,
497
    const consensus::OpIdType opid_type,
498
    const MonoDelta& timeout,
499
    OpIdPB* op_id);
500
501
} // namespace itest
502
} // namespace yb
503
504
#endif // YB_INTEGRATION_TESTS_CLUSTER_ITEST_UTIL_H_