YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/integration-tests/external_mini_cluster.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
#ifndef YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_
33
#define YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_
34
35
#include <string.h>
36
#include <sys/types.h>
37
38
#include <functional>
39
#include <memory>
40
#include <string>
41
#include <thread>
42
#include <vector>
43
44
#include <gtest/gtest_prod.h>
45
46
#include "yb/common/entity_ids_types.h"
47
48
#include "yb/consensus/consensus_fwd.h"
49
#include "yb/consensus/consensus_types.pb.h"
50
#include "yb/consensus/metadata.pb.h"
51
52
#include "yb/gutil/macros.h"
53
#include "yb/gutil/ref_counted.h"
54
#include "yb/gutil/stringprintf.h"
55
56
#include "yb/integration-tests/mini_cluster_base.h"
57
58
#include "yb/server/server_fwd.h"
59
60
#include "yb/tserver/tserver_fwd.h"
61
#include "yb/tserver/tserver_types.pb.h"
62
63
#include "yb/util/status_fwd.h"
64
#include "yb/util/monotime.h"
65
#include "yb/util/net/net_util.h"
66
#include "yb/util/status.h"
67
#include "yb/util/tsan_util.h"
68
69
namespace yb {
70
71
class ExternalDaemon;
72
class ExternalMaster;
73
class ExternalTabletServer;
74
class HostPort;
75
class MetricPrototype;
76
class MetricEntityPrototype;
77
class OpIdPB;
78
class NodeInstancePB;
79
class Subprocess;
80
81
namespace server {
82
class ServerStatusPB;
83
}  // namespace server
84
85
using yb::consensus::ChangeConfigType;
86
87
struct ExternalMiniClusterOptions {
88
89
  // Number of masters to start.
90
  size_t num_masters = 1;
91
92
  // Number of TS to start.
93
  size_t num_tablet_servers = 1;
94
95
  // Number of drives to use on TS.
96
  int num_drives = 1;
97
98
  // If more than one master is specified, list of ports for the masters in a consensus
99
  // configuration. Port at index 0 is used for the leader master.
100
  // Default: one entry as num_masters defaults to 1. Value 0 implies, a free port
101
  //          is picked at runtime.
102
  std::vector<uint16_t> master_rpc_ports = { 0 };
103
104
  static constexpr bool kDefaultStartCqlProxy = true;
105
#if defined(__APPLE__)
106
  static constexpr bool kBindToUniqueLoopbackAddress = false;
107
#else
108
  static constexpr bool kBindToUniqueLoopbackAddress = true;
109
#endif
110
111
  bool enable_ysql = false;
112
113
  // Directory in which to store data.
114
  // Default: "", which auto-generates a unique path for this cluster.
115
  std::string data_root{};
116
117
  // Set data_root_counter to non-negative number if your test run need to create an new and empty
118
  // cluster every time ExternalMiniCluster() is constructed.
119
  // - During a test run, data_root will stay the same until the run is finished, so recreating a
120
  //   brand new cluster from scratch is not possible because the database location stays the same.
121
  // - When data_root_counter is non-negative, a new "data_root" is generated every time
122
  //   ExternalMiniCluster() is constructed.
123
  int data_root_counter = -1;
124
125
  // If true, binds each tablet server to a different loopback address.  This affects the server's
126
  // RPC server, and also forces the server to only use this IP address for outgoing socket
127
  // connections as well.  This allows the use of iptables on the localhost to simulate network
128
  // partitions.
129
  //
130
  // The addressed used are 127.<A>.<B>.<C> where:
131
  // - <A,B> are the high and low bytes of the pid of the process running the minicluster (not the
132
  //   daemon itself).
133
  // - <C> is the index of the server within this minicluster.
134
  //
135
  // This requires that the system is set up such that processes may bind to any IP address in the
136
  // localhost netblock (127.0.0.0/8). This seems to be the case on common Linux distributions. You
137
  // can verify by running 'ip addr | grep 127.0.0.1' and checking that the address is listed as
138
  // '127.0.0.1/8'.
139
  //
140
  // This option is disabled by default on OS X.
141
  // Enabling of this option on OS X means usage of default IPs: 127.0.0.x.
142
  bool bind_to_unique_loopback_addresses = kBindToUniqueLoopbackAddress;
143
144
  // If true, second and other TSes will use the same ports as the first TS uses.
145
  // Else every TS will allocate unique ports for itself.
146
  // The option is applicable ONLY with bind_to_unique_loopback_addresses == true.
147
  bool use_same_ts_ports = false;
148
149
  // The path where the yb daemons should be run from.
150
  // Default: "../bin", which points to the path where non-test executables are located.
151
  // This works for unit tests, since they all end up in build/latest/test-<subproject_name>.
152
  std::string daemon_bin_path{};
153
154
  // Extra flags for tablet servers and masters respectively.
155
  //
156
  // In these flags, you may use the special string '${index}' which will
157
  // be substituted with the index of the tablet server or master.
158
  std::vector<std::string> extra_tserver_flags;
159
  std::vector<std::string> extra_master_flags;
160
161
  // Default timeout for operations involving RPC's, when none provided in the API.
162
  // Default : 10sec
163
  MonoDelta timeout = MonoDelta::FromSeconds(10);
164
165
  // If true logs will be written in both stderr and file
166
  bool log_to_file = false;
167
168
  // Use even IPs for cluster, like we have for MiniCluster.
169
  // So it could be used with test certificates.
170
  bool use_even_ips = false;
171
172
  // Cluster id used to create fs path when we create tests with multiple clusters.
173
  std::string cluster_id;
174
175
  CHECKED_STATUS RemovePort(const uint16_t port);
176
  CHECKED_STATUS AddPort(const uint16_t port);
177
178
  // Make sure we have the correct number of master RPC ports specified.
179
  void AdjustMasterRpcPorts();
180
};
181
182
// A mini-cluster made up of subprocesses running each of the daemons separately. This is useful for
183
// black-box or grey-box failure testing purposes -- it provides the ability to forcibly kill or
184
// stop particular cluster participants, which isn't feasible in the normal MiniCluster.  On the
185
// other hand, there is little access to inspect the internal state of the daemons.
186
class ExternalMiniCluster : public MiniClusterBase {
187
 public:
188
  typedef ExternalMiniClusterOptions Options;
189
190
  // Mode to which node types a certain action (like Shutdown()) should apply.
191
  enum NodeSelectionMode {
192
    TS_ONLY,
193
    ALL
194
  };
195
196
  // Threshold of the number of retries for master related rpc calls.
197
  static const int kMaxRetryIterations = 100;
198
199
  explicit ExternalMiniCluster(const ExternalMiniClusterOptions& opts);
200
  ~ExternalMiniCluster();
201
202
  // Start the cluster.
203
  CHECKED_STATUS Start(rpc::Messenger* messenger = nullptr);
204
205
  // Restarts the cluster. Requires that it has been Shutdown() first.
206
  CHECKED_STATUS Restart();
207
208
  // Like the previous method but performs initialization synchronously, i.e.  this will wait for
209
  // all TS's to be started and initialized. Tests should use this if they interact with tablets
210
  // immediately after Start();
211
  CHECKED_STATUS StartSync();
212
213
  // Add a new TS to the cluster. The new TS is started.  Requires that the master is already
214
  // running.
215
  CHECKED_STATUS AddTabletServer(
216
      bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy,
217
      const std::vector<std::string>& extra_flags = {},
218
      int num_drives = -1);
219
220
  // Shuts down the whole cluster or part of it, depending on the selected 'mode'.  Currently, this
221
  // uses SIGKILL on each daemon for a non-graceful shutdown.
222
  void Shutdown(NodeSelectionMode mode = ALL);
223
224
  // Waits for the master to finishing running initdb.
225
  CHECKED_STATUS WaitForInitDb();
226
227
  // Return the IP address that the tablet server with the given index will bind to.  If
228
  // options.bind_to_unique_loopback_addresses is false, this will be 127.0.0.1 Otherwise, it is
229
  // another IP in the local netblock.
230
  std::string GetBindIpForTabletServer(size_t index) const;
231
232
  // Return a pointer to the running leader master. This may be NULL
233
  // if the cluster is not started.
234
  // WARNING: If leader master is not elected after kMaxRetryIterations, first available master
235
  // will be returned.
236
  ExternalMaster* GetLeaderMaster();
237
238
  // Perform an RPC to determine the leader of the external mini cluster.  Set 'index' to the leader
239
  // master's index (for calls to to master() below).
240
  //
241
  // NOTE: if a leader election occurs after this method is executed, the last result may not be
242
  // valid.
243
  Result<size_t> GetLeaderMasterIndex();
244
245
  // Return a non-leader master index
246
  Result<size_t> GetFirstNonLeaderMasterIndex();
247
248
  Result<size_t> GetTabletLeaderIndex(const std::string& tablet_id);
249
250
  // The comma separated string of the master adresses host/ports from current list of masters.
251
  string GetMasterAddresses() const;
252
253
  string GetTabletServerAddresses() const;
254
255
  string GetTabletServerHTTPAddresses() const;
256
257
  // Start a new master with `peer_addrs` as the master_addresses parameter.
258
  Result<ExternalMaster *> StartMasterWithPeers(const string& peer_addrs);
259
260
  // Send a ping request to the rpc port of the master. Return OK() only if it is reachable.
261
  CHECKED_STATUS PingMaster(ExternalMaster* master) const;
262
263
  // Add a Tablet Server to the blacklist.
264
  CHECKED_STATUS AddTServerToBlacklist(ExternalMaster* master, ExternalTabletServer* ts);
265
266
  // Returns the min_num_replicas corresponding to a PlacementBlockPB.
267
  CHECKED_STATUS GetMinReplicaCountForPlacementBlock(
268
    ExternalMaster* master,
269
    const string& cloud, const string& region, const string& zone,
270
    int* min_num_replicas);
271
  // Add a Tablet Server to the leader blacklist.
272
  CHECKED_STATUS AddTServerToLeaderBlacklist(ExternalMaster* master, ExternalTabletServer* ts);
273
274
  // Empty blacklist.
275
  CHECKED_STATUS ClearBlacklist(ExternalMaster* master);
276
277
  // Starts a new master and returns the handle of the new master object on success.  Not thread
278
  // safe for now. We could move this to a static function outside External Mini Cluster, but
279
  // keeping it here for now as it is currently used only in conjunction with EMC.  If there are any
280
  // errors and if a new master could not be spawned, it will crash internally.
281
  void StartShellMaster(ExternalMaster** new_master);
282
283
  // Performs an add or remove from the existing config of this EMC, of the given master.
284
  // When use_hostport is true, the master is deemed as dead and its UUID is not used.
285
  CHECKED_STATUS ChangeConfig(ExternalMaster* master,
286
      ChangeConfigType type,
287
      consensus::PeerMemberType member_type = consensus::PeerMemberType::PRE_VOTER,
288
      bool use_hostport = false);
289
290
  // Performs an RPC to the given master to get the number of masters it is tracking in-memory.
291
  CHECKED_STATUS GetNumMastersAsSeenBy(ExternalMaster* master, int* num_peers);
292
293
  // Get the last committed opid for the current leader master.
294
  CHECKED_STATUS GetLastOpIdForLeader(OpIdPB* opid);
295
296
  // The leader master sometimes does not commit the config in time on first setup, causing
297
  // CheckHasCommittedOpInCurrentTermUnlocked check - that the current term should have had at least
298
  // one commit - to fail. This API waits for the leader's commit term to move ahead by one.
299
  CHECKED_STATUS WaitForLeaderCommitTermAdvance();
300
301
  // This API waits for the commit indices of all the master peers to reach the target index.
302
  CHECKED_STATUS WaitForMastersToCommitUpTo(int64_t target_index);
303
304
  // If this cluster is configured for a single non-distributed master, return the single master or
305
  // NULL if the master is not started. Exits with a CHECK failure if there are multiple masters.
306
  ExternalMaster* master() const;
307
308
  // Return master at 'idx' or NULL if the master at 'idx' has not been started.
309
  ExternalMaster* master(size_t idx) const;
310
311
  ExternalTabletServer* tablet_server(size_t idx) const;
312
313
  // Return ExternalTabletServer given its UUID. If not found, returns NULL.
314
  ExternalTabletServer* tablet_server_by_uuid(const std::string& uuid) const;
315
316
  // Return the index of the ExternalTabletServer that has the given 'uuid', or -1 if no such UUID
317
  // can be found.
318
  int tablet_server_index_by_uuid(const std::string& uuid) const;
319
320
  // Return all masters.
321
  std::vector<ExternalMaster*> master_daemons() const;
322
323
  // Return all tablet servers and masters.
324
  std::vector<ExternalDaemon*> daemons() const;
325
326
  // Return all tablet servers.
327
  std::vector<ExternalTabletServer*> tserver_daemons() const;
328
329
  // Get tablet server host.
330
  HostPort pgsql_hostport(int node_index) const;
331
332
1.76k
  size_t num_tablet_servers() const {
333
1.76k
    return tablet_servers_.size();
334
1.76k
  }
335
336
1.70k
  size_t num_masters() const {
337
1.70k
    return masters_.size();
338
1.70k
  }
339
340
  // Return the client messenger used by the ExternalMiniCluster.
341
  rpc::Messenger* messenger();
342
343
395k
  rpc::ProxyCache& proxy_cache() {
344
395k
    return *proxy_cache_;
345
395k
  }
346
347
  // Get the master leader consensus proxy.
348
  consensus::ConsensusServiceProxy GetLeaderConsensusProxy();
349
350
  // Get the given master's consensus proxy.
351
  consensus::ConsensusServiceProxy GetConsensusProxy(ExternalDaemon* daemon);
352
353
  template <class T>
354
  T GetProxy(ExternalDaemon* daemon);
355
356
  template <class T>
357
0
  T GetTServerProxy(size_t i) {
358
0
    return GetProxy<T>(tablet_server(i));
359
0
  }
360
361
  template <class T>
362
550
  T GetMasterProxy() {
363
550
    CHECK_EQ(masters_.size(), 1);
364
550
    return GetMasterProxy<T>(0);
365
550
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master14MasterDdlProxyEEET_v
Line
Count
Source
362
22
  T GetMasterProxy() {
363
22
    CHECK_EQ(masters_.size(), 1);
364
22
    return GetMasterProxy<T>(0);
365
22
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master17MasterClientProxyEEET_v
Line
Count
Source
362
273
  T GetMasterProxy() {
363
273
    CHECK_EQ(masters_.size(), 1);
364
273
    return GetMasterProxy<T>(0);
365
273
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master18MasterClusterProxyEEET_v
Line
Count
Source
362
186
  T GetMasterProxy() {
363
186
    CHECK_EQ(masters_.size(), 1);
364
186
    return GetMasterProxy<T>(0);
365
186
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master16MasterAdminProxyEEET_v
Line
Count
Source
362
69
  T GetMasterProxy() {
363
69
    CHECK_EQ(masters_.size(), 1);
364
69
    return GetMasterProxy<T>(0);
365
69
  }
366
367
  template <class T>
368
41.2k
  T GetMasterProxy(size_t idx) {
369
41.2k
    CHECK_LT(idx, masters_.size());
370
41.2k
    return GetProxy<T>(master(idx));
371
41.2k
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master14MasterDdlProxyEEET_m
Line
Count
Source
368
22
  T GetMasterProxy(size_t idx) {
369
22
    CHECK_LT(idx, masters_.size());
370
22
    return GetProxy<T>(master(idx));
371
22
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master18MasterClusterProxyEEET_m
Line
Count
Source
368
40.8k
  T GetMasterProxy(size_t idx) {
369
40.8k
    CHECK_LT(idx, masters_.size());
370
40.8k
    return GetProxy<T>(master(idx));
371
40.8k
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master17MasterClientProxyEEET_m
Line
Count
Source
368
273
  T GetMasterProxy(size_t idx) {
369
273
    CHECK_LT(idx, masters_.size());
370
273
    return GetProxy<T>(master(idx));
371
273
  }
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master16MasterAdminProxyEEET_m
Line
Count
Source
368
165
  T GetMasterProxy(size_t idx) {
369
165
    CHECK_LT(idx, masters_.size());
370
165
    return GetProxy<T>(master(idx));
371
165
  }
372
373
  template <class T>
374
236
  T GetLeaderMasterProxy() {
375
236
    return GetProxy<T>(GetLeaderMaster());
376
236
  }
_ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master18MasterClusterProxyEEET_v
Line
Count
Source
374
118
  T GetLeaderMasterProxy() {
375
118
    return GetProxy<T>(GetLeaderMaster());
376
118
  }
_ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master17MasterClientProxyEEET_v
Line
Count
Source
374
118
  T GetLeaderMasterProxy() {
375
118
    return GetProxy<T>(GetLeaderMaster());
376
118
  }
Unexecuted instantiation: _ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master14MasterDdlProxyEEET_v
377
378
  // Returns an generic proxy to the master at 'idx'. Requires that the master at 'idx' is running.
379
  std::shared_ptr<server::GenericServiceProxy> master_generic_proxy(int idx) const;
380
  // Same as above, using the bound rpc address.
381
  std::shared_ptr<server::GenericServiceProxy> master_generic_proxy(const HostPort& bound_addr)
382
      const;
383
384
  // Wait until the number of registered tablet servers reaches the given count on at least one of
385
  // the running masters.  Returns Status::TimedOut if the desired count is not achieved with the
386
  // given timeout.
387
  CHECKED_STATUS WaitForTabletServerCount(size_t count, const MonoDelta& timeout);
388
389
  // Runs gtest assertions that no servers have crashed.
390
  void AssertNoCrashes();
391
392
  // Wait until all tablets on the given tablet server are in 'RUNNING'
393
  // state.
394
  CHECKED_STATUS WaitForTabletsRunning(ExternalTabletServer* ts, const MonoDelta& timeout);
395
396
  Result<tserver::ListTabletsResponsePB> ListTablets(ExternalTabletServer* ts);
397
398
  Result<std::vector<tserver::ListTabletsForTabletServerResponsePB_Entry>> GetTablets(
399
      ExternalTabletServer* ts);
400
401
  Result<std::vector<TabletId>> GetTabletIds(ExternalTabletServer* ts);
402
403
  Result<tserver::GetSplitKeyResponsePB> GetSplitKey(const std::string& tablet_id);
404
405
  CHECKED_STATUS FlushTabletsOnSingleTServer(
406
      ExternalTabletServer* ts, const std::vector<yb::TabletId> tablet_ids,
407
      bool is_compaction);
408
409
  CHECKED_STATUS WaitForTSToCrash(const ExternalTabletServer* ts,
410
                          const MonoDelta& timeout = MonoDelta::FromSeconds(60));
411
412
  CHECKED_STATUS WaitForTSToCrash(
413
      size_t index, const MonoDelta& timeout = MonoDelta::FromSeconds(60));
414
415
  // Sets the given flag on the given daemon, which must be running.
416
  //
417
  // This uses the 'force' flag on the RPC so that, even if the flag is considered unsafe to change
418
  // at runtime, it is changed.
419
  CHECKED_STATUS SetFlag(ExternalDaemon* daemon,
420
                         const std::string& flag,
421
                         const std::string& value);
422
423
  // Sets the given flag on all masters.
424
  CHECKED_STATUS SetFlagOnMasters(const std::string& flag, const std::string& value);
425
  // Sets the given flag on all tablet servers.
426
  CHECKED_STATUS SetFlagOnTServers(const std::string& flag, const std::string& value);
427
428
  // Allocates a free port and stores a file lock guarding access to that port into an internal
429
  // array of file locks.
430
  uint16_t AllocateFreePort();
431
432
  // Step down the master leader. error_code tracks rpc error info that can be used by the caller.
433
  CHECKED_STATUS StepDownMasterLeader(tserver::TabletServerErrorPB::Code* error_code);
434
435
  // Step down the master leader and wait for a new leader to be elected.
436
  CHECKED_STATUS StepDownMasterLeaderAndWaitForNewLeader();
437
438
  // Find out if the master service considers itself ready. Return status OK() implies it is ready.
439
  CHECKED_STATUS GetIsMasterLeaderServiceReady(ExternalMaster* master);
440
441
  // Timeout to be used for rpc operations.
442
0
  MonoDelta timeout() {
443
0
    return opts_.timeout;
444
0
  }
445
446
  // Start a leader election on this master.
447
  CHECKED_STATUS StartElection(ExternalMaster* master);
448
449
14
  bool running() const { return running_; }
450
451
8
  string data_root() const { return data_root_; }
452
453
  // Return true if the tserver has been marked as DEAD by master leader.
454
  Result<bool> is_ts_stale(
455
      int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier);
456
457
  CHECKED_STATUS WaitForMasterToMarkTSAlive(
458
      int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier);
459
460
  CHECKED_STATUS WaitForMasterToMarkTSDead(
461
      int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier);
462
463
 protected:
464
  FRIEND_TEST(MasterFailoverTest, TestKillAnyMaster);
465
466
  void ConfigureClientBuilder(client::YBClientBuilder* builder) override;
467
468
  Result<HostPort> DoGetLeaderMasterBoundRpcAddr() override;
469
470
  CHECKED_STATUS StartMasters();
471
472
  std::string GetBinaryPath(const std::string& binary) const;
473
  std::string GetDataPath(const std::string& daemon_id) const;
474
475
  CHECKED_STATUS DeduceBinRoot(std::string* ret);
476
  CHECKED_STATUS HandleOptions();
477
478
  std::string GetClusterDataDirName() const;
479
480
  // Helper function to get a leader or (random) follower index
481
  Result<size_t> GetPeerMasterIndex(bool is_leader);
482
483
  // API to help update the cluster state (rpc ports)
484
  CHECKED_STATUS AddMaster(ExternalMaster* master);
485
  CHECKED_STATUS RemoveMaster(ExternalMaster* master);
486
487
  // Get the index of this master in the vector of masters. This might not be the insertion order as
488
  // we might have removed some masters within the vector.
489
  int GetIndexOfMaster(ExternalMaster* master) const;
490
491
  // Checks that the masters_ list and opts_ match in terms of the number of elements.
492
  CHECKED_STATUS CheckPortAndMasterSizes() const;
493
494
  // Return the list of opid's for all master's in this cluster.
495
  CHECKED_STATUS GetLastOpIdForEachMasterPeer(
496
      const MonoDelta& timeout,
497
      consensus::OpIdType opid_type,
498
      std::vector<OpIdPB>* op_ids);
499
500
  // Ensure that the leader server is allowed to process a config change (by having at least one
501
  // commit in the current term as leader).
502
  CHECKED_STATUS WaitForLeaderToAllowChangeConfig(
503
      const string& uuid,
504
      consensus::ConsensusServiceProxy* leader_proxy);
505
506
  // Return master address for specified port.
507
  std::string MasterAddressForPort(uint16_t port) const;
508
509
  ExternalMiniClusterOptions opts_;
510
511
  // The root for binaries.
512
  std::string daemon_bin_path_;
513
514
  std::string data_root_;
515
516
  // This variable is incremented every time a new master is spawned (either in shell mode or create
517
  // mode). Avoids reusing an index of a killed/removed master. Useful for master side logging.
518
  size_t add_new_master_at_;
519
520
  std::vector<scoped_refptr<ExternalMaster> > masters_;
521
  std::vector<scoped_refptr<ExternalTabletServer> > tablet_servers_;
522
523
  rpc::Messenger* messenger_ = nullptr;
524
  std::unique_ptr<rpc::Messenger> messenger_holder_;
525
  std::unique_ptr<rpc::ProxyCache> proxy_cache_;
526
527
  std::vector<std::unique_ptr<FileLock>> free_port_file_locks_;
528
  std::atomic<bool> running_{false};
529
530
 private:
531
  DISALLOW_COPY_AND_ASSIGN(ExternalMiniCluster);
532
};
533
534
class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> {
535
 public:
536
  class StringListener {
537
   public:
538
    virtual void Handle(const GStringPiece& s) = 0;
539
4
    virtual ~StringListener() {}
540
  };
541
542
  ExternalDaemon(
543
      std::string daemon_id,
544
      rpc::Messenger* messenger,
545
      rpc::ProxyCache* proxy_cache,
546
      const std::string& exe,
547
      const std::string& root_dir,
548
      const std::vector<std::string>& data_dirs,
549
      const std::vector<std::string>& extra_flags);
550
551
  HostPort bound_rpc_hostport() const;
552
  HostPort bound_rpc_addr() const;
553
  HostPort bound_http_hostport() const;
554
  const NodeInstancePB& instance_id() const;
555
  const std::string& uuid() const;
556
557
  // Return the pid of the running process.  Causes a CHECK failure if the process is not running.
558
  pid_t pid() const;
559
560
3
  const std::string& id() const { return daemon_id_; }
561
562
  // Sends a SIGSTOP signal to the daemon.
563
  CHECKED_STATUS Pause();
564
565
  // Sends a SIGCONT signal to the daemon.
566
  CHECKED_STATUS Resume();
567
568
  CHECKED_STATUS Kill(int signal);
569
570
  // Return true if we have explicitly shut down the process.
571
  bool IsShutdown() const;
572
573
  // Return true if the process is still running.  This may return false if the process crashed,
574
  // even if we didn't explicitly call Shutdown().
575
  bool IsProcessAlive() const;
576
577
  virtual void Shutdown();
578
579
32
  std::vector<std::string> GetDataDirs() const { return data_dirs_; }
580
581
1
  const std::string& exe() const { return exe_; }
582
583
100
  const std::string& GetRootDir() const { return root_dir_; }
584
585
  // Return a pointer to the flags used for this server on restart.  Modifying these flags will only
586
  // take effect on the next restart.
587
9
  std::vector<std::string>* mutable_flags() { return &extra_flags_; }
588
589
  // Retrieve the value of a given metric from this server. The metric must be of int64_t type.
590
  //
591
  // 'value_field' represents the particular field of the metric to be read.  For example, for a
592
  // counter or gauge, this should be 'value'. For a histogram, it might be 'total_count' or 'mean'.
593
  //
594
  // 'entity_id' may be NULL, in which case the first entity of the same type as 'entity_proto' will
595
  // be matched.
596
  Result<int64_t> GetInt64Metric(const MetricEntityPrototype* entity_proto,
597
                                 const char* entity_id,
598
                                 const MetricPrototype* metric_proto,
599
                                 const char* value_field) const;
600
601
  Result<int64_t> GetInt64Metric(const char* entity_proto_name,
602
                                 const char* entity_id,
603
                                 const char* metric_proto_name,
604
                                 const char* value_field) const;
605
606
  std::string LogPrefix();
607
608
  void SetLogListener(StringListener* listener);
609
610
  void RemoveLogListener(StringListener* listener);
611
612
  static Result<int64_t> GetInt64MetricFromHost(const HostPort& hostport,
613
                                                const MetricEntityPrototype* entity_proto,
614
                                                const char* entity_id,
615
                                                const MetricPrototype* metric_proto,
616
                                                const char* value_field);
617
618
  static Result<int64_t> GetInt64MetricFromHost(const HostPort& hostport,
619
                                                const char* entity_proto_name,
620
                                                const char* entity_id,
621
                                                const char* metric_proto_name,
622
                                                const char* value_field);
623
624
  // Get the current value of the flag for the given daemon.
625
  Result<std::string> GetFlag(const std::string& flag);
626
627
 protected:
628
  friend class RefCountedThreadSafe<ExternalDaemon>;
629
  virtual ~ExternalDaemon();
630
631
  CHECKED_STATUS StartProcess(const std::vector<std::string>& flags);
632
633
  virtual CHECKED_STATUS DeleteServerInfoPaths();
634
635
636
  virtual bool ServerInfoPathsExist();
637
638
  virtual CHECKED_STATUS BuildServerStateFromInfoPath();
639
640
  CHECKED_STATUS BuildServerStateFromInfoPath(
641
      const string& info_path, std::unique_ptr<server::ServerStatusPB>* server_status);
642
643
  string GetServerInfoPath();
644
645
  // In a code-coverage build, try to flush the coverage data to disk.
646
  // In a non-coverage build, this does nothing.
647
  void FlushCoverage();
648
649
  std::string ProcessNameAndPidStr();
650
651
  const std::string daemon_id_;
652
  rpc::Messenger* messenger_;
653
  rpc::ProxyCache* proxy_cache_;
654
  const std::string exe_;
655
  const std::string root_dir_;
656
  std::vector<std::string> data_dirs_;
657
  std::vector<std::string> extra_flags_;
658
659
  std::unique_ptr<Subprocess> process_;
660
661
  std::unique_ptr<server::ServerStatusPB> status_;
662
663
  // These capture the daemons parameters and running ports and
664
  // are used to Restart() the daemon with the same parameters.
665
  HostPort bound_rpc_;
666
  HostPort bound_http_;
667
668
 private:
669
  class LogTailerThread;
670
671
  std::unique_ptr<LogTailerThread> stdout_tailer_thread_, stderr_tailer_thread_;
672
673
  DISALLOW_COPY_AND_ASSIGN(ExternalDaemon);
674
};
675
676
// Utility class for waiting for logging events.
677
class LogWaiter : public ExternalDaemon::StringListener {
678
 public:
679
  LogWaiter(ExternalDaemon* daemon, const std::string& string_to_wait);
680
681
  CHECKED_STATUS WaitFor(MonoDelta timeout);
682
0
  bool IsEventOccurred() { return event_occurred_; }
683
684
  ~LogWaiter();
685
686
 private:
687
  void Handle(const GStringPiece& s) override;
688
689
  ExternalDaemon* daemon_;
690
  std::atomic<bool> event_occurred_{false};
691
  std::string string_to_wait_;
692
};
693
694
// Resumes a daemon that was stopped with ExteranlDaemon::Pause() upon
695
// exiting a scope.
696
class ScopedResumeExternalDaemon {
697
 public:
698
  // 'daemon' must remain valid for the lifetime of a
699
  // ScopedResumeExternalDaemon object.
700
  explicit ScopedResumeExternalDaemon(ExternalDaemon* daemon);
701
702
  // Resume 'daemon_'.
703
  ~ScopedResumeExternalDaemon();
704
705
 private:
706
  ExternalDaemon* daemon_;
707
708
  DISALLOW_COPY_AND_ASSIGN(ScopedResumeExternalDaemon);
709
};
710
711
712
class ExternalMaster : public ExternalDaemon {
713
 public:
714
  ExternalMaster(
715
    size_t master_index,
716
    rpc::Messenger* messenger,
717
    rpc::ProxyCache* proxy_cache,
718
    const std::string& exe,
719
    const std::string& data_dir,
720
    const std::vector<std::string>& extra_flags,
721
    const std::string& rpc_bind_address = "127.0.0.1:0",
722
    uint16_t http_port = 0,
723
    const std::string& master_addrs = "");
724
725
  CHECKED_STATUS Start(bool shell_mode = false);
726
727
  // Restarts the daemon. Requires that it has previously been shutdown.
728
  CHECKED_STATUS Restart();
729
730
 private:
731
  friend class RefCountedThreadSafe<ExternalMaster>;
732
  virtual ~ExternalMaster();
733
734
  // used on start to create the cluster; on restart, this should not be used!
735
  const std::string rpc_bind_address_;
736
  const std::string master_addrs_;
737
  const uint16_t http_port_;
738
};
739
740
class ExternalTabletServer : public ExternalDaemon {
741
 public:
742
  ExternalTabletServer(
743
      size_t tablet_server_index, rpc::Messenger* messenger, rpc::ProxyCache* proxy_cache,
744
      const std::string& exe, const std::string& data_dir, uint16_t num_drives,
745
      std::string bind_host, uint16_t rpc_port, uint16_t http_port, uint16_t redis_rpc_port,
746
      uint16_t redis_http_port, uint16_t cql_rpc_port, uint16_t cql_http_port,
747
      uint16_t pgsql_rpc_port, uint16_t pgsql_http_port,
748
      const std::vector<HostPort>& master_addrs,
749
      const std::vector<std::string>& extra_flags);
750
751
  CHECKED_STATUS Start(
752
      bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy,
753
      bool set_proxy_addrs = true,
754
      std::vector<std::pair<string, string>> extra_flags = {});
755
756
  // Restarts the daemon. Requires that it has previously been shutdown.
757
  CHECKED_STATUS Restart(
758
      bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy,
759
      std::vector<std::pair<string, string>> flags = {});
760
761
  CHECKED_STATUS SetNumDrives(uint16_t num_drives);
762
763
  // IP addresses to bind to.
764
214
  const std::string& bind_host() const {
765
214
    return bind_host_;
766
214
  }
767
768
  // Assigned ports.
769
106
  uint16_t rpc_port() const {
770
106
    return rpc_port_;
771
106
  }
772
106
  uint16_t http_port() const {
773
106
    return http_port_;
774
106
  }
775
776
106
  uint16_t pgsql_rpc_port() const {
777
106
    return pgsql_rpc_port_;
778
106
  }
779
106
  uint16_t pgsql_http_port() const {
780
106
    return pgsql_http_port_;
781
106
  }
782
783
106
  uint16_t redis_rpc_port() const {
784
106
    return redis_rpc_port_;
785
106
  }
786
106
  uint16_t redis_http_port() const {
787
106
    return redis_http_port_;
788
106
  }
789
790
172
  uint16_t cql_rpc_port() const {
791
172
    return cql_rpc_port_;
792
172
  }
793
154
  uint16_t cql_http_port() const {
794
154
    return cql_http_port_;
795
154
  }
796
797
  Result<int64_t> GetInt64CQLMetric(const MetricEntityPrototype* entity_proto,
798
                                    const char* entity_id,
799
                                    const MetricPrototype* metric_proto,
800
                                    const char* value_field) const;
801
802
 protected:
803
  CHECKED_STATUS DeleteServerInfoPaths() override;
804
805
  bool ServerInfoPathsExist() override;
806
807
  CHECKED_STATUS BuildServerStateFromInfoPath() override;
808
809
 private:
810
  string GetCQLServerInfoPath();
811
  const std::string master_addrs_;
812
  const std::string bind_host_;
813
  const uint16_t rpc_port_;
814
  const uint16_t http_port_;
815
  const uint16_t redis_rpc_port_;
816
  const uint16_t redis_http_port_;
817
  const uint16_t pgsql_rpc_port_;
818
  const uint16_t pgsql_http_port_;
819
  const uint16_t cql_rpc_port_;
820
  const uint16_t cql_http_port_;
821
  uint16_t num_drives_;
822
  bool start_cql_proxy_ = true;
823
  std::unique_ptr<server::ServerStatusPB> cqlserver_status_;
824
825
  friend class RefCountedThreadSafe<ExternalTabletServer>;
826
  virtual ~ExternalTabletServer();
827
};
828
829
// Custom functor for predicate based comparison with the master list.
830
struct MasterComparator {
831
17
  explicit MasterComparator(ExternalMaster* master) : master_(master) { }
832
833
  // We look for the exact master match. Since it is possible to stop/restart master on a given
834
  // host/port, we do not want a stale master pointer input to match a newer master.
835
41
  bool operator()(const scoped_refptr<ExternalMaster>& other) const {
836
41
    return master_ == other.get();
837
41
  }
838
839
 private:
840
  const ExternalMaster* master_;
841
};
842
843
template <class T>
844
41.6k
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
41.6k
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
41.6k
}
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master14MasterDdlProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
22
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
22
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
22
}
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master18MasterClusterProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
40.9k
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
40.9k
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
40.9k
}
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master17MasterClientProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
391
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
391
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
391
}
_ZN2yb19ExternalMiniCluster8GetProxyINS_7tserver29TabletServerAdminServiceProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
1
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
1
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
1
}
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master16MasterAdminProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
165
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
165
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
165
}
Unexecuted instantiation: _ZN2yb19ExternalMiniCluster8GetProxyINS_7tserver24TabletServerServiceProxyEEET_PNS_14ExternalDaemonE
_ZN2yb19ExternalMiniCluster8GetProxyINS_9consensus21ConsensusServiceProxyEEET_PNS_14ExternalDaemonE
Line
Count
Source
844
155
T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) {
845
155
  return T(proxy_cache_.get(), daemon->bound_rpc_addr());
846
155
}
847
848
CHECKED_STATUS RestartAllMasters(ExternalMiniCluster* cluster);
849
850
}  // namespace yb
851
#endif  // YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_