/Users/deen/code/yugabyte-db/src/yb/integration-tests/external_mini_cluster.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #ifndef YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_ |
33 | | #define YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_ |
34 | | |
35 | | #include <string.h> |
36 | | #include <sys/types.h> |
37 | | |
38 | | #include <functional> |
39 | | #include <memory> |
40 | | #include <string> |
41 | | #include <thread> |
42 | | #include <vector> |
43 | | |
44 | | #include <gtest/gtest_prod.h> |
45 | | |
46 | | #include "yb/common/entity_ids_types.h" |
47 | | |
48 | | #include "yb/consensus/consensus_fwd.h" |
49 | | #include "yb/consensus/consensus_types.pb.h" |
50 | | #include "yb/consensus/metadata.pb.h" |
51 | | |
52 | | #include "yb/gutil/macros.h" |
53 | | #include "yb/gutil/ref_counted.h" |
54 | | #include "yb/gutil/stringprintf.h" |
55 | | |
56 | | #include "yb/integration-tests/mini_cluster_base.h" |
57 | | |
58 | | #include "yb/server/server_fwd.h" |
59 | | |
60 | | #include "yb/tserver/tserver_fwd.h" |
61 | | #include "yb/tserver/tserver_types.pb.h" |
62 | | |
63 | | #include "yb/util/status_fwd.h" |
64 | | #include "yb/util/monotime.h" |
65 | | #include "yb/util/net/net_util.h" |
66 | | #include "yb/util/status.h" |
67 | | #include "yb/util/tsan_util.h" |
68 | | |
69 | | namespace yb { |
70 | | |
71 | | class ExternalDaemon; |
72 | | class ExternalMaster; |
73 | | class ExternalTabletServer; |
74 | | class HostPort; |
75 | | class MetricPrototype; |
76 | | class MetricEntityPrototype; |
77 | | class OpIdPB; |
78 | | class NodeInstancePB; |
79 | | class Subprocess; |
80 | | |
81 | | namespace server { |
82 | | class ServerStatusPB; |
83 | | } // namespace server |
84 | | |
85 | | using yb::consensus::ChangeConfigType; |
86 | | |
87 | | struct ExternalMiniClusterOptions { |
88 | | |
89 | | // Number of masters to start. |
90 | | size_t num_masters = 1; |
91 | | |
92 | | // Number of TS to start. |
93 | | size_t num_tablet_servers = 1; |
94 | | |
95 | | // Number of drives to use on TS. |
96 | | int num_drives = 1; |
97 | | |
98 | | // If more than one master is specified, list of ports for the masters in a consensus |
99 | | // configuration. Port at index 0 is used for the leader master. |
100 | | // Default: one entry as num_masters defaults to 1. Value 0 implies, a free port |
101 | | // is picked at runtime. |
102 | | std::vector<uint16_t> master_rpc_ports = { 0 }; |
103 | | |
104 | | static constexpr bool kDefaultStartCqlProxy = true; |
105 | | #if defined(__APPLE__) |
106 | | static constexpr bool kBindToUniqueLoopbackAddress = false; |
107 | | #else |
108 | | static constexpr bool kBindToUniqueLoopbackAddress = true; |
109 | | #endif |
110 | | |
111 | | bool enable_ysql = false; |
112 | | |
113 | | // Directory in which to store data. |
114 | | // Default: "", which auto-generates a unique path for this cluster. |
115 | | std::string data_root{}; |
116 | | |
117 | | // Set data_root_counter to non-negative number if your test run need to create an new and empty |
118 | | // cluster every time ExternalMiniCluster() is constructed. |
119 | | // - During a test run, data_root will stay the same until the run is finished, so recreating a |
120 | | // brand new cluster from scratch is not possible because the database location stays the same. |
121 | | // - When data_root_counter is non-negative, a new "data_root" is generated every time |
122 | | // ExternalMiniCluster() is constructed. |
123 | | int data_root_counter = -1; |
124 | | |
125 | | // If true, binds each tablet server to a different loopback address. This affects the server's |
126 | | // RPC server, and also forces the server to only use this IP address for outgoing socket |
127 | | // connections as well. This allows the use of iptables on the localhost to simulate network |
128 | | // partitions. |
129 | | // |
130 | | // The addressed used are 127.<A>.<B>.<C> where: |
131 | | // - <A,B> are the high and low bytes of the pid of the process running the minicluster (not the |
132 | | // daemon itself). |
133 | | // - <C> is the index of the server within this minicluster. |
134 | | // |
135 | | // This requires that the system is set up such that processes may bind to any IP address in the |
136 | | // localhost netblock (127.0.0.0/8). This seems to be the case on common Linux distributions. You |
137 | | // can verify by running 'ip addr | grep 127.0.0.1' and checking that the address is listed as |
138 | | // '127.0.0.1/8'. |
139 | | // |
140 | | // This option is disabled by default on OS X. |
141 | | // Enabling of this option on OS X means usage of default IPs: 127.0.0.x. |
142 | | bool bind_to_unique_loopback_addresses = kBindToUniqueLoopbackAddress; |
143 | | |
144 | | // If true, second and other TSes will use the same ports as the first TS uses. |
145 | | // Else every TS will allocate unique ports for itself. |
146 | | // The option is applicable ONLY with bind_to_unique_loopback_addresses == true. |
147 | | bool use_same_ts_ports = false; |
148 | | |
149 | | // The path where the yb daemons should be run from. |
150 | | // Default: "../bin", which points to the path where non-test executables are located. |
151 | | // This works for unit tests, since they all end up in build/latest/test-<subproject_name>. |
152 | | std::string daemon_bin_path{}; |
153 | | |
154 | | // Extra flags for tablet servers and masters respectively. |
155 | | // |
156 | | // In these flags, you may use the special string '${index}' which will |
157 | | // be substituted with the index of the tablet server or master. |
158 | | std::vector<std::string> extra_tserver_flags; |
159 | | std::vector<std::string> extra_master_flags; |
160 | | |
161 | | // Default timeout for operations involving RPC's, when none provided in the API. |
162 | | // Default : 10sec |
163 | | MonoDelta timeout = MonoDelta::FromSeconds(10); |
164 | | |
165 | | // If true logs will be written in both stderr and file |
166 | | bool log_to_file = false; |
167 | | |
168 | | // Use even IPs for cluster, like we have for MiniCluster. |
169 | | // So it could be used with test certificates. |
170 | | bool use_even_ips = false; |
171 | | |
172 | | // Cluster id used to create fs path when we create tests with multiple clusters. |
173 | | std::string cluster_id; |
174 | | |
175 | | CHECKED_STATUS RemovePort(const uint16_t port); |
176 | | CHECKED_STATUS AddPort(const uint16_t port); |
177 | | |
178 | | // Make sure we have the correct number of master RPC ports specified. |
179 | | void AdjustMasterRpcPorts(); |
180 | | }; |
181 | | |
182 | | // A mini-cluster made up of subprocesses running each of the daemons separately. This is useful for |
183 | | // black-box or grey-box failure testing purposes -- it provides the ability to forcibly kill or |
184 | | // stop particular cluster participants, which isn't feasible in the normal MiniCluster. On the |
185 | | // other hand, there is little access to inspect the internal state of the daemons. |
186 | | class ExternalMiniCluster : public MiniClusterBase { |
187 | | public: |
188 | | typedef ExternalMiniClusterOptions Options; |
189 | | |
190 | | // Mode to which node types a certain action (like Shutdown()) should apply. |
191 | | enum NodeSelectionMode { |
192 | | TS_ONLY, |
193 | | ALL |
194 | | }; |
195 | | |
196 | | // Threshold of the number of retries for master related rpc calls. |
197 | | static const int kMaxRetryIterations = 100; |
198 | | |
199 | | explicit ExternalMiniCluster(const ExternalMiniClusterOptions& opts); |
200 | | ~ExternalMiniCluster(); |
201 | | |
202 | | // Start the cluster. |
203 | | CHECKED_STATUS Start(rpc::Messenger* messenger = nullptr); |
204 | | |
205 | | // Restarts the cluster. Requires that it has been Shutdown() first. |
206 | | CHECKED_STATUS Restart(); |
207 | | |
208 | | // Like the previous method but performs initialization synchronously, i.e. this will wait for |
209 | | // all TS's to be started and initialized. Tests should use this if they interact with tablets |
210 | | // immediately after Start(); |
211 | | CHECKED_STATUS StartSync(); |
212 | | |
213 | | // Add a new TS to the cluster. The new TS is started. Requires that the master is already |
214 | | // running. |
215 | | CHECKED_STATUS AddTabletServer( |
216 | | bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy, |
217 | | const std::vector<std::string>& extra_flags = {}, |
218 | | int num_drives = -1); |
219 | | |
220 | | // Shuts down the whole cluster or part of it, depending on the selected 'mode'. Currently, this |
221 | | // uses SIGKILL on each daemon for a non-graceful shutdown. |
222 | | void Shutdown(NodeSelectionMode mode = ALL); |
223 | | |
224 | | // Waits for the master to finishing running initdb. |
225 | | CHECKED_STATUS WaitForInitDb(); |
226 | | |
227 | | // Return the IP address that the tablet server with the given index will bind to. If |
228 | | // options.bind_to_unique_loopback_addresses is false, this will be 127.0.0.1 Otherwise, it is |
229 | | // another IP in the local netblock. |
230 | | std::string GetBindIpForTabletServer(size_t index) const; |
231 | | |
232 | | // Return a pointer to the running leader master. This may be NULL |
233 | | // if the cluster is not started. |
234 | | // WARNING: If leader master is not elected after kMaxRetryIterations, first available master |
235 | | // will be returned. |
236 | | ExternalMaster* GetLeaderMaster(); |
237 | | |
238 | | // Perform an RPC to determine the leader of the external mini cluster. Set 'index' to the leader |
239 | | // master's index (for calls to to master() below). |
240 | | // |
241 | | // NOTE: if a leader election occurs after this method is executed, the last result may not be |
242 | | // valid. |
243 | | Result<size_t> GetLeaderMasterIndex(); |
244 | | |
245 | | // Return a non-leader master index |
246 | | Result<size_t> GetFirstNonLeaderMasterIndex(); |
247 | | |
248 | | Result<size_t> GetTabletLeaderIndex(const std::string& tablet_id); |
249 | | |
250 | | // The comma separated string of the master adresses host/ports from current list of masters. |
251 | | string GetMasterAddresses() const; |
252 | | |
253 | | string GetTabletServerAddresses() const; |
254 | | |
255 | | string GetTabletServerHTTPAddresses() const; |
256 | | |
257 | | // Start a new master with `peer_addrs` as the master_addresses parameter. |
258 | | Result<ExternalMaster *> StartMasterWithPeers(const string& peer_addrs); |
259 | | |
260 | | // Send a ping request to the rpc port of the master. Return OK() only if it is reachable. |
261 | | CHECKED_STATUS PingMaster(ExternalMaster* master) const; |
262 | | |
263 | | // Add a Tablet Server to the blacklist. |
264 | | CHECKED_STATUS AddTServerToBlacklist(ExternalMaster* master, ExternalTabletServer* ts); |
265 | | |
266 | | // Returns the min_num_replicas corresponding to a PlacementBlockPB. |
267 | | CHECKED_STATUS GetMinReplicaCountForPlacementBlock( |
268 | | ExternalMaster* master, |
269 | | const string& cloud, const string& region, const string& zone, |
270 | | int* min_num_replicas); |
271 | | // Add a Tablet Server to the leader blacklist. |
272 | | CHECKED_STATUS AddTServerToLeaderBlacklist(ExternalMaster* master, ExternalTabletServer* ts); |
273 | | |
274 | | // Empty blacklist. |
275 | | CHECKED_STATUS ClearBlacklist(ExternalMaster* master); |
276 | | |
277 | | // Starts a new master and returns the handle of the new master object on success. Not thread |
278 | | // safe for now. We could move this to a static function outside External Mini Cluster, but |
279 | | // keeping it here for now as it is currently used only in conjunction with EMC. If there are any |
280 | | // errors and if a new master could not be spawned, it will crash internally. |
281 | | void StartShellMaster(ExternalMaster** new_master); |
282 | | |
283 | | // Performs an add or remove from the existing config of this EMC, of the given master. |
284 | | // When use_hostport is true, the master is deemed as dead and its UUID is not used. |
285 | | CHECKED_STATUS ChangeConfig(ExternalMaster* master, |
286 | | ChangeConfigType type, |
287 | | consensus::PeerMemberType member_type = consensus::PeerMemberType::PRE_VOTER, |
288 | | bool use_hostport = false); |
289 | | |
290 | | // Performs an RPC to the given master to get the number of masters it is tracking in-memory. |
291 | | CHECKED_STATUS GetNumMastersAsSeenBy(ExternalMaster* master, int* num_peers); |
292 | | |
293 | | // Get the last committed opid for the current leader master. |
294 | | CHECKED_STATUS GetLastOpIdForLeader(OpIdPB* opid); |
295 | | |
296 | | // The leader master sometimes does not commit the config in time on first setup, causing |
297 | | // CheckHasCommittedOpInCurrentTermUnlocked check - that the current term should have had at least |
298 | | // one commit - to fail. This API waits for the leader's commit term to move ahead by one. |
299 | | CHECKED_STATUS WaitForLeaderCommitTermAdvance(); |
300 | | |
301 | | // This API waits for the commit indices of all the master peers to reach the target index. |
302 | | CHECKED_STATUS WaitForMastersToCommitUpTo(int64_t target_index); |
303 | | |
304 | | // If this cluster is configured for a single non-distributed master, return the single master or |
305 | | // NULL if the master is not started. Exits with a CHECK failure if there are multiple masters. |
306 | | ExternalMaster* master() const; |
307 | | |
308 | | // Return master at 'idx' or NULL if the master at 'idx' has not been started. |
309 | | ExternalMaster* master(size_t idx) const; |
310 | | |
311 | | ExternalTabletServer* tablet_server(size_t idx) const; |
312 | | |
313 | | // Return ExternalTabletServer given its UUID. If not found, returns NULL. |
314 | | ExternalTabletServer* tablet_server_by_uuid(const std::string& uuid) const; |
315 | | |
316 | | // Return the index of the ExternalTabletServer that has the given 'uuid', or -1 if no such UUID |
317 | | // can be found. |
318 | | int tablet_server_index_by_uuid(const std::string& uuid) const; |
319 | | |
320 | | // Return all masters. |
321 | | std::vector<ExternalMaster*> master_daemons() const; |
322 | | |
323 | | // Return all tablet servers and masters. |
324 | | std::vector<ExternalDaemon*> daemons() const; |
325 | | |
326 | | // Return all tablet servers. |
327 | | std::vector<ExternalTabletServer*> tserver_daemons() const; |
328 | | |
329 | | // Get tablet server host. |
330 | | HostPort pgsql_hostport(int node_index) const; |
331 | | |
332 | 1.76k | size_t num_tablet_servers() const { |
333 | 1.76k | return tablet_servers_.size(); |
334 | 1.76k | } |
335 | | |
336 | 1.70k | size_t num_masters() const { |
337 | 1.70k | return masters_.size(); |
338 | 1.70k | } |
339 | | |
340 | | // Return the client messenger used by the ExternalMiniCluster. |
341 | | rpc::Messenger* messenger(); |
342 | | |
343 | 395k | rpc::ProxyCache& proxy_cache() { |
344 | 395k | return *proxy_cache_; |
345 | 395k | } |
346 | | |
347 | | // Get the master leader consensus proxy. |
348 | | consensus::ConsensusServiceProxy GetLeaderConsensusProxy(); |
349 | | |
350 | | // Get the given master's consensus proxy. |
351 | | consensus::ConsensusServiceProxy GetConsensusProxy(ExternalDaemon* daemon); |
352 | | |
353 | | template <class T> |
354 | | T GetProxy(ExternalDaemon* daemon); |
355 | | |
356 | | template <class T> |
357 | 0 | T GetTServerProxy(size_t i) { |
358 | 0 | return GetProxy<T>(tablet_server(i)); |
359 | 0 | } |
360 | | |
361 | | template <class T> |
362 | 550 | T GetMasterProxy() { |
363 | 550 | CHECK_EQ(masters_.size(), 1); |
364 | 550 | return GetMasterProxy<T>(0); |
365 | 550 | } _ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master14MasterDdlProxyEEET_v Line | Count | Source | 362 | 22 | T GetMasterProxy() { | 363 | 22 | CHECK_EQ(masters_.size(), 1); | 364 | 22 | return GetMasterProxy<T>(0); | 365 | 22 | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master17MasterClientProxyEEET_v Line | Count | Source | 362 | 273 | T GetMasterProxy() { | 363 | 273 | CHECK_EQ(masters_.size(), 1); | 364 | 273 | return GetMasterProxy<T>(0); | 365 | 273 | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master18MasterClusterProxyEEET_v Line | Count | Source | 362 | 186 | T GetMasterProxy() { | 363 | 186 | CHECK_EQ(masters_.size(), 1); | 364 | 186 | return GetMasterProxy<T>(0); | 365 | 186 | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master16MasterAdminProxyEEET_v Line | Count | Source | 362 | 69 | T GetMasterProxy() { | 363 | 69 | CHECK_EQ(masters_.size(), 1); | 364 | 69 | return GetMasterProxy<T>(0); | 365 | 69 | } |
|
366 | | |
367 | | template <class T> |
368 | 41.2k | T GetMasterProxy(size_t idx) { |
369 | 41.2k | CHECK_LT(idx, masters_.size()); |
370 | 41.2k | return GetProxy<T>(master(idx)); |
371 | 41.2k | } _ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master14MasterDdlProxyEEET_m Line | Count | Source | 368 | 22 | T GetMasterProxy(size_t idx) { | 369 | 22 | CHECK_LT(idx, masters_.size()); | 370 | 22 | return GetProxy<T>(master(idx)); | 371 | 22 | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master18MasterClusterProxyEEET_m Line | Count | Source | 368 | 40.8k | T GetMasterProxy(size_t idx) { | 369 | 40.8k | CHECK_LT(idx, masters_.size()); | 370 | 40.8k | return GetProxy<T>(master(idx)); | 371 | 40.8k | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master17MasterClientProxyEEET_m Line | Count | Source | 368 | 273 | T GetMasterProxy(size_t idx) { | 369 | 273 | CHECK_LT(idx, masters_.size()); | 370 | 273 | return GetProxy<T>(master(idx)); | 371 | 273 | } |
_ZN2yb19ExternalMiniCluster14GetMasterProxyINS_6master16MasterAdminProxyEEET_m Line | Count | Source | 368 | 165 | T GetMasterProxy(size_t idx) { | 369 | 165 | CHECK_LT(idx, masters_.size()); | 370 | 165 | return GetProxy<T>(master(idx)); | 371 | 165 | } |
|
372 | | |
373 | | template <class T> |
374 | 236 | T GetLeaderMasterProxy() { |
375 | 236 | return GetProxy<T>(GetLeaderMaster()); |
376 | 236 | } _ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master18MasterClusterProxyEEET_v Line | Count | Source | 374 | 118 | T GetLeaderMasterProxy() { | 375 | 118 | return GetProxy<T>(GetLeaderMaster()); | 376 | 118 | } |
_ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master17MasterClientProxyEEET_v Line | Count | Source | 374 | 118 | T GetLeaderMasterProxy() { | 375 | 118 | return GetProxy<T>(GetLeaderMaster()); | 376 | 118 | } |
Unexecuted instantiation: _ZN2yb19ExternalMiniCluster20GetLeaderMasterProxyINS_6master14MasterDdlProxyEEET_v |
377 | | |
378 | | // Returns an generic proxy to the master at 'idx'. Requires that the master at 'idx' is running. |
379 | | std::shared_ptr<server::GenericServiceProxy> master_generic_proxy(int idx) const; |
380 | | // Same as above, using the bound rpc address. |
381 | | std::shared_ptr<server::GenericServiceProxy> master_generic_proxy(const HostPort& bound_addr) |
382 | | const; |
383 | | |
384 | | // Wait until the number of registered tablet servers reaches the given count on at least one of |
385 | | // the running masters. Returns Status::TimedOut if the desired count is not achieved with the |
386 | | // given timeout. |
387 | | CHECKED_STATUS WaitForTabletServerCount(size_t count, const MonoDelta& timeout); |
388 | | |
389 | | // Runs gtest assertions that no servers have crashed. |
390 | | void AssertNoCrashes(); |
391 | | |
392 | | // Wait until all tablets on the given tablet server are in 'RUNNING' |
393 | | // state. |
394 | | CHECKED_STATUS WaitForTabletsRunning(ExternalTabletServer* ts, const MonoDelta& timeout); |
395 | | |
396 | | Result<tserver::ListTabletsResponsePB> ListTablets(ExternalTabletServer* ts); |
397 | | |
398 | | Result<std::vector<tserver::ListTabletsForTabletServerResponsePB_Entry>> GetTablets( |
399 | | ExternalTabletServer* ts); |
400 | | |
401 | | Result<std::vector<TabletId>> GetTabletIds(ExternalTabletServer* ts); |
402 | | |
403 | | Result<tserver::GetSplitKeyResponsePB> GetSplitKey(const std::string& tablet_id); |
404 | | |
405 | | CHECKED_STATUS FlushTabletsOnSingleTServer( |
406 | | ExternalTabletServer* ts, const std::vector<yb::TabletId> tablet_ids, |
407 | | bool is_compaction); |
408 | | |
409 | | CHECKED_STATUS WaitForTSToCrash(const ExternalTabletServer* ts, |
410 | | const MonoDelta& timeout = MonoDelta::FromSeconds(60)); |
411 | | |
412 | | CHECKED_STATUS WaitForTSToCrash( |
413 | | size_t index, const MonoDelta& timeout = MonoDelta::FromSeconds(60)); |
414 | | |
415 | | // Sets the given flag on the given daemon, which must be running. |
416 | | // |
417 | | // This uses the 'force' flag on the RPC so that, even if the flag is considered unsafe to change |
418 | | // at runtime, it is changed. |
419 | | CHECKED_STATUS SetFlag(ExternalDaemon* daemon, |
420 | | const std::string& flag, |
421 | | const std::string& value); |
422 | | |
423 | | // Sets the given flag on all masters. |
424 | | CHECKED_STATUS SetFlagOnMasters(const std::string& flag, const std::string& value); |
425 | | // Sets the given flag on all tablet servers. |
426 | | CHECKED_STATUS SetFlagOnTServers(const std::string& flag, const std::string& value); |
427 | | |
428 | | // Allocates a free port and stores a file lock guarding access to that port into an internal |
429 | | // array of file locks. |
430 | | uint16_t AllocateFreePort(); |
431 | | |
432 | | // Step down the master leader. error_code tracks rpc error info that can be used by the caller. |
433 | | CHECKED_STATUS StepDownMasterLeader(tserver::TabletServerErrorPB::Code* error_code); |
434 | | |
435 | | // Step down the master leader and wait for a new leader to be elected. |
436 | | CHECKED_STATUS StepDownMasterLeaderAndWaitForNewLeader(); |
437 | | |
438 | | // Find out if the master service considers itself ready. Return status OK() implies it is ready. |
439 | | CHECKED_STATUS GetIsMasterLeaderServiceReady(ExternalMaster* master); |
440 | | |
441 | | // Timeout to be used for rpc operations. |
442 | 0 | MonoDelta timeout() { |
443 | 0 | return opts_.timeout; |
444 | 0 | } |
445 | | |
446 | | // Start a leader election on this master. |
447 | | CHECKED_STATUS StartElection(ExternalMaster* master); |
448 | | |
449 | 14 | bool running() const { return running_; } |
450 | | |
451 | 8 | string data_root() const { return data_root_; } |
452 | | |
453 | | // Return true if the tserver has been marked as DEAD by master leader. |
454 | | Result<bool> is_ts_stale( |
455 | | int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier); |
456 | | |
457 | | CHECKED_STATUS WaitForMasterToMarkTSAlive( |
458 | | int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier); |
459 | | |
460 | | CHECKED_STATUS WaitForMasterToMarkTSDead( |
461 | | int ts_idx, MonoDelta deadline = MonoDelta::FromSeconds(120) * kTimeMultiplier); |
462 | | |
463 | | protected: |
464 | | FRIEND_TEST(MasterFailoverTest, TestKillAnyMaster); |
465 | | |
466 | | void ConfigureClientBuilder(client::YBClientBuilder* builder) override; |
467 | | |
468 | | Result<HostPort> DoGetLeaderMasterBoundRpcAddr() override; |
469 | | |
470 | | CHECKED_STATUS StartMasters(); |
471 | | |
472 | | std::string GetBinaryPath(const std::string& binary) const; |
473 | | std::string GetDataPath(const std::string& daemon_id) const; |
474 | | |
475 | | CHECKED_STATUS DeduceBinRoot(std::string* ret); |
476 | | CHECKED_STATUS HandleOptions(); |
477 | | |
478 | | std::string GetClusterDataDirName() const; |
479 | | |
480 | | // Helper function to get a leader or (random) follower index |
481 | | Result<size_t> GetPeerMasterIndex(bool is_leader); |
482 | | |
483 | | // API to help update the cluster state (rpc ports) |
484 | | CHECKED_STATUS AddMaster(ExternalMaster* master); |
485 | | CHECKED_STATUS RemoveMaster(ExternalMaster* master); |
486 | | |
487 | | // Get the index of this master in the vector of masters. This might not be the insertion order as |
488 | | // we might have removed some masters within the vector. |
489 | | int GetIndexOfMaster(ExternalMaster* master) const; |
490 | | |
491 | | // Checks that the masters_ list and opts_ match in terms of the number of elements. |
492 | | CHECKED_STATUS CheckPortAndMasterSizes() const; |
493 | | |
494 | | // Return the list of opid's for all master's in this cluster. |
495 | | CHECKED_STATUS GetLastOpIdForEachMasterPeer( |
496 | | const MonoDelta& timeout, |
497 | | consensus::OpIdType opid_type, |
498 | | std::vector<OpIdPB>* op_ids); |
499 | | |
500 | | // Ensure that the leader server is allowed to process a config change (by having at least one |
501 | | // commit in the current term as leader). |
502 | | CHECKED_STATUS WaitForLeaderToAllowChangeConfig( |
503 | | const string& uuid, |
504 | | consensus::ConsensusServiceProxy* leader_proxy); |
505 | | |
506 | | // Return master address for specified port. |
507 | | std::string MasterAddressForPort(uint16_t port) const; |
508 | | |
509 | | ExternalMiniClusterOptions opts_; |
510 | | |
511 | | // The root for binaries. |
512 | | std::string daemon_bin_path_; |
513 | | |
514 | | std::string data_root_; |
515 | | |
516 | | // This variable is incremented every time a new master is spawned (either in shell mode or create |
517 | | // mode). Avoids reusing an index of a killed/removed master. Useful for master side logging. |
518 | | size_t add_new_master_at_; |
519 | | |
520 | | std::vector<scoped_refptr<ExternalMaster> > masters_; |
521 | | std::vector<scoped_refptr<ExternalTabletServer> > tablet_servers_; |
522 | | |
523 | | rpc::Messenger* messenger_ = nullptr; |
524 | | std::unique_ptr<rpc::Messenger> messenger_holder_; |
525 | | std::unique_ptr<rpc::ProxyCache> proxy_cache_; |
526 | | |
527 | | std::vector<std::unique_ptr<FileLock>> free_port_file_locks_; |
528 | | std::atomic<bool> running_{false}; |
529 | | |
530 | | private: |
531 | | DISALLOW_COPY_AND_ASSIGN(ExternalMiniCluster); |
532 | | }; |
533 | | |
534 | | class ExternalDaemon : public RefCountedThreadSafe<ExternalDaemon> { |
535 | | public: |
536 | | class StringListener { |
537 | | public: |
538 | | virtual void Handle(const GStringPiece& s) = 0; |
539 | 4 | virtual ~StringListener() {} |
540 | | }; |
541 | | |
542 | | ExternalDaemon( |
543 | | std::string daemon_id, |
544 | | rpc::Messenger* messenger, |
545 | | rpc::ProxyCache* proxy_cache, |
546 | | const std::string& exe, |
547 | | const std::string& root_dir, |
548 | | const std::vector<std::string>& data_dirs, |
549 | | const std::vector<std::string>& extra_flags); |
550 | | |
551 | | HostPort bound_rpc_hostport() const; |
552 | | HostPort bound_rpc_addr() const; |
553 | | HostPort bound_http_hostport() const; |
554 | | const NodeInstancePB& instance_id() const; |
555 | | const std::string& uuid() const; |
556 | | |
557 | | // Return the pid of the running process. Causes a CHECK failure if the process is not running. |
558 | | pid_t pid() const; |
559 | | |
560 | 3 | const std::string& id() const { return daemon_id_; } |
561 | | |
562 | | // Sends a SIGSTOP signal to the daemon. |
563 | | CHECKED_STATUS Pause(); |
564 | | |
565 | | // Sends a SIGCONT signal to the daemon. |
566 | | CHECKED_STATUS Resume(); |
567 | | |
568 | | CHECKED_STATUS Kill(int signal); |
569 | | |
570 | | // Return true if we have explicitly shut down the process. |
571 | | bool IsShutdown() const; |
572 | | |
573 | | // Return true if the process is still running. This may return false if the process crashed, |
574 | | // even if we didn't explicitly call Shutdown(). |
575 | | bool IsProcessAlive() const; |
576 | | |
577 | | virtual void Shutdown(); |
578 | | |
579 | 32 | std::vector<std::string> GetDataDirs() const { return data_dirs_; } |
580 | | |
581 | 1 | const std::string& exe() const { return exe_; } |
582 | | |
583 | 100 | const std::string& GetRootDir() const { return root_dir_; } |
584 | | |
585 | | // Return a pointer to the flags used for this server on restart. Modifying these flags will only |
586 | | // take effect on the next restart. |
587 | 9 | std::vector<std::string>* mutable_flags() { return &extra_flags_; } |
588 | | |
589 | | // Retrieve the value of a given metric from this server. The metric must be of int64_t type. |
590 | | // |
591 | | // 'value_field' represents the particular field of the metric to be read. For example, for a |
592 | | // counter or gauge, this should be 'value'. For a histogram, it might be 'total_count' or 'mean'. |
593 | | // |
594 | | // 'entity_id' may be NULL, in which case the first entity of the same type as 'entity_proto' will |
595 | | // be matched. |
596 | | Result<int64_t> GetInt64Metric(const MetricEntityPrototype* entity_proto, |
597 | | const char* entity_id, |
598 | | const MetricPrototype* metric_proto, |
599 | | const char* value_field) const; |
600 | | |
601 | | Result<int64_t> GetInt64Metric(const char* entity_proto_name, |
602 | | const char* entity_id, |
603 | | const char* metric_proto_name, |
604 | | const char* value_field) const; |
605 | | |
606 | | std::string LogPrefix(); |
607 | | |
608 | | void SetLogListener(StringListener* listener); |
609 | | |
610 | | void RemoveLogListener(StringListener* listener); |
611 | | |
612 | | static Result<int64_t> GetInt64MetricFromHost(const HostPort& hostport, |
613 | | const MetricEntityPrototype* entity_proto, |
614 | | const char* entity_id, |
615 | | const MetricPrototype* metric_proto, |
616 | | const char* value_field); |
617 | | |
618 | | static Result<int64_t> GetInt64MetricFromHost(const HostPort& hostport, |
619 | | const char* entity_proto_name, |
620 | | const char* entity_id, |
621 | | const char* metric_proto_name, |
622 | | const char* value_field); |
623 | | |
624 | | // Get the current value of the flag for the given daemon. |
625 | | Result<std::string> GetFlag(const std::string& flag); |
626 | | |
627 | | protected: |
628 | | friend class RefCountedThreadSafe<ExternalDaemon>; |
629 | | virtual ~ExternalDaemon(); |
630 | | |
631 | | CHECKED_STATUS StartProcess(const std::vector<std::string>& flags); |
632 | | |
633 | | virtual CHECKED_STATUS DeleteServerInfoPaths(); |
634 | | |
635 | | |
636 | | virtual bool ServerInfoPathsExist(); |
637 | | |
638 | | virtual CHECKED_STATUS BuildServerStateFromInfoPath(); |
639 | | |
640 | | CHECKED_STATUS BuildServerStateFromInfoPath( |
641 | | const string& info_path, std::unique_ptr<server::ServerStatusPB>* server_status); |
642 | | |
643 | | string GetServerInfoPath(); |
644 | | |
645 | | // In a code-coverage build, try to flush the coverage data to disk. |
646 | | // In a non-coverage build, this does nothing. |
647 | | void FlushCoverage(); |
648 | | |
649 | | std::string ProcessNameAndPidStr(); |
650 | | |
651 | | const std::string daemon_id_; |
652 | | rpc::Messenger* messenger_; |
653 | | rpc::ProxyCache* proxy_cache_; |
654 | | const std::string exe_; |
655 | | const std::string root_dir_; |
656 | | std::vector<std::string> data_dirs_; |
657 | | std::vector<std::string> extra_flags_; |
658 | | |
659 | | std::unique_ptr<Subprocess> process_; |
660 | | |
661 | | std::unique_ptr<server::ServerStatusPB> status_; |
662 | | |
663 | | // These capture the daemons parameters and running ports and |
664 | | // are used to Restart() the daemon with the same parameters. |
665 | | HostPort bound_rpc_; |
666 | | HostPort bound_http_; |
667 | | |
668 | | private: |
669 | | class LogTailerThread; |
670 | | |
671 | | std::unique_ptr<LogTailerThread> stdout_tailer_thread_, stderr_tailer_thread_; |
672 | | |
673 | | DISALLOW_COPY_AND_ASSIGN(ExternalDaemon); |
674 | | }; |
675 | | |
676 | | // Utility class for waiting for logging events. |
677 | | class LogWaiter : public ExternalDaemon::StringListener { |
678 | | public: |
679 | | LogWaiter(ExternalDaemon* daemon, const std::string& string_to_wait); |
680 | | |
681 | | CHECKED_STATUS WaitFor(MonoDelta timeout); |
682 | 0 | bool IsEventOccurred() { return event_occurred_; } |
683 | | |
684 | | ~LogWaiter(); |
685 | | |
686 | | private: |
687 | | void Handle(const GStringPiece& s) override; |
688 | | |
689 | | ExternalDaemon* daemon_; |
690 | | std::atomic<bool> event_occurred_{false}; |
691 | | std::string string_to_wait_; |
692 | | }; |
693 | | |
694 | | // Resumes a daemon that was stopped with ExteranlDaemon::Pause() upon |
695 | | // exiting a scope. |
696 | | class ScopedResumeExternalDaemon { |
697 | | public: |
698 | | // 'daemon' must remain valid for the lifetime of a |
699 | | // ScopedResumeExternalDaemon object. |
700 | | explicit ScopedResumeExternalDaemon(ExternalDaemon* daemon); |
701 | | |
702 | | // Resume 'daemon_'. |
703 | | ~ScopedResumeExternalDaemon(); |
704 | | |
705 | | private: |
706 | | ExternalDaemon* daemon_; |
707 | | |
708 | | DISALLOW_COPY_AND_ASSIGN(ScopedResumeExternalDaemon); |
709 | | }; |
710 | | |
711 | | |
712 | | class ExternalMaster : public ExternalDaemon { |
713 | | public: |
714 | | ExternalMaster( |
715 | | size_t master_index, |
716 | | rpc::Messenger* messenger, |
717 | | rpc::ProxyCache* proxy_cache, |
718 | | const std::string& exe, |
719 | | const std::string& data_dir, |
720 | | const std::vector<std::string>& extra_flags, |
721 | | const std::string& rpc_bind_address = "127.0.0.1:0", |
722 | | uint16_t http_port = 0, |
723 | | const std::string& master_addrs = ""); |
724 | | |
725 | | CHECKED_STATUS Start(bool shell_mode = false); |
726 | | |
727 | | // Restarts the daemon. Requires that it has previously been shutdown. |
728 | | CHECKED_STATUS Restart(); |
729 | | |
730 | | private: |
731 | | friend class RefCountedThreadSafe<ExternalMaster>; |
732 | | virtual ~ExternalMaster(); |
733 | | |
734 | | // used on start to create the cluster; on restart, this should not be used! |
735 | | const std::string rpc_bind_address_; |
736 | | const std::string master_addrs_; |
737 | | const uint16_t http_port_; |
738 | | }; |
739 | | |
740 | | class ExternalTabletServer : public ExternalDaemon { |
741 | | public: |
742 | | ExternalTabletServer( |
743 | | size_t tablet_server_index, rpc::Messenger* messenger, rpc::ProxyCache* proxy_cache, |
744 | | const std::string& exe, const std::string& data_dir, uint16_t num_drives, |
745 | | std::string bind_host, uint16_t rpc_port, uint16_t http_port, uint16_t redis_rpc_port, |
746 | | uint16_t redis_http_port, uint16_t cql_rpc_port, uint16_t cql_http_port, |
747 | | uint16_t pgsql_rpc_port, uint16_t pgsql_http_port, |
748 | | const std::vector<HostPort>& master_addrs, |
749 | | const std::vector<std::string>& extra_flags); |
750 | | |
751 | | CHECKED_STATUS Start( |
752 | | bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy, |
753 | | bool set_proxy_addrs = true, |
754 | | std::vector<std::pair<string, string>> extra_flags = {}); |
755 | | |
756 | | // Restarts the daemon. Requires that it has previously been shutdown. |
757 | | CHECKED_STATUS Restart( |
758 | | bool start_cql_proxy = ExternalMiniClusterOptions::kDefaultStartCqlProxy, |
759 | | std::vector<std::pair<string, string>> flags = {}); |
760 | | |
761 | | CHECKED_STATUS SetNumDrives(uint16_t num_drives); |
762 | | |
763 | | // IP addresses to bind to. |
764 | 214 | const std::string& bind_host() const { |
765 | 214 | return bind_host_; |
766 | 214 | } |
767 | | |
768 | | // Assigned ports. |
769 | 106 | uint16_t rpc_port() const { |
770 | 106 | return rpc_port_; |
771 | 106 | } |
772 | 106 | uint16_t http_port() const { |
773 | 106 | return http_port_; |
774 | 106 | } |
775 | | |
776 | 106 | uint16_t pgsql_rpc_port() const { |
777 | 106 | return pgsql_rpc_port_; |
778 | 106 | } |
779 | 106 | uint16_t pgsql_http_port() const { |
780 | 106 | return pgsql_http_port_; |
781 | 106 | } |
782 | | |
783 | 106 | uint16_t redis_rpc_port() const { |
784 | 106 | return redis_rpc_port_; |
785 | 106 | } |
786 | 106 | uint16_t redis_http_port() const { |
787 | 106 | return redis_http_port_; |
788 | 106 | } |
789 | | |
790 | 172 | uint16_t cql_rpc_port() const { |
791 | 172 | return cql_rpc_port_; |
792 | 172 | } |
793 | 154 | uint16_t cql_http_port() const { |
794 | 154 | return cql_http_port_; |
795 | 154 | } |
796 | | |
797 | | Result<int64_t> GetInt64CQLMetric(const MetricEntityPrototype* entity_proto, |
798 | | const char* entity_id, |
799 | | const MetricPrototype* metric_proto, |
800 | | const char* value_field) const; |
801 | | |
802 | | protected: |
803 | | CHECKED_STATUS DeleteServerInfoPaths() override; |
804 | | |
805 | | bool ServerInfoPathsExist() override; |
806 | | |
807 | | CHECKED_STATUS BuildServerStateFromInfoPath() override; |
808 | | |
809 | | private: |
810 | | string GetCQLServerInfoPath(); |
811 | | const std::string master_addrs_; |
812 | | const std::string bind_host_; |
813 | | const uint16_t rpc_port_; |
814 | | const uint16_t http_port_; |
815 | | const uint16_t redis_rpc_port_; |
816 | | const uint16_t redis_http_port_; |
817 | | const uint16_t pgsql_rpc_port_; |
818 | | const uint16_t pgsql_http_port_; |
819 | | const uint16_t cql_rpc_port_; |
820 | | const uint16_t cql_http_port_; |
821 | | uint16_t num_drives_; |
822 | | bool start_cql_proxy_ = true; |
823 | | std::unique_ptr<server::ServerStatusPB> cqlserver_status_; |
824 | | |
825 | | friend class RefCountedThreadSafe<ExternalTabletServer>; |
826 | | virtual ~ExternalTabletServer(); |
827 | | }; |
828 | | |
829 | | // Custom functor for predicate based comparison with the master list. |
830 | | struct MasterComparator { |
831 | 17 | explicit MasterComparator(ExternalMaster* master) : master_(master) { } |
832 | | |
833 | | // We look for the exact master match. Since it is possible to stop/restart master on a given |
834 | | // host/port, we do not want a stale master pointer input to match a newer master. |
835 | 41 | bool operator()(const scoped_refptr<ExternalMaster>& other) const { |
836 | 41 | return master_ == other.get(); |
837 | 41 | } |
838 | | |
839 | | private: |
840 | | const ExternalMaster* master_; |
841 | | }; |
842 | | |
843 | | template <class T> |
844 | 41.6k | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { |
845 | 41.6k | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); |
846 | 41.6k | } _ZN2yb19ExternalMiniCluster8GetProxyINS_6master14MasterDdlProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 22 | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 22 | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 22 | } |
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master18MasterClusterProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 40.9k | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 40.9k | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 40.9k | } |
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master17MasterClientProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 391 | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 391 | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 391 | } |
_ZN2yb19ExternalMiniCluster8GetProxyINS_7tserver29TabletServerAdminServiceProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 1 | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 1 | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 1 | } |
_ZN2yb19ExternalMiniCluster8GetProxyINS_6master16MasterAdminProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 165 | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 165 | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 165 | } |
Unexecuted instantiation: _ZN2yb19ExternalMiniCluster8GetProxyINS_7tserver24TabletServerServiceProxyEEET_PNS_14ExternalDaemonE _ZN2yb19ExternalMiniCluster8GetProxyINS_9consensus21ConsensusServiceProxyEEET_PNS_14ExternalDaemonE Line | Count | Source | 844 | 155 | T ExternalMiniCluster::GetProxy(ExternalDaemon* daemon) { | 845 | 155 | return T(proxy_cache_.get(), daemon->bound_rpc_addr()); | 846 | 155 | } |
|
847 | | |
848 | | CHECKED_STATUS RestartAllMasters(ExternalMiniCluster* cluster); |
849 | | |
850 | | } // namespace yb |
851 | | #endif // YB_INTEGRATION_TESTS_EXTERNAL_MINI_CLUSTER_H_ |