/Users/deen/code/yugabyte-db/src/yb/master/master_cluster_service.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/gutil/casts.h" |
15 | | |
16 | | #include "yb/master/catalog_manager.h" |
17 | | #include "yb/master/master_cluster.service.h" |
18 | | #include "yb/master/master_heartbeat.pb.h" |
19 | | #include "yb/master/master_service_base.h" |
20 | | #include "yb/master/master_service_base-internal.h" |
21 | | #include "yb/master/ts_descriptor.h" |
22 | | #include "yb/master/ts_manager.h" |
23 | | |
24 | | #include "yb/util/service_util.h" |
25 | | |
26 | | DEFINE_double(master_slow_get_registration_probability, 0, |
27 | | "Probability of injecting delay in GetMasterRegistration."); |
28 | | |
29 | | using namespace std::literals; |
30 | | |
31 | | namespace yb { |
32 | | namespace master { |
33 | | |
34 | | namespace { |
35 | | |
36 | | class MasterClusterServiceImpl : public MasterServiceBase, public MasterClusterIf { |
37 | | public: |
38 | | explicit MasterClusterServiceImpl(Master* master) |
39 | 8.03k | : MasterServiceBase(master), MasterClusterIf(master->metric_entity()) {} |
40 | | |
41 | | void ListTabletServers(const ListTabletServersRequestPB* req, |
42 | | ListTabletServersResponsePB* resp, |
43 | 26.8k | rpc::RpcContext rpc) override { |
44 | 26.8k | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
45 | 26.8k | if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, &rpc)) { |
46 | 12.7k | return; |
47 | 12.7k | } |
48 | | |
49 | 14.0k | std::vector<std::shared_ptr<TSDescriptor> > descs; |
50 | 14.0k | if (!req->primary_only()) { |
51 | 7.52k | server_->ts_manager()->GetAllDescriptors(&descs); |
52 | 7.52k | } else { |
53 | 6.53k | auto uuid_result = server_->catalog_manager_impl()->placement_uuid(); |
54 | 6.53k | if (!uuid_result.ok()) { |
55 | 0 | return; |
56 | 0 | } |
57 | 6.53k | server_->ts_manager()->GetAllLiveDescriptorsInCluster(&descs, *uuid_result); |
58 | 6.53k | } |
59 | | |
60 | 34.5k | for (const std::shared_ptr<TSDescriptor>& desc : descs)14.0k { |
61 | 34.5k | ListTabletServersResponsePB::Entry* entry = resp->add_servers(); |
62 | 34.5k | auto ts_info = *desc->GetTSInformationPB(); |
63 | 34.5k | *entry->mutable_instance_id() = std::move(*ts_info.mutable_tserver_instance()); |
64 | 34.5k | *entry->mutable_registration() = std::move(*ts_info.mutable_registration()); |
65 | 34.5k | entry->set_millis_since_heartbeat( |
66 | 34.5k | narrow_cast<int>(desc->TimeSinceHeartbeat().ToMilliseconds())); |
67 | 34.5k | entry->set_alive(desc->IsLive()); |
68 | 34.5k | desc->GetMetrics(entry->mutable_metrics()); |
69 | 34.5k | } |
70 | 14.0k | rpc.RespondSuccess(); |
71 | 14.0k | } |
72 | | |
73 | | void ListLiveTabletServers(const ListLiveTabletServersRequestPB* req, |
74 | | ListLiveTabletServersResponsePB* resp, |
75 | 4 | rpc::RpcContext rpc) override { |
76 | 4 | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
77 | 4 | if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, &rpc)) { |
78 | 0 | return; |
79 | 0 | } |
80 | 4 | auto placement_uuid_result = server_->catalog_manager_impl()->placement_uuid(); |
81 | 4 | if (!placement_uuid_result.ok()) { |
82 | 0 | return; |
83 | 0 | } |
84 | 4 | string placement_uuid = *placement_uuid_result; |
85 | | |
86 | 4 | vector<std::shared_ptr<TSDescriptor> > descs; |
87 | 4 | server_->ts_manager()->GetAllLiveDescriptors(&descs); |
88 | | |
89 | 12 | for (const std::shared_ptr<TSDescriptor>& desc : descs) { |
90 | 12 | ListLiveTabletServersResponsePB::Entry* entry = resp->add_servers(); |
91 | 12 | auto ts_info = *desc->GetTSInformationPB(); |
92 | 12 | *entry->mutable_instance_id() = std::move(*ts_info.mutable_tserver_instance()); |
93 | 12 | *entry->mutable_registration() = std::move(*ts_info.mutable_registration()); |
94 | 12 | bool isPrimary = server_->ts_manager()->IsTsInCluster(desc, placement_uuid); |
95 | 12 | entry->set_isfromreadreplica(!isPrimary); |
96 | 12 | } |
97 | 4 | rpc.RespondSuccess(); |
98 | 4 | } |
99 | | |
100 | | void ListMasters( |
101 | | const ListMastersRequestPB* req, |
102 | | ListMastersResponsePB* resp, |
103 | 67 | rpc::RpcContext rpc) override { |
104 | 67 | std::vector<ServerEntryPB> masters; |
105 | 67 | Status s = server_->ListMasters(&masters); |
106 | 67 | if (s.ok()) { |
107 | 207 | for (const ServerEntryPB& master : masters) { |
108 | 207 | resp->add_masters()->CopyFrom(master); |
109 | 207 | } |
110 | 67 | rpc.RespondSuccess(); |
111 | 67 | } else { |
112 | 0 | SetupErrorAndRespond(resp->mutable_error(), s, MasterErrorPB_Code_UNKNOWN_ERROR, &rpc); |
113 | 0 | } |
114 | 67 | } |
115 | | |
116 | | void ListMasterRaftPeers( |
117 | | const ListMasterRaftPeersRequestPB* req, |
118 | | ListMasterRaftPeersResponsePB* resp, |
119 | 0 | rpc::RpcContext rpc) override { |
120 | 0 | std::vector<consensus::RaftPeerPB> masters; |
121 | 0 | Status s = server_->ListRaftConfigMasters(&masters); |
122 | 0 | if (s.ok()) { |
123 | 0 | for (const consensus::RaftPeerPB& master : masters) { |
124 | 0 | resp->add_masters()->CopyFrom(master); |
125 | 0 | } |
126 | 0 | rpc.RespondSuccess(); |
127 | 0 | } else { |
128 | 0 | SetupErrorAndRespond(resp->mutable_error(), s, MasterErrorPB_Code_UNKNOWN_ERROR, &rpc); |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | void GetMasterRegistration(const GetMasterRegistrationRequestPB* req, |
133 | | GetMasterRegistrationResponsePB* resp, |
134 | 29.4M | rpc::RpcContext rpc) override { |
135 | | // instance_id must always be set in order for status pages to be useful. |
136 | 29.4M | if (RandomActWithProbability(FLAGS_master_slow_get_registration_probability)) { |
137 | 55 | std::this_thread::sleep_for(20s); |
138 | 55 | } |
139 | 29.4M | resp->mutable_instance_id()->CopyFrom(server_->instance_pb()); |
140 | 29.4M | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
141 | 29.4M | if (!l.CheckIsInitializedOrRespond(resp, &rpc)) { |
142 | 514k | return; |
143 | 514k | } |
144 | 28.8M | Status s = server_->GetMasterRegistration(resp->mutable_registration()); |
145 | 28.8M | CheckRespErrorOrSetUnknown(s, resp); |
146 | 28.8M | auto role = server_->catalog_manager_impl()->Role(); |
147 | 28.8M | if (role == PeerRole::LEADER) { |
148 | 1.00M | if (!l.leader_status().ok()) { |
149 | 527k | YB_LOG_EVERY_N_SECS(INFO, 1) |
150 | 5.47k | << "Patching role from leader to follower because of: " << l.leader_status() |
151 | 5.47k | << THROTTLE_MSG; |
152 | 527k | role = PeerRole::FOLLOWER; |
153 | 527k | } |
154 | 1.00M | } |
155 | 28.8M | resp->set_role(role); |
156 | 28.8M | rpc.RespondSuccess(); |
157 | 28.8M | } |
158 | | |
159 | | void IsMasterLeaderServiceReady( |
160 | | const IsMasterLeaderReadyRequestPB* req, IsMasterLeaderReadyResponsePB* resp, |
161 | 172 | rpc::RpcContext rpc) override { |
162 | 172 | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
163 | 172 | if (!l.CheckIsInitializedAndIsLeaderOrRespond(resp, &rpc)) { |
164 | 122 | return; |
165 | 122 | } |
166 | | |
167 | 50 | rpc.RespondSuccess(); |
168 | 50 | } |
169 | | |
170 | | void DumpState( |
171 | | const DumpMasterStateRequestPB* req, |
172 | | DumpMasterStateResponsePB* resp, |
173 | 0 | rpc::RpcContext rpc) override { |
174 | 0 | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
175 | 0 | if (!l.CheckIsInitializedOrRespond(resp, &rpc)) { |
176 | 0 | return; |
177 | 0 | } |
178 | | |
179 | 0 | const string role = (req->has_peers_also() && req->peers_also() ? "Leader" : "Follower"); |
180 | 0 | const string title = role + " Master " + server_->instance_pb().permanent_uuid(); |
181 | |
|
182 | 0 | if (req->return_dump_as_string()) { |
183 | 0 | std::ostringstream ss; |
184 | 0 | server_->catalog_manager_impl()->DumpState(&ss, req->on_disk()); |
185 | 0 | resp->set_dump(title + ":\n" + ss.str()); |
186 | 0 | } else { |
187 | 0 | LOG(INFO) << title; |
188 | 0 | server_->catalog_manager_impl()->DumpState(&LOG(INFO), req->on_disk()); |
189 | 0 | } |
190 | |
|
191 | 0 | if (req->has_peers_also() && req->peers_also()) { |
192 | 0 | std::vector<consensus::RaftPeerPB> masters_raft; |
193 | 0 | Status s = server_->ListRaftConfigMasters(&masters_raft); |
194 | 0 | CheckRespErrorOrSetUnknown(s, resp); |
195 | |
|
196 | 0 | if (!s.ok()) |
197 | 0 | return; |
198 | | |
199 | 0 | LOG(INFO) << "Sending dump command to " << masters_raft.size()-1 << " peers."; |
200 | | |
201 | | // Remove our entry before broadcasting to all peers. |
202 | 0 | bool found = false; |
203 | 0 | for (auto it = masters_raft.begin(); it != masters_raft.end(); it++) { |
204 | 0 | if (server_->instance_pb().permanent_uuid() == it->permanent_uuid()) { |
205 | 0 | masters_raft.erase(it); |
206 | 0 | found = true; |
207 | 0 | break; |
208 | 0 | } |
209 | 0 | } |
210 | |
|
211 | 0 | LOG_IF(DFATAL, !found) << "Did not find leader in Raft config: " |
212 | 0 | << server_->instance_pb().permanent_uuid(); |
213 | |
|
214 | 0 | s = server_->catalog_manager_impl()->PeerStateDump(masters_raft, req, resp); |
215 | 0 | CheckRespErrorOrSetUnknown(s, resp); |
216 | 0 | } |
217 | | |
218 | 0 | rpc.RespondSuccess(); |
219 | 0 | } |
220 | | |
221 | | void ChangeLoadBalancerState( |
222 | | const ChangeLoadBalancerStateRequestPB* req, ChangeLoadBalancerStateResponsePB* resp, |
223 | 0 | rpc::RpcContext rpc) override { |
224 | | // This should work on both followers and leaders, in order to cover leader failover! |
225 | 0 | if (req->has_is_enabled()) { |
226 | 0 | LOG(INFO) << "Changing balancer state to " << req->is_enabled(); |
227 | 0 | server_->catalog_manager_impl()->SetLoadBalancerEnabled(req->is_enabled()); |
228 | 0 | } |
229 | |
|
230 | 0 | rpc.RespondSuccess(); |
231 | 0 | } |
232 | | |
233 | | void GetLoadBalancerState( |
234 | | const GetLoadBalancerStateRequestPB* req, GetLoadBalancerStateResponsePB* resp, |
235 | 0 | rpc::RpcContext rpc) override { |
236 | 0 | resp->set_is_enabled(server_->catalog_manager_impl()->IsLoadBalancerEnabled()); |
237 | 0 | rpc.RespondSuccess(); |
238 | 0 | } |
239 | | |
240 | | void RemovedMasterUpdate(const RemovedMasterUpdateRequestPB* req, |
241 | | RemovedMasterUpdateResponsePB* resp, |
242 | 38 | rpc::RpcContext rpc) override { |
243 | 38 | SCOPED_LEADER_SHARED_LOCK(l, server_->catalog_manager_impl()); |
244 | 38 | if (!l.CheckIsInitializedOrRespond(resp, &rpc)) { |
245 | 0 | return; |
246 | 0 | } |
247 | | |
248 | 38 | Status s = server_->GoIntoShellMode(); |
249 | 38 | CheckRespErrorOrSetUnknown(s, resp); |
250 | 38 | rpc.RespondSuccess(); |
251 | 38 | } |
252 | | |
253 | | void ChangeMasterClusterConfig( |
254 | | const ChangeMasterClusterConfigRequestPB* req, ChangeMasterClusterConfigResponsePB* resp, |
255 | 170 | rpc::RpcContext rpc) override { |
256 | 170 | HANDLE_ON_LEADER_WITH_LOCK(CatalogManager, SetClusterConfig); |
257 | 170 | } |
258 | | |
259 | | void GetMasterClusterConfig( |
260 | | const GetMasterClusterConfigRequestPB* req, GetMasterClusterConfigResponsePB* resp, |
261 | 334 | rpc::RpcContext rpc) override { |
262 | 334 | HANDLE_ON_LEADER_WITH_LOCK(CatalogManager, GetClusterConfig); |
263 | 334 | } |
264 | | |
265 | | void GetLeaderBlacklistCompletion( |
266 | | const GetLeaderBlacklistPercentRequestPB* req, GetLoadMovePercentResponsePB* resp, |
267 | 388 | rpc::RpcContext rpc) override { |
268 | 388 | HANDLE_ON_LEADER_WITH_LOCK(CatalogManager, GetLeaderBlacklistCompletionPercent); |
269 | 388 | } |
270 | | |
271 | | void GetLoadMoveCompletion( |
272 | | const GetLoadMovePercentRequestPB* req, GetLoadMovePercentResponsePB* resp, |
273 | 2.06k | rpc::RpcContext rpc) override { |
274 | 2.06k | HANDLE_ON_LEADER_WITH_LOCK(CatalogManager, GetLoadMoveCompletionPercent); |
275 | 2.06k | } |
276 | | |
277 | | MASTER_SERVICE_IMPL_ON_LEADER_WITH_LOCK( |
278 | | CatalogManager, |
279 | | (AreLeadersOnPreferredOnly) |
280 | | (IsLoadBalanced) |
281 | | (IsLoadBalancerIdle) |
282 | | (SetPreferredZones) |
283 | | ) |
284 | | }; |
285 | | |
286 | | } // namespace |
287 | | |
288 | 8.03k | std::unique_ptr<rpc::ServiceIf> MakeMasterClusterService(Master* master) { |
289 | 8.03k | return std::make_unique<MasterClusterServiceImpl>(master); |
290 | 8.03k | } |
291 | | |
292 | | } // namespace master |
293 | | } // namespace yb |