/Users/deen/code/yugabyte-db/src/yb/consensus/consensus_meta.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include "yb/consensus/consensus_meta.h" |
34 | | |
35 | | #include "yb/common/entity_ids_types.h" |
36 | | #include "yb/common/wire_protocol.h" |
37 | | |
38 | | #include "yb/consensus/consensus_util.h" |
39 | | #include "yb/consensus/consensus.pb.h" |
40 | | #include "yb/consensus/metadata.pb.h" |
41 | | #include "yb/consensus/opid_util.h" |
42 | | #include "yb/consensus/quorum_util.h" |
43 | | |
44 | | #include "yb/fs/fs_manager.h" |
45 | | |
46 | | #include "yb/gutil/strings/substitute.h" |
47 | | #include "yb/util/fault_injection.h" |
48 | | #include "yb/util/flag_tags.h" |
49 | | #include "yb/util/logging.h" |
50 | | #include "yb/util/pb_util.h" |
51 | | #include "yb/util/result.h" |
52 | | #include "yb/util/stopwatch.h" |
53 | | |
54 | | DEFINE_test_flag(double, fault_crash_before_cmeta_flush, 0.0, |
55 | | "Fraction of the time when the server will crash just before flushing " |
56 | | "consensus metadata. (For testing only!)"); |
57 | | |
58 | | namespace yb { |
59 | | namespace consensus { |
60 | | |
61 | | using std::string; |
62 | | using strings::Substitute; |
63 | | |
64 | | namespace { |
65 | | |
66 | | const int kBitsPerPackedRole = 3; |
67 | | static_assert(0 <= PeerRole_MIN, "RaftPeerPB_Role_Role_MIN must be non-negative."); |
68 | | static_assert(PeerRole_MAX < (1 << kBitsPerPackedRole), |
69 | | "RaftPeerPB_Role_Role_MAX must fit in kBitsPerPackedRole bits."); |
70 | | |
71 | 2.52M | ConsensusMetadata::PackedRoleAndTerm PackRoleAndTerm(PeerRole role, int64_t term) { |
72 | | // Ensure we've had no more than 2305843009213693952 terms in this tablet. |
73 | 2.52M | CHECK_LT(term, 1ull << (8 * sizeof(ConsensusMetadata::PackedRoleAndTerm) - kBitsPerPackedRole)); |
74 | 2.52M | return to_underlying(role) | (term << kBitsPerPackedRole); |
75 | 2.52M | } |
76 | | |
77 | 9.49M | int64_t UnpackTerm(ConsensusMetadata::PackedRoleAndTerm role_and_term) { |
78 | 9.49M | return role_and_term >> kBitsPerPackedRole; |
79 | 9.49M | } |
80 | | |
81 | 9.49M | PeerRole UnpackRole(ConsensusMetadata::PackedRoleAndTerm role_and_term) { |
82 | 9.49M | return static_cast<PeerRole>(role_and_term & ((1 << kBitsPerPackedRole) - 1)); |
83 | 9.49M | } |
84 | | |
85 | | } // anonymous namespace |
86 | | |
87 | | Status ConsensusMetadata::Create(FsManager* fs_manager, |
88 | | const string& tablet_id, |
89 | | const std::string& peer_uuid, |
90 | | const RaftConfigPB& config, |
91 | | int64_t current_term, |
92 | 150k | std::unique_ptr<ConsensusMetadata>* cmeta_out) { |
93 | 150k | std::unique_ptr<ConsensusMetadata> cmeta(new ConsensusMetadata(fs_manager, tablet_id, peer_uuid)); |
94 | 150k | cmeta->set_committed_config(config); |
95 | 150k | cmeta->set_current_term(current_term); |
96 | 150k | RETURN_NOT_OK(cmeta->Flush()); |
97 | 150k | cmeta_out->swap(cmeta); |
98 | 150k | return Status::OK(); |
99 | 150k | } |
100 | | |
101 | | Status ConsensusMetadata::Load(FsManager* fs_manager, |
102 | | const std::string& tablet_id, |
103 | | const std::string& peer_uuid, |
104 | 301k | std::unique_ptr<ConsensusMetadata>* cmeta_out) { |
105 | 301k | std::unique_ptr<ConsensusMetadata> cmeta(new ConsensusMetadata(fs_manager, tablet_id, peer_uuid)); |
106 | 301k | RETURN_NOT_OK(pb_util::ReadPBContainerFromPath(fs_manager->env(), |
107 | 301k | fs_manager->GetConsensusMetadataPath(tablet_id), |
108 | 301k | &cmeta->pb_)); |
109 | 301k | cmeta->UpdateActiveRole(); // Needs to happen here as we sidestep the accessor APIs. |
110 | 301k | RETURN_NOT_OK(cmeta->UpdateOnDiskSize()); |
111 | 301k | cmeta_out->swap(cmeta); |
112 | 301k | return Status::OK(); |
113 | 301k | } |
114 | | |
115 | 74.1k | Status ConsensusMetadata::DeleteOnDiskData(FsManager* fs_manager, const string& tablet_id) { |
116 | 74.1k | string cmeta_path = fs_manager->GetConsensusMetadataPath(tablet_id); |
117 | 74.1k | Env* env = fs_manager->env(); |
118 | 74.1k | if (!env->FileExists(cmeta_path)) { |
119 | 141 | return Status::OK(); |
120 | 141 | } |
121 | 73.9k | LOG(INFO) << "T " << tablet_id << " Deleting consensus metadata"; |
122 | 73.9k | RETURN_NOT_OK_PREPEND(env->DeleteFile(cmeta_path), |
123 | 73.9k | "Unable to delete consensus metadata file for tablet " + tablet_id); |
124 | 73.9k | return Status::OK(); |
125 | 73.9k | } |
126 | | |
127 | 248M | int64_t ConsensusMetadata::current_term() const { |
128 | 248M | DCHECK(pb_.has_current_term()); |
129 | 248M | return pb_.current_term(); |
130 | 248M | } |
131 | | |
132 | 907k | void ConsensusMetadata::set_current_term(int64_t term) { |
133 | 907k | DCHECK_GE(term, kMinimumTerm); |
134 | 907k | pb_.set_current_term(term); |
135 | 907k | UpdateRoleAndTermCache(); |
136 | 907k | } |
137 | | |
138 | | |
139 | 150k | bool ConsensusMetadata::has_split_parent_tablet_id() const { |
140 | 150k | return pb_.has_split_parent_tablet_id(); |
141 | 150k | } |
142 | | |
143 | 138 | const TabletId& ConsensusMetadata::split_parent_tablet_id() const { |
144 | 138 | DCHECK(pb_.has_split_parent_tablet_id()); |
145 | 138 | return pb_.split_parent_tablet_id(); |
146 | 138 | } |
147 | | |
148 | 135 | void ConsensusMetadata::set_split_parent_tablet_id(const TabletId& split_parent_tablet_id) { |
149 | 135 | DCHECK(!split_parent_tablet_id.empty()); |
150 | 135 | pb_.set_split_parent_tablet_id(split_parent_tablet_id); |
151 | 135 | } |
152 | | |
153 | 64.7k | bool ConsensusMetadata::has_voted_for() const { |
154 | 64.7k | return pb_.has_voted_for(); |
155 | 64.7k | } |
156 | | |
157 | 2.54k | const string& ConsensusMetadata::voted_for() const { |
158 | 2.54k | DCHECK(pb_.has_voted_for()); |
159 | 2.54k | return pb_.voted_for(); |
160 | 2.54k | } |
161 | | |
162 | 757k | void ConsensusMetadata::clear_voted_for() { |
163 | 757k | pb_.clear_voted_for(); |
164 | 757k | } |
165 | | |
166 | 170k | void ConsensusMetadata::set_voted_for(const string& uuid) { |
167 | 170k | DCHECK(!uuid.empty()); |
168 | 170k | pb_.set_voted_for(uuid); |
169 | 170k | } |
170 | | |
171 | 225M | const RaftConfigPB& ConsensusMetadata::committed_config() const { |
172 | 225M | DCHECK(pb_.has_committed_config()); |
173 | 225M | return pb_.committed_config(); |
174 | 225M | } |
175 | | |
176 | 169k | void ConsensusMetadata::set_committed_config(const RaftConfigPB& config) { |
177 | 169k | *pb_.mutable_committed_config() = config; |
178 | 169k | if (!has_pending_config_) { |
179 | 150k | UpdateActiveRole(); |
180 | 150k | } |
181 | 169k | } |
182 | | |
183 | 8.83M | bool ConsensusMetadata::has_pending_config() const { |
184 | 8.83M | return has_pending_config_; |
185 | 8.83M | } |
186 | | |
187 | 5.93M | const RaftConfigPB& ConsensusMetadata::pending_config() const { |
188 | 5.93M | DCHECK(has_pending_config_); |
189 | 5.93M | return pending_config_; |
190 | 5.93M | } |
191 | | |
192 | 19.6k | void ConsensusMetadata::clear_pending_config() { |
193 | 19.6k | has_pending_config_ = false; |
194 | 19.6k | pending_config_.Clear(); |
195 | 19.6k | UpdateActiveRole(); |
196 | 19.6k | } |
197 | | |
198 | 19.5k | void ConsensusMetadata::set_pending_config(const RaftConfigPB& config) { |
199 | 19.5k | has_pending_config_ = true; |
200 | 19.5k | pending_config_ = config; |
201 | 19.5k | UpdateActiveRole(); |
202 | 19.5k | } |
203 | | |
204 | 101M | const RaftConfigPB& ConsensusMetadata::active_config() const { |
205 | 101M | if (has_pending_config_) { |
206 | 2.98M | return pending_config(); |
207 | 2.98M | } |
208 | 98.2M | return committed_config(); |
209 | 101M | } |
210 | | |
211 | 104M | const string& ConsensusMetadata::leader_uuid() const { |
212 | 104M | return leader_uuid_; |
213 | 104M | } |
214 | | |
215 | 676k | void ConsensusMetadata::set_leader_uuid(const string& uuid) { |
216 | 676k | leader_uuid_ = uuid; |
217 | 676k | UpdateActiveRole(); |
218 | 676k | } |
219 | | |
220 | 0 | void ConsensusMetadata::clear_leader_uuid() { |
221 | 0 | set_leader_uuid(""); |
222 | 0 | } |
223 | | |
224 | 266M | PeerRole ConsensusMetadata::active_role() const { |
225 | 266M | return active_role_; |
226 | 266M | } |
227 | | |
228 | 47.7M | ConsensusStatePB ConsensusMetadata::ToConsensusStatePB(ConsensusConfigType type) const { |
229 | 18.4E | CHECK(type == CONSENSUS_CONFIG_ACTIVE || type == CONSENSUS_CONFIG_COMMITTED) |
230 | 18.4E | << "Unsupported ConsensusConfigType: " << ConsensusConfigType_Name(type) << ": " << type; |
231 | 47.7M | ConsensusStatePB cstate; |
232 | 47.7M | cstate.set_current_term(pb_.current_term()); |
233 | 47.7M | if (type == CONSENSUS_CONFIG_ACTIVE) { |
234 | 1.20M | *cstate.mutable_config() = active_config(); |
235 | 1.20M | cstate.set_leader_uuid(leader_uuid_); |
236 | 46.5M | } else { |
237 | 46.5M | *cstate.mutable_config() = committed_config(); |
238 | | // It's possible, though unlikely, that a new node from a pending configuration |
239 | | // could be elected leader. Do not indicate a leader in this case. |
240 | 46.5M | if (PREDICT_TRUE(IsRaftConfigVoter(leader_uuid_, cstate.config()))) { |
241 | 35.9M | cstate.set_leader_uuid(leader_uuid_); |
242 | 35.9M | } |
243 | 46.5M | } |
244 | 47.7M | return cstate; |
245 | 47.7M | } |
246 | | |
247 | 152 | void ConsensusMetadata::MergeCommittedConsensusStatePB(const ConsensusStatePB& committed_cstate) { |
248 | 152 | if (committed_cstate.current_term() > current_term()) { |
249 | 40 | set_current_term(committed_cstate.current_term()); |
250 | 40 | clear_voted_for(); |
251 | 40 | } |
252 | | |
253 | 152 | set_leader_uuid(""); |
254 | 152 | set_committed_config(committed_cstate.config()); |
255 | 152 | clear_pending_config(); |
256 | 152 | } |
257 | | |
258 | 1.10M | Status ConsensusMetadata::Flush() { |
259 | 1.10M | MAYBE_FAULT(FLAGS_TEST_fault_crash_before_cmeta_flush); |
260 | 1.10M | SCOPED_LOG_SLOW_EXECUTION_PREFIX(WARNING, 500, LogPrefix(), "flushing consensus metadata"); |
261 | | // Sanity test to ensure we never write out a bad configuration. |
262 | 1.10M | RETURN_NOT_OK_PREPEND(VerifyRaftConfig(pb_.committed_config(), COMMITTED_QUORUM), |
263 | 1.10M | "Invalid config in ConsensusMetadata, cannot flush to disk"); |
264 | | |
265 | | // Create directories if needed. |
266 | 1.10M | string dir = fs_manager_->GetConsensusMetadataDir(); |
267 | 1.10M | bool created_dir = false; |
268 | 1.10M | RETURN_NOT_OK_PREPEND(fs_manager_->CreateDirIfMissing(dir, &created_dir), |
269 | 1.10M | "Unable to create consensus metadata root dir"); |
270 | | // fsync() parent dir if we had to create the dir. |
271 | 526k | if (PREDICT_FALSE(created_dir)) { |
272 | 0 | string parent_dir = DirName(dir); |
273 | 0 | RETURN_NOT_OK_PREPEND(Env::Default()->SyncDir(parent_dir), |
274 | 0 | "Unable to fsync consensus parent dir " + parent_dir); |
275 | 0 | } |
276 | | |
277 | 526k | string meta_file_path = fs_manager_->GetConsensusMetadataPath(tablet_id_); |
278 | 526k | RETURN_NOT_OK_PREPEND(pb_util::WritePBContainerToPath( |
279 | 526k | fs_manager_->env(), meta_file_path, pb_, |
280 | 526k | pb_util::OVERWRITE, |
281 | | // Always fsync the consensus metadata. |
282 | 526k | pb_util::SYNC), |
283 | 526k | Substitute("Unable to write consensus meta file for tablet $0 to path $1", |
284 | 526k | tablet_id_, meta_file_path)); |
285 | 526k | RETURN_NOT_OK(UpdateOnDiskSize()); |
286 | 526k | return Status::OK(); |
287 | 526k | } |
288 | | |
289 | | ConsensusMetadata::ConsensusMetadata(FsManager* fs_manager, |
290 | | std::string tablet_id, |
291 | | std::string peer_uuid) |
292 | | : fs_manager_(CHECK_NOTNULL(fs_manager)), |
293 | | tablet_id_(std::move(tablet_id)), |
294 | | peer_uuid_(std::move(peer_uuid)), |
295 | | has_pending_config_(false), |
296 | | active_role_(PeerRole::UNKNOWN_ROLE), |
297 | 451k | on_disk_size_(0) { |
298 | 451k | UpdateRoleAndTermCache(); |
299 | 451k | } |
300 | | |
301 | 2.26M | std::string ConsensusMetadata::LogPrefix() const { |
302 | 2.26M | return MakeTabletLogPrefix(tablet_id_, peer_uuid_); |
303 | 2.26M | } |
304 | | |
305 | 1.16M | void ConsensusMetadata::UpdateActiveRole() { |
306 | 1.16M | ConsensusStatePB cstate = ToConsensusStatePB(CONSENSUS_CONFIG_ACTIVE); |
307 | 1.16M | PeerRole old_role = active_role_; |
308 | 1.16M | active_role_ = GetConsensusRole(peer_uuid_, cstate); |
309 | 1.16M | UpdateRoleAndTermCache(); |
310 | 1.16M | LOG_WITH_PREFIX(INFO) << "Updating active role from " << PeerRole_Name(old_role) |
311 | 1.16M | << " to " << PeerRole_Name(active_role_) |
312 | 1.16M | << ". Consensus state: " << cstate.ShortDebugString() |
313 | 1.16M | << ", has_pending_config = " << has_pending_config_; |
314 | 1.16M | } |
315 | | |
316 | 827k | Status ConsensusMetadata::UpdateOnDiskSize() { |
317 | 827k | string path = fs_manager_->GetConsensusMetadataPath(tablet_id_); |
318 | 827k | on_disk_size_.store(VERIFY_RESULT(fs_manager_->env()->GetFileSize(path))); |
319 | 0 | return Status::OK(); |
320 | 827k | } |
321 | | |
322 | 2.52M | void ConsensusMetadata::UpdateRoleAndTermCache() { |
323 | 2.52M | auto new_value = PackRoleAndTerm(active_role_, pb_.has_current_term() ? current_term()1.92M : 0601k ); |
324 | 2.52M | role_and_term_cache_.store(new_value, std::memory_order_release); |
325 | 2.52M | } |
326 | | |
327 | 9.49M | std::pair<PeerRole, int64_t> ConsensusMetadata::GetRoleAndTerm() const { |
328 | 9.49M | const auto packed_role_and_term = role_and_term_cache_.load(std::memory_order_acquire); |
329 | 9.49M | return std::make_pair(UnpackRole(packed_role_and_term), UnpackTerm(packed_role_and_term)); |
330 | 9.49M | } |
331 | | |
332 | 1.60M | const HostPortPB& DesiredHostPort(const RaftPeerPB& peer, const CloudInfoPB& from) { |
333 | 1.60M | return DesiredHostPort( |
334 | 1.60M | peer.last_known_broadcast_addr(), peer.last_known_private_addr(), peer.cloud_info(), from); |
335 | 1.60M | } |
336 | | |
337 | 180k | void TakeRegistration(ServerRegistrationPB* source, RaftPeerPB* dest) { |
338 | 180k | dest->mutable_last_known_private_addr()->Swap(source->mutable_private_rpc_addresses()); |
339 | 180k | dest->mutable_last_known_broadcast_addr()->Swap(source->mutable_broadcast_addresses()); |
340 | 180k | dest->mutable_cloud_info()->Swap(source->mutable_cloud_info()); |
341 | 180k | } |
342 | | |
343 | 20.8k | void CopyRegistration(ServerRegistrationPB source, RaftPeerPB* dest) { |
344 | 20.8k | TakeRegistration(&source, dest); |
345 | 20.8k | } |
346 | | |
347 | | } // namespace consensus |
348 | | } // namespace yb |