/Users/deen/code/yugabyte-db/src/yb/master/state_with_tablets.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/master/state_with_tablets.h" |
15 | | |
16 | | #include "yb/util/enums.h" |
17 | | #include "yb/util/flag_tags.h" |
18 | | #include "yb/util/logging.h" |
19 | | #include "yb/util/result.h" |
20 | | #include "yb/util/status_format.h" |
21 | | |
22 | | DEFINE_test_flag(bool, mark_snasphot_as_failed, false, |
23 | | "Whether we should skip sending RESTORE_FINISHED to tablets."); |
24 | | |
25 | | namespace yb { |
26 | | namespace master { |
27 | | |
28 | | namespace { |
29 | | |
30 | | const std::initializer_list<std::pair<SysSnapshotEntryPB::State, SysSnapshotEntryPB::State>> |
31 | | kStateTransitions = { |
32 | | { SysSnapshotEntryPB::CREATING, SysSnapshotEntryPB::COMPLETE }, |
33 | | { SysSnapshotEntryPB::DELETING, SysSnapshotEntryPB::DELETED }, |
34 | | { SysSnapshotEntryPB::RESTORING, SysSnapshotEntryPB::RESTORED }, |
35 | | }; |
36 | | |
37 | 0 | SysSnapshotEntryPB::State InitialStateToTerminalState(SysSnapshotEntryPB::State state) { |
38 | 0 | for (const auto& initial_and_terminal_states : kStateTransitions) { |
39 | 0 | if (state == initial_and_terminal_states.first) { |
40 | 0 | if (PREDICT_FALSE(FLAGS_TEST_mark_snasphot_as_failed) |
41 | 0 | && state == SysSnapshotEntryPB::RESTORING) { |
42 | 0 | LOG(INFO) << "TEST: Mark COMPETE snapshot as FAILED"; |
43 | 0 | return SysSnapshotEntryPB::FAILED; |
44 | 0 | } |
45 | 0 | return initial_and_terminal_states.second; |
46 | 0 | } |
47 | 0 | } |
48 | |
|
49 | 0 | FATAL_INVALID_PB_ENUM_VALUE(SysSnapshotEntryPB::State, state); |
50 | 0 | } |
51 | | |
52 | | } // namespace |
53 | | |
54 | | StateWithTablets::StateWithTablets( |
55 | | SnapshotCoordinatorContext* context, SysSnapshotEntryPB::State initial_state, |
56 | | std::string log_prefix) |
57 | 0 | : context_(*context), initial_state_(initial_state), log_prefix_(std::move(log_prefix)) { |
58 | 0 | } |
59 | | |
60 | | void StateWithTablets::InitTablets( |
61 | 0 | const google::protobuf::RepeatedPtrField<SysSnapshotEntryPB::TabletSnapshotPB>& tablets) { |
62 | 0 | for (const auto& tablet : tablets) { |
63 | 0 | tablets_.emplace(tablet.id(), tablet.state()); |
64 | 0 | if (tablet.state() == initial_state_) { |
65 | 0 | ++num_tablets_in_initial_state_; |
66 | 0 | } |
67 | 0 | } |
68 | 0 | CheckCompleteness(); |
69 | 0 | } |
70 | | |
71 | 0 | Result<SysSnapshotEntryPB::State> StateWithTablets::AggregatedState() const { |
72 | 0 | if (tablets_.empty()) { |
73 | 0 | return InitialStateToTerminalState(initial_state_); |
74 | 0 | } |
75 | 0 | SysSnapshotEntryPB::State result = initial_state_; |
76 | 0 | bool has_initial = false; |
77 | 0 | for (const auto& tablet : tablets_) { |
78 | 0 | if (tablet.state == SysSnapshotEntryPB::FAILED) { |
79 | 0 | return SysSnapshotEntryPB::FAILED; |
80 | 0 | } else if (tablet.state == initial_state_) { |
81 | 0 | has_initial = true; |
82 | 0 | } else if (result == initial_state_) { |
83 | 0 | result = tablet.state; |
84 | 0 | } else if (tablet.state != result) { |
85 | | // Should not happen. |
86 | 0 | return STATUS_FORMAT(IllegalState, "Tablets in different terminal states: $0 and $1", |
87 | 0 | SysSnapshotEntryPB::State_Name(result), |
88 | 0 | SysSnapshotEntryPB::State_Name(tablet.state)); |
89 | 0 | } |
90 | 0 | } |
91 | 0 | return has_initial ? initial_state_ : result; |
92 | 0 | } |
93 | | |
94 | 0 | Result<bool> StateWithTablets::Complete() const { |
95 | 0 | return VERIFY_RESULT(AggregatedState()) != initial_state_; |
96 | 0 | } |
97 | | |
98 | 0 | Status StateWithTablets::AnyFailure() const { |
99 | 0 | for (const auto& tablet : tablets_) { |
100 | 0 | if (tablet.state == SysSnapshotEntryPB::FAILED) { |
101 | 0 | return tablet.last_error; |
102 | 0 | } |
103 | 0 | } |
104 | 0 | return Status::OK(); |
105 | 0 | } |
106 | | |
107 | 0 | bool StateWithTablets::AllTabletsDone() const { |
108 | 0 | return num_tablets_in_initial_state_ == 0; |
109 | 0 | } |
110 | | |
111 | 0 | bool StateWithTablets::PassedSinceCompletion(const MonoDelta& duration) const { |
112 | 0 | if (!AllTabletsDone()) { |
113 | 0 | return false; |
114 | 0 | } |
115 | | |
116 | 0 | if (complete_at_ == CoarseTimePoint()) { |
117 | 0 | YB_LOG_EVERY_N_SECS(DFATAL, 30) |
118 | 0 | << LogPrefix() << "All tablets done but complete done was not set"; |
119 | 0 | return false; |
120 | 0 | } |
121 | | |
122 | 0 | return CoarseMonoClock::Now() > complete_at_ + duration; |
123 | 0 | } |
124 | | |
125 | 0 | std::vector<TabletId> StateWithTablets::TabletIdsInState(SysSnapshotEntryPB::State state) { |
126 | 0 | std::vector<TabletId> result; |
127 | 0 | result.reserve(tablets_.size()); |
128 | 0 | for (const auto& tablet : tablets_) { |
129 | 0 | if (tablet.state == state) { |
130 | 0 | result.push_back(tablet.id); |
131 | 0 | } |
132 | 0 | } |
133 | 0 | return result; |
134 | 0 | } |
135 | | |
136 | 0 | void StateWithTablets::Done(const TabletId& tablet_id, Status status) { |
137 | 0 | VLOG_WITH_PREFIX_AND_FUNC(4) << tablet_id << ", " << status; |
138 | |
|
139 | 0 | auto it = tablets_.find(tablet_id); |
140 | 0 | if (it == tablets_.end()) { |
141 | 0 | LOG_WITH_PREFIX(DFATAL) |
142 | 0 | << "Finished " << InitialStateName() << " at unknown tablet " |
143 | 0 | << tablet_id << ": " << status; |
144 | 0 | return; |
145 | 0 | } |
146 | 0 | if (!it->running) { |
147 | 0 | LOG_WITH_PREFIX(DFATAL) |
148 | 0 | << "Finished " << InitialStateName() << " at " << tablet_id |
149 | 0 | << " that is not running and in state " << SysSnapshotEntryPB::State_Name(it->state) |
150 | 0 | << ": " << status; |
151 | 0 | return; |
152 | 0 | } |
153 | 0 | tablets_.modify(it, [](TabletData& data) { data.running = false; }); |
154 | 0 | const auto& state = it->state; |
155 | 0 | if (state == initial_state_) { |
156 | 0 | status = CheckDoneStatus(status); |
157 | 0 | if (status.ok()) { |
158 | 0 | tablets_.modify( |
159 | 0 | it, [terminal_state = InitialStateToTerminalState(initial_state_)](TabletData& data) { |
160 | 0 | data.state = terminal_state; |
161 | 0 | }); |
162 | 0 | LOG_WITH_PREFIX(INFO) << "Finished " << InitialStateName() << " at " << tablet_id << ", " |
163 | 0 | << num_tablets_in_initial_state_ << " was running"; |
164 | 0 | } else { |
165 | 0 | auto full_status = status.CloneAndPrepend( |
166 | 0 | Format("Failed to $0 snapshot at $1", InitialStateName(), tablet_id)); |
167 | 0 | bool terminal = IsTerminalFailure(status); |
168 | 0 | tablets_.modify(it, [&full_status, terminal](TabletData& data) { |
169 | 0 | if (terminal) { |
170 | 0 | data.state = SysSnapshotEntryPB::FAILED; |
171 | 0 | } |
172 | 0 | data.last_error = full_status; |
173 | 0 | }); |
174 | 0 | LOG_WITH_PREFIX(WARNING) |
175 | 0 | << full_status << ", terminal: " << terminal << ", " << num_tablets_in_initial_state_ |
176 | 0 | << " was running"; |
177 | 0 | if (!terminal) { |
178 | 0 | return; |
179 | 0 | } |
180 | 0 | } |
181 | 0 | --num_tablets_in_initial_state_; |
182 | 0 | CheckCompleteness(); |
183 | 0 | } else { |
184 | 0 | LOG_WITH_PREFIX(DFATAL) |
185 | 0 | << "Finished " << InitialStateName() << " at tablet " << tablet_id << " in a wrong state " |
186 | 0 | << state << ": " << status; |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | 0 | bool StateWithTablets::AllInState(SysSnapshotEntryPB::State state) { |
191 | 0 | for (const auto& tablet : tablets_) { |
192 | 0 | if (tablet.state != state) { |
193 | 0 | return false; |
194 | 0 | } |
195 | 0 | } |
196 | |
|
197 | 0 | return true; |
198 | 0 | } |
199 | | |
200 | 0 | bool StateWithTablets::HasInState(SysSnapshotEntryPB::State state) { |
201 | 0 | for (const auto& tablet : tablets_) { |
202 | 0 | if (tablet.state == state) { |
203 | 0 | return true; |
204 | 0 | } |
205 | 0 | } |
206 | |
|
207 | 0 | return false; |
208 | 0 | } |
209 | | |
210 | 0 | void StateWithTablets::SetInitialTabletsState(SysSnapshotEntryPB::State state) { |
211 | 0 | initial_state_ = state; |
212 | 0 | for (auto it = tablets_.begin(); it != tablets_.end(); ++it) { |
213 | 0 | tablets_.modify(it, [state](TabletData& data) { |
214 | 0 | data.state = state; |
215 | 0 | }); |
216 | 0 | } |
217 | 0 | num_tablets_in_initial_state_ = tablets_.size(); |
218 | 0 | } |
219 | | |
220 | 0 | const std::string& StateWithTablets::InitialStateName() const { |
221 | 0 | return SysSnapshotEntryPB::State_Name(initial_state_); |
222 | 0 | } |
223 | | |
224 | 0 | void StateWithTablets::CheckCompleteness() { |
225 | 0 | if (num_tablets_in_initial_state_ == 0) { |
226 | 0 | complete_at_ = CoarseMonoClock::Now(); |
227 | 0 | } |
228 | 0 | } |
229 | | |
230 | 0 | void StateWithTablets::RemoveTablets(const std::vector<std::string>& tablet_ids) { |
231 | 0 | for (const auto& id : tablet_ids) { |
232 | 0 | tablets_.erase(id); |
233 | 0 | } |
234 | 0 | } |
235 | | |
236 | 0 | const std::string& StateWithTablets::LogPrefix() const { |
237 | 0 | return log_prefix_; |
238 | 0 | } |
239 | | |
240 | | } // namespace master |
241 | | } // namespace yb |