/Users/deen/code/yugabyte-db/src/yb/tools/ysck.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | // Ysck, a tool to run a YB System Check. |
33 | | |
34 | | #ifndef YB_TOOLS_YSCK_H |
35 | | #define YB_TOOLS_YSCK_H |
36 | | |
37 | | #include <memory> |
38 | | #include <string> |
39 | | #include <unordered_map> |
40 | | #include <utility> |
41 | | #include <vector> |
42 | | |
43 | | #include "yb/client/yb_table_name.h" |
44 | | |
45 | | #include "yb/common/entity_ids_types.h" |
46 | | #include "yb/common/schema.h" |
47 | | |
48 | | #include "yb/gutil/callback_forward.h" |
49 | | |
50 | | #include "yb/util/status_fwd.h" |
51 | | |
52 | | namespace yb { |
53 | | class MonoDelta; |
54 | | namespace tools { |
55 | | |
56 | | // Options for checksum scans. |
57 | | struct ChecksumOptions { |
58 | | public: |
59 | | |
60 | | ChecksumOptions(); |
61 | | |
62 | | ChecksumOptions(MonoDelta timeout, int scan_concurrency); |
63 | | |
64 | | // The maximum total time to wait for results to come back from all replicas. |
65 | | MonoDelta timeout; |
66 | | |
67 | | // The maximum number of concurrent checksum scans to run per tablet server. |
68 | | int scan_concurrency; |
69 | | }; |
70 | | |
71 | | // Representation of a tablet replica on a tablet server. |
72 | | class YsckTabletReplica { |
73 | | public: |
74 | | YsckTabletReplica(const std::string ts_uuid, const bool is_leader, const bool is_follower) |
75 | | : is_leader_(is_leader), |
76 | | is_follower_(is_follower), |
77 | 23.9k | ts_uuid_(ts_uuid) { |
78 | 23.9k | } |
79 | | |
80 | 16.9k | const bool& is_leader() const { |
81 | 16.9k | return is_leader_; |
82 | 16.9k | } |
83 | | |
84 | 3.73k | const bool& is_follower() const { |
85 | 3.73k | return is_follower_; |
86 | 3.73k | } |
87 | | |
88 | 4.37k | const std::string& ts_uuid() const { |
89 | 4.37k | return ts_uuid_; |
90 | 4.37k | } |
91 | | |
92 | | std::string ToString() const; |
93 | | |
94 | | private: |
95 | | const bool is_leader_; |
96 | | const bool is_follower_; |
97 | | const std::string ts_uuid_; |
98 | | DISALLOW_COPY_AND_ASSIGN(YsckTabletReplica); |
99 | | }; |
100 | | |
101 | | // Representation of a tablet belonging to a table. The tablet is composed of replicas. |
102 | | class YsckTablet { |
103 | | public: |
104 | | // TODO add start/end keys, stale. |
105 | 19.1k | explicit YsckTablet(std::string id) : id_(std::move(id)) {} |
106 | | |
107 | 36.4k | const std::string& id() const { |
108 | 36.4k | return id_; |
109 | 36.4k | } |
110 | | |
111 | 16.1k | const std::vector<std::shared_ptr<YsckTabletReplica> >& replicas() const { |
112 | 16.1k | return replicas_; |
113 | 16.1k | } |
114 | | |
115 | 19.1k | void set_replicas(const std::vector<std::shared_ptr<YsckTabletReplica> >& replicas) { |
116 | 19.1k | replicas_.assign(replicas.begin(), replicas.end()); |
117 | 19.1k | } |
118 | | private: |
119 | | const std::string id_; |
120 | | std::vector<std::shared_ptr<YsckTabletReplica>> replicas_; |
121 | | DISALLOW_COPY_AND_ASSIGN(YsckTablet); |
122 | | }; |
123 | | |
124 | | // Representation of a table. Composed of tablets. |
125 | | class YsckTable { |
126 | | public: |
127 | | YsckTable( |
128 | | const TableId& id, |
129 | | client::YBTableName name, |
130 | | const Schema& schema, |
131 | | int num_replicas, |
132 | | TableType table_type) |
133 | | : id_(id), |
134 | | name_(std::move(name)), |
135 | | schema_(schema), |
136 | | num_replicas_(num_replicas), |
137 | 18.2k | table_type_(table_type) {} |
138 | | |
139 | 17.9k | const TableId& id() const { |
140 | 17.9k | return id_; |
141 | 17.9k | } |
142 | | |
143 | 42.6k | const client::YBTableName& name() const { |
144 | 42.6k | return name_; |
145 | 42.6k | } |
146 | | |
147 | 4.37k | const Schema& schema() const { |
148 | 4.37k | return schema_; |
149 | 4.37k | } |
150 | | |
151 | 12.1k | int num_replicas() const { |
152 | 12.1k | return num_replicas_; |
153 | 12.1k | } |
154 | | |
155 | 1.31k | const TableType table_type() const { |
156 | 1.31k | return table_type_; |
157 | 1.31k | } |
158 | | |
159 | 17.9k | void set_tablets(const std::vector<std::shared_ptr<YsckTablet>>& tablets) { |
160 | 17.9k | tablets_.assign(tablets.begin(), tablets.end()); |
161 | 17.9k | } |
162 | | |
163 | 24.7k | std::vector<std::shared_ptr<YsckTablet> >& tablets() { |
164 | 24.7k | return tablets_; |
165 | 24.7k | } |
166 | | |
167 | | std::string ToString() const; |
168 | | |
169 | | private: |
170 | | TableId id_; |
171 | | const client::YBTableName name_; |
172 | | const Schema schema_; |
173 | | const int num_replicas_; |
174 | | const TableType table_type_; |
175 | | std::vector<std::shared_ptr<YsckTablet>> tablets_; |
176 | | DISALLOW_COPY_AND_ASSIGN(YsckTable); |
177 | | }; |
178 | | |
179 | | typedef Callback<void(const Status& status, uint64_t checksum)> ReportResultCallback; |
180 | | |
181 | | // The following two classes must be extended in order to communicate with their respective |
182 | | // components. The two main use cases envisioned for this are: |
183 | | // - To be able to mock a cluster to more easily test the Ysck checks. |
184 | | // - To be able to communicate with a real YB cluster. |
185 | | |
186 | | // Class that must be extended to represent a tablet server. |
187 | | class YsckTabletServer { |
188 | | public: |
189 | 2.97k | explicit YsckTabletServer(std::string uuid) : uuid_(std::move(uuid)) {} |
190 | 2.97k | virtual ~YsckTabletServer() { } |
191 | | |
192 | | // Connects to the configured Tablet Server. |
193 | | virtual CHECKED_STATUS Connect() const = 0; |
194 | | |
195 | | virtual CHECKED_STATUS CurrentHybridTime(uint64_t* hybrid_time) const = 0; |
196 | | |
197 | | // Executes a checksum scan on the associated tablet, and runs the callback |
198 | | // with the result. The callback must be threadsafe and non-blocking. |
199 | | virtual void RunTabletChecksumScanAsync( |
200 | | const std::string& tablet_id, |
201 | | const Schema& schema, |
202 | | const ChecksumOptions& options, |
203 | | const ReportResultCallback& callback) = 0; |
204 | | |
205 | 16.7k | virtual const std::string& uuid() const { |
206 | 16.7k | return uuid_; |
207 | 16.7k | } |
208 | | |
209 | | virtual const std::string& address() const = 0; |
210 | | |
211 | | private: |
212 | | const std::string uuid_; |
213 | | DISALLOW_COPY_AND_ASSIGN(YsckTabletServer); |
214 | | }; |
215 | | |
216 | | // Class that must be extended to represent a master. |
217 | | class YsckMaster { |
218 | | public: |
219 | | // Map of YsckTabletServer objects keyed by tablet server permanent_uuid. |
220 | | typedef std::unordered_map<std::string, std::shared_ptr<YsckTabletServer> > TSMap; |
221 | | |
222 | 1.19k | YsckMaster() { } |
223 | 1.19k | virtual ~YsckMaster() { } |
224 | | |
225 | | // Connects to the configured Master. |
226 | | virtual CHECKED_STATUS Connect() const = 0; |
227 | | |
228 | | // Gets the list of Tablet Servers from the Master and stores it in the passed |
229 | | // map, which is keyed on server permanent_uuid. |
230 | | // 'tablet_servers' is only modified if this method returns OK. |
231 | | virtual CHECKED_STATUS RetrieveTabletServers(TSMap* tablet_servers) = 0; |
232 | | |
233 | | // Gets the list of tables from the Master and stores it in the passed vector. |
234 | | // tables is only modified if this method returns OK. |
235 | | virtual CHECKED_STATUS RetrieveTablesList( |
236 | | std::vector<std::shared_ptr<YsckTable> >* tables) = 0; |
237 | | |
238 | | // Gets the list of tablets for the specified table and stores the list in it. |
239 | | // The table's tablet list is only modified if this method returns OK. |
240 | | virtual CHECKED_STATUS RetrieveTabletsList(const std::shared_ptr<YsckTable>& table) = 0; |
241 | | |
242 | | private: |
243 | | DISALLOW_COPY_AND_ASSIGN(YsckMaster); |
244 | | }; |
245 | | |
246 | | // Class used to communicate with the cluster. It bootstraps this by using the provided master. |
247 | | class YsckCluster { |
248 | | public: |
249 | | explicit YsckCluster(std::shared_ptr<YsckMaster> master) |
250 | 1.19k | : master_(std::move(master)) {} |
251 | | ~YsckCluster(); |
252 | | |
253 | | // Fetches list of tables, tablets, and tablet servers from the master and |
254 | | // populates the full list in cluster_->tables(). |
255 | | CHECKED_STATUS FetchTableAndTabletInfo(); |
256 | | |
257 | 1.19k | const std::shared_ptr<YsckMaster>& master() { |
258 | 1.19k | return master_; |
259 | 1.19k | } |
260 | | |
261 | | const std::unordered_map<std::string, |
262 | 10.7k | std::shared_ptr<YsckTabletServer> >& tablet_servers() { |
263 | 10.7k | return tablet_servers_; |
264 | 10.7k | } |
265 | | |
266 | 3.65k | const std::vector<std::shared_ptr<YsckTable> >& tables() { |
267 | 3.65k | return tables_; |
268 | 3.65k | } |
269 | | |
270 | | private: |
271 | | // Gets the list of tablet servers from the Master. |
272 | | CHECKED_STATUS RetrieveTabletServers(); |
273 | | |
274 | | // Gets the list of tables from the Master. |
275 | | CHECKED_STATUS RetrieveTablesList(); |
276 | | |
277 | | // Fetch the list of tablets for the given table from the Master. |
278 | | CHECKED_STATUS RetrieveTabletsList(const std::shared_ptr<YsckTable>& table); |
279 | | |
280 | | const std::shared_ptr<YsckMaster> master_; |
281 | | std::unordered_map<std::string, std::shared_ptr<YsckTabletServer> > tablet_servers_; |
282 | | std::vector<std::shared_ptr<YsckTable> > tables_; |
283 | | DISALLOW_COPY_AND_ASSIGN(YsckCluster); |
284 | | }; |
285 | | |
286 | | // Externally facing class to run checks against the provided cluster. |
287 | | class Ysck { |
288 | | public: |
289 | | explicit Ysck(std::shared_ptr<YsckCluster> cluster) |
290 | 1.19k | : cluster_(std::move(cluster)) {} |
291 | 1.19k | ~Ysck() {} |
292 | | |
293 | | // Verifies that it can connect to the Master. |
294 | | CHECKED_STATUS CheckMasterRunning(); |
295 | | |
296 | | // Populates all the cluster table and tablet info from the Master. |
297 | | CHECKED_STATUS FetchTableAndTabletInfo(); |
298 | | |
299 | | // Verifies that it can connect to all the Tablet Servers reported by the master. |
300 | | // Must first call FetchTableAndTabletInfo(). |
301 | | CHECKED_STATUS CheckTabletServersRunning(); |
302 | | |
303 | | // Establishes a connection with the specified Tablet Server. |
304 | | // Must first call FetchTableAndTabletInfo(). |
305 | | CHECKED_STATUS ConnectToTabletServer(const std::shared_ptr<YsckTabletServer>& ts); |
306 | | |
307 | | // Verifies that all the tables have contiguous tablets and that each tablet has enough replicas |
308 | | // and a leader. |
309 | | // Must first call FetchTableAndTabletInfo(). |
310 | | CHECKED_STATUS CheckTablesConsistency(); |
311 | | |
312 | | // Verifies data checksums on all tablets by doing a scan of the database on each replica. |
313 | | // If tables is not empty, checks only the named tables. |
314 | | // If tablets is not empty, checks only the specified tablets. |
315 | | // If both are specified, takes the intersection. |
316 | | // If both are empty, all tables and tablets are checked. |
317 | | // Must first call FetchTableAndTabletInfo(). |
318 | | CHECKED_STATUS ChecksumData(const std::vector<std::string>& tables, |
319 | | const std::vector<std::string>& tablets, |
320 | | const ChecksumOptions& options); |
321 | | |
322 | | // Verifies that the assignments reported by the master are the same reported by the |
323 | | // Tablet Servers. |
324 | | // Must first call FetchTableAndTabletInfo(). |
325 | | CHECKED_STATUS CheckAssignments(); |
326 | | |
327 | | private: |
328 | | bool VerifyTable(const std::shared_ptr<YsckTable>& table); |
329 | | bool VerifyTableWithTimeout(const std::shared_ptr<YsckTable>& table, |
330 | | const MonoDelta& timeout, |
331 | | const MonoDelta& retry_interval); |
332 | | bool VerifyTablet(const std::shared_ptr<YsckTablet>& tablet, size_t table_num_replicas); |
333 | | |
334 | | const std::shared_ptr<YsckCluster> cluster_; |
335 | | DISALLOW_COPY_AND_ASSIGN(Ysck); |
336 | | }; |
337 | | } // namespace tools |
338 | | } // namespace yb |
339 | | |
340 | | #endif // YB_TOOLS_YSCK_H |