/Users/deen/code/yugabyte-db/src/yb/tserver/ts_tablet_manager.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #ifndef YB_TSERVER_TS_TABLET_MANAGER_H |
33 | | #define YB_TSERVER_TS_TABLET_MANAGER_H |
34 | | |
35 | | #include <memory> |
36 | | #include <string> |
37 | | #include <unordered_map> |
38 | | #include <unordered_set> |
39 | | #include <vector> |
40 | | |
41 | | #include <boost/optional/optional_fwd.hpp> |
42 | | #include <gtest/gtest_prod.h> |
43 | | |
44 | | #include "yb/client/client_fwd.h" |
45 | | #include "yb/client/async_initializer.h" |
46 | | |
47 | | #include "yb/common/constants.h" |
48 | | #include "yb/common/snapshot.h" |
49 | | |
50 | | #include "yb/consensus/consensus_fwd.h" |
51 | | #include "yb/consensus/metadata.pb.h" |
52 | | |
53 | | #include "yb/gutil/macros.h" |
54 | | #include "yb/gutil/ref_counted.h" |
55 | | |
56 | | #include "yb/master/master_fwd.h" |
57 | | #include "yb/master/master_heartbeat.fwd.h" |
58 | | |
59 | | #include "yb/rocksdb/cache.h" |
60 | | #include "yb/rocksdb/options.h" |
61 | | |
62 | | #include "yb/rpc/rpc_fwd.h" |
63 | | |
64 | | #include "yb/tablet/tablet_fwd.h" |
65 | | #include "yb/tablet/metadata.pb.h" |
66 | | #include "yb/tablet/tablet_options.h" |
67 | | #include "yb/tablet/tablet_splitter.h" |
68 | | |
69 | | #include "yb/tserver/tserver_fwd.h" |
70 | | #include "yb/tserver/tablet_memory_manager.h" |
71 | | #include "yb/tserver/tablet_peer_lookup.h" |
72 | | #include "yb/tserver/tserver_types.pb.h" |
73 | | |
74 | | #include "yb/util/status_fwd.h" |
75 | | #include "yb/util/locks.h" |
76 | | #include "yb/util/rw_mutex.h" |
77 | | #include "yb/util/shared_lock.h" |
78 | | #include "yb/util/threadpool.h" |
79 | | |
80 | | namespace yb { |
81 | | |
82 | | class GarbageCollector; |
83 | | class PartitionSchema; |
84 | | class FsManager; |
85 | | class HostPort; |
86 | | class Partition; |
87 | | class Schema; |
88 | | class BackgroundTask; |
89 | | |
90 | | namespace consensus { |
91 | | class RaftConfigPB; |
92 | | } // namespace consensus |
93 | | |
94 | | namespace tserver { |
95 | | class TabletServer; |
96 | | |
97 | | using rocksdb::MemoryMonitor; |
98 | | |
99 | | // Map of tablet id -> transition reason string. |
100 | | typedef std::unordered_map<TabletId, std::string> TransitionInProgressMap; |
101 | | |
102 | | class TransitionInProgressDeleter; |
103 | | struct TabletCreationMetaData; |
104 | | typedef boost::container::static_vector<TabletCreationMetaData, kNumSplitParts> |
105 | | SplitTabletsCreationMetaData; |
106 | | |
107 | | // If 'expr' fails, log a message, tombstone the given tablet, and return the |
108 | | // error status. |
109 | | #define TOMBSTONE_NOT_OK(expr, meta, uuid, msg, ts_manager_ptr) \ |
110 | 1.99k | do { \ |
111 | 1.99k | Status _s = (expr); \ |
112 | 1.99k | if (PREDICT_FALSE(!_s.ok())) { \ |
113 | 3 | tserver::LogAndTombstone((meta), (msg), (uuid), _s, ts_manager_ptr); \ |
114 | 3 | return _s; \ |
115 | 3 | } \ |
116 | 1.99k | } while (0) |
117 | | |
118 | | // Type of tablet directory. |
119 | | YB_DEFINE_ENUM(TabletDirType, (kData)(kWal)); |
120 | | |
121 | | // Keeps track of the tablets hosted on the tablet server side. |
122 | | // |
123 | | // TODO: will also be responsible for keeping the local metadata about |
124 | | // which tablets are hosted on this server persistent on disk, as well |
125 | | // as re-opening all the tablets at startup, etc. |
126 | | class TSTabletManager : public tserver::TabletPeerLookupIf, public tablet::TabletSplitter { |
127 | | public: |
128 | | typedef std::vector<std::shared_ptr<tablet::TabletPeer>> TabletPeers; |
129 | | typedef std::vector<tablet::TabletPtr> TabletPtrs; |
130 | | |
131 | | // Construct the tablet manager. |
132 | | // 'fs_manager' must remain valid until this object is destructed. |
133 | | TSTabletManager(FsManager* fs_manager, |
134 | | TabletServer* server, |
135 | | MetricRegistry* metric_registry); |
136 | | |
137 | | virtual ~TSTabletManager(); |
138 | | |
139 | | // Load all tablet metadata blocks from disk, and open their respective tablets. |
140 | | // Upon return of this method all existing tablets are registered, but |
141 | | // the bootstrap is performed asynchronously. |
142 | | CHECKED_STATUS Init(); |
143 | | CHECKED_STATUS Start(); |
144 | | |
145 | | // Waits for all the bootstraps to complete. |
146 | | // Returns Status::OK if all tablets bootstrapped successfully. If |
147 | | // the bootstrap of any tablet failed returns the failure reason for |
148 | | // the first tablet whose bootstrap failed. |
149 | | CHECKED_STATUS WaitForAllBootstrapsToFinish(); |
150 | | |
151 | | // Starts shutdown process. |
152 | | void StartShutdown(); |
153 | | // Completes shutdown process and waits for it's completeness. |
154 | | void CompleteShutdown(); |
155 | | |
156 | 83.3k | ThreadPool* tablet_prepare_pool() const { return tablet_prepare_pool_.get(); } |
157 | 83.4k | ThreadPool* raft_pool() const { return raft_pool_.get(); } |
158 | 0 | ThreadPool* read_pool() const { return read_pool_.get(); } |
159 | 83.3k | ThreadPool* append_pool() const { return append_pool_.get(); } |
160 | | |
161 | | // Create a new tablet and register it with the tablet manager. The new tablet |
162 | | // is persisted on disk and opened before this method returns. |
163 | | // |
164 | | // If tablet_peer is non-NULL, the newly created tablet will be returned. |
165 | | // |
166 | | // If another tablet already exists with this ID, logs a DFATAL |
167 | | // and returns a bad Status. |
168 | | Result<tablet::TabletPeerPtr> CreateNewTablet( |
169 | | const tablet::TableInfoPtr& table_info, |
170 | | const string& tablet_id, |
171 | | const Partition& partition, |
172 | | consensus::RaftConfigPB config, |
173 | | const bool colocated = false, |
174 | | const std::vector<SnapshotScheduleId>& snapshot_schedules = {}); |
175 | | |
176 | | CHECKED_STATUS ApplyTabletSplit(tablet::SplitOperation* operation, log::Log* raft_log) override; |
177 | | |
178 | | // Delete the specified tablet. |
179 | | // 'delete_type' must be one of TABLET_DATA_DELETED or TABLET_DATA_TOMBSTONED |
180 | | // or else returns Status::IllegalArgument. |
181 | | // 'cas_config_opid_index_less_or_equal' is optionally specified to enable an |
182 | | // atomic DeleteTablet operation that only occurs if the latest committed |
183 | | // raft config change op has an opid_index equal to or less than the specified |
184 | | // value. If not, 'error_code' is set to CAS_FAILED and a non-OK Status is |
185 | | // returned. |
186 | | // If `hide_only` is true, then just hide tablet instead of deleting it. |
187 | | CHECKED_STATUS DeleteTablet( |
188 | | const TabletId& tablet_id, |
189 | | tablet::TabletDataState delete_type, |
190 | | const boost::optional<int64_t>& cas_config_opid_index_less_or_equal, |
191 | | bool hide_only, |
192 | | boost::optional<TabletServerErrorPB::Code>* error_code); |
193 | | |
194 | | // Lookup the given tablet peer by its ID. |
195 | | // Returns true if the tablet is found successfully. |
196 | | bool LookupTablet(const TabletId& tablet_id, |
197 | | std::shared_ptr<tablet::TabletPeer>* tablet_peer) const; |
198 | | |
199 | | // Lookup the given tablet peer by its ID. |
200 | | // Returns NotFound error if the tablet is not found. |
201 | | Result<std::shared_ptr<tablet::TabletPeer>> LookupTablet(const TabletId& tablet_id) const; |
202 | | |
203 | | // Same as LookupTablet but doesn't acquired the shared lock. |
204 | | bool LookupTabletUnlocked(const TabletId& tablet_id, |
205 | | std::shared_ptr<tablet::TabletPeer>* tablet_peer) const |
206 | | REQUIRES_SHARED(mutex_); |
207 | | |
208 | | CHECKED_STATUS GetTabletPeer( |
209 | | const TabletId& tablet_id, |
210 | | std::shared_ptr<tablet::TabletPeer>* tablet_peer) const override; |
211 | | |
212 | | const NodeInstancePB& NodeInstance() const override; |
213 | | |
214 | | CHECKED_STATUS GetRegistration(ServerRegistrationPB* reg) const override; |
215 | | |
216 | | // Initiate remote bootstrap of the specified tablet. |
217 | | // See the StartRemoteBootstrap() RPC declaration in consensus.proto for details. |
218 | | // Currently this runs the entire procedure synchronously. |
219 | | // TODO: KUDU-921: Run this procedure on a background thread. |
220 | | virtual CHECKED_STATUS |
221 | | StartRemoteBootstrap(const consensus::StartRemoteBootstrapRequestPB& req) override; |
222 | | |
223 | | // Generate a tablet report. |
224 | | // |
225 | | // This will report any tablets which have changed since the last acknowleged |
226 | | // tablet report. Once the report is successfully transferred, call |
227 | | // MarkTabletReportAcknowledged() to clear the incremental state. Otherwise, the |
228 | | // next tablet report will continue to include the same tablets until one |
229 | | // is acknowleged. |
230 | | // 'include_bootstrap' flag indicates whether to include bootstrapped tablets that have not |
231 | | // changed. Normal reports include bootstrap information on every HB, but full reports do not. |
232 | | // |
233 | | // This is thread-safe to call along with tablet modification, but not safe |
234 | | // to call from multiple threads at the same time. |
235 | | void GenerateTabletReport(master::TabletReportPB* report, bool include_bootstrap = true); |
236 | | |
237 | | // Start a full tablet report and reset any incremental state tracking. |
238 | | void StartFullTabletReport(master::TabletReportPB* report); |
239 | | |
240 | | // Mark that the master successfully received and processed the given tablet report. |
241 | | // 'seq_num' - only remove tablets unchanged since the acknowledged report sequence number. |
242 | | // 'updates' - explicitly ACK'd updates from the Master, may be a subset of request tablets. |
243 | | // 'dirty_check' - DEBUG. Confirm we've processed all dirty tablets after a full sweep. |
244 | | void MarkTabletReportAcknowledged(uint32_t seq_num, |
245 | | const master::TabletReportUpdatesPB& updates, |
246 | | bool dirty_check = false); |
247 | | |
248 | | // Adjust the max number of tablets that will be included in a single report. |
249 | | // This is normally controlled by a master-configured GFLAG. |
250 | 402k | void SetReportLimit(int32_t limit) { |
251 | 402k | std::lock_guard<RWMutex> write_lock(mutex_); |
252 | 402k | report_limit_ = limit; |
253 | 402k | } |
254 | 0 | int32_t GetReportLimit() { |
255 | 0 | SharedLock<RWMutex> read_lock(mutex_); |
256 | 0 | return report_limit_; |
257 | 0 | } |
258 | | |
259 | | // Get all of the tablets currently hosted on this server. |
260 | | TabletPeers GetTabletPeers(TabletPtrs* tablet_ptrs = nullptr) const; |
261 | | void GetTabletPeersUnlocked(TabletPeers* tablet_peers) const REQUIRES_SHARED(mutex_); |
262 | | void PreserveLocalLeadersOnly(std::vector<const TabletId*>* tablet_ids) const; |
263 | | |
264 | | // Callback used for state changes outside of the control of TsTabletManager, such as a consensus |
265 | | // role change. They are applied asynchronously internally. |
266 | | void ApplyChange(const TabletId& tablet_id, |
267 | | std::shared_ptr<consensus::StateChangeContext> context); |
268 | | |
269 | | // Marks tablet with 'tablet_id' dirty. |
270 | | // Used for state changes outside of the control of TsTabletManager, such as consensus role |
271 | | // changes. |
272 | | void MarkTabletDirty(const TabletId& tablet_id, |
273 | | std::shared_ptr<consensus::StateChangeContext> context); |
274 | | |
275 | | void MarkTabletBeingRemoteBootstrapped(const TabletId& tablet_id, const TableId& table_id); |
276 | | |
277 | | void UnmarkTabletBeingRemoteBootstrapped(const TabletId& tablet_id, const TableId& table_id); |
278 | | |
279 | | // Returns the number of tablets in the "dirty" map, for use by unit tests. |
280 | | size_t TEST_GetNumDirtyTablets() const; |
281 | | |
282 | | // Return the number of tablets in RUNNING or BOOTSTRAPPING state. |
283 | | int GetNumLiveTablets() const; |
284 | | |
285 | | // Return the number of tablets for which this ts is a leader. |
286 | | int GetLeaderCount() const; |
287 | | |
288 | | // Set the number of tablets which are waiting to be bootstrapped and can go to RUNNING |
289 | | // state in the response proto. Also set the total number of runnable tablets on this tserver. |
290 | | // If the tablet manager itself is not initialized, then INT_MAX is set for both. |
291 | | CHECKED_STATUS GetNumTabletsPendingBootstrap(IsTabletServerReadyResponsePB* resp) const; |
292 | | |
293 | | CHECKED_STATUS RunAllLogGC(); |
294 | | |
295 | | // Creates and updates the map of table to the set of tablets assigned per table per disk |
296 | | // for both data and wal directories. |
297 | | void GetAndRegisterDataAndWalDir(FsManager* fs_manager, |
298 | | const std::string& table_id, |
299 | | const TabletId& tablet_id, |
300 | | std::string* data_root_dir, |
301 | | std::string* wal_root_dir); |
302 | | // Updates the map of table to the set of tablets assigned per table per disk |
303 | | // for both of the given data and wal directories. |
304 | | void RegisterDataAndWalDir(FsManager* fs_manager, |
305 | | const std::string& table_id, |
306 | | const TabletId& tablet_id, |
307 | | const std::string& data_root_dir, |
308 | | const std::string& wal_root_dir); |
309 | | // Removes the tablet id assigned to the table and disk pair for both the data and WAL directory |
310 | | // as pointed by the data and wal directory map. |
311 | | void UnregisterDataWalDir(const std::string& table_id, |
312 | | const TabletId& tablet_id, |
313 | | const std::string& data_root_dir, |
314 | | const std::string& wal_root_dir); |
315 | | |
316 | | bool IsTabletInTransition(const TabletId& tablet_id) const; |
317 | | |
318 | 11.3k | TabletServer* server() { return server_; } |
319 | | |
320 | 0 | MemoryMonitor* memory_monitor() { return tablet_options_.memory_monitor.get(); } |
321 | | |
322 | 6 | TabletMemoryManager* tablet_memory_manager() { return mem_manager_.get(); } |
323 | | |
324 | | CHECKED_STATUS UpdateSnapshotsInfo(const master::TSSnapshotsInfoPB& info); |
325 | | |
326 | | // Background task that verifies the data on each tablet for consistency. |
327 | | void VerifyTabletData(); |
328 | | |
329 | | // Background task that Retires old metrics. |
330 | | void CleanupOldMetrics(); |
331 | | |
332 | | client::YBClient& client(); |
333 | | |
334 | | const std::shared_future<client::YBClient*>& client_future(); |
335 | | |
336 | 25 | tablet::TabletOptions* TEST_tablet_options() { return &tablet_options_; } |
337 | | |
338 | | // Trigger asynchronous compactions concurrently on the provided tablets. |
339 | | CHECKED_STATUS TriggerCompactionAndWait(const TabletPtrs& tablets); |
340 | | |
341 | | private: |
342 | | FRIEND_TEST(TsTabletManagerTest, TestPersistBlocks); |
343 | | FRIEND_TEST(TsTabletManagerTest, TestTombstonedTabletsAreUnregistered); |
344 | | |
345 | | // Flag specified when registering a TabletPeer. |
346 | | enum RegisterTabletPeerMode { |
347 | | NEW_PEER, |
348 | | REPLACEMENT_PEER |
349 | | }; |
350 | | |
351 | | typedef std::unordered_set<TabletId> TabletIdUnorderedSet; |
352 | | |
353 | | // Maps directory to set of tablets (IDs) using that directory. |
354 | | typedef std::map<std::string, TabletIdUnorderedSet> TabletIdSetByDirectoryMap; |
355 | | |
356 | | // This is a map that takes a table id and maps it to a map of directory and |
357 | | // set of tablets using that directory. |
358 | | typedef std::unordered_map<TableId, TabletIdSetByDirectoryMap> TableDiskAssignmentMap; |
359 | | |
360 | | // Each tablet report is assigned a sequence number, so that subsequent |
361 | | // tablet reports only need to re-report those tablets which have |
362 | | // changed since the last report. Each tablet tracks the sequence |
363 | | // number at which it became dirty. |
364 | | struct TabletReportState { |
365 | | uint32_t change_seq; |
366 | | }; |
367 | | typedef std::unordered_map<std::string, TabletReportState> DirtyMap; |
368 | | |
369 | | // Returns Status::OK() iff state_ == MANAGER_RUNNING. |
370 | | CHECKED_STATUS CheckRunningUnlocked(boost::optional<TabletServerErrorPB::Code>* error_code) const |
371 | | REQUIRES_SHARED(mutex_); |
372 | | |
373 | | // Registers the start of a tablet state transition by inserting the tablet |
374 | | // id and reason string into the transition_in_progress_ map. |
375 | | // 'reason' is a string included in the Status return when there is |
376 | | // contention indicating why the tablet is currently already transitioning. |
377 | | // Returns IllegalState if the tablet is already "locked" for a state |
378 | | // transition by some other operation. |
379 | | // On success, returns OK and populates 'deleter' with an object that removes |
380 | | // the map entry on destruction. |
381 | | CHECKED_STATUS StartTabletStateTransition( |
382 | | const TabletId& tablet_id, const std::string& reason, |
383 | | scoped_refptr<TransitionInProgressDeleter>* deleter); |
384 | | |
385 | | // Registers the start of a table state transition with "creating tablet" reason. |
386 | | // See StartTabletStateTransition. |
387 | | Result<scoped_refptr<TransitionInProgressDeleter>> StartTabletStateTransitionForCreation( |
388 | | const TabletId& tablet_id); |
389 | | |
390 | | // Open a tablet meta from the local file system by loading its superblock. |
391 | | CHECKED_STATUS OpenTabletMeta(const TabletId& tablet_id, |
392 | | scoped_refptr<tablet::RaftGroupMetadata>* metadata); |
393 | | |
394 | | // Open a tablet whose metadata has already been loaded/created. |
395 | | // This method does not return anything as it can be run asynchronously. |
396 | | // Upon completion of this method the tablet should be initialized and running. |
397 | | // If something wrong happened on bootstrap/initialization the relevant error |
398 | | // will be set on TabletPeer along with the state set to FAILED. |
399 | | // |
400 | | // The tablet must be registered and an entry corresponding to this tablet |
401 | | // must be put into the transition_in_progress_ map before calling this |
402 | | // method. A TransitionInProgressDeleter must be passed as 'deleter' into |
403 | | // this method in order to remove that transition-in-progress entry when |
404 | | // opening the tablet is complete (in either a success or a failure case). |
405 | | void OpenTablet(const scoped_refptr<tablet::RaftGroupMetadata>& meta, |
406 | | const scoped_refptr<TransitionInProgressDeleter>& deleter); |
407 | | |
408 | | // Open a tablet whose metadata has already been loaded. |
409 | | void BootstrapAndInitTablet(const scoped_refptr<tablet::RaftGroupMetadata>& meta, |
410 | | std::shared_ptr<tablet::TabletPeer>* peer); |
411 | | |
412 | | // Add the tablet to the tablet map. |
413 | | // 'mode' specifies whether to expect an existing tablet to exist in the map. |
414 | | // If mode == NEW_PEER but a tablet with the same name is already registered, |
415 | | // or if mode == REPLACEMENT_PEER but a tablet with the same name is not |
416 | | // registered, a FATAL message is logged, causing a process crash. |
417 | | // Calls to this method are expected to be externally synchronized, typically |
418 | | // using the transition_in_progress_ map. |
419 | | CHECKED_STATUS RegisterTablet(const TabletId& tablet_id, |
420 | | const std::shared_ptr<tablet::TabletPeer>& tablet_peer, |
421 | | RegisterTabletPeerMode mode); |
422 | | |
423 | | // Create and register a new TabletPeer, given tablet metadata. |
424 | | // Calls RegisterTablet() with the given 'mode' parameter after constructing |
425 | | // the TablerPeer object. See RegisterTablet() for details about the |
426 | | // semantics of 'mode' and the locking requirements. |
427 | | Result<std::shared_ptr<tablet::TabletPeer>> CreateAndRegisterTabletPeer( |
428 | | const scoped_refptr<tablet::RaftGroupMetadata>& meta, |
429 | | RegisterTabletPeerMode mode); |
430 | | |
431 | | // Returns either table_data_assignment_map_ or table_wal_assignment_map_ depending on dir_type. |
432 | | TableDiskAssignmentMap* GetTableDiskAssignmentMapUnlocked(TabletDirType dir_type); |
433 | | |
434 | | // Returns assigned root dir of specified type for specified table and tablet. |
435 | | // If root dir is not registered for the specified table_id and tablet_id combination - returns |
436 | | // error. |
437 | | Result<const std::string&> GetAssignedRootDirForTablet( |
438 | | TabletDirType dir_type, const TableId& table_id, const TabletId& tablet_id); |
439 | | |
440 | | // Helper to generate the report for a single tablet. |
441 | | void CreateReportedTabletPB(const std::shared_ptr<tablet::TabletPeer>& tablet_peer, |
442 | | master::ReportedTabletPB* reported_tablet); |
443 | | |
444 | | // Mark that the provided TabletPeer's state has changed. That should be taken into |
445 | | // account in the next report. |
446 | | // |
447 | | // NOTE: requires that the caller holds the lock. |
448 | | void MarkDirtyUnlocked(const TabletId& tablet_id, |
449 | | std::shared_ptr<consensus::StateChangeContext> context) REQUIRES(mutex_); |
450 | | |
451 | | // Handle the case on startup where we find a tablet that is not in ready state. Generally, we |
452 | | // tombstone the replica. |
453 | | CHECKED_STATUS HandleNonReadyTabletOnStartup( |
454 | | const scoped_refptr<tablet::RaftGroupMetadata>& meta); |
455 | | |
456 | | CHECKED_STATUS StartSubtabletsSplit( |
457 | | const tablet::RaftGroupMetadata& source_tablet_meta, SplitTabletsCreationMetaData* tcmetas); |
458 | | |
459 | | // Creates tablet peer and schedules opening the tablet. |
460 | | // See CreateAndRegisterTabletPeer and OpenTablet. |
461 | | void CreatePeerAndOpenTablet( |
462 | | const tablet::RaftGroupMetadataPtr& meta, |
463 | | const scoped_refptr<TransitionInProgressDeleter>& deleter); |
464 | | |
465 | 93.8k | TSTabletManagerStatePB state() const { |
466 | 93.8k | SharedLock<RWMutex> lock(mutex_); |
467 | 93.8k | return state_; |
468 | 93.8k | } |
469 | | |
470 | | bool ClosingUnlocked() const REQUIRES_SHARED(mutex_); |
471 | | |
472 | | // Initializes the RaftPeerPB for the local peer. |
473 | | // Guaranteed to include both uuid and last_seen_addr fields. |
474 | | // Crashes with an invariant check if the RPC server is not currently in a |
475 | | // running state. |
476 | | void InitLocalRaftPeerPB(); |
477 | | |
478 | | std::string LogPrefix() const; |
479 | | |
480 | | std::string TabletLogPrefix(const TabletId& tablet_id) const; |
481 | | |
482 | | void CleanupCheckpoints(); |
483 | | |
484 | | void LogCacheGC(MemTracker* log_cache_mem_tracker, size_t required); |
485 | | |
486 | | // Check that the the global and per-table RBS limits are respected if flags |
487 | | // TEST_crash_if_remote_bootstrap_sessions_greater_than and |
488 | | // TEST_crash_if_remote_bootstrap_sessions_per_table_greater_than are non-zero. |
489 | | // Used only for tests. |
490 | | void MaybeDoChecksForTests(const TableId& table_id) REQUIRES_SHARED(mutex_); |
491 | | |
492 | | void CleanupSplitTablets(); |
493 | | |
494 | | HybridTime AllowedHistoryCutoff(tablet::RaftGroupMetadata* metadata); |
495 | | |
496 | | const CoarseTimePoint start_time_; |
497 | | |
498 | | FsManager* const fs_manager_; |
499 | | |
500 | | TabletServer* server_; |
501 | | |
502 | | consensus::RaftPeerPB local_peer_pb_; |
503 | | |
504 | | typedef std::unordered_map<TabletId, std::shared_ptr<tablet::TabletPeer>> TabletMap; |
505 | | |
506 | | // Lock protecting tablet_map_, dirty_tablets_, state_, tablets_blocked_from_lb_ and |
507 | | // tablets_being_remote_bootstrapped_. |
508 | | mutable RWMutex mutex_; |
509 | | |
510 | | // Map from tablet ID to tablet |
511 | | TabletMap tablet_map_ GUARDED_BY(mutex_); |
512 | | |
513 | | // Map from table ID to count of children in data and wal directories. |
514 | | TableDiskAssignmentMap table_data_assignment_map_ GUARDED_BY(dir_assignment_mutex_); |
515 | | TableDiskAssignmentMap table_wal_assignment_map_ GUARDED_BY(dir_assignment_mutex_); |
516 | | mutable std::mutex dir_assignment_mutex_; |
517 | | |
518 | | // Map of tablet ids -> reason strings where the keys are tablets whose |
519 | | // bootstrap, creation, or deletion is in-progress |
520 | | TransitionInProgressMap transition_in_progress_ GUARDED_BY(transition_in_progress_mutex_); |
521 | | mutable std::mutex transition_in_progress_mutex_; |
522 | | |
523 | | // Tablets to include in the next tablet report. When a tablet is added/removed/added |
524 | | // locally and needs to be reported to the master, an entry is added to this map. |
525 | | // Tablets aren't removed from this Map until the Master acknowledges it in response. |
526 | | DirtyMap dirty_tablets_ GUARDED_BY(mutex_); |
527 | | |
528 | | typedef std::set<TabletId> TabletIdSet; |
529 | | |
530 | | TabletIdSet tablets_being_remote_bootstrapped_ GUARDED_BY(mutex_); |
531 | | |
532 | | TabletIdSet tablets_blocked_from_lb_ GUARDED_BY(mutex_); |
533 | | |
534 | | // Used to keep track of the number of concurrent remote bootstrap sessions per table. |
535 | | std::unordered_map<TableId, TabletIdSet> tablets_being_remote_bootstrapped_per_table_; |
536 | | |
537 | | // Next tablet report seqno. |
538 | | uint32_t next_report_seq_ GUARDED_BY(mutex_) = 0; |
539 | | |
540 | | // Limit on the number of tablets to send in a single report. |
541 | | int32_t report_limit_ GUARDED_BY(mutex_) = std::numeric_limits<int32_t>::max(); |
542 | | |
543 | | MetricRegistry* metric_registry_; |
544 | | |
545 | | TSTabletManagerStatePB state_ GUARDED_BY(mutex_); |
546 | | |
547 | | // Thread pool used to open the tablets async, whether bootstrap is required or not. |
548 | | std::unique_ptr<ThreadPool> open_tablet_pool_; |
549 | | |
550 | | // Thread pool for preparing transactions, shared between all tablets. |
551 | | std::unique_ptr<ThreadPool> tablet_prepare_pool_; |
552 | | |
553 | | // Thread pool for apply transactions, shared between all tablets. |
554 | | std::unique_ptr<ThreadPool> apply_pool_; |
555 | | |
556 | | // Thread pool for Raft-related operations, shared between all tablets. |
557 | | std::unique_ptr<ThreadPool> raft_pool_; |
558 | | |
559 | | // Thread pool for appender threads, shared between all tablets. |
560 | | std::unique_ptr<ThreadPool> append_pool_; |
561 | | |
562 | | // Thread pool for log allocation threads, shared between all tablets. |
563 | | std::unique_ptr<ThreadPool> allocation_pool_; |
564 | | |
565 | | // Thread pool for read ops, that are run in parallel, shared between all tablets. |
566 | | std::unique_ptr<ThreadPool> read_pool_; |
567 | | |
568 | | // Thread pool for manually triggering compactions for tablets created from a split. |
569 | | std::unique_ptr<ThreadPool> post_split_trigger_compaction_pool_; |
570 | | |
571 | | // Thread pool for admin triggered compactions for tablets. |
572 | | std::unique_ptr<ThreadPool> admin_triggered_compaction_pool_; |
573 | | |
574 | | std::unique_ptr<rpc::Poller> tablets_cleaner_; |
575 | | |
576 | | // Used for verifying tablet data integrity. |
577 | | std::unique_ptr<rpc::Poller> verify_tablet_data_poller_; |
578 | | |
579 | | // Used for cleaning up old metrics. |
580 | | std::unique_ptr<rpc::Poller> metrics_cleaner_; |
581 | | |
582 | | // For block cache and memory monitor shared across tablets |
583 | | tablet::TabletOptions tablet_options_; |
584 | | |
585 | | std::unique_ptr<consensus::MultiRaftManager> multi_raft_manager_; |
586 | | |
587 | | boost::optional<yb::client::AsyncClientInitialiser> async_client_init_; |
588 | | |
589 | | TabletPeers shutting_down_peers_; |
590 | | |
591 | | std::shared_ptr<TabletMemoryManager> mem_manager_; |
592 | | |
593 | | std::unordered_set<std::string> bootstrap_source_addresses_; |
594 | | |
595 | | std::atomic<int32_t> num_tablets_being_remote_bootstrapped_{0}; |
596 | | |
597 | | mutable simple_spinlock snapshot_schedule_allowed_history_cutoff_mutex_; |
598 | | std::unordered_map<SnapshotScheduleId, HybridTime, SnapshotScheduleIdHash> |
599 | | snapshot_schedule_allowed_history_cutoff_ |
600 | | GUARDED_BY(snapshot_schedule_allowed_history_cutoff_mutex_); |
601 | | // Store snapshot schedules that were missing on previous calls to AllowedHistoryCutoff. |
602 | | std::unordered_map<SnapshotScheduleId, int64_t, SnapshotScheduleIdHash> |
603 | | missing_snapshot_schedules_ |
604 | | GUARDED_BY(snapshot_schedule_allowed_history_cutoff_mutex_); |
605 | | int64_t snapshot_schedules_version_ = 0; |
606 | | HybridTime last_restorations_update_ht_; |
607 | | |
608 | | DISALLOW_COPY_AND_ASSIGN(TSTabletManager); |
609 | | }; |
610 | | |
611 | | // Helper to delete the transition-in-progress entry from the corresponding set |
612 | | // when tablet bootstrap, create, and delete operations complete. |
613 | | class TransitionInProgressDeleter : public RefCountedThreadSafe<TransitionInProgressDeleter> { |
614 | | public: |
615 | | TransitionInProgressDeleter(TransitionInProgressMap* map, std::mutex* mutex, |
616 | | const TabletId& tablet_id); |
617 | | |
618 | | private: |
619 | | friend class RefCountedThreadSafe<TransitionInProgressDeleter>; |
620 | | ~TransitionInProgressDeleter(); |
621 | | |
622 | | TransitionInProgressMap* const in_progress_; |
623 | | std::mutex* const mutex_; |
624 | | const std::string tablet_id_; |
625 | | }; |
626 | | |
627 | | // Print a log message using the given info and tombstone the specified tablet. |
628 | | // If tombstoning the tablet fails, a FATAL error is logged, resulting in a crash. |
629 | | // If ts_manager pointer is passed in, it will unregister from the directory assignment map. |
630 | | void LogAndTombstone(const scoped_refptr<tablet::RaftGroupMetadata>& meta, |
631 | | const std::string& msg, |
632 | | const std::string& uuid, |
633 | | const Status& s, |
634 | | TSTabletManager* ts_manager = nullptr); |
635 | | |
636 | | // Delete the tablet using the specified delete_type as the final metadata |
637 | | // state. Deletes the on-disk data, as well as all WAL segments. |
638 | | // If ts_manager pointer is passed in, it will unregister from the directory assignment map. |
639 | | Status DeleteTabletData(const scoped_refptr<tablet::RaftGroupMetadata>& meta, |
640 | | tablet::TabletDataState delete_type, |
641 | | const std::string& uuid, |
642 | | const yb::OpId& last_logged_opid, |
643 | | TSTabletManager* ts_manager = nullptr); |
644 | | |
645 | | // Return Status::IllegalState if leader_term < last_logged_term. |
646 | | // Helper function for use with remote bootstrap. |
647 | | Status CheckLeaderTermNotLower(const TabletId& tablet_id, |
648 | | const std::string& uuid, |
649 | | int64_t leader_term, |
650 | | int64_t last_logged_term); |
651 | | |
652 | | // Helper function to replace a stale tablet found from earlier failed tries. |
653 | | Status HandleReplacingStaleTablet(scoped_refptr<tablet::RaftGroupMetadata> meta, |
654 | | std::shared_ptr<tablet::TabletPeer> old_tablet_peer, |
655 | | const TabletId& tablet_id, |
656 | | const std::string& uuid, |
657 | | const int64_t& leader_term); |
658 | | |
659 | | CHECKED_STATUS ShutdownAndTombstoneTabletPeerNotOk( |
660 | | const Status& status, const tablet::TabletPeerPtr& tablet_peer, |
661 | | const tablet::RaftGroupMetadataPtr& meta, const std::string& uuid, const char* msg, |
662 | | TSTabletManager* ts_tablet_manager = nullptr); |
663 | | |
664 | | } // namespace tserver |
665 | | } // namespace yb |
666 | | #endif /* YB_TSERVER_TS_TABLET_MANAGER_H */ |