/Users/deen/code/yugabyte-db/src/yb/client/meta_cache.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | // This module is internal to the client and not a public API. |
33 | | #ifndef YB_CLIENT_META_CACHE_H |
34 | | #define YB_CLIENT_META_CACHE_H |
35 | | |
36 | | #include <shared_mutex> |
37 | | #include <map> |
38 | | #include <string> |
39 | | #include <memory> |
40 | | #include <unordered_map> |
41 | | #include <vector> |
42 | | |
43 | | #include <boost/variant.hpp> |
44 | | |
45 | | #include <gtest/gtest_prod.h> |
46 | | |
47 | | #include "yb/client/client_fwd.h" |
48 | | |
49 | | #include "yb/common/partition.h" |
50 | | #include "yb/common/wire_protocol.h" |
51 | | #include "yb/consensus/metadata.pb.h" |
52 | | |
53 | | #include "yb/gutil/macros.h" |
54 | | #include "yb/gutil/ref_counted.h" |
55 | | #include "yb/gutil/thread_annotations.h" |
56 | | |
57 | | #include "yb/master/master_client.fwd.h" |
58 | | #include "yb/master/master_fwd.h" |
59 | | |
60 | | #include "yb/rpc/rpc_fwd.h" |
61 | | #include "yb/rpc/rpc.h" |
62 | | |
63 | | #include "yb/tablet/metadata.pb.h" |
64 | | |
65 | | #include "yb/tserver/tserver_fwd.h" |
66 | | |
67 | | #include "yb/util/capabilities.h" |
68 | | #include "yb/util/format.h" |
69 | | #include "yb/util/locks.h" |
70 | | #include "yb/util/lockfree.h" |
71 | | #include "yb/util/metrics.h" |
72 | | #include "yb/util/monotime.h" |
73 | | #include "yb/util/semaphore.h" |
74 | | #include "yb/util/status_fwd.h" |
75 | | #include "yb/util/memory/arena.h" |
76 | | #include "yb/util/net/net_util.h" |
77 | | |
78 | | namespace yb { |
79 | | |
80 | | class Histogram; |
81 | | class YBPartialRow; |
82 | | |
83 | | namespace client { |
84 | | |
85 | | class ClientTest_TestMasterLookupPermits_Test; |
86 | | class YBClient; |
87 | | class YBTable; |
88 | | |
89 | | namespace internal { |
90 | | |
91 | | class LookupRpc; |
92 | | class LookupByKeyRpc; |
93 | | class LookupByIdRpc; |
94 | | |
95 | | // The information cached about a given tablet server in the cluster. |
96 | | // |
97 | | // A RemoteTabletServer could be the local tablet server. |
98 | | // |
99 | | // This class is thread-safe. |
100 | | class RemoteTabletServer { |
101 | | public: |
102 | | RemoteTabletServer(const std::string& uuid, |
103 | | const std::shared_ptr<tserver::TabletServerServiceProxy>& proxy, |
104 | | const tserver::LocalTabletServer* local_tserver = nullptr); |
105 | | explicit RemoteTabletServer(const master::TSInfoPB& pb); |
106 | | ~RemoteTabletServer(); |
107 | | |
108 | | // Initialize the RPC proxy to this tablet server, if it is not already set up. |
109 | | // This will involve a DNS lookup if there is not already an active proxy. |
110 | | // If there is an active proxy, does nothing. |
111 | | CHECKED_STATUS InitProxy(YBClient* client); |
112 | | |
113 | | // Update information from the given pb. |
114 | | // Requires that 'pb''s UUID matches this server. |
115 | | void Update(const master::TSInfoPB& pb); |
116 | | |
117 | | // Is this tablet server local? |
118 | | bool IsLocal() const; |
119 | | |
120 | 1.34k | const tserver::LocalTabletServer* local_tserver() const { |
121 | 1.34k | return local_tserver_; |
122 | 1.34k | } |
123 | | |
124 | | // Return the current proxy to this tablet server. Requires that InitProxy() |
125 | | // be called prior to this. |
126 | | std::shared_ptr<tserver::TabletServerServiceProxy> proxy() const; |
127 | | ::yb::HostPort ProxyEndpoint() const; |
128 | | |
129 | | std::string ToString() const; |
130 | | |
131 | | bool HasHostFrom(const std::unordered_set<std::string>& hosts) const; |
132 | | |
133 | | // Returns the remote server's uuid. |
134 | | const std::string& permanent_uuid() const; |
135 | | |
136 | | const CloudInfoPB& cloud_info() const; |
137 | | |
138 | | const google::protobuf::RepeatedPtrField<HostPortPB>& public_rpc_hostports() const; |
139 | | |
140 | | const google::protobuf::RepeatedPtrField<HostPortPB>& private_rpc_hostports() const; |
141 | | |
142 | | bool HasCapability(CapabilityId capability) const; |
143 | | |
144 | | private: |
145 | | mutable rw_spinlock mutex_; |
146 | | const std::string uuid_; |
147 | | |
148 | | google::protobuf::RepeatedPtrField<HostPortPB> public_rpc_hostports_; |
149 | | google::protobuf::RepeatedPtrField<HostPortPB> private_rpc_hostports_; |
150 | | yb::CloudInfoPB cloud_info_pb_; |
151 | | std::shared_ptr<tserver::TabletServerServiceProxy> proxy_; |
152 | | ::yb::HostPort proxy_endpoint_; |
153 | | const tserver::LocalTabletServer* const local_tserver_ = nullptr; |
154 | | scoped_refptr<Histogram> dns_resolve_histogram_; |
155 | | std::vector<CapabilityId> capabilities_; |
156 | | |
157 | | DISALLOW_COPY_AND_ASSIGN(RemoteTabletServer); |
158 | | }; |
159 | | |
160 | | struct RemoteReplica { |
161 | | RemoteTabletServer* ts; |
162 | | PeerRole role; |
163 | | MonoTime last_failed_time = MonoTime::kUninitialized; |
164 | | // The state of this replica. Only updated after calling GetTabletStatus. |
165 | | tablet::RaftGroupStatePB state = tablet::RaftGroupStatePB::UNKNOWN; |
166 | | |
167 | | RemoteReplica(RemoteTabletServer* ts_, PeerRole role_) |
168 | 938k | : ts(ts_), role(role_) {} |
169 | | |
170 | 4.91k | void MarkFailed() { |
171 | 4.91k | last_failed_time = MonoTime::Now(); |
172 | 4.91k | } |
173 | | |
174 | 5 | void ClearFailed() { |
175 | 5 | last_failed_time = MonoTime::kUninitialized; |
176 | 5 | } |
177 | | |
178 | 37.7M | bool Failed() const { |
179 | 37.7M | return last_failed_time.Initialized(); |
180 | 37.7M | } |
181 | | |
182 | | std::string ToString() const; |
183 | | }; |
184 | | |
185 | | typedef std::unordered_map<std::string, std::unique_ptr<RemoteTabletServer>> TabletServerMap; |
186 | | |
187 | | YB_STRONGLY_TYPED_BOOL(UpdateLocalTsState); |
188 | | YB_STRONGLY_TYPED_BOOL(IncludeFailedReplicas); |
189 | | |
190 | | struct ReplicasCount { |
191 | 70.6k | ReplicasCount(int expected_live_replicas, int expected_read_replicas) { |
192 | 70.6k | SetExpectedReplicas(expected_live_replicas, expected_read_replicas); |
193 | 70.6k | } |
194 | | int expected_live_replicas = 0; |
195 | | |
196 | | int expected_read_replicas = 0; |
197 | | |
198 | | // Number of live replicas in replicas_. |
199 | | int num_alive_live_replicas = 0; |
200 | | |
201 | | // Number of read replicas in replicas_. |
202 | | int num_alive_read_replicas = 0; |
203 | | |
204 | 1.79k | bool IsReplicasCountConsistent() { |
205 | 1.79k | return (expected_live_replicas + expected_read_replicas) == |
206 | 1.79k | (num_alive_live_replicas + num_alive_read_replicas); |
207 | 1.79k | } |
208 | | |
209 | | // Set expected_live_replicas and expected_read_replicas. |
210 | 423k | void SetExpectedReplicas(int live_replicas, int read_replicas) { |
211 | 423k | expected_live_replicas = live_replicas; |
212 | 423k | expected_read_replicas = read_replicas; |
213 | 423k | } |
214 | | |
215 | 230k | void SetAliveReplicas(int live_replicas, int read_replicas) { |
216 | 230k | num_alive_live_replicas = live_replicas; |
217 | 230k | num_alive_read_replicas = read_replicas; |
218 | 230k | } |
219 | | |
220 | | std::string ToString(); |
221 | | }; |
222 | | |
223 | | // The client's view of a given tablet. This object manages lookups of |
224 | | // the tablet's locations, status, etc. |
225 | | // |
226 | | // This class is thread-safe. |
227 | | class RemoteTablet : public RefCountedThreadSafe<RemoteTablet> { |
228 | | public: |
229 | | RemoteTablet(std::string tablet_id, |
230 | | Partition partition, |
231 | | boost::optional<PartitionListVersion> partition_list_version, |
232 | | uint64 split_depth, |
233 | | const TabletId& split_parent_tablet_id); |
234 | | |
235 | | ~RemoteTablet(); |
236 | | |
237 | | // Updates this tablet's replica locations. |
238 | | void Refresh( |
239 | | const TabletServerMap& tservers, |
240 | | const google::protobuf::RepeatedPtrField<master::TabletLocationsPB_ReplicaPB>& replicas); |
241 | | |
242 | | // Mark this tablet as stale, indicating that the cached tablet metadata is |
243 | | // out of date. Staleness is checked by the MetaCache when |
244 | | // LookupTabletByKey() is called to determine whether the fast (non-network) |
245 | | // path can be used or whether the metadata must be refreshed from the Master. |
246 | | void MarkStale(); |
247 | | |
248 | | // Whether the tablet has been marked as stale. |
249 | | bool stale() const; |
250 | | |
251 | | // Mark this tablet as already split. |
252 | | void MarkAsSplit(); |
253 | | |
254 | | bool is_split() const; |
255 | | |
256 | | // Returns table partition list version last known to the client for which this tablet was |
257 | | // serving partition_ key range. |
258 | | // This could be `none` for RemoteTablet instances requested by ID, because in that case we don't |
259 | | // get table partition list version from master. |
260 | 0 | boost::optional<PartitionListVersion> partition_list_version() const { |
261 | 0 | return partition_list_version_; |
262 | 0 | } |
263 | | |
264 | | // Mark any replicas of this tablet hosted by 'ts' as failed. They will |
265 | | // not be returned in future cache lookups. |
266 | | // |
267 | | // The provided status is used for logging. |
268 | | // Returns true if 'ts' was found among this tablet's replicas, false if not. |
269 | | bool MarkReplicaFailed(RemoteTabletServer *ts, const Status& status); |
270 | | |
271 | | // Return the number of failed replicas for this tablet. |
272 | | int GetNumFailedReplicas() const; |
273 | | |
274 | | bool IsReplicasCountConsistent() const; |
275 | | |
276 | | std::string ReplicasCountToString() const; |
277 | | |
278 | | // Set expected_live_replicas and expected_read_replicas. |
279 | | void SetExpectedReplicas(int expected_live_replicas, int expected_read_replicas); |
280 | | |
281 | | void SetAliveReplicas(int alive_live_replicas, int alive_read_replicas); |
282 | | |
283 | | // Return the tablet server which is acting as the current LEADER for |
284 | | // this tablet, provided it hasn't failed. |
285 | | // |
286 | | // Returns NULL if there is currently no leader, or if the leader has |
287 | | // failed. Given that the replica list may change at any time, |
288 | | // callers should always check the result against NULL. |
289 | | RemoteTabletServer* LeaderTServer() const; |
290 | | |
291 | | // Writes this tablet's TSes (across all replicas) to 'servers' for all available replicas. If a |
292 | | // replica has failed recently, check if it is available now if it is local. For remote replica, |
293 | | // wait for some time (configurable) before retrying. |
294 | | void GetRemoteTabletServers( |
295 | | std::vector<RemoteTabletServer*>* servers, |
296 | | IncludeFailedReplicas include_failed_replicas = IncludeFailedReplicas::kFalse); |
297 | | |
298 | | std::vector<RemoteTabletServer*> GetRemoteTabletServers( |
299 | 182k | IncludeFailedReplicas include_failed_replicas = IncludeFailedReplicas::kFalse) { |
300 | 182k | std::vector<RemoteTabletServer*> result; |
301 | 182k | GetRemoteTabletServers(&result, include_failed_replicas); |
302 | 182k | return result; |
303 | 182k | } |
304 | | |
305 | | // Return true if the tablet currently has a known LEADER replica |
306 | | // (i.e the next call to LeaderTServer() is likely to return non-NULL) |
307 | | bool HasLeader() const; |
308 | | |
309 | 21.4M | const std::string& tablet_id() const { return tablet_id_; } |
310 | | |
311 | 27.5M | const Partition& partition() const { |
312 | 27.5M | return partition_; |
313 | 27.5M | } |
314 | | |
315 | | // Mark the specified tablet server as the leader of the consensus configuration in the cache. |
316 | | // Returns whether server was found in replicas_. |
317 | | bool MarkTServerAsLeader(const RemoteTabletServer* server) WARN_UNUSED_RESULT; |
318 | | |
319 | | // Mark the specified tablet server as a follower in the cache. |
320 | | void MarkTServerAsFollower(const RemoteTabletServer* server); |
321 | | |
322 | | // Return stringified representation of the list of replicas for this tablet. |
323 | | std::string ReplicasAsString() const; |
324 | | |
325 | | std::string ToString() const; |
326 | | |
327 | 523 | const std::string& LogPrefix() const { return log_prefix_; } |
328 | | |
329 | 11.8k | MonoTime refresh_time() { return refresh_time_.load(std::memory_order_acquire); } |
330 | | |
331 | | // See TabletLocationsPB::split_depth. |
332 | 24 | uint64 split_depth() const { return split_depth_; } |
333 | | |
334 | 70.6k | const TabletId& split_parent_tablet_id() const { return split_parent_tablet_id_; } |
335 | | |
336 | 16.5k | int64_t lookups_without_new_replicas() const { return lookups_without_new_replicas_; } |
337 | | |
338 | | // The last version of the table's partition list that we know the tablet was serving data with. |
339 | | PartitionListVersion GetLastKnownPartitionListVersion() const; |
340 | | |
341 | | void MakeLastKnownPartitionListVersionAtLeast(PartitionListVersion partition_list_version); |
342 | | |
343 | | private: |
344 | | // Same as ReplicasAsString(), except that the caller must hold mutex_. |
345 | | std::string ReplicasAsStringUnlocked() const; |
346 | | |
347 | | const std::string tablet_id_; |
348 | | const std::string log_prefix_; |
349 | | const Partition partition_; |
350 | | const boost::optional<PartitionListVersion> partition_list_version_; |
351 | | const uint64 split_depth_; |
352 | | const TabletId split_parent_tablet_id_; |
353 | | |
354 | | // All non-const members are protected by 'mutex_'. |
355 | | mutable rw_spinlock mutex_; |
356 | | bool stale_; |
357 | | bool is_split_ = false; |
358 | | std::vector<RemoteReplica> replicas_; |
359 | | PartitionListVersion last_known_partition_list_version_ = 0; |
360 | | |
361 | | std::atomic<ReplicasCount> replicas_count_{{0, 0}}; |
362 | | |
363 | | // Last time this object was refreshed. Initialized to MonoTime::Min() so we don't have to be |
364 | | // checking whether it has been initialized everytime we use this value. |
365 | | std::atomic<MonoTime> refresh_time_{MonoTime::Min()}; |
366 | | |
367 | | int64_t lookups_without_new_replicas_ = 0; |
368 | | |
369 | | DISALLOW_COPY_AND_ASSIGN(RemoteTablet); |
370 | | }; |
371 | | |
372 | | class ToStringable { |
373 | | public: |
374 | | virtual std::string ToString() const = 0; |
375 | 56.6k | virtual ~ToStringable() = default; |
376 | | }; |
377 | | |
378 | | class RequestCleanup { |
379 | | public: |
380 | | virtual void CleanupRequest() = 0; |
381 | 56.6k | virtual ~RequestCleanup() = default; |
382 | | }; |
383 | | |
384 | | // We store partition_list_version in addition to start_key to be able to uniquely identify exact |
385 | | // partition (as a result of split we can have new partition with same start key, but different |
386 | | // version). |
387 | | struct VersionedPartitionStartKey { |
388 | | PartitionKeyPtr key; |
389 | | PartitionListVersion partition_list_version; |
390 | | |
391 | | std::string ToString() const; |
392 | | }; |
393 | | |
394 | | typedef PartitionKey PartitionGroupStartKey; |
395 | | typedef PartitionKeyPtr PartitionGroupStartKeyPtr; |
396 | | typedef VersionedPartitionStartKey VersionedPartitionGroupStartKey; |
397 | | |
398 | | using LookupCallbackParam = boost::variant<RemoteTabletPtr, std::vector<RemoteTabletPtr>>; |
399 | | |
400 | | using LookupCallback = boost::variant<LookupTabletCallback, LookupTabletRangeCallback>; |
401 | | |
402 | | // Used to store callbacks for individual requests looking up tablet by partition key and those |
403 | | // requests deadlines, so MetaCache can fire invoke those callbacks inside ProcessTabletLocations |
404 | | // after receiving group of tablet locations from master. |
405 | | struct LookupData : public MPSCQueueEntry<LookupData> { |
406 | 0 | LookupData() {} |
407 | | LookupData( |
408 | | const LookupCallback& callback_, CoarseTimePoint deadline_, |
409 | | const PartitionKeyPtr& partition_start_) |
410 | | : callback(callback_), deadline(deadline_), |
411 | 96.3k | partition_start(partition_start_) { |
412 | 96.3k | } |
413 | | |
414 | | LookupCallback callback; |
415 | | CoarseTimePoint deadline; |
416 | | // Suitable only when lookup is performed for partition, nullptr otherwise. |
417 | | PartitionKeyPtr partition_start; |
418 | | |
419 | 0 | std::string ToString() const { |
420 | 0 | return Format("{ deadline: $1 partition_start: $2 }", |
421 | 0 | deadline, partition_start ? Slice(*partition_start).ToDebugHexString() : ""); |
422 | 0 | } |
423 | | }; |
424 | | |
425 | | // Stores group of tablet lookups to be resolved by the same single RPC call. |
426 | | // For this purpose, lookups by tablet ID are grouped by tablet ID and lookups by key |
427 | | // are grouped by partitions group. |
428 | | struct LookupDataGroup { |
429 | | MPSCQueue<LookupData> lookups; |
430 | | // 0 if the request is not yet sent |
431 | | std::atomic<int64_t> running_request_number{0}; |
432 | | |
433 | | int64_t max_completed_request_number = 0; |
434 | | |
435 | | void Finished(int64_t request_no, const ToStringable& id, bool allow_absence = false); |
436 | | ~LookupDataGroup(); |
437 | | }; |
438 | | |
439 | | struct TableData { |
440 | | explicit TableData(const VersionedTablePartitionListPtr& partition_list_); |
441 | | |
442 | | VersionedTablePartitionListPtr partition_list; |
443 | | std::map<PartitionKey, RemoteTabletPtr> tablets_by_partition; |
444 | | std::unordered_map<PartitionGroupStartKey, LookupDataGroup> tablet_lookups_by_group; |
445 | | std::vector<RemoteTabletPtr> all_tablets; |
446 | | LookupDataGroup full_table_lookups; |
447 | | bool stale = false; |
448 | | // To resolve partition_key to tablet_id MetaCache uses client::FindPartitionStart with |
449 | | // TableData::partition_list and then translates partition_start to tablet_id based on |
450 | | // TableData::tablets_by_partition. |
451 | | // |
452 | | // We maintain the invariant that TableData::tablets_by_partition and |
453 | | // TableData::tablet_lookups_by_group always correspond to the version of |
454 | | // TableData::partition_list to avoid inconsistencies like the following: |
455 | | // |
456 | | // If TableData::tablets_by_partition was populated based on a newer version of Table partition |
457 | | // list than partition_list.version, for a key belonging to 2nd (with the higher key range) |
458 | | // post-split tablet we can get partition_start key of the parent partition as of the state before |
459 | | // the split and then translates it using newer TableData::tablets_by_partition into 1st |
460 | | // (with the lower key range) post-split tablet instead of 2nd post-split tablet. This way we will |
461 | | // miss the key, because it doesn't exist in 1st post-split tablet. |
462 | | }; |
463 | | |
464 | | class LookupCallbackVisitor : public boost::static_visitor<> { |
465 | | public: |
466 | 93.8k | explicit LookupCallbackVisitor(const LookupCallbackParam& param) : param_(param) { |
467 | 93.8k | } |
468 | | |
469 | 2.26k | explicit LookupCallbackVisitor(const Status& error_status) : error_status_(error_status) { |
470 | 2.26k | } |
471 | | |
472 | | void operator()(const LookupTabletCallback& tablet_callback) const; |
473 | | void operator()(const LookupTabletRangeCallback& tablet_range_callback) const; |
474 | | |
475 | | private: |
476 | | const LookupCallbackParam param_; |
477 | | const boost::optional<Status> error_status_; |
478 | | }; |
479 | | |
480 | | // Manager of RemoteTablets and RemoteTabletServers. The client consults |
481 | | // this class to look up a given tablet or server. |
482 | | // |
483 | | // This class will also be responsible for cache eviction policies, etc. |
484 | | class MetaCache : public RefCountedThreadSafe<MetaCache> { |
485 | | public: |
486 | | // The passed 'client' object must remain valid as long as MetaCache is alive. |
487 | | explicit MetaCache(YBClient* client); |
488 | | |
489 | | ~MetaCache(); |
490 | | |
491 | | // Add a tablet server's proxy, and optionally the tserver itself it is local. |
492 | | void SetLocalTabletServer(const std::string& permanent_uuid, |
493 | | const std::shared_ptr<tserver::TabletServerServiceProxy>& proxy, |
494 | | const tserver::LocalTabletServer* local_tserver); |
495 | | |
496 | | // Look up which tablet hosts the given partition key for a table. When it is |
497 | | // available, the tablet is stored in 'remote_tablet' (if not NULL) and the |
498 | | // callback is fired. Only tablets with non-failed LEADERs are considered. |
499 | | // |
500 | | // NOTE: the callback may be called from an IO thread or inline with this |
501 | | // call if the cached data is already available. |
502 | | // |
503 | | // NOTE: the memory referenced by 'table' must remain valid until 'callback' |
504 | | // is invoked. |
505 | | void LookupTabletByKey(const std::shared_ptr<YBTable>& table, |
506 | | const PartitionKey& partition_key, |
507 | | CoarseTimePoint deadline, |
508 | | LookupTabletCallback callback); |
509 | | |
510 | | std::future<Result<internal::RemoteTabletPtr>> LookupTabletByKeyFuture( |
511 | | const std::shared_ptr<YBTable>& table, |
512 | | const PartitionKey& partition_key, |
513 | | CoarseTimePoint deadline); |
514 | | |
515 | | // Lookup all tablets corresponding to a table. |
516 | | void LookupAllTablets(const std::shared_ptr<const YBTable>& table, |
517 | | CoarseTimePoint deadline, |
518 | | LookupTabletRangeCallback callback); |
519 | | |
520 | | // If table is specified and cache is not used or has no tablet leader also checks whether table |
521 | | // partitions are stale and returns ClientErrorCode::kTablePartitionListIsStale in that case. |
522 | | void LookupTabletById(const TabletId& tablet_id, |
523 | | const std::shared_ptr<const YBTable>& table, |
524 | | master::IncludeInactive include_inactive, |
525 | | CoarseTimePoint deadline, |
526 | | LookupTabletCallback callback, |
527 | | UseCache use_cache); |
528 | | |
529 | | // Return the local tablet server if available. |
530 | 40.4k | RemoteTabletServer* local_tserver() const { |
531 | 40.4k | return local_tserver_; |
532 | 40.4k | } |
533 | | |
534 | | // Mark any replicas of any tablets hosted by 'ts' as failed. They will |
535 | | // not be returned in future cache lookups. |
536 | | void MarkTSFailed(RemoteTabletServer* ts, const Status& status); |
537 | | |
538 | | // Acquire or release a permit to perform a (slow) master lookup. |
539 | | // |
540 | | // If acquisition fails, caller may still do the lookup, but is first |
541 | | // blocked for a short time to prevent lookup storms. |
542 | | bool AcquireMasterLookupPermit(); |
543 | | void ReleaseMasterLookupPermit(); |
544 | | |
545 | | // Called on the slow LookupTablet path when the master responds. |
546 | | // Populates the tablet caches. |
547 | | // If table_partition_list_version is specified, the function checks for it to match |
548 | | // TableData::partitions and returns Status with ClientErrorCode::kTablePartitionListIsStale if |
549 | | // versions do not match. |
550 | | // TableData::tablets_by_partition is only updated when these versions are defined and match. |
551 | | // Also notifies all callbacks that are waiting on received tablet ids. |
552 | | // REQUIRES locations to be in order of partitions and without overlaps. |
553 | | // There could be gaps due to post-tablets not yet being running, in this case, MetaCache will |
554 | | // just skip updating cache for these tablets until they become running. |
555 | | CHECKED_STATUS ProcessTabletLocations( |
556 | | const google::protobuf::RepeatedPtrField<master::TabletLocationsPB>& locations, |
557 | | boost::optional<PartitionListVersion> table_partition_list_version, LookupRpc* lookup_rpc); |
558 | | |
559 | | void InvalidateTableCache(const YBTable& table); |
560 | | |
561 | 0 | const std::string& LogPrefix() const { return log_prefix_; } |
562 | | |
563 | | private: |
564 | | friend class LookupRpc; |
565 | | friend class LookupByKeyRpc; |
566 | | friend class LookupByIdRpc; |
567 | | friend class LookupFullTableRpc; |
568 | | |
569 | | FRIEND_TEST(client::ClientTest, TestMasterLookupPermits); |
570 | | |
571 | | // Lookup the given tablet by partition_start_key, only consulting local information. |
572 | | // Returns true and sets *remote_tablet if successful. |
573 | | RemoteTabletPtr LookupTabletByKeyFastPathUnlocked( |
574 | | const TableId& table_id, |
575 | | const VersionedPartitionStartKey& partition_key) REQUIRES_SHARED(mutex_); |
576 | | |
577 | | RemoteTabletPtr LookupTabletByIdFastPathUnlocked(const TabletId& tablet_id) |
578 | | REQUIRES_SHARED(mutex_); |
579 | | |
580 | | // Update our information about the given tablet server. |
581 | | // |
582 | | // This is called when we get some response from the master which contains |
583 | | // the latest host/port info for a server. |
584 | | void UpdateTabletServerUnlocked(const master::TSInfoPB& pb) REQUIRES(mutex_); |
585 | | |
586 | | // Notify appropriate callbacks that lookup of specified partition group of specified table |
587 | | // was failed because of specified status. |
588 | | void LookupByKeyFailed( |
589 | | const std::shared_ptr<const YBTable>& table, |
590 | | const VersionedPartitionGroupStartKey& partition_group_start, |
591 | | PartitionListVersion response_partition_list_version, |
592 | | int64_t request_no, const Status& status); |
593 | | |
594 | | void LookupByIdFailed( |
595 | | const TabletId& tablet_id, |
596 | | const std::shared_ptr<const YBTable>& table, |
597 | | master::IncludeInactive include_inactive, |
598 | | const boost::optional<PartitionListVersion>& response_partition_list_version, |
599 | | int64_t request_no, |
600 | | const Status& status); |
601 | | |
602 | | void LookupFullTableFailed(const std::shared_ptr<const YBTable>& table, |
603 | | int64_t request_no, const Status& status); |
604 | | |
605 | | class CallbackNotifier; |
606 | | |
607 | | // Processes lookup failure. |
608 | | // status - failure status. |
609 | | // map - map that contains lookup data. |
610 | | // lock - lock of mutex_. |
611 | | // Returns deadline, if lookup should be restarted. CoarseTimePoint() if not. |
612 | | CoarseTimePoint LookupFailed( |
613 | | const Status& status, int64_t request_no, const ToStringable& lookup_id, |
614 | | LookupDataGroup* lookup_data_group, |
615 | | CallbackNotifier* notifier) REQUIRES(mutex_); |
616 | | |
617 | | RemoteTabletPtr FastLookupTabletByKeyUnlocked( |
618 | | const TableId& table_id, |
619 | | const VersionedPartitionStartKey& partition_start) REQUIRES_SHARED(mutex_); |
620 | | |
621 | | // Lookup from cache the set of tablets corresponding to a tiven table. |
622 | | // Returns empty vector if the cache is invalid or a tablet is stale, |
623 | | // otherwise returns a list of tablets. |
624 | | boost::optional<std::vector<RemoteTabletPtr>> FastLookupAllTabletsUnlocked( |
625 | | const std::shared_ptr<const YBTable>& table) REQUIRES_SHARED(mutex_); |
626 | | |
627 | | // If `tablet` is a result of splitting of pre-split tablet for which we already have |
628 | | // TabletRequests structure inside YBClient - updates TabletRequests.request_id_seq for the |
629 | | // `tablet` based on value for pre-split tablet. |
630 | | // This is required for correct tracking of duplicate requests to post-split tablets, if we |
631 | | // start from scratch - tserver will treat these requests as duplicates/incorrect, because |
632 | | // on tserver side related structure for tracking duplicate requests is also copied from |
633 | | // pre-split tablet to post-split tablets. |
634 | | void MaybeUpdateClientRequests(const RemoteTablet& tablet); |
635 | | |
636 | | std::unordered_map<TableId, TableData>::iterator InitTableDataUnlocked( |
637 | | const TableId& table_id, const VersionedTablePartitionListPtr& partitions) |
638 | | REQUIRES_SHARED(mutex_); |
639 | | |
640 | | template <class Lock> |
641 | | bool DoLookupTabletByKey( |
642 | | const std::shared_ptr<const YBTable>& table, const VersionedTablePartitionListPtr& partitions, |
643 | | const PartitionKeyPtr& partition_start, CoarseTimePoint deadline, |
644 | | LookupTabletCallback* callback, PartitionGroupStartKeyPtr* partition_group_start); |
645 | | |
646 | | template <class Lock> |
647 | | bool DoLookupTabletById( |
648 | | const TabletId& tablet_id, |
649 | | const std::shared_ptr<const YBTable>& table, |
650 | | master::IncludeInactive include_inactive, |
651 | | CoarseTimePoint deadline, |
652 | | UseCache use_cache, |
653 | | LookupTabletCallback* callback); |
654 | | |
655 | | template <class Lock> |
656 | | bool DoLookupAllTablets(const std::shared_ptr<const YBTable>& table, |
657 | | CoarseTimePoint deadline, |
658 | | LookupTabletRangeCallback* callback); |
659 | | |
660 | | YBClient* const client_; |
661 | | |
662 | | std::shared_timed_mutex mutex_; |
663 | | |
664 | | // Cache of Tablet Server locations: TS UUID -> RemoteTabletServer*. |
665 | | // |
666 | | // Given that the set of tablet servers is bounded by physical machines, we never |
667 | | // evict entries from this map until the MetaCache is destructed. So, no need to use |
668 | | // shared_ptr, etc. |
669 | | // |
670 | | // Protected by mutex_. |
671 | | TabletServerMap ts_cache_; |
672 | | |
673 | | // Local tablet server. |
674 | | RemoteTabletServer* local_tserver_ = nullptr; |
675 | | |
676 | | // Cache of tablets, keyed by table ID, then by start partition key. |
677 | | |
678 | | std::unordered_map<TableId, TableData> tables_ GUARDED_BY(mutex_); |
679 | | |
680 | | // Cache of tablets, keyed by tablet ID. |
681 | | std::unordered_map<TabletId, RemoteTabletPtr> tablets_by_id_ GUARDED_BY(mutex_); |
682 | | |
683 | | std::unordered_map<TabletId, LookupDataGroup> tablet_lookups_by_id_ GUARDED_BY(mutex_); |
684 | | |
685 | | // Prevents master lookup "storms" by delaying master lookups when all |
686 | | // permits have been acquired. |
687 | | Semaphore master_lookup_sem_; |
688 | | |
689 | | const std::string log_prefix_; |
690 | | |
691 | | DISALLOW_COPY_AND_ASSIGN(MetaCache); |
692 | | }; |
693 | | |
694 | | int64_t TEST_GetLookupSerial(); |
695 | | |
696 | | } // namespace internal |
697 | | } // namespace client |
698 | | } // namespace yb |
699 | | |
700 | | #endif /* YB_CLIENT_META_CACHE_H */ |