/Users/deen/code/yugabyte-db/src/yb/client/meta_cache.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | // This module is internal to the client and not a public API. |
33 | | #ifndef YB_CLIENT_META_CACHE_H |
34 | | #define YB_CLIENT_META_CACHE_H |
35 | | |
36 | | #include <shared_mutex> |
37 | | #include <map> |
38 | | #include <string> |
39 | | #include <memory> |
40 | | #include <unordered_map> |
41 | | #include <vector> |
42 | | |
43 | | #include <boost/variant.hpp> |
44 | | |
45 | | #include <gtest/gtest_prod.h> |
46 | | |
47 | | #include "yb/client/client_fwd.h" |
48 | | |
49 | | #include "yb/common/partition.h" |
50 | | #include "yb/common/wire_protocol.h" |
51 | | #include "yb/consensus/metadata.pb.h" |
52 | | |
53 | | #include "yb/gutil/macros.h" |
54 | | #include "yb/gutil/ref_counted.h" |
55 | | #include "yb/gutil/thread_annotations.h" |
56 | | |
57 | | #include "yb/master/master_client.fwd.h" |
58 | | #include "yb/master/master_fwd.h" |
59 | | |
60 | | #include "yb/rpc/rpc_fwd.h" |
61 | | #include "yb/rpc/rpc.h" |
62 | | |
63 | | #include "yb/tablet/metadata.pb.h" |
64 | | |
65 | | #include "yb/tserver/tserver_fwd.h" |
66 | | |
67 | | #include "yb/util/capabilities.h" |
68 | | #include "yb/util/format.h" |
69 | | #include "yb/util/locks.h" |
70 | | #include "yb/util/lockfree.h" |
71 | | #include "yb/util/metrics.h" |
72 | | #include "yb/util/monotime.h" |
73 | | #include "yb/util/semaphore.h" |
74 | | #include "yb/util/status_fwd.h" |
75 | | #include "yb/util/memory/arena.h" |
76 | | #include "yb/util/net/net_util.h" |
77 | | |
78 | | namespace yb { |
79 | | |
80 | | class Histogram; |
81 | | |
82 | | namespace client { |
83 | | |
84 | | class ClientTest_TestMasterLookupPermits_Test; |
85 | | class YBClient; |
86 | | class YBTable; |
87 | | |
88 | | namespace internal { |
89 | | |
90 | | class LookupRpc; |
91 | | class LookupByKeyRpc; |
92 | | class LookupByIdRpc; |
93 | | |
94 | | YB_DEFINE_ENUM(LocalityLevel, (kNone)(kRegion)(kZone)); |
95 | | |
96 | | // The information cached about a given tablet server in the cluster. |
97 | | // |
98 | | // A RemoteTabletServer could be the local tablet server. |
99 | | // |
100 | | // This class is thread-safe. |
101 | | class RemoteTabletServer { |
102 | | public: |
103 | | RemoteTabletServer(const std::string& uuid, |
104 | | const std::shared_ptr<tserver::TabletServerServiceProxy>& proxy, |
105 | | const tserver::LocalTabletServer* local_tserver = nullptr); |
106 | | explicit RemoteTabletServer(const master::TSInfoPB& pb); |
107 | | ~RemoteTabletServer(); |
108 | | |
109 | | // Initialize the RPC proxy to this tablet server, if it is not already set up. |
110 | | // This will involve a DNS lookup if there is not already an active proxy. |
111 | | // If there is an active proxy, does nothing. |
112 | | CHECKED_STATUS InitProxy(YBClient* client); |
113 | | |
114 | | // Update information from the given pb. |
115 | | // Requires that 'pb''s UUID matches this server. |
116 | | void Update(const master::TSInfoPB& pb); |
117 | | |
118 | | // Is this tablet server local? |
119 | | bool IsLocal() const; |
120 | | |
121 | 1.55k | const tserver::LocalTabletServer* local_tserver() const { |
122 | 1.55k | return local_tserver_; |
123 | 1.55k | } |
124 | | |
125 | | // Return the current proxy to this tablet server. Requires that InitProxy() |
126 | | // be called prior to this. |
127 | | std::shared_ptr<tserver::TabletServerServiceProxy> proxy() const; |
128 | | ::yb::HostPort ProxyEndpoint() const; |
129 | | |
130 | | std::string ToString() const; |
131 | | |
132 | | bool HasHostFrom(const std::unordered_set<std::string>& hosts) const; |
133 | | |
134 | | // Returns the remote server's uuid. |
135 | | const std::string& permanent_uuid() const; |
136 | | |
137 | | bool HasCapability(CapabilityId capability) const; |
138 | | |
139 | | bool IsLocalRegion() const; |
140 | | |
141 | | LocalityLevel LocalityLevelWith(const CloudInfoPB& cloud_info) const; |
142 | | |
143 | | HostPortPB DesiredHostPort(const CloudInfoPB& cloud_info) const; |
144 | | |
145 | | std::string TEST_PlacementZone() const; |
146 | | |
147 | | private: |
148 | | mutable rw_spinlock mutex_; |
149 | | const std::string uuid_; |
150 | | |
151 | | google::protobuf::RepeatedPtrField<HostPortPB> public_rpc_hostports_ GUARDED_BY(mutex_); |
152 | | google::protobuf::RepeatedPtrField<HostPortPB> private_rpc_hostports_ GUARDED_BY(mutex_); |
153 | | yb::CloudInfoPB cloud_info_pb_ GUARDED_BY(mutex_); |
154 | | std::shared_ptr<tserver::TabletServerServiceProxy> proxy_; |
155 | | ::yb::HostPort proxy_endpoint_; |
156 | | const tserver::LocalTabletServer* const local_tserver_ = nullptr; |
157 | | scoped_refptr<Histogram> dns_resolve_histogram_; |
158 | | std::vector<CapabilityId> capabilities_ GUARDED_BY(mutex_); |
159 | | |
160 | | DISALLOW_COPY_AND_ASSIGN(RemoteTabletServer); |
161 | | }; |
162 | | |
163 | | struct RemoteReplica { |
164 | | RemoteTabletServer* ts; |
165 | | PeerRole role; |
166 | | MonoTime last_failed_time = MonoTime::kUninitialized; |
167 | | // The state of this replica. Only updated after calling GetTabletStatus. |
168 | | tablet::RaftGroupStatePB state = tablet::RaftGroupStatePB::UNKNOWN; |
169 | | |
170 | | RemoteReplica(RemoteTabletServer* ts_, PeerRole role_) |
171 | 1.95M | : ts(ts_), role(role_) {} |
172 | | |
173 | 4.22k | void MarkFailed() { |
174 | 4.22k | last_failed_time = MonoTime::Now(); |
175 | 4.22k | } |
176 | | |
177 | 20 | void ClearFailed() { |
178 | 20 | last_failed_time = MonoTime::kUninitialized; |
179 | 20 | } |
180 | | |
181 | 79.1M | bool Failed() const { |
182 | 79.1M | return last_failed_time.Initialized(); |
183 | 79.1M | } |
184 | | |
185 | | std::string ToString() const; |
186 | | }; |
187 | | |
188 | | typedef std::unordered_map<std::string, std::unique_ptr<RemoteTabletServer>> TabletServerMap; |
189 | | |
190 | | YB_STRONGLY_TYPED_BOOL(UpdateLocalTsState); |
191 | | YB_STRONGLY_TYPED_BOOL(IncludeFailedReplicas); |
192 | | |
193 | | struct ReplicasCount { |
194 | 90.3k | ReplicasCount(int expected_live_replicas, int expected_read_replicas) { |
195 | 90.3k | SetExpectedReplicas(expected_live_replicas, expected_read_replicas); |
196 | 90.3k | } |
197 | | int expected_live_replicas = 0; |
198 | | |
199 | | int expected_read_replicas = 0; |
200 | | |
201 | | // Number of live replicas in replicas_. |
202 | | int num_alive_live_replicas = 0; |
203 | | |
204 | | // Number of read replicas in replicas_. |
205 | | int num_alive_read_replicas = 0; |
206 | | |
207 | 3.59k | bool IsReplicasCountConsistent() { |
208 | 3.59k | return (expected_live_replicas + expected_read_replicas) == |
209 | 3.59k | (num_alive_live_replicas + num_alive_read_replicas); |
210 | 3.59k | } |
211 | | |
212 | | // Set expected_live_replicas and expected_read_replicas. |
213 | 847k | void SetExpectedReplicas(int live_replicas, int read_replicas) { |
214 | 847k | expected_live_replicas = live_replicas; |
215 | 847k | expected_read_replicas = read_replicas; |
216 | 847k | } |
217 | | |
218 | 368k | void SetAliveReplicas(int live_replicas, int read_replicas) { |
219 | 368k | num_alive_live_replicas = live_replicas; |
220 | 368k | num_alive_read_replicas = read_replicas; |
221 | 368k | } |
222 | | |
223 | | std::string ToString(); |
224 | | }; |
225 | | |
226 | | // The client's view of a given tablet. This object manages lookups of |
227 | | // the tablet's locations, status, etc. |
228 | | // |
229 | | // This class is thread-safe. |
230 | | class RemoteTablet : public RefCountedThreadSafe<RemoteTablet> { |
231 | | public: |
232 | | RemoteTablet(std::string tablet_id, |
233 | | Partition partition, |
234 | | boost::optional<PartitionListVersion> partition_list_version, |
235 | | uint64 split_depth, |
236 | | const TabletId& split_parent_tablet_id); |
237 | | |
238 | | ~RemoteTablet(); |
239 | | |
240 | | // Updates this tablet's replica locations. |
241 | | void Refresh( |
242 | | const TabletServerMap& tservers, |
243 | | const google::protobuf::RepeatedPtrField<master::TabletLocationsPB_ReplicaPB>& replicas); |
244 | | |
245 | | // Mark this tablet as stale, indicating that the cached tablet metadata is |
246 | | // out of date. Staleness is checked by the MetaCache when |
247 | | // LookupTabletByKey() is called to determine whether the fast (non-network) |
248 | | // path can be used or whether the metadata must be refreshed from the Master. |
249 | | void MarkStale(); |
250 | | |
251 | | // Whether the tablet has been marked as stale. |
252 | | bool stale() const; |
253 | | |
254 | | // Mark this tablet as already split. |
255 | | void MarkAsSplit(); |
256 | | |
257 | | bool is_split() const; |
258 | | |
259 | | // Returns table partition list version last known to the client for which this tablet was |
260 | | // serving partition_ key range. |
261 | | // This could be `none` for RemoteTablet instances requested by ID, because in that case we don't |
262 | | // get table partition list version from master. |
263 | 0 | boost::optional<PartitionListVersion> partition_list_version() const { |
264 | 0 | return partition_list_version_; |
265 | 0 | } |
266 | | |
267 | | // Mark any replicas of this tablet hosted by 'ts' as failed. They will |
268 | | // not be returned in future cache lookups. |
269 | | // |
270 | | // The provided status is used for logging. |
271 | | // Returns true if 'ts' was found among this tablet's replicas, false if not. |
272 | | bool MarkReplicaFailed(RemoteTabletServer *ts, const Status& status); |
273 | | |
274 | | // Return the number of failed replicas for this tablet. |
275 | | int GetNumFailedReplicas() const; |
276 | | |
277 | | bool IsReplicasCountConsistent() const; |
278 | | |
279 | | std::string ReplicasCountToString() const; |
280 | | |
281 | | // Set expected_live_replicas and expected_read_replicas. |
282 | | void SetExpectedReplicas(int expected_live_replicas, int expected_read_replicas); |
283 | | |
284 | | void SetAliveReplicas(int alive_live_replicas, int alive_read_replicas); |
285 | | |
286 | | // Return the tablet server which is acting as the current LEADER for |
287 | | // this tablet, provided it hasn't failed. |
288 | | // |
289 | | // Returns NULL if there is currently no leader, or if the leader has |
290 | | // failed. Given that the replica list may change at any time, |
291 | | // callers should always check the result against NULL. |
292 | | RemoteTabletServer* LeaderTServer() const; |
293 | | |
294 | | // Writes this tablet's TSes (across all replicas) to 'servers' for all available replicas. If a |
295 | | // replica has failed recently, check if it is available now if it is local. For remote replica, |
296 | | // wait for some time (configurable) before retrying. |
297 | | void GetRemoteTabletServers( |
298 | | std::vector<RemoteTabletServer*>* servers, |
299 | | IncludeFailedReplicas include_failed_replicas = IncludeFailedReplicas::kFalse); |
300 | | |
301 | | std::vector<RemoteTabletServer*> GetRemoteTabletServers( |
302 | 309k | IncludeFailedReplicas include_failed_replicas = IncludeFailedReplicas::kFalse) { |
303 | 309k | std::vector<RemoteTabletServer*> result; |
304 | 309k | GetRemoteTabletServers(&result, include_failed_replicas); |
305 | 309k | return result; |
306 | 309k | } |
307 | | |
308 | | // Return true if the tablet currently has a known LEADER replica |
309 | | // (i.e the next call to LeaderTServer() is likely to return non-NULL) |
310 | | bool HasLeader() const; |
311 | | |
312 | 45.2M | const std::string& tablet_id() const { return tablet_id_; } |
313 | | |
314 | 57.5M | const Partition& partition() const { |
315 | 57.5M | return partition_; |
316 | 57.5M | } |
317 | | |
318 | | // Mark the specified tablet server as the leader of the consensus configuration in the cache. |
319 | | // Returns whether server was found in replicas_. |
320 | | bool MarkTServerAsLeader(const RemoteTabletServer* server) WARN_UNUSED_RESULT; |
321 | | |
322 | | // Mark the specified tablet server as a follower in the cache. |
323 | | void MarkTServerAsFollower(const RemoteTabletServer* server); |
324 | | |
325 | | // Return stringified representation of the list of replicas for this tablet. |
326 | | std::string ReplicasAsString() const; |
327 | | |
328 | | std::string ToString() const; |
329 | | |
330 | 557 | const std::string& LogPrefix() const { return log_prefix_; } |
331 | | |
332 | 22.0k | MonoTime refresh_time() { return refresh_time_.load(std::memory_order_acquire); } |
333 | | |
334 | | // See TabletLocationsPB::split_depth. |
335 | 62 | uint64 split_depth() const { return split_depth_; } |
336 | | |
337 | 90.4k | const TabletId& split_parent_tablet_id() const { return split_parent_tablet_id_; } |
338 | | |
339 | 15.7k | int64_t lookups_without_new_replicas() const { return lookups_without_new_replicas_; } |
340 | | |
341 | | // The last version of the table's partition list that we know the tablet was serving data with. |
342 | | PartitionListVersion GetLastKnownPartitionListVersion() const; |
343 | | |
344 | | void MakeLastKnownPartitionListVersionAtLeast(PartitionListVersion partition_list_version); |
345 | | |
346 | | private: |
347 | | // Same as ReplicasAsString(), except that the caller must hold mutex_. |
348 | | std::string ReplicasAsStringUnlocked() const; |
349 | | |
350 | | const std::string tablet_id_; |
351 | | const std::string log_prefix_; |
352 | | const Partition partition_; |
353 | | const boost::optional<PartitionListVersion> partition_list_version_; |
354 | | const uint64 split_depth_; |
355 | | const TabletId split_parent_tablet_id_; |
356 | | |
357 | | // All non-const members are protected by 'mutex_'. |
358 | | mutable rw_spinlock mutex_; |
359 | | bool stale_; |
360 | | bool is_split_ = false; |
361 | | std::vector<RemoteReplica> replicas_; |
362 | | PartitionListVersion last_known_partition_list_version_ = 0; |
363 | | |
364 | | std::atomic<ReplicasCount> replicas_count_{{0, 0}}; |
365 | | |
366 | | // Last time this object was refreshed. Initialized to MonoTime::Min() so we don't have to be |
367 | | // checking whether it has been initialized everytime we use this value. |
368 | | std::atomic<MonoTime> refresh_time_{MonoTime::Min()}; |
369 | | |
370 | | int64_t lookups_without_new_replicas_ = 0; |
371 | | |
372 | | DISALLOW_COPY_AND_ASSIGN(RemoteTablet); |
373 | | }; |
374 | | |
375 | | class ToStringable { |
376 | | public: |
377 | | virtual std::string ToString() const = 0; |
378 | 69.3k | virtual ~ToStringable() = default; |
379 | | }; |
380 | | |
381 | | class RequestCleanup { |
382 | | public: |
383 | | virtual void CleanupRequest() = 0; |
384 | 69.3k | virtual ~RequestCleanup() = default; |
385 | | }; |
386 | | |
387 | | // We store partition_list_version in addition to start_key to be able to uniquely identify exact |
388 | | // partition (as a result of split we can have new partition with same start key, but different |
389 | | // version). |
390 | | struct VersionedPartitionStartKey { |
391 | | PartitionKeyPtr key; |
392 | | PartitionListVersion partition_list_version; |
393 | | |
394 | | std::string ToString() const; |
395 | | }; |
396 | | |
397 | | typedef PartitionKey PartitionGroupStartKey; |
398 | | typedef PartitionKeyPtr PartitionGroupStartKeyPtr; |
399 | | typedef VersionedPartitionStartKey VersionedPartitionGroupStartKey; |
400 | | |
401 | | using LookupCallbackParam = boost::variant<RemoteTabletPtr, std::vector<RemoteTabletPtr>>; |
402 | | |
403 | | using LookupCallback = boost::variant<LookupTabletCallback, LookupTabletRangeCallback>; |
404 | | |
405 | | // Used to store callbacks for individual requests looking up tablet by partition key and those |
406 | | // requests deadlines, so MetaCache can fire invoke those callbacks inside ProcessTabletLocations |
407 | | // after receiving group of tablet locations from master. |
408 | | struct LookupData : public MPSCQueueEntry<LookupData> { |
409 | 0 | LookupData() {} |
410 | | LookupData( |
411 | | const LookupCallback& callback_, CoarseTimePoint deadline_, |
412 | | const PartitionKeyPtr& partition_start_) |
413 | | : callback(callback_), deadline(deadline_), |
414 | 159k | partition_start(partition_start_) { |
415 | 159k | } |
416 | | |
417 | | LookupCallback callback; |
418 | | CoarseTimePoint deadline; |
419 | | // Suitable only when lookup is performed for partition, nullptr otherwise. |
420 | | PartitionKeyPtr partition_start; |
421 | | |
422 | 0 | std::string ToString() const { |
423 | 0 | return Format("{ deadline: $1 partition_start: $2 }", |
424 | 0 | deadline, partition_start ? Slice(*partition_start).ToDebugHexString() : ""); |
425 | 0 | } |
426 | | }; |
427 | | |
428 | | // Stores group of tablet lookups to be resolved by the same single RPC call. |
429 | | // For this purpose, lookups by tablet ID are grouped by tablet ID and lookups by key |
430 | | // are grouped by partitions group. |
431 | | struct LookupDataGroup { |
432 | | MPSCQueue<LookupData> lookups; |
433 | | // 0 if the request is not yet sent |
434 | | std::atomic<int64_t> running_request_number{0}; |
435 | | |
436 | | int64_t max_completed_request_number = 0; |
437 | | |
438 | | void Finished(int64_t request_no, const ToStringable& id, bool allow_absence = false); |
439 | | ~LookupDataGroup(); |
440 | | }; |
441 | | |
442 | | struct TableData { |
443 | | explicit TableData(const VersionedTablePartitionListPtr& partition_list_); |
444 | | |
445 | | VersionedTablePartitionListPtr partition_list; |
446 | | std::map<PartitionKey, RemoteTabletPtr> tablets_by_partition; |
447 | | std::unordered_map<PartitionGroupStartKey, LookupDataGroup> tablet_lookups_by_group; |
448 | | std::vector<RemoteTabletPtr> all_tablets; |
449 | | LookupDataGroup full_table_lookups; |
450 | | bool stale = false; |
451 | | // To resolve partition_key to tablet_id MetaCache uses client::FindPartitionStart with |
452 | | // TableData::partition_list and then translates partition_start to tablet_id based on |
453 | | // TableData::tablets_by_partition. |
454 | | // |
455 | | // We maintain the invariant that TableData::tablets_by_partition and |
456 | | // TableData::tablet_lookups_by_group always correspond to the version of |
457 | | // TableData::partition_list to avoid inconsistencies like the following: |
458 | | // |
459 | | // If TableData::tablets_by_partition was populated based on a newer version of Table partition |
460 | | // list than partition_list.version, for a key belonging to 2nd (with the higher key range) |
461 | | // post-split tablet we can get partition_start key of the parent partition as of the state before |
462 | | // the split and then translates it using newer TableData::tablets_by_partition into 1st |
463 | | // (with the lower key range) post-split tablet instead of 2nd post-split tablet. This way we will |
464 | | // miss the key, because it doesn't exist in 1st post-split tablet. |
465 | | }; |
466 | | |
467 | | class LookupCallbackVisitor : public boost::static_visitor<> { |
468 | | public: |
469 | 158k | explicit LookupCallbackVisitor(const LookupCallbackParam& param) : param_(param) { |
470 | 158k | } |
471 | | |
472 | 1.32k | explicit LookupCallbackVisitor(const Status& error_status) : error_status_(error_status) { |
473 | 1.32k | } |
474 | | |
475 | | void operator()(const LookupTabletCallback& tablet_callback) const; |
476 | | void operator()(const LookupTabletRangeCallback& tablet_range_callback) const; |
477 | | |
478 | | private: |
479 | | const LookupCallbackParam param_; |
480 | | const boost::optional<Status> error_status_; |
481 | | }; |
482 | | |
483 | | // Manager of RemoteTablets and RemoteTabletServers. The client consults |
484 | | // this class to look up a given tablet or server. |
485 | | // |
486 | | // This class will also be responsible for cache eviction policies, etc. |
487 | | class MetaCache : public RefCountedThreadSafe<MetaCache> { |
488 | | public: |
489 | | // The passed 'client' object must remain valid as long as MetaCache is alive. |
490 | | explicit MetaCache(YBClient* client); |
491 | | |
492 | | ~MetaCache(); |
493 | | |
494 | | // Add a tablet server's proxy, and optionally the tserver itself it is local. |
495 | | void SetLocalTabletServer(const std::string& permanent_uuid, |
496 | | const std::shared_ptr<tserver::TabletServerServiceProxy>& proxy, |
497 | | const tserver::LocalTabletServer* local_tserver); |
498 | | |
499 | | // Look up which tablet hosts the given partition key for a table. When it is |
500 | | // available, the tablet is stored in 'remote_tablet' (if not NULL) and the |
501 | | // callback is fired. Only tablets with non-failed LEADERs are considered. |
502 | | // |
503 | | // NOTE: the callback may be called from an IO thread or inline with this |
504 | | // call if the cached data is already available. |
505 | | // |
506 | | // NOTE: the memory referenced by 'table' must remain valid until 'callback' |
507 | | // is invoked. |
508 | | void LookupTabletByKey(const std::shared_ptr<YBTable>& table, |
509 | | const PartitionKey& partition_key, |
510 | | CoarseTimePoint deadline, |
511 | | LookupTabletCallback callback); |
512 | | |
513 | | std::future<Result<internal::RemoteTabletPtr>> LookupTabletByKeyFuture( |
514 | | const std::shared_ptr<YBTable>& table, |
515 | | const PartitionKey& partition_key, |
516 | | CoarseTimePoint deadline); |
517 | | |
518 | | // Lookup all tablets corresponding to a table. |
519 | | void LookupAllTablets(const std::shared_ptr<const YBTable>& table, |
520 | | CoarseTimePoint deadline, |
521 | | LookupTabletRangeCallback callback); |
522 | | |
523 | | // If table is specified and cache is not used or has no tablet leader also checks whether table |
524 | | // partitions are stale and returns ClientErrorCode::kTablePartitionListIsStale in that case. |
525 | | void LookupTabletById(const TabletId& tablet_id, |
526 | | const std::shared_ptr<const YBTable>& table, |
527 | | master::IncludeInactive include_inactive, |
528 | | CoarseTimePoint deadline, |
529 | | LookupTabletCallback callback, |
530 | | UseCache use_cache); |
531 | | |
532 | | // Return the local tablet server if available. |
533 | 80.8k | RemoteTabletServer* local_tserver() const { |
534 | 80.8k | return local_tserver_; |
535 | 80.8k | } |
536 | | |
537 | | // Mark any replicas of any tablets hosted by 'ts' as failed. They will |
538 | | // not be returned in future cache lookups. |
539 | | void MarkTSFailed(RemoteTabletServer* ts, const Status& status); |
540 | | |
541 | | // Acquire or release a permit to perform a (slow) master lookup. |
542 | | // |
543 | | // If acquisition fails, caller may still do the lookup, but is first |
544 | | // blocked for a short time to prevent lookup storms. |
545 | | bool AcquireMasterLookupPermit(); |
546 | | void ReleaseMasterLookupPermit(); |
547 | | |
548 | | // Called on the slow LookupTablet path when the master responds. |
549 | | // Populates the tablet caches. |
550 | | // If table_partition_list_version is specified, the function checks for it to match |
551 | | // TableData::partitions and returns Status with ClientErrorCode::kTablePartitionListIsStale if |
552 | | // versions do not match. |
553 | | // TableData::tablets_by_partition is only updated when these versions are defined and match. |
554 | | // Also notifies all callbacks that are waiting on received tablet ids. |
555 | | // REQUIRES locations to be in order of partitions and without overlaps. |
556 | | // There could be gaps due to post-tablets not yet being running, in this case, MetaCache will |
557 | | // just skip updating cache for these tablets until they become running. |
558 | | CHECKED_STATUS ProcessTabletLocations( |
559 | | const google::protobuf::RepeatedPtrField<master::TabletLocationsPB>& locations, |
560 | | boost::optional<PartitionListVersion> table_partition_list_version, LookupRpc* lookup_rpc); |
561 | | |
562 | | void InvalidateTableCache(const YBTable& table); |
563 | | |
564 | 0 | const std::string& LogPrefix() const { return log_prefix_; } |
565 | | |
566 | | private: |
567 | | friend class LookupRpc; |
568 | | friend class LookupByKeyRpc; |
569 | | friend class LookupByIdRpc; |
570 | | friend class LookupFullTableRpc; |
571 | | |
572 | | FRIEND_TEST(client::ClientTest, TestMasterLookupPermits); |
573 | | |
574 | | // Lookup the given tablet by partition_start_key, only consulting local information. |
575 | | // Returns true and sets *remote_tablet if successful. |
576 | | RemoteTabletPtr LookupTabletByKeyFastPathUnlocked( |
577 | | const TableId& table_id, |
578 | | const VersionedPartitionStartKey& partition_key) REQUIRES_SHARED(mutex_); |
579 | | |
580 | | RemoteTabletPtr LookupTabletByIdFastPathUnlocked(const TabletId& tablet_id) |
581 | | REQUIRES_SHARED(mutex_); |
582 | | |
583 | | // Update our information about the given tablet server. |
584 | | // |
585 | | // This is called when we get some response from the master which contains |
586 | | // the latest host/port info for a server. |
587 | | void UpdateTabletServerUnlocked(const master::TSInfoPB& pb) REQUIRES(mutex_); |
588 | | |
589 | | // Notify appropriate callbacks that lookup of specified partition group of specified table |
590 | | // was failed because of specified status. |
591 | | void LookupByKeyFailed( |
592 | | const std::shared_ptr<const YBTable>& table, |
593 | | const VersionedPartitionGroupStartKey& partition_group_start, |
594 | | PartitionListVersion response_partition_list_version, |
595 | | int64_t request_no, const Status& status); |
596 | | |
597 | | void LookupByIdFailed( |
598 | | const TabletId& tablet_id, |
599 | | const std::shared_ptr<const YBTable>& table, |
600 | | master::IncludeInactive include_inactive, |
601 | | const boost::optional<PartitionListVersion>& response_partition_list_version, |
602 | | int64_t request_no, |
603 | | const Status& status); |
604 | | |
605 | | void LookupFullTableFailed(const std::shared_ptr<const YBTable>& table, |
606 | | int64_t request_no, const Status& status); |
607 | | |
608 | | class CallbackNotifier; |
609 | | |
610 | | // Processes lookup failure. |
611 | | // status - failure status. |
612 | | // map - map that contains lookup data. |
613 | | // lock - lock of mutex_. |
614 | | // Returns deadline, if lookup should be restarted. CoarseTimePoint() if not. |
615 | | CoarseTimePoint LookupFailed( |
616 | | const Status& status, int64_t request_no, const ToStringable& lookup_id, |
617 | | LookupDataGroup* lookup_data_group, |
618 | | CallbackNotifier* notifier) REQUIRES(mutex_); |
619 | | |
620 | | RemoteTabletPtr FastLookupTabletByKeyUnlocked( |
621 | | const TableId& table_id, |
622 | | const VersionedPartitionStartKey& partition_start) REQUIRES_SHARED(mutex_); |
623 | | |
624 | | // Lookup from cache the set of tablets corresponding to a tiven table. |
625 | | // Returns empty vector if the cache is invalid or a tablet is stale, |
626 | | // otherwise returns a list of tablets. |
627 | | boost::optional<std::vector<RemoteTabletPtr>> FastLookupAllTabletsUnlocked( |
628 | | const std::shared_ptr<const YBTable>& table) REQUIRES_SHARED(mutex_); |
629 | | |
630 | | // If `tablet` is a result of splitting of pre-split tablet for which we already have |
631 | | // TabletRequests structure inside YBClient - updates TabletRequests.request_id_seq for the |
632 | | // `tablet` based on value for pre-split tablet. |
633 | | // This is required for correct tracking of duplicate requests to post-split tablets, if we |
634 | | // start from scratch - tserver will treat these requests as duplicates/incorrect, because |
635 | | // on tserver side related structure for tracking duplicate requests is also copied from |
636 | | // pre-split tablet to post-split tablets. |
637 | | void MaybeUpdateClientRequests(const RemoteTablet& tablet); |
638 | | |
639 | | std::unordered_map<TableId, TableData>::iterator InitTableDataUnlocked( |
640 | | const TableId& table_id, const VersionedTablePartitionListPtr& partitions) |
641 | | REQUIRES_SHARED(mutex_); |
642 | | |
643 | | template <class Lock> |
644 | | bool DoLookupTabletByKey( |
645 | | const std::shared_ptr<const YBTable>& table, const VersionedTablePartitionListPtr& partitions, |
646 | | const PartitionKeyPtr& partition_start, CoarseTimePoint deadline, |
647 | | LookupTabletCallback* callback, PartitionGroupStartKeyPtr* partition_group_start); |
648 | | |
649 | | template <class Lock> |
650 | | bool DoLookupTabletById( |
651 | | const TabletId& tablet_id, |
652 | | const std::shared_ptr<const YBTable>& table, |
653 | | master::IncludeInactive include_inactive, |
654 | | CoarseTimePoint deadline, |
655 | | UseCache use_cache, |
656 | | LookupTabletCallback* callback); |
657 | | |
658 | | template <class Lock> |
659 | | bool DoLookupAllTablets(const std::shared_ptr<const YBTable>& table, |
660 | | CoarseTimePoint deadline, |
661 | | LookupTabletRangeCallback* callback); |
662 | | |
663 | | YBClient* const client_; |
664 | | |
665 | | std::shared_timed_mutex mutex_; |
666 | | |
667 | | // Cache of Tablet Server locations: TS UUID -> RemoteTabletServer*. |
668 | | // |
669 | | // Given that the set of tablet servers is bounded by physical machines, we never |
670 | | // evict entries from this map until the MetaCache is destructed. So, no need to use |
671 | | // shared_ptr, etc. |
672 | | // |
673 | | // Protected by mutex_. |
674 | | TabletServerMap ts_cache_; |
675 | | |
676 | | // Local tablet server. |
677 | | RemoteTabletServer* local_tserver_ = nullptr; |
678 | | |
679 | | // Cache of tablets, keyed by table ID, then by start partition key. |
680 | | |
681 | | std::unordered_map<TableId, TableData> tables_ GUARDED_BY(mutex_); |
682 | | |
683 | | // Cache of tablets, keyed by tablet ID. |
684 | | std::unordered_map<TabletId, RemoteTabletPtr> tablets_by_id_ GUARDED_BY(mutex_); |
685 | | |
686 | | std::unordered_map<TabletId, LookupDataGroup> tablet_lookups_by_id_ GUARDED_BY(mutex_); |
687 | | |
688 | | // Prevents master lookup "storms" by delaying master lookups when all |
689 | | // permits have been acquired. |
690 | | Semaphore master_lookup_sem_; |
691 | | |
692 | | const std::string log_prefix_; |
693 | | |
694 | | DISALLOW_COPY_AND_ASSIGN(MetaCache); |
695 | | }; |
696 | | |
697 | | int64_t TEST_GetLookupSerial(); |
698 | | |
699 | | } // namespace internal |
700 | | } // namespace client |
701 | | } // namespace yb |
702 | | |
703 | | #endif /* YB_CLIENT_META_CACHE_H */ |