YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/master/ts_descriptor.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
#ifndef YB_MASTER_TS_DESCRIPTOR_H
33
#define YB_MASTER_TS_DESCRIPTOR_H
34
35
#include <shared_mutex>
36
37
#include <atomic>
38
#include <memory>
39
#include <mutex>
40
#include <string>
41
42
#include <gtest/gtest_prod.h>
43
44
#include "yb/common/common_net.pb.h"
45
#include "yb/common/hybrid_time.h"
46
47
#include "yb/master/master_heartbeat.fwd.h"
48
#include "yb/master/master_fwd.h"
49
50
#include "yb/rpc/rpc_fwd.h"
51
52
#include "yb/util/capabilities.h"
53
#include "yb/util/locks.h"
54
#include "yb/util/monotime.h"
55
#include "yb/util/net/net_util.h"
56
#include "yb/util/physical_time.h"
57
#include "yb/util/result.h"
58
#include "yb/util/status_fwd.h"
59
#include "yb/util/shared_ptr_tuple.h"
60
#include "yb/util/shared_lock.h"
61
62
namespace yb {
63
64
class NodeInstancePB;
65
66
namespace consensus {
67
class ConsensusServiceProxy;
68
}
69
70
namespace tserver {
71
class TabletServerAdminServiceProxy;
72
class TabletServerServiceProxy;
73
}
74
75
namespace master {
76
77
class TSRegistrationPB;
78
class TSInformationPB;
79
class ReplicationInfoPB;
80
class TServerMetricsPB;
81
82
typedef util::SharedPtrTuple<tserver::TabletServerAdminServiceProxy,
83
                             tserver::TabletServerServiceProxy,
84
                             consensus::ConsensusServiceProxy> ProxyTuple;
85
86
// Master-side view of a single tablet server.
87
//
88
// Tracks the last heartbeat, status, instance identifier, etc.
89
// This class is thread-safe.
90
class TSDescriptor {
91
 public:
92
  static Result<TSDescriptorPtr> RegisterNew(
93
      const NodeInstancePB& instance,
94
      const TSRegistrationPB& registration,
95
      CloudInfoPB local_cloud_info,
96
      rpc::ProxyCache* proxy_cache,
97
      RegisteredThroughHeartbeat registered_through_heartbeat = RegisteredThroughHeartbeat::kTrue);
98
99
  static std::string generate_placement_id(const CloudInfoPB& ci);
100
101
  virtual ~TSDescriptor();
102
103
  // Set the last-heartbeat time to now.
104
  void UpdateHeartbeat(const TSHeartbeatRequestPB* req);
105
106
  // Return the amount of time since the last heartbeat received
107
  // from this TS.
108
  MonoDelta TimeSinceHeartbeat() const;
109
110
  // Register this tablet server.
111
  CHECKED_STATUS Register(const NodeInstancePB& instance,
112
                          const TSRegistrationPB& registration,
113
                          CloudInfoPB local_cloud_info,
114
                          rpc::ProxyCache* proxy_cache);
115
116
5.31M
  const std::string &permanent_uuid() const { return permanent_uuid_; }
117
  int64_t latest_seqno() const;
118
119
  bool has_tablet_report() const;
120
  void set_has_tablet_report(bool has_report);
121
122
  bool registered_through_heartbeat() const;
123
124
  // Returns TSRegistrationPB for this TSDescriptor.
125
  TSRegistrationPB GetRegistration() const;
126
127
  // Returns TSInformationPB for this TSDescriptor.
128
  const std::shared_ptr<TSInformationPB> GetTSInformationPB() const;
129
130
  // Helper function to tell if this TS matches the cloud information provided. For now, we have
131
  // no wildcard functionality, so it will have to explicitly match each individual component.
132
  // Later, this might be extended to say if this TS is part of some wildcard expression for cloud
133
  // information (eg: aws.us-west.* will match any TS in aws.us-west.1a or aws.us-west.1b, etc.).
134
  bool MatchesCloudInfo(const CloudInfoPB& cloud_info) const;
135
136
  CloudInfoPB GetCloudInfo() const;
137
138
  // Return the pre-computed placement_id, comprised of the cloud_info data.
139
  std::string placement_id() const;
140
141
  std::string placement_uuid() const;
142
143
  template<typename Lambda>
144
  bool DoesRegistrationMatch(Lambda predicate) const;
145
  bool IsRunningOn(const HostPortPB& hp) const;
146
  bool IsBlacklisted(const BlacklistSet& blacklist) const;
147
148
  // Should this ts have any leader load on it.
149
  virtual bool IsAcceptingLeaderLoad(const ReplicationInfoPB& replication_info) const;
150
151
  // Return an RPC proxy to a service.
152
  template <class TProxy>
153
1.08M
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
1.08M
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
1.08M
  }
_ZN2yb6master12TSDescriptor8GetProxyINS_7tserver24TabletServerServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EE
Line
Count
Source
153
272k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
272k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
272k
  }
_ZN2yb6master12TSDescriptor8GetProxyINS_7tserver29TabletServerAdminServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EE
Line
Count
Source
153
272k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
272k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
272k
  }
_ZN2yb6master12TSDescriptor8GetProxyINS_9consensus21ConsensusServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EE
Line
Count
Source
153
272k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
272k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
272k
  }
_ZN2yb6master12TSDescriptor8GetProxyINS_7tserver30TabletServerBackupServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EE
Line
Count
Source
153
272k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
272k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
272k
  }
156
157
  // Increment the accounting of the number of replicas recently created on this
158
  // server. This value will automatically decay over time.
159
  void IncrementRecentReplicaCreations();
160
161
  // Return the number of replicas which have recently been created on this
162
  // TS. This number is incremented when replicas are placed on the TS, and
163
  // then decayed over time. This method is not 'const' because each call
164
  // actually performs the time-based decay.
165
  double RecentReplicaCreations();
166
167
  // Set the number of live replicas (i.e. running or bootstrapping).
168
28
  void set_num_live_replicas(int num_live_replicas) {
169
28
    DCHECK_GE(num_live_replicas, 0);
170
28
    std::lock_guard<decltype(lock_)> l(lock_);
171
28
    num_live_replicas_ = num_live_replicas;
172
28
  }
173
174
  // Return the number of live replicas (i.e running or bootstrapping).
175
29
  int num_live_replicas() const {
176
29
    SharedLock<decltype(lock_)> l(lock_);
177
29
    return num_live_replicas_;
178
29
  }
179
180
25
  void set_leader_count(int leader_count) {
181
25
    DCHECK_GE(leader_count, 0);
182
25
    std::lock_guard<decltype(lock_)> l(lock_);
183
25
    leader_count_ = leader_count;
184
25
  }
185
186
308
  int leader_count() const {
187
308
    SharedLock<decltype(lock_)> l(lock_);
188
308
    return leader_count_;
189
308
  }
190
191
0
  MicrosTime physical_time() const {
192
0
    SharedLock<decltype(lock_)> l(lock_);
193
0
    return physical_time_;
194
0
  }
195
196
0
  void set_hybrid_time(HybridTime hybrid_time) {
197
0
    std::lock_guard<decltype(lock_)> l(lock_);
198
0
    hybrid_time_ = hybrid_time;
199
0
  }
200
201
0
  HybridTime hybrid_time() const {
202
0
    SharedLock<decltype(lock_)> l(lock_);
203
0
    return hybrid_time_;
204
0
  }
205
206
0
  MonoDelta heartbeat_rtt() const {
207
0
    SharedLock<decltype(lock_)> l(lock_);
208
0
    return heartbeat_rtt_;
209
0
  }
210
211
4
  uint64_t total_memory_usage() {
212
4
    SharedLock<decltype(lock_)> l(lock_);
213
4
    return ts_metrics_.total_memory_usage;
214
4
  }
215
216
4
  uint64_t total_sst_file_size() {
217
4
    SharedLock<decltype(lock_)> l(lock_);
218
4
    return ts_metrics_.total_sst_file_size;
219
4
  }
220
221
4
  uint64_t uncompressed_sst_file_size() {
222
4
    SharedLock<decltype(lock_)> l(lock_);
223
4
    return ts_metrics_.uncompressed_sst_file_size;
224
4
  }
225
226
4
  uint64_t num_sst_files() {
227
4
    SharedLock<decltype(lock_)> l(lock_);
228
4
    return ts_metrics_.num_sst_files;
229
4
  }
230
231
4
  double read_ops_per_sec() {
232
4
    SharedLock<decltype(lock_)> l(lock_);
233
4
    return ts_metrics_.read_ops_per_sec;
234
4
  }
235
236
4
  double write_ops_per_sec() {
237
4
    SharedLock<decltype(lock_)> l(lock_);
238
4
    return ts_metrics_.write_ops_per_sec;
239
4
  }
240
241
35
  uint64_t uptime_seconds() {
242
35
    SharedLock<decltype(lock_)> l(lock_);
243
35
    return ts_metrics_.uptime_seconds;
244
35
  }
245
246
  struct TSPathMetrics {
247
    uint64_t used_space = 0;
248
    uint64_t total_space = 0;
249
  };
250
251
0
  std::unordered_map<std::string, TSPathMetrics> path_metrics() {
252
0
    SharedLock<decltype(lock_)> l(lock_);
253
0
    return ts_metrics_.path_metrics;
254
0
  }
255
256
  void UpdateMetrics(const TServerMetricsPB& metrics);
257
258
  void GetMetrics(TServerMetricsPB* metrics);
259
260
6.86k
  void ClearMetrics() {
261
6.86k
    std::lock_guard<decltype(lock_)> l(lock_);
262
6.86k
    ts_metrics_.ClearMetrics();
263
6.86k
  }
264
265
  // Set of methods to keep track of pending tablet deletes for a tablet server. We use them to
266
  // avoid assigning more tablets to a tserver that might be potentially unresponsive.
267
  bool HasTabletDeletePending() const;
268
  bool IsTabletDeletePending(const std::string& tablet_id) const;
269
  void AddPendingTabletDelete(const std::string& tablet_id);
270
  void ClearPendingTabletDelete(const std::string& tablet_id);
271
  std::string PendingTabletDeleteToString() const;
272
273
  std::string ToString() const;
274
275
  // Indicates that this descriptor was removed from the cluster and shouldn't be surfaced.
276
8.47M
  bool IsRemoved() const {
277
8.47M
    return removed_.load(std::memory_order_acquire);
278
8.47M
  }
279
280
1
  void SetRemoved(bool removed = true) {
281
1
    removed_.store(removed, std::memory_order_release);
282
1
  }
283
284
  explicit TSDescriptor(std::string perm_id);
285
286
  std::size_t NumTasks() const;
287
288
  bool IsLive() const;
289
290
643k
  bool HasCapability(CapabilityId capability) const {
291
643k
    return capabilities_.find(capability) != capabilities_.end();
292
643k
  }
293
294
  virtual bool IsLiveAndHasReported() const;
295
296
 protected:
297
  virtual CHECKED_STATUS RegisterUnlocked(const NodeInstancePB& instance,
298
                                          const TSRegistrationPB& registration,
299
                                          CloudInfoPB local_cloud_info,
300
                                          rpc::ProxyCache* proxy_cache);
301
302
  mutable rw_spinlock lock_;
303
 private:
304
  template <class TProxy>
305
  CHECKED_STATUS GetOrCreateProxy(std::shared_ptr<TProxy>* result,
306
                                  std::shared_ptr<TProxy>* result_cache);
307
308
  FRIEND_TEST(TestTSDescriptor, TestReplicaCreationsDecay);
309
  template<class ClusterLoadBalancerClass> friend class TestLoadBalancerBase;
310
311
  // Uses DNS to resolve registered hosts to a single endpoint.
312
  Result<HostPort> GetHostPortUnlocked() const;
313
314
  void DecayRecentReplicaCreationsUnlocked();
315
316
  struct TSMetrics {
317
318
    // Stores the total RAM usage of a tserver that is sent in every heartbeat.
319
    uint64_t total_memory_usage = 0;
320
321
    // Stores the total size of all the sst files in a tserver
322
    uint64_t total_sst_file_size = 0;
323
    uint64_t uncompressed_sst_file_size = 0;
324
    uint64_t num_sst_files = 0;
325
326
    double read_ops_per_sec = 0;
327
328
    double write_ops_per_sec = 0;
329
330
    uint64_t uptime_seconds = 0;
331
332
    std::unordered_map<std::string, TSPathMetrics> path_metrics;
333
334
6.86k
    void ClearMetrics() {
335
6.86k
      total_memory_usage = 0;
336
6.86k
      total_sst_file_size = 0;
337
6.86k
      uncompressed_sst_file_size = 0;
338
6.86k
      num_sst_files = 0;
339
6.86k
      read_ops_per_sec = 0;
340
6.86k
      write_ops_per_sec = 0;
341
6.86k
      uptime_seconds = 0;
342
6.86k
      path_metrics.clear();
343
6.86k
    }
344
  };
345
346
  struct TSMetrics ts_metrics_;
347
348
  const std::string permanent_uuid_;
349
  CloudInfoPB local_cloud_info_;
350
  rpc::ProxyCache* proxy_cache_;
351
  int64_t latest_seqno_;
352
353
  // The last time a heartbeat was received for this node.
354
  MonoTime last_heartbeat_;
355
356
  // The physical and hybrid times on this node at the time of heartbeat
357
  MicrosTime physical_time_;
358
  HybridTime hybrid_time_;
359
360
  // Roundtrip time of previous heartbeat.
361
  MonoDelta heartbeat_rtt_;
362
363
  // Set to true once this instance has reported all of its tablets.
364
  bool has_tablet_report_;
365
366
  // The number of times this tablet server has recently been selected to create a
367
  // tablet replica. This value decays back to 0 over time.
368
  double recent_replica_creations_;
369
  MonoTime last_replica_creations_decay_;
370
371
  // The number of live replicas on this host, from the last heartbeat.
372
  int num_live_replicas_;
373
374
  // The number of tablets for which this ts is a leader.
375
  int leader_count_;
376
377
  std::shared_ptr<TSInformationPB> ts_information_;
378
  std::string placement_id_;
379
380
  // The (read replica) cluster uuid to which this tserver belongs.
381
  std::string placement_uuid_;
382
383
  enterprise::ProxyTuple proxies_;
384
385
  // Set of tablet uuids for which a delete is pending on this tablet server.
386
  std::set<std::string> tablets_pending_delete_;
387
388
  // Capabilities of this tablet server.
389
  std::set<CapabilityId> capabilities_;
390
391
  // We don't remove TSDescriptor's from the master's in memory map since several classes hold
392
  // references to this object and those would be invalidated if we remove the descriptor from
393
  // the master's map. As a result, we just store a boolean indicating this entry is removed and
394
  // shouldn't be surfaced.
395
  std::atomic<bool> removed_{false};
396
397
  // Did this tserver register by heartbeating through master. If false, we registered through
398
  // peer's Raft config.
399
  RegisteredThroughHeartbeat registered_through_heartbeat_ = RegisteredThroughHeartbeat::kTrue;
400
401
  DISALLOW_COPY_AND_ASSIGN(TSDescriptor);
402
};
403
404
template <class TProxy>
405
Status TSDescriptor::GetOrCreateProxy(std::shared_ptr<TProxy>* result,
406
1.08M
                                      std::shared_ptr<TProxy>* result_cache) {
407
1.08M
  {
408
1.08M
    std::lock_guard<decltype(lock_)> l(lock_);
409
1.08M
    if (*result_cache) {
410
1.07M
      *result = *result_cache;
411
1.07M
      return Status::OK();
412
1.07M
    }
413
16.2k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
414
16.5k
    if (!(*result_cache)) {
415
16.5k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
416
16.5k
    }
417
16.2k
    *result = *result_cache;
418
16.2k
  }
419
16.2k
  return Status::OK();
420
16.2k
}
_ZN2yb6master12TSDescriptor16GetOrCreateProxyINS_7tserver24TabletServerServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EESA_
Line
Count
Source
406
271k
                                      std::shared_ptr<TProxy>* result_cache) {
407
271k
  {
408
271k
    std::lock_guard<decltype(lock_)> l(lock_);
409
271k
    if (*result_cache) {
410
267k
      *result = *result_cache;
411
267k
      return Status::OK();
412
267k
    }
413
4.19k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
414
4.19k
    if (!(*result_cache)) {
415
4.14k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
416
4.14k
    }
417
4.19k
    *result = *result_cache;
418
4.19k
  }
419
4.19k
  return Status::OK();
420
4.19k
}
_ZN2yb6master12TSDescriptor16GetOrCreateProxyINS_7tserver29TabletServerAdminServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EESA_
Line
Count
Source
406
271k
                                      std::shared_ptr<TProxy>* result_cache) {
407
271k
  {
408
271k
    std::lock_guard<decltype(lock_)> l(lock_);
409
271k
    if (*result_cache) {
410
267k
      *result = *result_cache;
411
267k
      return Status::OK();
412
267k
    }
413
3.89k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
414
4.14k
    if (!(*result_cache)) {
415
4.14k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
416
4.14k
    }
417
3.89k
    *result = *result_cache;
418
3.89k
  }
419
3.89k
  return Status::OK();
420
3.89k
}
_ZN2yb6master12TSDescriptor16GetOrCreateProxyINS_9consensus21ConsensusServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EESA_
Line
Count
Source
406
272k
                                      std::shared_ptr<TProxy>* result_cache) {
407
272k
  {
408
272k
    std::lock_guard<decltype(lock_)> l(lock_);
409
272k
    if (*result_cache) {
410
267k
      *result = *result_cache;
411
267k
      return Status::OK();
412
267k
    }
413
4.11k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
414
4.14k
    if (!(*result_cache)) {
415
4.14k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
416
4.14k
    }
417
4.11k
    *result = *result_cache;
418
4.11k
  }
419
4.11k
  return Status::OK();
420
4.11k
}
_ZN2yb6master12TSDescriptor16GetOrCreateProxyINS_7tserver30TabletServerBackupServiceProxyEEENS_6StatusEPNSt3__110shared_ptrIT_EESA_
Line
Count
Source
406
271k
                                      std::shared_ptr<TProxy>* result_cache) {
407
271k
  {
408
271k
    std::lock_guard<decltype(lock_)> l(lock_);
409
271k
    if (*result_cache) {
410
267k
      *result = *result_cache;
411
267k
      return Status::OK();
412
267k
    }
413
4.04k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
414
4.14k
    if (!(*result_cache)) {
415
4.14k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
416
4.14k
    }
417
4.04k
    *result = *result_cache;
418
4.04k
  }
419
4.04k
  return Status::OK();
420
4.04k
}
421
422
} // namespace master
423
} // namespace yb
424
425
#endif // YB_MASTER_TS_DESCRIPTOR_H