YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/master/ts_descriptor.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
#ifndef YB_MASTER_TS_DESCRIPTOR_H
33
#define YB_MASTER_TS_DESCRIPTOR_H
34
35
#include <shared_mutex>
36
37
#include <atomic>
38
#include <memory>
39
#include <mutex>
40
#include <string>
41
42
#include <gtest/gtest_prod.h>
43
44
#include "yb/common/common_net.pb.h"
45
#include "yb/common/hybrid_time.h"
46
47
#include "yb/master/master_heartbeat.fwd.h"
48
#include "yb/master/master_fwd.h"
49
50
#include "yb/rpc/rpc_fwd.h"
51
52
#include "yb/util/capabilities.h"
53
#include "yb/util/locks.h"
54
#include "yb/util/monotime.h"
55
#include "yb/util/net/net_util.h"
56
#include "yb/util/physical_time.h"
57
#include "yb/util/result.h"
58
#include "yb/util/status_fwd.h"
59
#include "yb/util/shared_ptr_tuple.h"
60
#include "yb/util/shared_lock.h"
61
62
namespace yb {
63
64
class NodeInstancePB;
65
66
namespace consensus {
67
class ConsensusServiceProxy;
68
}
69
70
namespace tserver {
71
class TabletServerAdminServiceProxy;
72
class TabletServerServiceProxy;
73
}
74
75
namespace master {
76
77
class TSRegistrationPB;
78
class TSInformationPB;
79
class ReplicationInfoPB;
80
class TServerMetricsPB;
81
82
typedef util::SharedPtrTuple<tserver::TabletServerAdminServiceProxy,
83
                             tserver::TabletServerServiceProxy,
84
                             consensus::ConsensusServiceProxy> ProxyTuple;
85
86
// Master-side view of a single tablet server.
87
//
88
// Tracks the last heartbeat, status, instance identifier, etc.
89
// This class is thread-safe.
90
class TSDescriptor {
91
 public:
92
  static Result<TSDescriptorPtr> RegisterNew(
93
      const NodeInstancePB& instance,
94
      const TSRegistrationPB& registration,
95
      CloudInfoPB local_cloud_info,
96
      rpc::ProxyCache* proxy_cache,
97
      RegisteredThroughHeartbeat registered_through_heartbeat = RegisteredThroughHeartbeat::kTrue);
98
99
  static std::string generate_placement_id(const CloudInfoPB& ci);
100
101
  virtual ~TSDescriptor();
102
103
  // Set the last-heartbeat time to now.
104
  void UpdateHeartbeat(const TSHeartbeatRequestPB* req);
105
106
  // Return the amount of time since the last heartbeat received
107
  // from this TS.
108
  MonoDelta TimeSinceHeartbeat() const;
109
110
  // Register this tablet server.
111
  CHECKED_STATUS Register(const NodeInstancePB& instance,
112
                          const TSRegistrationPB& registration,
113
                          CloudInfoPB local_cloud_info,
114
                          rpc::ProxyCache* proxy_cache);
115
116
12.5M
  const std::string &permanent_uuid() const { return permanent_uuid_; }
117
  int64_t latest_seqno() const;
118
119
  bool has_tablet_report() const;
120
  void set_has_tablet_report(bool has_report);
121
122
  bool registered_through_heartbeat() const;
123
124
  // Returns TSRegistrationPB for this TSDescriptor.
125
  TSRegistrationPB GetRegistration() const;
126
127
  // Returns TSInformationPB for this TSDescriptor.
128
  const std::shared_ptr<TSInformationPB> GetTSInformationPB() const;
129
130
  // Helper function to tell if this TS matches the cloud information provided. For now, we have
131
  // no wildcard functionality, so it will have to explicitly match each individual component.
132
  // Later, this might be extended to say if this TS is part of some wildcard expression for cloud
133
  // information (eg: aws.us-west.* will match any TS in aws.us-west.1a or aws.us-west.1b, etc.).
134
  bool MatchesCloudInfo(const CloudInfoPB& cloud_info) const;
135
136
  CloudInfoPB GetCloudInfo() const;
137
138
  // Return the pre-computed placement_id, comprised of the cloud_info data.
139
  std::string placement_id() const;
140
141
  std::string placement_uuid() const;
142
143
  template<typename Lambda>
144
  bool DoesRegistrationMatch(Lambda predicate) const;
145
  bool IsRunningOn(const HostPortPB& hp) const;
146
  bool IsBlacklisted(const BlacklistSet& blacklist) const;
147
148
  // Should this ts have any leader load on it.
149
  virtual bool IsAcceptingLeaderLoad(const ReplicationInfoPB& replication_info) const;
150
151
  // Return an RPC proxy to a service.
152
  template <class TProxy>
153
1.85M
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
1.85M
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
1.85M
  }
yb::Status yb::master::TSDescriptor::GetProxy<yb::tserver::TabletServerServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerServiceProxy>*)
Line
Count
Source
153
463k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
463k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
463k
  }
yb::Status yb::master::TSDescriptor::GetProxy<yb::tserver::TabletServerAdminServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerAdminServiceProxy>*)
Line
Count
Source
153
463k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
463k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
463k
  }
yb::Status yb::master::TSDescriptor::GetProxy<yb::consensus::ConsensusServiceProxy>(std::__1::shared_ptr<yb::consensus::ConsensusServiceProxy>*)
Line
Count
Source
153
463k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
463k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
463k
  }
yb::Status yb::master::TSDescriptor::GetProxy<yb::tserver::TabletServerBackupServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerBackupServiceProxy>*)
Line
Count
Source
153
463k
  CHECKED_STATUS GetProxy(std::shared_ptr<TProxy>* proxy) {
154
463k
    return GetOrCreateProxy(proxy, &proxies_.get<TProxy>());
155
463k
  }
156
157
  // Increment the accounting of the number of replicas recently created on this
158
  // server. This value will automatically decay over time.
159
  void IncrementRecentReplicaCreations();
160
161
  // Return the number of replicas which have recently been created on this
162
  // TS. This number is incremented when replicas are placed on the TS, and
163
  // then decayed over time. This method is not 'const' because each call
164
  // actually performs the time-based decay.
165
  double RecentReplicaCreations();
166
167
  // Set the number of live replicas (i.e. running or bootstrapping).
168
  void set_num_live_replicas(int num_live_replicas) {
169
    DCHECK_GE(num_live_replicas, 0);
170
    std::lock_guard<decltype(lock_)> l(lock_);
171
    num_live_replicas_ = num_live_replicas;
172
  }
173
174
  // Return the number of live replicas (i.e running or bootstrapping).
175
35
  int num_live_replicas() const {
176
35
    SharedLock<decltype(lock_)> l(lock_);
177
35
    return num_live_replicas_;
178
35
  }
179
180
  void set_leader_count(int leader_count) {
181
    DCHECK_GE(leader_count, 0);
182
    std::lock_guard<decltype(lock_)> l(lock_);
183
    leader_count_ = leader_count;
184
  }
185
186
346
  int leader_count() const {
187
346
    SharedLock<decltype(lock_)> l(lock_);
188
346
    return leader_count_;
189
346
  }
190
191
0
  MicrosTime physical_time() const {
192
0
    SharedLock<decltype(lock_)> l(lock_);
193
0
    return physical_time_;
194
0
  }
195
196
0
  void set_hybrid_time(HybridTime hybrid_time) {
197
0
    std::lock_guard<decltype(lock_)> l(lock_);
198
0
    hybrid_time_ = hybrid_time;
199
0
  }
200
201
0
  HybridTime hybrid_time() const {
202
0
    SharedLock<decltype(lock_)> l(lock_);
203
0
    return hybrid_time_;
204
0
  }
205
206
0
  MonoDelta heartbeat_rtt() const {
207
0
    SharedLock<decltype(lock_)> l(lock_);
208
0
    return heartbeat_rtt_;
209
0
  }
210
211
7
  uint64_t total_memory_usage() {
212
7
    SharedLock<decltype(lock_)> l(lock_);
213
7
    return ts_metrics_.total_memory_usage;
214
7
  }
215
216
7
  uint64_t total_sst_file_size() {
217
7
    SharedLock<decltype(lock_)> l(lock_);
218
7
    return ts_metrics_.total_sst_file_size;
219
7
  }
220
221
7
  uint64_t uncompressed_sst_file_size() {
222
7
    SharedLock<decltype(lock_)> l(lock_);
223
7
    return ts_metrics_.uncompressed_sst_file_size;
224
7
  }
225
226
7
  uint64_t num_sst_files() {
227
7
    SharedLock<decltype(lock_)> l(lock_);
228
7
    return ts_metrics_.num_sst_files;
229
7
  }
230
231
7
  double read_ops_per_sec() {
232
7
    SharedLock<decltype(lock_)> l(lock_);
233
7
    return ts_metrics_.read_ops_per_sec;
234
7
  }
235
236
7
  double write_ops_per_sec() {
237
7
    SharedLock<decltype(lock_)> l(lock_);
238
7
    return ts_metrics_.write_ops_per_sec;
239
7
  }
240
241
37
  uint64_t uptime_seconds() {
242
37
    SharedLock<decltype(lock_)> l(lock_);
243
37
    return ts_metrics_.uptime_seconds;
244
37
  }
245
246
  struct TSPathMetrics {
247
    uint64_t used_space = 0;
248
    uint64_t total_space = 0;
249
  };
250
251
0
  std::unordered_map<std::string, TSPathMetrics> path_metrics() {
252
0
    SharedLock<decltype(lock_)> l(lock_);
253
0
    return ts_metrics_.path_metrics;
254
0
  }
255
256
126k
  bool get_disable_tablet_split_if_default_ttl() {
257
126k
    SharedLock<decltype(lock_)> l(lock_);
258
126k
    return ts_metrics_.disable_tablet_split_if_default_ttl;
259
126k
  }
260
261
  void UpdateMetrics(const TServerMetricsPB& metrics);
262
263
  void GetMetrics(TServerMetricsPB* metrics);
264
265
348k
  void ClearMetrics() {
266
348k
    std::lock_guard<decltype(lock_)> l(lock_);
267
348k
    ts_metrics_.ClearMetrics();
268
348k
  }
269
270
  // Set of methods to keep track of pending tablet deletes for a tablet server. We use them to
271
  // avoid assigning more tablets to a tserver that might be potentially unresponsive.
272
  bool HasTabletDeletePending() const;
273
  bool IsTabletDeletePending(const std::string& tablet_id) const;
274
  void AddPendingTabletDelete(const std::string& tablet_id);
275
  void ClearPendingTabletDelete(const std::string& tablet_id);
276
  std::string PendingTabletDeleteToString() const;
277
278
  std::string ToString() const;
279
280
  // Indicates that this descriptor was removed from the cluster and shouldn't be surfaced.
281
70.7M
  bool IsRemoved() const {
282
70.7M
    return removed_.load(std::memory_order_acquire);
283
70.7M
  }
284
285
1
  void SetRemoved(bool removed = true) {
286
1
    removed_.store(removed, std::memory_order_release);
287
1
  }
288
289
  explicit TSDescriptor(std::string perm_id);
290
291
  std::size_t NumTasks() const;
292
293
  bool IsLive() const;
294
295
5.26M
  bool HasCapability(CapabilityId capability) const {
296
5.26M
    return capabilities_.find(capability) != capabilities_.end();
297
5.26M
  }
298
299
  virtual bool IsLiveAndHasReported() const;
300
301
 protected:
302
  virtual CHECKED_STATUS RegisterUnlocked(const NodeInstancePB& instance,
303
                                          const TSRegistrationPB& registration,
304
                                          CloudInfoPB local_cloud_info,
305
                                          rpc::ProxyCache* proxy_cache);
306
307
  mutable rw_spinlock lock_;
308
 private:
309
  template <class TProxy>
310
  CHECKED_STATUS GetOrCreateProxy(std::shared_ptr<TProxy>* result,
311
                                  std::shared_ptr<TProxy>* result_cache);
312
313
  FRIEND_TEST(TestTSDescriptor, TestReplicaCreationsDecay);
314
  template<class ClusterLoadBalancerClass> friend class TestLoadBalancerBase;
315
316
  // Uses DNS to resolve registered hosts to a single endpoint.
317
  Result<HostPort> GetHostPortUnlocked() const;
318
319
  void DecayRecentReplicaCreationsUnlocked();
320
321
  struct TSMetrics {
322
323
    // Stores the total RAM usage of a tserver that is sent in every heartbeat.
324
    uint64_t total_memory_usage = 0;
325
326
    // Stores the total size of all the sst files in a tserver
327
    uint64_t total_sst_file_size = 0;
328
    uint64_t uncompressed_sst_file_size = 0;
329
    uint64_t num_sst_files = 0;
330
331
    double read_ops_per_sec = 0;
332
333
    double write_ops_per_sec = 0;
334
335
    uint64_t uptime_seconds = 0;
336
337
    std::unordered_map<std::string, TSPathMetrics> path_metrics;
338
339
    bool disable_tablet_split_if_default_ttl = false;
340
341
348k
    void ClearMetrics() {
342
348k
      total_memory_usage = 0;
343
348k
      total_sst_file_size = 0;
344
348k
      uncompressed_sst_file_size = 0;
345
348k
      num_sst_files = 0;
346
348k
      read_ops_per_sec = 0;
347
348k
      write_ops_per_sec = 0;
348
348k
      uptime_seconds = 0;
349
348k
      path_metrics.clear();
350
348k
      disable_tablet_split_if_default_ttl = false;
351
348k
    }
352
  };
353
354
  struct TSMetrics ts_metrics_;
355
356
  const std::string permanent_uuid_;
357
  CloudInfoPB local_cloud_info_;
358
  rpc::ProxyCache* proxy_cache_;
359
  int64_t latest_seqno_;
360
361
  // The last time a heartbeat was received for this node.
362
  MonoTime last_heartbeat_;
363
364
  // The physical and hybrid times on this node at the time of heartbeat
365
  MicrosTime physical_time_;
366
  HybridTime hybrid_time_;
367
368
  // Roundtrip time of previous heartbeat.
369
  MonoDelta heartbeat_rtt_;
370
371
  // Set to true once this instance has reported all of its tablets.
372
  bool has_tablet_report_;
373
374
  // The number of times this tablet server has recently been selected to create a
375
  // tablet replica. This value decays back to 0 over time.
376
  double recent_replica_creations_;
377
  MonoTime last_replica_creations_decay_;
378
379
  // The number of live replicas on this host, from the last heartbeat.
380
  int num_live_replicas_;
381
382
  // The number of tablets for which this ts is a leader.
383
  int leader_count_;
384
385
  std::shared_ptr<TSInformationPB> ts_information_;
386
  std::string placement_id_;
387
388
  // The (read replica) cluster uuid to which this tserver belongs.
389
  std::string placement_uuid_;
390
391
  enterprise::ProxyTuple proxies_;
392
393
  // Set of tablet uuids for which a delete is pending on this tablet server.
394
  std::set<std::string> tablets_pending_delete_;
395
396
  // Capabilities of this tablet server.
397
  std::set<CapabilityId> capabilities_;
398
399
  // We don't remove TSDescriptor's from the master's in memory map since several classes hold
400
  // references to this object and those would be invalidated if we remove the descriptor from
401
  // the master's map. As a result, we just store a boolean indicating this entry is removed and
402
  // shouldn't be surfaced.
403
  std::atomic<bool> removed_{false};
404
405
  // Did this tserver register by heartbeating through master. If false, we registered through
406
  // peer's Raft config.
407
  RegisteredThroughHeartbeat registered_through_heartbeat_ = RegisteredThroughHeartbeat::kTrue;
408
409
  DISALLOW_COPY_AND_ASSIGN(TSDescriptor);
410
};
411
412
template <class TProxy>
413
Status TSDescriptor::GetOrCreateProxy(std::shared_ptr<TProxy>* result,
414
1.85M
                                      std::shared_ptr<TProxy>* result_cache) {
415
1.85M
  {
416
1.85M
    std::lock_guard<decltype(lock_)> l(lock_);
417
1.85M
    if (*result_cache) {
418
1.82M
      *result = *result_cache;
419
1.82M
      return Status::OK();
420
1.82M
    }
421
25.2k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
422
25.9k
    if (
!(*result_cache)25.2k
) {
423
25.9k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
424
25.9k
    }
425
25.2k
    *result = *result_cache;
426
25.2k
  }
427
0
  return Status::OK();
428
25.2k
}
yb::Status yb::master::TSDescriptor::GetOrCreateProxy<yb::tserver::TabletServerServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerServiceProxy>*, std::__1::shared_ptr<yb::tserver::TabletServerServiceProxy>*)
Line
Count
Source
414
463k
                                      std::shared_ptr<TProxy>* result_cache) {
415
463k
  {
416
463k
    std::lock_guard<decltype(lock_)> l(lock_);
417
463k
    if (*result_cache) {
418
456k
      *result = *result_cache;
419
456k
      return Status::OK();
420
456k
    }
421
6.49k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
422
6.50k
    if (
!(*result_cache)6.49k
) {
423
6.50k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
424
6.50k
    }
425
6.49k
    *result = *result_cache;
426
6.49k
  }
427
0
  return Status::OK();
428
6.49k
}
yb::Status yb::master::TSDescriptor::GetOrCreateProxy<yb::tserver::TabletServerAdminServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerAdminServiceProxy>*, std::__1::shared_ptr<yb::tserver::TabletServerAdminServiceProxy>*)
Line
Count
Source
414
463k
                                      std::shared_ptr<TProxy>* result_cache) {
415
463k
  {
416
463k
    std::lock_guard<decltype(lock_)> l(lock_);
417
463k
    if (*result_cache) {
418
456k
      *result = *result_cache;
419
456k
      return Status::OK();
420
456k
    }
421
6.10k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
422
6.49k
    if (
!(*result_cache)6.10k
) {
423
6.49k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
424
6.49k
    }
425
6.10k
    *result = *result_cache;
426
6.10k
  }
427
0
  return Status::OK();
428
6.10k
}
yb::Status yb::master::TSDescriptor::GetOrCreateProxy<yb::consensus::ConsensusServiceProxy>(std::__1::shared_ptr<yb::consensus::ConsensusServiceProxy>*, std::__1::shared_ptr<yb::consensus::ConsensusServiceProxy>*)
Line
Count
Source
414
463k
                                      std::shared_ptr<TProxy>* result_cache) {
415
463k
  {
416
463k
    std::lock_guard<decltype(lock_)> l(lock_);
417
463k
    if (*result_cache) {
418
456k
      *result = *result_cache;
419
456k
      return Status::OK();
420
456k
    }
421
6.32k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
422
6.49k
    if (
!(*result_cache)6.32k
) {
423
6.49k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
424
6.49k
    }
425
6.32k
    *result = *result_cache;
426
6.32k
  }
427
0
  return Status::OK();
428
6.32k
}
yb::Status yb::master::TSDescriptor::GetOrCreateProxy<yb::tserver::TabletServerBackupServiceProxy>(std::__1::shared_ptr<yb::tserver::TabletServerBackupServiceProxy>*, std::__1::shared_ptr<yb::tserver::TabletServerBackupServiceProxy>*)
Line
Count
Source
414
463k
                                      std::shared_ptr<TProxy>* result_cache) {
415
463k
  {
416
463k
    std::lock_guard<decltype(lock_)> l(lock_);
417
463k
    if (*result_cache) {
418
456k
      *result = *result_cache;
419
456k
      return Status::OK();
420
456k
    }
421
6.33k
    auto hostport = VERIFY_RESULT(GetHostPortUnlocked());
422
6.49k
    if (
!(*result_cache)6.33k
) {
423
6.49k
      *result_cache = std::make_shared<TProxy>(proxy_cache_, hostport);
424
6.49k
    }
425
6.33k
    *result = *result_cache;
426
6.33k
  }
427
0
  return Status::OK();
428
6.33k
}
429
430
} // namespace master
431
} // namespace yb
432
433
#endif // YB_MASTER_TS_DESCRIPTOR_H