YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/util/metric_entity.cc
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
#include "yb/util/metric_entity.h"
16
17
#include <regex>
18
19
#include "yb/gutil/map-util.h"
20
#include "yb/util/flag_tags.h"
21
#include "yb/util/jsonwriter.h"
22
#include "yb/util/metrics.h"
23
#include "yb/util/status_log.h"
24
25
DEFINE_int32(metrics_retirement_age_ms, 120 * 1000,
26
             "The minimum number of milliseconds a metric will be kept for after it is "
27
             "no longer active. (Advanced option)");
28
TAG_FLAG(metrics_retirement_age_ms, runtime);
29
TAG_FLAG(metrics_retirement_age_ms, advanced);
30
31
// TODO: changed to empty string and add logic to get this from cluster_uuid in case empty.
32
DEFINE_string(metric_node_name, "DEFAULT_NODE_NAME",
33
              "Value to use as node name for metrics reporting");
34
35
namespace yb {
36
37
namespace {
38
39
const std::regex prometheus_name_regex("[a-zA-Z_:][a-zA-Z0-9_:]*");
40
41
// Registry of all of the metric and entity prototypes that have been
42
// defined.
43
//
44
// Prototypes are typically defined as static variables in different compilation
45
// units, and their constructors register themselves here. The registry is then
46
// used in order to dump metrics metadata to generate a Cloudera Manager MDL
47
// file.
48
//
49
// This class is thread-safe.
50
class MetricPrototypeRegistry {
51
 public:
52
  // Get the singleton instance.
53
  static MetricPrototypeRegistry* get();
54
55
  // Dump a JSON document including all of the registered entity and metric
56
  // prototypes.
57
  void WriteAsJson(JsonWriter* writer) const;
58
59
  // Register a metric prototype in the registry.
60
  void AddMetric(const MetricPrototype* prototype);
61
62
  // Register a metric entity prototype in the registry.
63
  void AddEntity(const MetricEntityPrototype* prototype);
64
65
 private:
66
20.6k
  MetricPrototypeRegistry() {}
67
15.0k
  ~MetricPrototypeRegistry() {}
68
69
  mutable simple_spinlock lock_;
70
  std::vector<const MetricPrototype*> metrics_;
71
  std::vector<const MetricEntityPrototype*> entities_;
72
73
  DISALLOW_COPY_AND_ASSIGN(MetricPrototypeRegistry);
74
};
75
76
//
77
// MetricPrototypeRegistry
78
//
79
22.0M
MetricPrototypeRegistry* MetricPrototypeRegistry::get() {
80
22.0M
  static MetricPrototypeRegistry instance;
81
22.0M
  return &instance;
82
22.0M
}
83
84
22.0M
void MetricPrototypeRegistry::AddMetric(const MetricPrototype* prototype) {
85
22.0M
  std::lock_guard<simple_spinlock> l(lock_);
86
22.0M
  metrics_.push_back(prototype);
87
22.0M
}
88
89
66.3k
void MetricPrototypeRegistry::AddEntity(const MetricEntityPrototype* prototype) {
90
66.3k
  std::lock_guard<simple_spinlock> l(lock_);
91
66.3k
  entities_.push_back(prototype);
92
66.3k
}
93
94
1
void MetricPrototypeRegistry::WriteAsJson(JsonWriter* writer) const {
95
1
  std::lock_guard<simple_spinlock> l(lock_);
96
1
  MetricJsonOptions opts;
97
1
  opts.include_schema_info = true;
98
1
  writer->StartObject();
99
100
  // Dump metric prototypes.
101
1
  writer->String("metrics");
102
1
  writer->StartArray();
103
35
  for (const MetricPrototype* p : metrics_) {
104
35
    writer->StartObject();
105
35
    p->WriteFields(writer, opts);
106
35
    writer->String("entity_type");
107
35
    writer->String(p->entity_type());
108
35
    writer->EndObject();
109
35
  }
110
1
  writer->EndArray();
111
112
  // Dump entity prototypes.
113
1
  writer->String("entities");
114
1
  writer->StartArray();
115
2
  for (const MetricEntityPrototype* p : entities_) {
116
2
    writer->StartObject();
117
2
    writer->String("name");
118
2
    writer->String(p->name());
119
2
    writer->EndObject();
120
2
  }
121
1
  writer->EndArray();
122
123
1
  writer->EndObject();
124
1
}
125
126
} // namespace
127
128
//
129
// MetricEntityPrototype
130
//
131
132
MetricEntityPrototype::MetricEntityPrototype(const char* name)
133
66.3k
  : name_(name) {
134
66.3k
  MetricPrototypeRegistry::get()->AddEntity(this);
135
66.3k
}
136
137
17.3k
MetricEntityPrototype::~MetricEntityPrototype() {
138
17.3k
}
139
140
scoped_refptr<MetricEntity> MetricEntityPrototype::Instantiate(
141
    MetricRegistry* registry,
142
    const std::string& id,
143
208k
    const MetricEntity::AttributeMap& initial_attrs) const {
144
208k
  return registry->FindOrCreateEntity(this, id, initial_attrs);
145
208k
}
146
147
scoped_refptr<MetricEntity> MetricEntityPrototype::Instantiate(
148
30.5k
    MetricRegistry* registry, const std::string& id) const {
149
30.5k
  return Instantiate(registry, id, std::unordered_map<std::string, std::string>());
150
30.5k
}
151
152
//
153
// MetricEntity
154
//
155
156
MetricEntity::MetricEntity(const MetricEntityPrototype* prototype,
157
                           std::string id, AttributeMap attributes)
158
    : prototype_(prototype),
159
      id_(std::move(id)),
160
130k
      attributes_(std::move(attributes)) {
161
130k
}
162
163
4.66k
MetricEntity::~MetricEntity() {
164
4.66k
}
165
166
32.2M
const std::regex& PrometheusNameRegex() {
167
32.2M
  return prometheus_name_regex;
168
32.2M
}
169
170
32.2M
void MetricEntity::CheckInstantiation(const MetricPrototype* proto) const {
171
0
  CHECK_STREQ(prototype_->name(), proto->entity_type())
172
0
    << "Metric " << proto->name() << " may not be instantiated entity of type "
173
0
    << prototype_->name() << " (expected: " << proto->entity_type() << ")";
174
18.4E
  DCHECK(regex_match(proto->name(), PrometheusNameRegex()))
175
18.4E
      << "Metric name is not compatible with Prometheus: " << proto->name();
176
32.2M
}
177
178
0
scoped_refptr<Metric> MetricEntity::FindOrNull(const MetricPrototype& prototype) const {
179
0
  std::lock_guard<simple_spinlock> l(lock_);
180
0
  return FindPtrOrNull(metric_map_, &prototype);
181
0
}
182
183
namespace {
184
185
const string kWildCardString = "*";
186
187
bool MatchMetricInList(const string& metric_name,
188
753k
                       const vector<string>& match_params) {
189
753k
  for (const string& param : match_params) {
190
    // Handle wildcard.
191
753k
    if (param == kWildCardString) return true;
192
    // The parameter is a substring match of the metric name.
193
107k
    if (metric_name.find(param) != std::string::npos) {
194
236
      return true;
195
236
    }
196
107k
  }
197
107k
  return false;
198
753k
}
199
200
} // anonymous namespace
201
202
203
Status MetricEntity::WriteAsJson(JsonWriter* writer,
204
                                 const vector<string>& requested_metrics,
205
645k
                                 const MetricJsonOptions& opts) const {
206
645k
  bool select_all = MatchMetricInList(id(), requested_metrics);
207
208
  // We want the keys to be in alphabetical order when printing, so we use an ordered map here.
209
645k
  typedef std::map<const char*, scoped_refptr<Metric> > OrderedMetricMap;
210
645k
  OrderedMetricMap metrics;
211
645k
  AttributeMap attrs;
212
645k
  std::vector<ExternalJsonMetricsCb> external_metrics_cbs;
213
645k
  {
214
    // Snapshot the metrics, attributes & external metrics callbacks in this metrics entity. (Note:
215
    // this is not guaranteed to be a consistent snapshot).
216
645k
    std::lock_guard<simple_spinlock> l(lock_);
217
645k
    attrs = attributes_;
218
645k
    external_metrics_cbs = external_json_metrics_cbs_;
219
80.8M
    for (const MetricMap::value_type& val : metric_map_) {
220
80.8M
      const MetricPrototype* prototype = val.first;
221
80.8M
      const scoped_refptr<Metric>& metric = val.second;
222
223
80.8M
      if (select_all || MatchMetricInList(prototype->name(), requested_metrics)) {
224
80.7M
        InsertOrDie(&metrics, prototype->name(), metric);
225
80.7M
      }
226
80.8M
    }
227
645k
  }
228
229
  // If we had a filter, and we didn't either match this entity or any metrics inside
230
  // it, don't print the entity at all.
231
645k
  if (!requested_metrics.empty() && !select_all && metrics.empty()) {
232
290
    return Status::OK();
233
290
  }
234
235
645k
  writer->StartObject();
236
237
645k
  writer->String("type");
238
645k
  writer->String(prototype_->name());
239
240
645k
  writer->String("id");
241
645k
  writer->String(id_);
242
243
645k
  writer->String("attributes");
244
645k
  writer->StartObject();
245
1.88M
  for (const AttributeMap::value_type& val : attrs) {
246
1.88M
    writer->String(val.first);
247
1.88M
    writer->String(val.second);
248
1.88M
  }
249
645k
  writer->EndObject();
250
251
645k
  writer->String("metrics");
252
645k
  writer->StartArray();
253
80.7M
  for (OrderedMetricMap::value_type& val : metrics) {
254
80.7M
    WARN_NOT_OK(val.second->WriteAsJson(writer, opts),
255
80.7M
                Format("Failed to write $0 as JSON", val.first));
256
257
80.7M
  }
258
  // Run the external metrics collection callback if there is one set.
259
0
  for (const ExternalJsonMetricsCb& cb : external_metrics_cbs) {
260
0
    cb(writer, opts);
261
0
  }
262
645k
  writer->EndArray();
263
264
645k
  writer->EndObject();
265
266
645k
  return Status::OK();
267
645k
}
268
269
CHECKED_STATUS MetricEntity::WriteForPrometheus(PrometheusWriter* writer,
270
                                                const vector<string>& requested_metrics,
271
226
                                                const MetricPrometheusOptions& opts) const {
272
226
  bool select_all = MatchMetricInList(id(), requested_metrics);
273
274
  // We want the keys to be in alphabetical order when printing, so we use an ordered map here.
275
226
  typedef std::map<const char*, scoped_refptr<Metric> > OrderedMetricMap;
276
226
  OrderedMetricMap metrics;
277
226
  AttributeMap attrs;
278
226
  std::vector<ExternalPrometheusMetricsCb> external_metrics_cbs;
279
226
  {
280
    // Snapshot the metrics, attributes & external metrics callbacks in this metrics entity. (Note:
281
    // this is not guaranteed to be a consistent snapshot).
282
226
    std::lock_guard<simple_spinlock> l(lock_);
283
226
    attrs = attributes_;
284
226
    external_metrics_cbs = external_prometheus_metrics_cbs_;
285
38.5k
    for (const MetricMap::value_type& val : metric_map_) {
286
38.5k
      const MetricPrototype* prototype = val.first;
287
38.5k
      const scoped_refptr<Metric>& metric = val.second;
288
289
38.5k
      if (select_all || MatchMetricInList(prototype->name(), requested_metrics)) {
290
38.5k
        InsertOrDie(&metrics, prototype->name(), metric);
291
38.5k
      }
292
38.5k
    }
293
226
  }
294
295
  // If we had a filter, and we didn't either match this entity or any metrics inside
296
  // it, don't print the entity at all.
297
  // If metrics is empty, we'd still call the callbacks if the entity matches,
298
  // i.e. requested_metrics and select_all is true.
299
226
  if (!requested_metrics.empty() && !select_all && metrics.empty()) {
300
0
    return Status::OK();
301
0
  }
302
303
226
  AttributeMap prometheus_attr;
304
  // Per tablet metrics come with tablet_id, as well as table_id and table_name attributes.
305
  // We ignore the tablet part to squash at the table level.
306
226
  if (strcmp(prototype_->name(), "tablet") == 0 || strcmp(prototype_->name(), "table") == 0) {
307
194
    prometheus_attr["table_id"] = attrs["table_id"];
308
194
    prometheus_attr["table_name"] = attrs["table_name"];
309
194
    prometheus_attr["namespace_name"] = attrs["namespace_name"];
310
32
  } else if (
311
32
      strcmp(prototype_->name(), "server") == 0 || strcmp(prototype_->name(), "cluster") == 0) {
312
32
    prometheus_attr = attrs;
313
    // This is tablet_id in the case of tablet, but otherwise names the server type, eg: yb.master
314
32
    prometheus_attr["metric_id"] = id_;
315
0
  } else if (strcmp(prototype_->name(), "cdc") == 0) {
316
0
    prometheus_attr["table_id"] = attrs["table_id"];
317
0
    prometheus_attr["table_name"] = attrs["table_name"];
318
0
    prometheus_attr["namespace_name"] = attrs["namespace_name"];
319
0
    prometheus_attr["stream_id"] = attrs["stream_id"];
320
0
  } else {
321
0
    return Status::OK();
322
0
  }
323
  // This is currently tablet / server / cluster / cdc.
324
226
  prometheus_attr["metric_type"] = prototype_->name();
325
226
  prometheus_attr["exported_instance"] = FLAGS_metric_node_name;
326
327
38.5k
  for (OrderedMetricMap::value_type& val : metrics) {
328
38.5k
    WARN_NOT_OK(val.second->WriteForPrometheus(writer, prometheus_attr, opts),
329
38.5k
                Format("Failed to write $0 as Prometheus", val.first));
330
331
38.5k
  }
332
  // Run the external metrics collection callback if there is one set.
333
0
  for (const ExternalPrometheusMetricsCb& cb : external_metrics_cbs) {
334
0
    cb(writer, opts);
335
0
  }
336
337
226
  return Status::OK();
338
226
}
339
340
1.17M
void MetricEntity::Remove(const MetricPrototype* proto) {
341
1.17M
  std::lock_guard<simple_spinlock> l(lock_);
342
1.17M
  metric_map_.erase(proto);
343
1.17M
}
344
345
681k
void MetricEntity::RetireOldMetrics() {
346
681k
  MonoTime now = MonoTime::Now();
347
348
681k
  std::lock_guard<simple_spinlock> l(lock_);
349
86.4M
  for (auto it = metric_map_.begin(); it != metric_map_.end();) {
350
85.7M
    const scoped_refptr<Metric>& metric = it->second;
351
352
85.7M
    if (PREDICT_TRUE(!metric->HasOneRef())) {
353
      // The metric is still in use. Note that, in the case of "NeverRetire()", the metric
354
      // will have a ref-count of 2 because it is reffed by the 'never_retire_metrics_'
355
      // collection.
356
357
      // Ensure that it is not marked for later retirement (this could happen in the case
358
      // that a metric is un-reffed and then re-reffed later by looking it up from the
359
      // registry).
360
83.4M
      metric->retire_time_ = MonoTime();
361
83.4M
      ++it;
362
83.4M
      continue;
363
83.4M
    }
364
365
2.34M
    if (!metric->retire_time_.Initialized()) {
366
0
      VLOG(3) << "Metric " << it->first << " has become un-referenced. Will retire after "
367
0
              << "the retention interval";
368
      // This is the first time we've seen this metric as retirable.
369
1.18M
      metric->retire_time_ = now;
370
1.18M
      metric->retire_time_.AddDelta(MonoDelta::FromMilliseconds(
371
1.18M
                                      FLAGS_metrics_retirement_age_ms));
372
1.18M
      ++it;
373
1.18M
      continue;
374
1.18M
    }
375
376
    // If we've already seen this metric in a previous scan, check if it's
377
    // time to retire it yet.
378
1.16M
    if (now.ComesBefore(metric->retire_time_)) {
379
0
      VLOG(3) << "Metric " << it->first << " is un-referenced, but still within "
380
0
              << "the retention interval";
381
946k
      ++it;
382
946k
      continue;
383
946k
    }
384
385
386
0
    VLOG(2) << "Retiring metric " << it->first;
387
213k
    metric_map_.erase(it++);
388
213k
  }
389
681k
}
390
391
227k
void MetricEntity::NeverRetire(const scoped_refptr<Metric>& metric) {
392
227k
  std::lock_guard<simple_spinlock> l(lock_);
393
227k
  never_retire_metrics_.push_back(metric);
394
227k
}
395
396
77.7k
void MetricEntity::SetAttributes(const AttributeMap& attrs) {
397
77.7k
  std::lock_guard<simple_spinlock> l(lock_);
398
77.7k
  attributes_ = attrs;
399
77.7k
}
400
401
186k
void MetricEntity::SetAttribute(const string& key, const string& val) {
402
186k
  std::lock_guard<simple_spinlock> l(lock_);
403
186k
  attributes_[key] = val;
404
186k
}
405
406
scoped_refptr<Counter> MetricEntity::FindOrCreateCounter(
407
12.4M
    const CounterPrototype* proto) {
408
12.4M
  CheckInstantiation(proto);
409
12.4M
  std::lock_guard<simple_spinlock> l(lock_);
410
12.4M
  scoped_refptr<Counter> m = down_cast<Counter*>(FindPtrOrNull(metric_map_, proto).get());
411
12.4M
  if (!m) {
412
7.69M
    m = new Counter(proto);
413
7.69M
    InsertOrDie(&metric_map_, proto, m);
414
7.69M
  }
415
12.4M
  return m;
416
12.4M
}
417
418
scoped_refptr<MillisLag> MetricEntity::FindOrCreateMillisLag(
419
0
    const MillisLagPrototype* proto) {
420
0
  CheckInstantiation(proto);
421
0
  std::lock_guard<simple_spinlock> l(lock_);
422
0
  scoped_refptr<MillisLag> m = down_cast<MillisLag*>(FindPtrOrNull(metric_map_, proto).get());
423
0
  if (!m) {
424
0
    m = new MillisLag(proto);
425
0
    InsertOrDie(&metric_map_, proto, m);
426
0
  }
427
0
  return m;
428
0
}
429
430
scoped_refptr<AtomicMillisLag> MetricEntity::FindOrCreateAtomicMillisLag(
431
105k
    const MillisLagPrototype* proto) {
432
105k
  CheckInstantiation(proto);
433
105k
  std::lock_guard<simple_spinlock> l(lock_);
434
105k
  scoped_refptr<AtomicMillisLag> m = down_cast<AtomicMillisLag*>(
435
105k
      FindPtrOrNull(metric_map_, proto).get());
436
105k
  if (!m) {
437
105k
    m = new AtomicMillisLag(proto);
438
105k
    InsertOrDie(&metric_map_, proto, m);
439
105k
  }
440
105k
  return m;
441
105k
}
442
443
scoped_refptr<Histogram> MetricEntity::FindOrCreateHistogram(
444
7.50M
    const HistogramPrototype* proto) {
445
7.50M
  CheckInstantiation(proto);
446
7.50M
  std::lock_guard<simple_spinlock> l(lock_);
447
7.50M
  scoped_refptr<Histogram> m = down_cast<Histogram*>(FindPtrOrNull(metric_map_, proto).get());
448
7.50M
  if (!m) {
449
2.32M
    m = new Histogram(proto);
450
2.32M
    InsertOrDie(&metric_map_, proto, m);
451
2.32M
  }
452
7.50M
  return m;
453
7.50M
}
454
455
scoped_refptr<Histogram> MetricEntity::FindOrCreateHistogram(
456
43.7k
    std::unique_ptr<HistogramPrototype> proto) {
457
43.7k
  CheckInstantiation(proto.get());
458
43.7k
  std::lock_guard<simple_spinlock> l(lock_);
459
43.7k
  auto m = down_cast<Histogram*>(FindPtrOrNull(metric_map_, proto.get()).get());
460
43.7k
  if (!m) {
461
43.7k
    uint64_t highest_trackable_value = proto->max_trackable_value();
462
43.7k
    int num_significant_digits = proto->num_sig_digits();
463
43.7k
    const ExportPercentiles export_percentile = proto->export_percentiles();
464
43.7k
    m = new Histogram(std::move(proto), highest_trackable_value, num_significant_digits,
465
43.7k
                      export_percentile);
466
43.7k
    InsertOrDie(&metric_map_, m->prototype(), m);
467
43.7k
  }
468
43.7k
  return m;
469
43.7k
}
470
471
1
void WriteRegistryAsJson(JsonWriter* writer) {
472
1
  MetricPrototypeRegistry::get()->WriteAsJson(writer);
473
1
}
474
475
22.0M
void RegisterMetricPrototype(const MetricPrototype* prototype) {
476
22.0M
  MetricPrototypeRegistry::get()->AddMetric(prototype);
477
22.0M
}
478
479
} // namespace yb