/Users/deen/code/yugabyte-db/src/yb/util/metrics.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include "yb/util/metrics.h" |
34 | | |
35 | | #include <map> |
36 | | #include <set> |
37 | | |
38 | | #include "yb/gutil/atomicops.h" |
39 | | #include "yb/gutil/casts.h" |
40 | | #include "yb/gutil/map-util.h" |
41 | | |
42 | | #include "yb/util/hdr_histogram.h" |
43 | | #include "yb/util/histogram.pb.h" |
44 | | #include "yb/util/jsonwriter.h" |
45 | | #include "yb/util/locks.h" |
46 | | #include "yb/util/status.h" |
47 | | #include "yb/util/status_log.h" |
48 | | |
49 | | DEFINE_bool(expose_metric_histogram_percentiles, true, |
50 | | "Should we expose the percentiles information for metrics histograms."); |
51 | | |
52 | | DEFINE_int32(max_tables_metrics_breakdowns, INT32_MAX, |
53 | | "The maxmimum number of tables to retrieve metrics for"); |
54 | | |
55 | | // Process/server-wide metrics should go into the 'server' entity. |
56 | | // More complex applications will define other entities. |
57 | | METRIC_DEFINE_entity(server); |
58 | | |
59 | | namespace yb { |
60 | | |
61 | | void RegisterMetricPrototype(const MetricPrototype* prototype); |
62 | | |
63 | | using std::string; |
64 | | using std::vector; |
65 | | using strings::Substitute; |
66 | | |
67 | | // |
68 | | // MetricUnit |
69 | | // |
70 | | |
71 | 35 | const char* MetricUnit::Name(Type unit) { |
72 | 35 | switch (unit) { |
73 | 0 | case kCacheHits: |
74 | 0 | return "hits"; |
75 | 0 | case kCacheQueries: |
76 | 0 | return "queries"; |
77 | 9 | case kBytes: |
78 | 9 | return "bytes"; |
79 | 1 | case kRequests: |
80 | 1 | return "requests"; |
81 | 0 | case kEntries: |
82 | 0 | return "entries"; |
83 | 0 | case kRows: |
84 | 0 | return "rows"; |
85 | 0 | case kCells: |
86 | 0 | return "cells"; |
87 | 0 | case kConnections: |
88 | 0 | return "connections"; |
89 | 0 | case kOperations: |
90 | 0 | return "operations"; |
91 | 0 | case kProbes: |
92 | 0 | return "probes"; |
93 | 0 | case kNanoseconds: |
94 | 0 | return "nanoseconds"; |
95 | 1 | case kMicroseconds: |
96 | 1 | return "microseconds"; |
97 | 7 | case kMilliseconds: |
98 | 7 | return "milliseconds"; |
99 | 0 | case kSeconds: |
100 | 0 | return "seconds"; |
101 | 2 | case kThreads: |
102 | 2 | return "threads"; |
103 | 0 | case kTransactions: |
104 | 0 | return "transactions"; |
105 | 0 | case kUnits: |
106 | 0 | return "units"; |
107 | 0 | case kMaintenanceOperations: |
108 | 0 | return "operations"; |
109 | 7 | case kBlocks: |
110 | 7 | return "blocks"; |
111 | 0 | case kLogBlockContainers: |
112 | 0 | return "log block containers"; |
113 | 3 | case kTasks: |
114 | 3 | return "tasks"; |
115 | 0 | case kMessages: |
116 | 0 | return "messages"; |
117 | 2 | case kContextSwitches: |
118 | 2 | return "context switches"; |
119 | 3 | case kFiles: |
120 | 3 | return "files"; |
121 | 0 | default: |
122 | 0 | return "UNKNOWN UNIT"; |
123 | 35 | } |
124 | 35 | } |
125 | | |
126 | | // |
127 | | // MetricType |
128 | | // |
129 | | |
130 | | const char* const MetricType::kGaugeType = "gauge"; |
131 | | const char* const MetricType::kCounterType = "counter"; |
132 | | const char* const MetricType::kHistogramType = "histogram"; |
133 | 35 | const char* MetricType::Name(MetricType::Type type) { |
134 | 35 | switch (type) { |
135 | 17 | case kGauge: |
136 | 17 | return kGaugeType; |
137 | 15 | case kCounter: |
138 | 15 | return kCounterType; |
139 | 1 | case kHistogram: |
140 | 1 | return kHistogramType; |
141 | 2 | default: |
142 | 2 | return "UNKNOWN TYPE"; |
143 | 35 | } |
144 | 35 | } |
145 | | |
146 | | namespace { |
147 | | |
148 | 35 | const char* MetricLevelName(MetricLevel level) { |
149 | 35 | switch (level) { |
150 | 0 | case MetricLevel::kDebug: |
151 | 0 | return "debug"; |
152 | 35 | case MetricLevel::kInfo: |
153 | 35 | return "info"; |
154 | 0 | case MetricLevel::kWarn: |
155 | 0 | return "warn"; |
156 | 0 | default: |
157 | 0 | return "UNKNOWN LEVEL"; |
158 | 35 | } |
159 | 35 | } |
160 | | |
161 | | } // anonymous namespace |
162 | | |
163 | | // |
164 | | // MetricRegistry |
165 | | // |
166 | | |
167 | 33.1k | MetricRegistry::MetricRegistry() { |
168 | 33.1k | } |
169 | | |
170 | 6.89k | MetricRegistry::~MetricRegistry() { |
171 | 6.89k | } |
172 | | |
173 | 695k | bool MetricRegistry::TabletHasBeenShutdown(const scoped_refptr<MetricEntity> entity) const { |
174 | 695k | if (strcmp(entity->prototype_->name(), "tablet") == 0 && tablets_shutdown_find(entity->id())543k ) { |
175 | 16.3k | DVLOG(5) << "Do not report metrics for shutdown tablet " << entity->id()0 ; |
176 | 16.3k | return true; |
177 | 16.3k | } |
178 | | |
179 | 678k | return false; |
180 | 695k | } |
181 | | |
182 | | Status MetricRegistry::WriteAsJson(JsonWriter* writer, |
183 | | const vector<string>& requested_metrics, |
184 | 15.8k | const MetricJsonOptions& opts) const { |
185 | 15.8k | EntityMap entities; |
186 | 15.8k | { |
187 | 15.8k | std::lock_guard<simple_spinlock> l(lock_); |
188 | 15.8k | entities = entities_; |
189 | 15.8k | } |
190 | | |
191 | 15.8k | writer->StartArray(); |
192 | 694k | for (const EntityMap::value_type& e : entities) { |
193 | 694k | if (TabletHasBeenShutdown(e.second)) { |
194 | 16.3k | continue; |
195 | 16.3k | } |
196 | | |
197 | 678k | WARN_NOT_OK(e.second->WriteAsJson(writer, requested_metrics, opts), |
198 | 678k | Substitute("Failed to write entity $0 as JSON", e.second->id())); |
199 | 678k | } |
200 | 15.8k | writer->EndArray(); |
201 | | |
202 | | // Rather than having a thread poll metrics periodically to retire old ones, |
203 | | // we'll just retire them here. The only downside is that, if no one is polling |
204 | | // metrics, we may end up leaving them around indefinitely; however, metrics are |
205 | | // small, and one might consider it a feature: if monitoring stops polling for |
206 | | // metrics, we should keep them around until the next poll. |
207 | 15.8k | entities.clear(); // necessary to deref metrics we just dumped before doing retirement scan. |
208 | 15.8k | const_cast<MetricRegistry*>(this)->RetireOldMetrics(); |
209 | 15.8k | return Status::OK(); |
210 | 15.8k | } |
211 | | |
212 | | CHECKED_STATUS MetricRegistry::WriteForPrometheus(PrometheusWriter* writer, |
213 | 0 | const MetricPrometheusOptions& opts) const { |
214 | 0 | return WriteForPrometheus(writer, {""}, opts); // Include all metrics. |
215 | 0 | } |
216 | | |
217 | | CHECKED_STATUS MetricRegistry::WriteForPrometheus(PrometheusWriter* writer, |
218 | | const vector<string>& requested_metrics, |
219 | 31 | const MetricPrometheusOptions& opts) const { |
220 | 31 | EntityMap entities; |
221 | 31 | { |
222 | 31 | std::lock_guard<simple_spinlock> l(lock_); |
223 | 31 | entities = entities_; |
224 | 31 | } |
225 | | |
226 | 274 | for (const EntityMap::value_type& e : entities) { |
227 | 274 | if (TabletHasBeenShutdown(e.second)) { |
228 | 0 | continue; |
229 | 0 | } |
230 | | |
231 | 274 | WARN_NOT_OK(e.second->WriteForPrometheus(writer, requested_metrics, opts), |
232 | 274 | Substitute("Failed to write entity $0 as Prometheus", e.second->id())); |
233 | 274 | } |
234 | 31 | RETURN_NOT_OK(writer->FlushAggregatedValues(opts.max_tables_metrics_breakdowns, |
235 | 31 | opts.priority_regex)); |
236 | | |
237 | | // Rather than having a thread poll metrics periodically to retire old ones, |
238 | | // we'll just retire them here. The only downside is that, if no one is polling |
239 | | // metrics, we may end up leaving them around indefinitely; however, metrics are |
240 | | // small, and one might consider it a feature: if monitoring stops polling for |
241 | | // metrics, we should keep them around until the next poll. |
242 | 31 | entities.clear(); // necessary to deref metrics we just dumped before doing retirement scan. |
243 | 31 | const_cast<MetricRegistry*>(this)->RetireOldMetrics(); |
244 | 31 | return Status::OK(); |
245 | 31 | } |
246 | | |
247 | 103k | void MetricRegistry::RetireOldMetrics() { |
248 | 103k | std::lock_guard<simple_spinlock> l(lock_); |
249 | 1.15M | for (auto it = entities_.begin(); it != entities_.end();) { |
250 | 1.05M | it->second->RetireOldMetrics(); |
251 | | |
252 | 1.05M | if (it->second->num_metrics() == 0 && it->second->HasOneRef()6.57k ) { |
253 | | // No metrics and no external references to this entity, so we can retire it. |
254 | | // Unlike retiring the metrics themselves, we don't wait for any timeout |
255 | | // to retire them -- we assume that that timed retention has been satisfied |
256 | | // by holding onto the metrics inside the entity. |
257 | | |
258 | | // For a tablet that has been shutdown, metrics are being deleted. So do not track |
259 | | // the tablet anymore. |
260 | 6.57k | if (strcmp(it->second->prototype_->name(), "tablet") == 0) { |
261 | 4.93k | DVLOG(3) << "T " << it->first << ": " |
262 | 0 | << "Remove from set of tablets that have been shutdown so as to be freed"; |
263 | 4.93k | tablets_shutdown_erase(it->first); |
264 | 4.93k | } |
265 | | |
266 | 6.57k | entities_.erase(it++); |
267 | 1.04M | } else { |
268 | 1.04M | ++it; |
269 | 1.04M | } |
270 | 1.05M | } |
271 | 103k | } |
272 | | |
273 | | // |
274 | | // MetricPrototype |
275 | | // |
276 | 36.9M | MetricPrototype::MetricPrototype(CtorArgs args) : args_(std::move(args)) { |
277 | 36.9M | RegisterMetricPrototype(this); |
278 | 36.9M | } |
279 | | |
280 | | void MetricPrototype::WriteFields(JsonWriter* writer, |
281 | 75.4M | const MetricJsonOptions& opts) const { |
282 | 75.4M | writer->String("name"); |
283 | 75.4M | writer->String(name()); |
284 | | |
285 | 75.4M | if (opts.include_schema_info) { |
286 | 35 | writer->String("label"); |
287 | 35 | writer->String(label()); |
288 | | |
289 | 35 | writer->String("type"); |
290 | 35 | writer->String(MetricType::Name(type())); |
291 | | |
292 | 35 | writer->String("unit"); |
293 | 35 | writer->String(MetricUnit::Name(unit())); |
294 | | |
295 | 35 | writer->String("description"); |
296 | 35 | writer->String(description()); |
297 | | |
298 | 35 | writer->String("level"); |
299 | 35 | writer->String(MetricLevelName(level())); |
300 | 35 | } |
301 | 75.4M | } |
302 | | |
303 | | // |
304 | | // FunctionGaugeDetacher |
305 | | // |
306 | | |
307 | | scoped_refptr<MetricEntity> MetricRegistry::FindOrCreateEntity( |
308 | | const MetricEntityPrototype* prototype, |
309 | | const std::string& id, |
310 | 350k | const MetricEntity::AttributeMap& initial_attributes) { |
311 | 350k | std::lock_guard<simple_spinlock> l(lock_); |
312 | 350k | scoped_refptr<MetricEntity> e = FindPtrOrNull(entities_, id); |
313 | 350k | if (!e) { |
314 | 223k | e = new MetricEntity(prototype, id, initial_attributes); |
315 | 223k | InsertOrDie(&entities_, id, e); |
316 | 223k | } else { |
317 | 127k | e->SetAttributes(initial_attributes); |
318 | 127k | } |
319 | 350k | return e; |
320 | 350k | } |
321 | | |
322 | | // |
323 | | // Metric |
324 | | // |
325 | | Metric::Metric(const MetricPrototype* prototype) |
326 | 32.7M | : prototype_(prototype) { |
327 | 32.7M | } |
328 | | |
329 | | Metric::Metric(std::unique_ptr<MetricPrototype> prototype) |
330 | 3.22M | : prototype_holder_(std::move(prototype)), prototype_(prototype_holder_.get()) { |
331 | 3.22M | } |
332 | | |
333 | 2.93M | Metric::~Metric() { |
334 | 2.93M | } |
335 | | |
336 | | // |
337 | | // Gauge |
338 | | // |
339 | | |
340 | | Status Gauge::WriteAsJson(JsonWriter* writer, |
341 | 58.9M | const MetricJsonOptions& opts) const { |
342 | 58.9M | if (prototype_->level() < opts.level) { |
343 | 0 | return Status::OK(); |
344 | 0 | } |
345 | | |
346 | 58.9M | writer->StartObject(); |
347 | | |
348 | 58.9M | prototype_->WriteFields(writer, opts); |
349 | | |
350 | 58.9M | writer->String("value"); |
351 | 58.9M | WriteValue(writer); |
352 | | |
353 | 58.9M | writer->EndObject(); |
354 | 58.9M | return Status::OK(); |
355 | 58.9M | } |
356 | | |
357 | | // |
358 | | // StringGauge |
359 | | // |
360 | | |
361 | | StringGauge::StringGauge(const GaugePrototype<string>* proto, |
362 | | string initial_value) |
363 | 0 | : Gauge(proto), value_(std::move(initial_value)) {} |
364 | | |
365 | 0 | std::string StringGauge::value() const { |
366 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
367 | 0 | return value_; |
368 | 0 | } |
369 | | |
370 | 0 | void StringGauge::set_value(const std::string& value) { |
371 | 0 | std::lock_guard<simple_spinlock> l(lock_); |
372 | 0 | value_ = value; |
373 | 0 | } |
374 | | |
375 | 0 | void StringGauge::WriteValue(JsonWriter* writer) const { |
376 | 0 | writer->String(value()); |
377 | 0 | } |
378 | | |
379 | | CHECKED_STATUS StringGauge::WriteForPrometheus( |
380 | | PrometheusWriter* writer, const MetricEntity::AttributeMap& attr, |
381 | 0 | const MetricPrometheusOptions& opts) const { |
382 | 0 | if (prototype_->level() < opts.level) { |
383 | 0 | return Status::OK(); |
384 | 0 | } |
385 | | |
386 | | // TODO(bogdan): don't think we need this? |
387 | | // return writer->WriteSingleEntry(attr, prototype_->name(), value()); |
388 | 0 | return Status::OK(); |
389 | 0 | } |
390 | | |
391 | | // |
392 | | // Counter |
393 | | // |
394 | | // This implementation is optimized by using a striped counter. See LongAdder for details. |
395 | | |
396 | | scoped_refptr<Counter> CounterPrototype::Instantiate( |
397 | 23.4M | const scoped_refptr<MetricEntity>& entity) const { |
398 | 23.4M | return entity->FindOrCreateCounter(this); |
399 | 23.4M | } |
400 | | |
401 | 11.9M | Counter::Counter(const CounterPrototype* proto) : Metric(proto) { |
402 | 11.9M | } |
403 | | |
404 | 2 | Counter::Counter(std::unique_ptr<CounterPrototype> proto) : Metric(std::move(proto)) { |
405 | 2 | } |
406 | | |
407 | 16.0M | int64_t Counter::value() const { |
408 | 16.0M | return value_.Value(); |
409 | 16.0M | } |
410 | | |
411 | 363M | void Counter::Increment() { |
412 | 363M | IncrementBy(1); |
413 | 363M | } |
414 | | |
415 | 998M | void Counter::IncrementBy(int64_t amount) { |
416 | 998M | value_.IncrementBy(amount); |
417 | 998M | } |
418 | | |
419 | | Status Counter::WriteAsJson(JsonWriter* writer, |
420 | 15.9M | const MetricJsonOptions& opts) const { |
421 | 15.9M | if (prototype_->level() < opts.level) { |
422 | 0 | return Status::OK(); |
423 | 0 | } |
424 | | |
425 | 15.9M | writer->StartObject(); |
426 | | |
427 | 15.9M | prototype_->WriteFields(writer, opts); |
428 | | |
429 | 15.9M | writer->String("value"); |
430 | 15.9M | writer->Int64(value()); |
431 | | |
432 | 15.9M | writer->EndObject(); |
433 | 15.9M | return Status::OK(); |
434 | 15.9M | } |
435 | | |
436 | | CHECKED_STATUS Counter::WriteForPrometheus( |
437 | | PrometheusWriter* writer, const MetricEntity::AttributeMap& attr, |
438 | 21.9k | const MetricPrometheusOptions& opts) const { |
439 | 21.9k | if (prototype_->level() < opts.level) { |
440 | 0 | return Status::OK(); |
441 | 0 | } |
442 | | |
443 | 21.9k | return writer->WriteSingleEntry(attr, prototype_->name(), value(), |
444 | 21.9k | prototype()->aggregation_function()); |
445 | 21.9k | } |
446 | | |
447 | | // |
448 | | // MillisLag |
449 | | // |
450 | | |
451 | | scoped_refptr<MillisLag> MillisLagPrototype::Instantiate( |
452 | 0 | const scoped_refptr<MetricEntity>& entity) const { |
453 | 0 | return entity->FindOrCreateMillisLag(this); |
454 | 0 | } |
455 | | |
456 | | MillisLag::MillisLag(const MillisLagPrototype* proto) |
457 | | : Metric(proto), |
458 | | timestamp_ms_(static_cast<int64_t>(std::chrono::duration_cast<std::chrono::milliseconds>( |
459 | 175k | std::chrono::system_clock::now().time_since_epoch()).count())) { |
460 | 175k | } |
461 | | |
462 | 0 | Status MillisLag::WriteAsJson(JsonWriter* writer, const MetricJsonOptions& opts) const { |
463 | 0 | if (prototype_->level() < opts.level) { |
464 | 0 | return Status::OK(); |
465 | 0 | } |
466 | | |
467 | 0 | writer->StartObject(); |
468 | |
|
469 | 0 | prototype_->WriteFields(writer, opts); |
470 | |
|
471 | 0 | writer->String("value"); |
472 | 0 | writer->Int64(lag_ms()); |
473 | |
|
474 | 0 | writer->EndObject(); |
475 | 0 | return Status::OK(); |
476 | 0 | } |
477 | | |
478 | | Status MillisLag::WriteForPrometheus( |
479 | | PrometheusWriter* writer, const MetricEntity::AttributeMap& attr, |
480 | 0 | const MetricPrometheusOptions& opts) const { |
481 | 0 | if (prototype_->level() < opts.level) { |
482 | 0 | return Status::OK(); |
483 | 0 | } |
484 | | |
485 | 0 | return writer->WriteSingleEntry(attr, prototype_->name(), lag_ms(), |
486 | 0 | prototype()->aggregation_function()); |
487 | 0 | } |
488 | | |
489 | | AtomicMillisLag::AtomicMillisLag(const MillisLagPrototype* proto) |
490 | | : MillisLag(proto), |
491 | | atomic_timestamp_ms_(static_cast<int64_t>(std::chrono::duration_cast<std::chrono::milliseconds>( |
492 | 175k | std::chrono::system_clock::now().time_since_epoch()).count())) { |
493 | 175k | } |
494 | | |
495 | 517k | Status AtomicMillisLag::WriteAsJson(JsonWriter* writer, const MetricJsonOptions& opts) const { |
496 | 517k | if (prototype_->level() < opts.level) { |
497 | 0 | return Status::OK(); |
498 | 0 | } |
499 | | |
500 | 517k | writer->StartObject(); |
501 | | |
502 | 517k | prototype_->WriteFields(writer, opts); |
503 | | |
504 | 517k | writer->String("value"); |
505 | 517k | writer->Int64(this->lag_ms()); |
506 | | |
507 | 517k | writer->EndObject(); |
508 | 517k | return Status::OK(); |
509 | 517k | } |
510 | | |
511 | | ///////////////////////////////////////////////// |
512 | | // HistogramPrototype |
513 | | ///////////////////////////////////////////////// |
514 | | |
515 | | HistogramPrototype::HistogramPrototype(const MetricPrototype::CtorArgs& args, |
516 | | uint64_t max_trackable_value, int num_sig_digits, |
517 | | ExportPercentiles export_percentiles) |
518 | | : MetricPrototype(args), |
519 | | max_trackable_value_(max_trackable_value), |
520 | | num_sig_digits_(num_sig_digits), |
521 | 7.36M | export_percentiles_(export_percentiles) { |
522 | | // Better to crash at definition time that at instantiation time. |
523 | 7.36M | CHECK(HdrHistogram::IsValidHighestTrackableValue(max_trackable_value)) |
524 | 0 | << Substitute("Invalid max trackable value on histogram $0: $1", |
525 | 0 | args.name_, max_trackable_value); |
526 | 7.36M | CHECK(HdrHistogram::IsValidNumSignificantDigits(num_sig_digits)) |
527 | 0 | << Substitute("Invalid number of significant digits on histogram $0: $1", |
528 | 0 | args.name_, num_sig_digits); |
529 | 7.36M | } |
530 | | |
531 | | scoped_refptr<Histogram> HistogramPrototype::Instantiate( |
532 | 14.2M | const scoped_refptr<MetricEntity>& entity) const { |
533 | 14.2M | return entity->FindOrCreateHistogram(this); |
534 | 14.2M | } |
535 | | |
536 | | ///////////////////////////////////////////////// |
537 | | // Histogram |
538 | | ///////////////////////////////////////////////// |
539 | | |
540 | | Histogram::Histogram(const HistogramPrototype* proto) |
541 | | : Metric(proto), |
542 | | histogram_(new HdrHistogram(proto->max_trackable_value(), proto->num_sig_digits())), |
543 | 3.82M | export_percentiles_(proto->export_percentiles()) { |
544 | 3.82M | } |
545 | | |
546 | | Histogram::Histogram( |
547 | | std::unique_ptr <HistogramPrototype> proto, uint64_t highest_trackable_value, |
548 | | int num_significant_digits, ExportPercentiles export_percentiles) |
549 | | : Metric(std::move(proto)), |
550 | | histogram_(new HdrHistogram(highest_trackable_value, num_significant_digits)), |
551 | 65.7k | export_percentiles_(export_percentiles) { |
552 | 65.7k | } |
553 | | |
554 | 688M | void Histogram::Increment(int64_t value) { |
555 | 688M | histogram_->Increment(value); |
556 | 688M | } |
557 | | |
558 | 1 | void Histogram::IncrementBy(int64_t value, int64_t amount) { |
559 | 1 | histogram_->IncrementBy(value, amount); |
560 | 1 | } |
561 | | |
562 | | Status Histogram::WriteAsJson(JsonWriter* writer, |
563 | 5.32M | const MetricJsonOptions& opts) const { |
564 | 5.32M | if (prototype_->level() < opts.level) { |
565 | 0 | return Status::OK(); |
566 | 0 | } |
567 | | |
568 | 5.32M | HistogramSnapshotPB snapshot; |
569 | 5.32M | RETURN_NOT_OK(GetAndResetHistogramSnapshotPB(&snapshot, opts)); |
570 | 5.32M | writer->Protobuf(snapshot); |
571 | 5.32M | return Status::OK(); |
572 | 5.32M | } |
573 | | |
574 | | CHECKED_STATUS Histogram::WriteForPrometheus( |
575 | | PrometheusWriter* writer, const MetricEntity::AttributeMap& attr, |
576 | 6.81k | const MetricPrometheusOptions& opts) const { |
577 | 6.81k | if (prototype_->level() < opts.level) { |
578 | 0 | return Status::OK(); |
579 | 0 | } |
580 | | |
581 | 6.81k | HdrHistogram snapshot(*histogram_); |
582 | | // HdrHistogram reports percentiles based on all the data points from the |
583 | | // begining of time. We are interested in the percentiles based on just |
584 | | // the "newly-arrived" data. So, we will reset the histogram's percentiles |
585 | | // between each invocation. |
586 | 6.81k | histogram_->ResetPercentiles(); |
587 | | |
588 | | // Representing the sum and count require suffixed names. |
589 | 6.81k | std::string hist_name = prototype_->name(); |
590 | 6.81k | auto copy_of_attr = attr; |
591 | 6.81k | RETURN_NOT_OK(writer->WriteSingleEntry( |
592 | 6.81k | copy_of_attr, hist_name + "_sum", snapshot.TotalSum(), |
593 | 6.81k | prototype()->aggregation_function())); |
594 | 6.81k | RETURN_NOT_OK(writer->WriteSingleEntry( |
595 | 6.81k | copy_of_attr, hist_name + "_count", snapshot.TotalCount(), |
596 | 6.81k | prototype()->aggregation_function())); |
597 | | |
598 | | // Copy the label map to add the quatiles. |
599 | 6.81k | if (export_percentiles_ && FLAGS_expose_metric_histogram_percentiles4.98k ) { |
600 | 4.98k | copy_of_attr["quantile"] = "p50"; |
601 | 4.98k | RETURN_NOT_OK(writer->WriteSingleEntry(copy_of_attr, hist_name, |
602 | 4.98k | snapshot.ValueAtPercentile(50), |
603 | 4.98k | prototype()->aggregation_function())); |
604 | 4.98k | copy_of_attr["quantile"] = "p95"; |
605 | 4.98k | RETURN_NOT_OK(writer->WriteSingleEntry(copy_of_attr, hist_name, |
606 | 4.98k | snapshot.ValueAtPercentile(95), |
607 | 4.98k | prototype()->aggregation_function())); |
608 | 4.98k | copy_of_attr["quantile"] = "p99"; |
609 | 4.98k | RETURN_NOT_OK(writer->WriteSingleEntry(copy_of_attr, hist_name, |
610 | 4.98k | snapshot.ValueAtPercentile(99), |
611 | 4.98k | prototype()->aggregation_function())); |
612 | 4.98k | copy_of_attr["quantile"] = "mean"; |
613 | 4.98k | RETURN_NOT_OK(writer->WriteSingleEntry(copy_of_attr, hist_name, |
614 | 4.98k | snapshot.MeanValue(), |
615 | 4.98k | prototype()->aggregation_function())); |
616 | 4.98k | copy_of_attr["quantile"] = "max"; |
617 | 4.98k | RETURN_NOT_OK(writer->WriteSingleEntry(copy_of_attr, hist_name, |
618 | 4.98k | snapshot.MaxValue(), |
619 | 4.98k | prototype()->aggregation_function())); |
620 | 4.98k | } |
621 | 6.81k | return Status::OK(); |
622 | 6.81k | } |
623 | | |
624 | | Status Histogram::GetAndResetHistogramSnapshotPB(HistogramSnapshotPB* snapshot_pb, |
625 | 5.32M | const MetricJsonOptions& opts) const { |
626 | 5.32M | HdrHistogram snapshot(*histogram_); |
627 | | // HdrHistogram reports percentiles based on all the data points from the |
628 | | // begining of time. We are interested in the percentiles based on just |
629 | | // the "newly-arrived" data. So, we will reset the histogram's percentiles |
630 | | // between each invocation. |
631 | 5.32M | histogram_->ResetPercentiles(); |
632 | | |
633 | 5.32M | snapshot_pb->set_name(prototype_->name()); |
634 | 5.32M | if (opts.include_schema_info) { |
635 | 0 | snapshot_pb->set_type(MetricType::Name(prototype_->type())); |
636 | 0 | snapshot_pb->set_label(prototype_->label()); |
637 | 0 | snapshot_pb->set_unit(MetricUnit::Name(prototype_->unit())); |
638 | 0 | snapshot_pb->set_description(prototype_->description()); |
639 | 0 | snapshot_pb->set_level(MetricLevelName(prototype_->level())); |
640 | 0 | snapshot_pb->set_max_trackable_value(snapshot.highest_trackable_value()); |
641 | 0 | snapshot_pb->set_num_significant_digits(snapshot.num_significant_digits()); |
642 | 0 | } |
643 | 5.32M | snapshot_pb->set_total_count(snapshot.TotalCount()); |
644 | 5.32M | snapshot_pb->set_total_sum(snapshot.TotalSum()); |
645 | 5.32M | snapshot_pb->set_min(snapshot.MinValue()); |
646 | 5.32M | snapshot_pb->set_mean(snapshot.MeanValue()); |
647 | 5.32M | snapshot_pb->set_percentile_75(snapshot.ValueAtPercentile(75)); |
648 | 5.32M | snapshot_pb->set_percentile_95(snapshot.ValueAtPercentile(95)); |
649 | 5.32M | snapshot_pb->set_percentile_99(snapshot.ValueAtPercentile(99)); |
650 | 5.32M | snapshot_pb->set_percentile_99_9(snapshot.ValueAtPercentile(99.9)); |
651 | 5.32M | snapshot_pb->set_percentile_99_99(snapshot.ValueAtPercentile(99.99)); |
652 | 5.32M | snapshot_pb->set_max(snapshot.MaxValue()); |
653 | | |
654 | 5.32M | if (opts.include_raw_histograms) { |
655 | 1 | RecordedValuesIterator iter(&snapshot); |
656 | 101 | while (iter.HasNext()) { |
657 | 100 | HistogramIterationValue value; |
658 | 100 | RETURN_NOT_OK(iter.Next(&value)); |
659 | 100 | snapshot_pb->add_values(value.value_iterated_to); |
660 | 100 | snapshot_pb->add_counts(value.count_at_value_iterated_to); |
661 | 100 | } |
662 | 1 | } |
663 | 5.32M | return Status::OK(); |
664 | 5.32M | } |
665 | | |
666 | 0 | uint64_t Histogram::CountInBucketForValueForTests(uint64_t value) const { |
667 | 0 | return histogram_->CountInBucketForValue(value); |
668 | 0 | } |
669 | | |
670 | 2.14M | uint64_t Histogram::TotalCount() const { |
671 | 2.14M | return histogram_->TotalCount(); |
672 | 2.14M | } |
673 | | |
674 | 2 | uint64_t Histogram::MinValueForTests() const { |
675 | 2 | return histogram_->MinValue(); |
676 | 2 | } |
677 | | |
678 | 3 | uint64_t Histogram::MaxValueForTests() const { |
679 | 3 | return histogram_->MaxValue(); |
680 | 3 | } |
681 | 1 | double Histogram::MeanValueForTests() const { |
682 | 1 | return histogram_->MeanValue(); |
683 | 1 | } |
684 | | |
685 | | ScopedLatencyMetric::ScopedLatencyMetric( |
686 | | const scoped_refptr<Histogram>& latency_hist, Auto automatic) |
687 | 50.2M | : latency_hist_(latency_hist), auto_(automatic) { |
688 | 50.2M | Restart(); |
689 | 50.2M | } |
690 | | |
691 | | ScopedLatencyMetric::ScopedLatencyMetric(ScopedLatencyMetric&& rhs) |
692 | | : latency_hist_(std::move(rhs.latency_hist_)), time_started_(rhs.time_started_), |
693 | 0 | auto_(rhs.auto_) { |
694 | 0 | } |
695 | | |
696 | 0 | void ScopedLatencyMetric::operator=(ScopedLatencyMetric&& rhs) { |
697 | 0 | if (auto_) { |
698 | 0 | Finish(); |
699 | 0 | } |
700 | |
|
701 | 0 | latency_hist_ = std::move(rhs.latency_hist_); |
702 | 0 | time_started_ = rhs.time_started_; |
703 | 0 | auto_ = rhs.auto_; |
704 | 0 | } |
705 | | |
706 | 50.2M | ScopedLatencyMetric::~ScopedLatencyMetric() { |
707 | 50.2M | if (auto_) { |
708 | 50.2M | Finish(); |
709 | 50.2M | } |
710 | 50.2M | } |
711 | | |
712 | 50.2M | void ScopedLatencyMetric::Restart() { |
713 | 50.2M | if (latency_hist_) { |
714 | 50.2M | time_started_ = MonoTime::Now(); |
715 | 50.2M | } |
716 | 50.2M | } |
717 | | |
718 | 50.2M | void ScopedLatencyMetric::Finish() { |
719 | 50.2M | if (latency_hist_ != nullptr) { |
720 | 50.2M | auto passed = (MonoTime::Now() - time_started_).ToMicroseconds(); |
721 | 50.2M | latency_hist_->Increment(passed); |
722 | 50.2M | } |
723 | 50.2M | } |
724 | | |
725 | | // Replace specific chars with underscore to pass PrometheusNameRegex(). |
726 | 4.29M | void EscapeMetricNameForPrometheus(std::string *id) { |
727 | 4.29M | std::replace(id->begin(), id->end(), ' ', '_'); |
728 | 4.29M | std::replace(id->begin(), id->end(), '.', '_'); |
729 | 4.29M | std::replace(id->begin(), id->end(), '-', '_'); |
730 | 4.29M | } |
731 | | |
732 | | } // namespace yb |