/Users/deen/code/yugabyte-db/src/yb/util/mem_tracker.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include "yb/util/mem_tracker.h" |
34 | | |
35 | | #include <algorithm> |
36 | | #include <limits> |
37 | | #include <list> |
38 | | #include <memory> |
39 | | #include <mutex> |
40 | | |
41 | | #ifdef TCMALLOC_ENABLED |
42 | | #include <gperftools/malloc_extension.h> |
43 | | #endif |
44 | | |
45 | | #include "yb/gutil/map-util.h" |
46 | | #include "yb/gutil/once.h" |
47 | | #include "yb/gutil/strings/human_readable.h" |
48 | | #include "yb/gutil/strings/substitute.h" |
49 | | |
50 | | #include "yb/util/debug-util.h" |
51 | | #include "yb/util/debug/trace_event.h" |
52 | | #include "yb/util/env.h" |
53 | | #include "yb/util/flag_tags.h" |
54 | | #include "yb/util/format.h" |
55 | | #include "yb/util/memory/memory.h" |
56 | | #include "yb/util/metrics.h" |
57 | | #include "yb/util/mutex.h" |
58 | | #include "yb/util/random_util.h" |
59 | | #include "yb/util/size_literals.h" |
60 | | #include "yb/util/status.h" |
61 | | #include "yb/util/status_log.h" |
62 | | #include "yb/util/logging.h" |
63 | | |
64 | | using namespace std::literals; |
65 | | |
66 | | DEFINE_int64(memory_limit_hard_bytes, 0, |
67 | | "Maximum amount of memory this daemon should use, in bytes. " |
68 | | "A value of 0 autosizes based on the total system memory. " |
69 | | "A value of -1 disables all memory limiting."); |
70 | | TAG_FLAG(memory_limit_hard_bytes, stable); |
71 | | DEFINE_double(default_memory_limit_to_ram_ratio, 0.85, |
72 | | "If memory_limit_hard_bytes is left unspecified, then it is " |
73 | | "set to default_memory_limit_to_ram_ratio * Available RAM."); |
74 | | TAG_FLAG(default_memory_limit_to_ram_ratio, advanced); |
75 | | TAG_FLAG(default_memory_limit_to_ram_ratio, hidden); |
76 | | |
77 | | DEFINE_int32(memory_limit_soft_percentage, 85, |
78 | | "Percentage of the hard memory limit that this daemon may " |
79 | | "consume before memory throttling of writes begins. The greater " |
80 | | "the excess, the higher the chance of throttling. In general, a " |
81 | | "lower soft limit leads to smoother write latencies but " |
82 | | "decreased throughput, and vice versa for a higher soft limit."); |
83 | | TAG_FLAG(memory_limit_soft_percentage, advanced); |
84 | | |
85 | | DEFINE_int32(memory_limit_warn_threshold_percentage, 98, |
86 | | "Percentage of the hard memory limit that this daemon may " |
87 | | "consume before WARNING level messages are periodically logged."); |
88 | | TAG_FLAG(memory_limit_warn_threshold_percentage, advanced); |
89 | | |
90 | | |
91 | | DEFINE_int64(server_tcmalloc_max_total_thread_cache_bytes, -1, "Total number of bytes to " |
92 | | "use for the thread cache for tcmalloc across all threads in the tserver/master."); |
93 | | DEFINE_int64(tserver_tcmalloc_max_total_thread_cache_bytes, -1, "Total number of bytes to " |
94 | | "use for the thread cache for tcmalloc across all threads in the tserver. " |
95 | | "This is being deprecated and is used to fallback/override the value set " |
96 | | "on the tserver by server_tcmalloc_max_total_thread_cache_bytes." ); |
97 | | |
98 | | #ifdef TCMALLOC_ENABLED |
99 | | DEFINE_int32(tcmalloc_max_free_bytes_percentage, 10, |
100 | | "Maximum percentage of the RSS that tcmalloc is allowed to use for " |
101 | | "reserved but unallocated memory."); |
102 | | TAG_FLAG(tcmalloc_max_free_bytes_percentage, advanced); |
103 | | #endif |
104 | | |
105 | | DEFINE_bool(mem_tracker_logging, false, |
106 | | "Enable logging of memory tracker consume/release operations"); |
107 | | |
108 | | DEFINE_bool(mem_tracker_log_stack_trace, false, |
109 | | "Enable logging of stack traces on memory tracker consume/release operations. " |
110 | | "Only takes effect if mem_tracker_logging is also enabled."); |
111 | | |
112 | | DEFINE_int64(mem_tracker_update_consumption_interval_us, 2000000, |
113 | | "Interval that is used to update memory consumption from external source. " |
114 | | "For instance from tcmalloc statistics."); |
115 | | |
116 | | DEFINE_int64(mem_tracker_tcmalloc_gc_release_bytes, -1, |
117 | | "When the total amount of memory from calls to Release() since the last GC exceeds " |
118 | | "this flag, a new tcmalloc GC will be triggered. This GC will clear the tcmalloc " |
119 | | "page heap freelist. A higher value implies less aggressive GC, i.e. higher memory " |
120 | | "overhead, but more efficient in terms of runtime."); |
121 | | TAG_FLAG(mem_tracker_tcmalloc_gc_release_bytes, runtime); |
122 | | |
123 | | namespace yb { |
124 | | |
125 | | // NOTE: this class has been adapted from Impala, so the code style varies |
126 | | // somewhat from yb. |
127 | | |
128 | | using std::deque; |
129 | | using std::list; |
130 | | using std::string; |
131 | | using std::stringstream; |
132 | | using std::shared_ptr; |
133 | | using std::vector; |
134 | | |
135 | | using strings::Substitute; |
136 | | |
137 | | namespace { |
138 | | |
139 | | // The ancestor for all trackers. Every tracker is visible from the root down. |
140 | | shared_ptr<MemTracker> root_tracker; |
141 | | GoogleOnceType root_tracker_once = GOOGLE_ONCE_INIT; |
142 | | |
143 | | // Total amount of memory from calls to Release() since the last GC. If this |
144 | | // is greater than mem_tracker_tcmalloc_gc_release_bytes, this will trigger a tcmalloc gc. |
145 | | Atomic64 released_memory_since_gc; |
146 | | |
147 | | // Validate that various flags are percentages. |
148 | 35.6k | bool ValidatePercentage(const char* flagname, int value) { |
149 | 35.6k | if (value >= 0 && value <= 100) { |
150 | 35.6k | return true; |
151 | 35.6k | } |
152 | 0 | LOG(ERROR) << Substitute("$0 must be a percentage, value $1 is invalid", |
153 | 0 | flagname, value); |
154 | 0 | return false; |
155 | 35.6k | } |
156 | | |
157 | | // Marked as unused because this is not referenced in release mode. |
158 | | bool dummy[] __attribute__((unused)) = { |
159 | | google::RegisterFlagValidator(&FLAGS_memory_limit_soft_percentage, &ValidatePercentage), |
160 | | google::RegisterFlagValidator(&FLAGS_memory_limit_warn_threshold_percentage, |
161 | | &ValidatePercentage) |
162 | | #ifdef TCMALLOC_ENABLED |
163 | | , google::RegisterFlagValidator(&FLAGS_tcmalloc_max_free_bytes_percentage, &ValidatePercentage) |
164 | | #endif |
165 | | }; |
166 | | |
167 | | template <class TrackerMetrics> |
168 | | bool TryIncrementBy(int64_t delta, int64_t max, HighWaterMark* consumption, |
169 | 36.5M | const std::unique_ptr<TrackerMetrics>& metrics) { |
170 | 36.5M | if (consumption->TryIncrementBy(delta, max)36.5M ) { |
171 | 36.5M | if (metrics) { |
172 | 19.5M | metrics->metric_->IncrementBy(delta); |
173 | 19.5M | } |
174 | 36.5M | return true; |
175 | 36.5M | } |
176 | 18.4E | return false; |
177 | 36.5M | } |
178 | | |
179 | | template <class TrackerMetrics> |
180 | | void IncrementBy(int64_t amount, HighWaterMark* consumption, |
181 | 3.45G | const std::unique_ptr<TrackerMetrics>& metrics) { |
182 | 3.45G | consumption->IncrementBy(amount); |
183 | 3.45G | if (metrics) { |
184 | 2.66G | metrics->metric_->IncrementBy(amount); |
185 | 2.66G | } |
186 | 3.45G | } |
187 | | |
188 | 5.07M | std::string CreateMetricName(const MemTracker& mem_tracker) { |
189 | 5.07M | if (mem_tracker.metric_entity() && |
190 | 5.07M | (!mem_tracker.parent() || |
191 | 5.07M | mem_tracker.parent()->metric_entity().get() != mem_tracker.metric_entity().get()5.07M )) { |
192 | 2.40M | return "mem_tracker"; |
193 | 2.40M | } |
194 | 2.67M | std::string id = mem_tracker.id(); |
195 | 2.67M | EscapeMetricNameForPrometheus(&id); |
196 | 2.67M | if (mem_tracker.parent()2.67M ) { |
197 | 2.67M | return CreateMetricName(*mem_tracker.parent()) + "_" + id; |
198 | 18.4E | } else { |
199 | 18.4E | return id; |
200 | 18.4E | } |
201 | 2.67M | } |
202 | | |
203 | 4.80M | std::string CreateMetricLabel(const MemTracker& mem_tracker) { |
204 | 4.80M | return Format("Memory consumed by $0", mem_tracker.ToString()); |
205 | 4.80M | } |
206 | | |
207 | 2.40M | std::string CreateMetricDescription(const MemTracker& mem_tracker) { |
208 | 2.40M | return CreateMetricLabel(mem_tracker); |
209 | 2.40M | } |
210 | | |
211 | | } // namespace |
212 | | |
213 | | class MemTracker::TrackerMetrics { |
214 | | public: |
215 | | explicit TrackerMetrics(const MetricEntityPtr& metric_entity) |
216 | 2.40M | : metric_entity_(metric_entity) { |
217 | 2.40M | } |
218 | | |
219 | 2.40M | void Init(const MemTracker& mem_tracker, const std::string& name_suffix) { |
220 | 2.40M | std::string name = CreateMetricName(mem_tracker); |
221 | 2.40M | if (!name_suffix.empty()) { |
222 | 571k | name += "_"; |
223 | 571k | name += name_suffix; |
224 | 571k | } |
225 | 2.40M | metric_ = metric_entity_->FindOrCreateGauge( |
226 | 2.40M | std::unique_ptr<GaugePrototype<int64_t>>(new OwningGaugePrototype<int64_t>( |
227 | 2.40M | metric_entity_->prototype().name(), std::move(name), |
228 | 2.40M | CreateMetricLabel(mem_tracker), MetricUnit::kBytes, |
229 | 2.40M | CreateMetricDescription(mem_tracker), yb::MetricLevel::kInfo)), |
230 | 2.40M | mem_tracker.consumption()); |
231 | 2.40M | } |
232 | | |
233 | | TrackerMetrics(TrackerMetrics&) = delete; |
234 | | void operator=(const TrackerMetrics&) = delete; |
235 | | |
236 | 1.60M | ~TrackerMetrics() { |
237 | 1.60M | metric_entity_->Remove(metric_->prototype()); |
238 | 1.60M | } |
239 | | |
240 | | MetricEntityPtr metric_entity_; |
241 | | scoped_refptr<AtomicGauge<int64_t>> metric_; |
242 | | }; |
243 | | |
244 | 14.6k | void MemTracker::SetTCMallocCacheMemory() { |
245 | | #ifdef TCMALLOC_ENABLED |
246 | | constexpr const char* const kTcMallocMaxThreadCacheBytes = |
247 | | "tcmalloc.max_total_thread_cache_bytes"; |
248 | | |
249 | | auto flag_value_to_use = |
250 | | (FLAGS_tserver_tcmalloc_max_total_thread_cache_bytes != -1 |
251 | | ? FLAGS_tserver_tcmalloc_max_total_thread_cache_bytes |
252 | | : FLAGS_server_tcmalloc_max_total_thread_cache_bytes); |
253 | | if (flag_value_to_use < 0) { |
254 | | const auto mem_limit = MemTracker::GetRootTracker()->limit(); |
255 | | FLAGS_server_tcmalloc_max_total_thread_cache_bytes = |
256 | | std::min(std::max(static_cast<size_t>(2.5 * mem_limit / 100), 32_MB), 2_GB); |
257 | | FLAGS_tserver_tcmalloc_max_total_thread_cache_bytes = |
258 | | FLAGS_server_tcmalloc_max_total_thread_cache_bytes; |
259 | | } |
260 | | LOG(INFO) << "Setting tcmalloc max thread cache bytes to: " |
261 | | << FLAGS_server_tcmalloc_max_total_thread_cache_bytes; |
262 | | if (!MallocExtension::instance()->SetNumericProperty( |
263 | | kTcMallocMaxThreadCacheBytes, FLAGS_server_tcmalloc_max_total_thread_cache_bytes)) { |
264 | | LOG(FATAL) << "Failed to set Tcmalloc property: " << kTcMallocMaxThreadCacheBytes; |
265 | | } |
266 | | #endif |
267 | 14.6k | } |
268 | | |
269 | 22.6k | void MemTracker::CreateRootTracker() { |
270 | 22.6k | DCHECK_ONLY_NOTNULL(dummy); |
271 | 22.6k | int64_t limit = FLAGS_memory_limit_hard_bytes; |
272 | 22.6k | if (limit == 0) { |
273 | | // If no limit is provided, we'll use |
274 | | // - 85% of the RAM for tservers. |
275 | | // - 10% of the RAM for masters. |
276 | 127 | int64_t total_ram; |
277 | 127 | CHECK_OK(Env::Default()->GetTotalRAMBytes(&total_ram)); |
278 | 127 | limit = total_ram * FLAGS_default_memory_limit_to_ram_ratio; |
279 | 127 | } |
280 | | |
281 | 22.6k | ConsumptionFunctor consumption_functor; |
282 | | |
283 | | #ifdef TCMALLOC_ENABLED |
284 | | consumption_functor = &MemTracker::GetTCMallocActualHeapSizeBytes; |
285 | | |
286 | | if (FLAGS_mem_tracker_tcmalloc_gc_release_bytes < 0) { |
287 | | // Allocate 1% of memory to the tcmallc page heap freelist. |
288 | | // On a 4GB RAM machine, the master gets 10%, so 400MB, so 1% is 4MB. |
289 | | // On a 16GB RAM machine, the tserver gets 85%, so 13.6GB, so 1% is 136MB, so cap at 128MB. |
290 | | FLAGS_mem_tracker_tcmalloc_gc_release_bytes = |
291 | | std::min(static_cast<size_t>(1.0 * limit / 100), 128_MB); |
292 | | } |
293 | | #endif |
294 | | |
295 | 22.6k | root_tracker = std::make_shared<MemTracker>( |
296 | 22.6k | limit, "root", std::move(consumption_functor), nullptr /* parent */, AddToParent::kTrue, |
297 | 22.6k | CreateMetrics::kFalse); |
298 | | |
299 | 22.6k | LOG(INFO) << StringPrintf("MemTracker: hard memory limit is %.6f GB", |
300 | 22.6k | (static_cast<float>(limit) / (1024.0 * 1024.0 * 1024.0))); |
301 | 22.6k | LOG(INFO) << StringPrintf("MemTracker: soft memory limit is %.6f GB", |
302 | 22.6k | (static_cast<float>(root_tracker->soft_limit_) / |
303 | 22.6k | (1024.0 * 1024.0 * 1024.0))); |
304 | 22.6k | } |
305 | | |
306 | | shared_ptr<MemTracker> MemTracker::CreateTracker(int64_t byte_limit, |
307 | | const string& id, |
308 | | ConsumptionFunctor consumption_functor, |
309 | | const shared_ptr<MemTracker>& parent, |
310 | | AddToParent add_to_parent, |
311 | 495k | CreateMetrics create_metrics) { |
312 | 495k | shared_ptr<MemTracker> real_parent = parent ? parent471k : GetRootTracker()24.2k ; |
313 | 495k | return real_parent->CreateChild( |
314 | 495k | byte_limit, id, std::move(consumption_functor), MayExist::kFalse, add_to_parent, |
315 | 495k | create_metrics); |
316 | 495k | } |
317 | | |
318 | | shared_ptr<MemTracker> MemTracker::CreateChild(int64_t byte_limit, |
319 | | const string& id, |
320 | | ConsumptionFunctor consumption_functor, |
321 | | MayExist may_exist, |
322 | | AddToParent add_to_parent, |
323 | 17.6M | CreateMetrics create_metrics) { |
324 | 17.6M | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
325 | 17.6M | if (may_exist) { |
326 | 17.2M | auto result = FindChildUnlocked(id); |
327 | 17.2M | if (result) { |
328 | 15.1M | return result; |
329 | 15.1M | } |
330 | 17.2M | } |
331 | 2.50M | auto result = std::make_shared<MemTracker>( |
332 | 2.50M | byte_limit, id, std::move(consumption_functor), shared_from_this(), add_to_parent, |
333 | 2.50M | create_metrics); |
334 | 2.50M | auto p = child_trackers_.emplace(id, result); |
335 | 2.50M | if (!p.second) { |
336 | 2 | auto existing = p.first->second.lock(); |
337 | 2 | if (existing) { |
338 | 0 | LOG(DFATAL) << Format("Duplicate memory tracker (id $0) on parent $1", id, ToString()); |
339 | 0 | return existing; |
340 | 0 | } |
341 | 2 | p.first->second = result; |
342 | 2 | } |
343 | | |
344 | 2.50M | return result; |
345 | 2.50M | } |
346 | | |
347 | | MemTracker::MemTracker(int64_t byte_limit, const string& id, |
348 | | ConsumptionFunctor consumption_functor, std::shared_ptr<MemTracker> parent, |
349 | | AddToParent add_to_parent, CreateMetrics create_metrics) |
350 | | : limit_(byte_limit), |
351 | | soft_limit_(limit_ == -1 ? -1 : (limit_ * FLAGS_memory_limit_soft_percentage) / 100), |
352 | | id_(id), |
353 | | consumption_functor_(std::move(consumption_functor)), |
354 | | descr_(Substitute("memory consumption for $0", id)), |
355 | | parent_(std::move(parent)), |
356 | | rand_(GetRandomSeed32()), |
357 | | enable_logging_(FLAGS_mem_tracker_logging), |
358 | | log_stack_(FLAGS_mem_tracker_log_stack_trace), |
359 | 2.59M | add_to_parent_(add_to_parent) { |
360 | 18.4E | VLOG(1) << "Creating tracker " << ToString(); |
361 | 2.59M | UpdateConsumption(); |
362 | | |
363 | 2.59M | all_trackers_.push_back(this); |
364 | 2.59M | if (has_limit()) { |
365 | 412k | limit_trackers_.push_back(this); |
366 | 412k | } |
367 | 2.59M | if (parent_ && add_to_parent2.57M ) { |
368 | 2.43M | all_trackers_.insert( |
369 | 2.43M | all_trackers_.end(), parent_->all_trackers_.begin(), parent_->all_trackers_.end()); |
370 | 2.43M | limit_trackers_.insert( |
371 | 2.43M | limit_trackers_.end(), parent_->limit_trackers_.begin(), parent_->limit_trackers_.end()); |
372 | 2.43M | } |
373 | | |
374 | 2.59M | if (create_metrics) { |
375 | 4.22M | for (MemTracker* tracker = this; tracker; tracker = tracker->parent().get()2.37M ) { |
376 | 4.02M | if (tracker->metric_entity()) { |
377 | 1.65M | metrics_ = std::make_unique<TrackerMetrics>(tracker->metric_entity()); |
378 | 1.65M | metrics_->Init(*this, std::string()); |
379 | 1.65M | break; |
380 | 1.65M | } |
381 | 4.02M | } |
382 | 1.84M | } |
383 | 2.59M | } |
384 | | |
385 | 1.78M | MemTracker::~MemTracker() { |
386 | 1.78M | VLOG(1) << "Destroying tracker " << ToString()503 ; |
387 | 1.78M | if (!consumption_functor_) { |
388 | 1.78M | DCHECK_EQ(consumption(), 0) << "Memory tracker " << ToString()0 ; |
389 | 1.78M | } |
390 | 1.78M | if (parent_) { |
391 | 1.78M | if (add_to_parent_) { |
392 | 1.68M | parent_->Release(consumption()); |
393 | 1.68M | } |
394 | 1.78M | } |
395 | 1.78M | } |
396 | | |
397 | 301k | void MemTracker::UnregisterFromParent() { |
398 | 301k | DCHECK(parent_); |
399 | 301k | parent_->UnregisterChild(id_); |
400 | 301k | } |
401 | | |
402 | 301k | void MemTracker::UnregisterChild(const std::string& id) { |
403 | 301k | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
404 | 301k | child_trackers_.erase(id); |
405 | 301k | } |
406 | | |
407 | 4.80M | string MemTracker::ToString() const { |
408 | 4.80M | string s; |
409 | 4.80M | const MemTracker* tracker = this; |
410 | 27.2M | while (tracker) { |
411 | 22.4M | if (s != "") { |
412 | 17.6M | s += "->"; |
413 | 17.6M | } |
414 | 22.4M | s += tracker->id(); |
415 | 22.4M | tracker = tracker->parent_.get(); |
416 | 22.4M | } |
417 | 4.80M | return s; |
418 | 4.80M | } |
419 | | |
420 | | MemTrackerPtr MemTracker::FindTracker(const std::string& id, |
421 | 5 | const MemTrackerPtr& parent) { |
422 | 5 | shared_ptr<MemTracker> real_parent = parent ? parent4 : GetRootTracker()1 ; |
423 | 5 | return real_parent->FindChild(id); |
424 | 5 | } |
425 | | |
426 | 6 | MemTrackerPtr MemTracker::FindChild(const std::string& id) { |
427 | 6 | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
428 | 6 | return FindChildUnlocked(id); |
429 | 6 | } |
430 | | |
431 | 17.2M | MemTrackerPtr MemTracker::FindChildUnlocked(const std::string& id) { |
432 | 17.2M | auto it = child_trackers_.find(id); |
433 | 17.2M | if (it != child_trackers_.end()) { |
434 | 15.9M | auto result = it->second.lock(); |
435 | 15.9M | if (!result) { |
436 | 757k | child_trackers_.erase(it); |
437 | 757k | } |
438 | 15.9M | return result; |
439 | 15.9M | } |
440 | 1.32M | return MemTrackerPtr(); |
441 | 17.2M | } |
442 | | |
443 | | shared_ptr<MemTracker> MemTracker::FindOrCreateTracker(int64_t byte_limit, |
444 | | const string& id, |
445 | | const shared_ptr<MemTracker>& parent, |
446 | | AddToParent add_to_parent, |
447 | 17.1M | CreateMetrics create_metrics) { |
448 | 17.1M | shared_ptr<MemTracker> real_parent = parent ? parent17.1M : GetRootTracker()37.3k ; |
449 | 17.1M | return real_parent->CreateChild( |
450 | 17.1M | byte_limit, id, ConsumptionFunctor(), MayExist::kTrue, add_to_parent, create_metrics); |
451 | 17.1M | } |
452 | | |
453 | 1.38k | std::vector<MemTrackerPtr> MemTracker::ListChildren() { |
454 | 1.38k | std::vector<MemTrackerPtr> result; |
455 | 1.38k | ListDescendantTrackers(&result, OnlyChildren::kTrue); |
456 | 1.38k | return result; |
457 | 1.38k | } |
458 | | |
459 | | void MemTracker::ListDescendantTrackers( |
460 | 1.39k | std::vector<MemTrackerPtr>* out, OnlyChildren only_children) { |
461 | 1.39k | size_t begin = out->size(); |
462 | 1.39k | { |
463 | 1.39k | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
464 | 2.72k | for (auto it = child_trackers_.begin(); it != child_trackers_.end();) { |
465 | 1.33k | auto child = it->second.lock(); |
466 | 1.33k | if (child) { |
467 | 1.33k | out->push_back(std::move(child)); |
468 | 1.33k | ++it; |
469 | 1.33k | } else { |
470 | 1 | it = child_trackers_.erase(it); |
471 | 1 | } |
472 | 1.33k | } |
473 | 1.39k | } |
474 | 1.39k | if (!only_children) { |
475 | 7 | size_t end = out->size(); |
476 | 11 | for (size_t i = begin; i != end; ++i4 ) { |
477 | 4 | (*out)[i]->ListDescendantTrackers(out); |
478 | 4 | } |
479 | 7 | } |
480 | 1.39k | } |
481 | | |
482 | 3 | std::vector<MemTrackerPtr> MemTracker::ListTrackers() { |
483 | 3 | std::vector<MemTrackerPtr> result; |
484 | 3 | auto root = GetRootTracker(); |
485 | 3 | result.push_back(root); |
486 | 3 | root->ListDescendantTrackers(&result); |
487 | 3 | return result; |
488 | 3 | } |
489 | | |
490 | 4.18G | bool MemTracker::UpdateConsumption(bool force) { |
491 | 4.18G | if (poll_children_consumption_functors_) { |
492 | 0 | poll_children_consumption_functors_(); |
493 | 0 | } |
494 | | |
495 | 4.18G | if (consumption_functor_) { |
496 | 0 | auto now = CoarseMonoClock::now(); |
497 | 0 | auto interval = std::chrono::microseconds( |
498 | 0 | GetAtomicFlag(&FLAGS_mem_tracker_update_consumption_interval_us)); |
499 | 0 | if (force || now > last_consumption_update_ + interval) { |
500 | 0 | last_consumption_update_ = now; |
501 | 0 | auto value = consumption_functor_(); |
502 | 0 | consumption_.set_value(value); |
503 | 0 | if (metrics_) { |
504 | 0 | metrics_->metric_->set_value(value); |
505 | 0 | } |
506 | 0 | } |
507 | 0 | return true; |
508 | 0 | } |
509 | | |
510 | 4.18G | return false; |
511 | 4.18G | } |
512 | | |
513 | 402M | void MemTracker::Consume(int64_t bytes) { |
514 | 402M | if (bytes < 0) { |
515 | 9 | Release(-bytes); |
516 | 9 | return; |
517 | 9 | } |
518 | | |
519 | 402M | if (UpdateConsumption()) { |
520 | 0 | return; |
521 | 0 | } |
522 | 402M | if (bytes == 0) { |
523 | 210k | return; |
524 | 210k | } |
525 | 402M | if (PREDICT_FALSE(enable_logging_)) { |
526 | 0 | LogUpdate(true, bytes); |
527 | 0 | } |
528 | 1.82G | for (auto& tracker : all_trackers_) { |
529 | 1.82G | if (!tracker->UpdateConsumption()1.82G ) { |
530 | 1.82G | IncrementBy(bytes, &tracker->consumption_, tracker->metrics_); |
531 | 1.82G | DCHECK_GE(tracker->consumption_.current_value(), 0); |
532 | 1.82G | } |
533 | 1.82G | } |
534 | 402M | } |
535 | | |
536 | 34.1M | bool MemTracker::TryConsume(int64_t bytes, MemTracker** blocking_mem_tracker) { |
537 | 34.1M | UpdateConsumption(); |
538 | 34.1M | if (bytes <= 0) { |
539 | 17.1M | return true; |
540 | 17.1M | } |
541 | 16.9M | if (PREDICT_FALSE(enable_logging_)) { |
542 | 0 | LogUpdate(true, bytes); |
543 | 0 | } |
544 | | |
545 | 16.9M | ssize_t i = 0; |
546 | | // Walk the tracker tree top-down, to avoid expanding a limit on a child whose parent |
547 | | // won't accommodate the change. |
548 | 96.9M | for (i = all_trackers_.size() - 1; i >= 0; --i79.9M ) { |
549 | 79.9M | MemTracker *tracker = all_trackers_[i]; |
550 | 79.9M | if (tracker->limit_ < 0) { |
551 | 43.4M | IncrementBy(bytes, &tracker->consumption_, tracker->metrics_); |
552 | 43.4M | } else { |
553 | 36.4M | if (!TryIncrementBy(bytes, tracker->limit_, &tracker->consumption_, tracker->metrics_)) { |
554 | | // One of the trackers failed, attempt to GC memory or expand our limit. If that |
555 | | // succeeds, TryUpdate() again. Bail if either fails. |
556 | 6.99k | if (!tracker->GcMemory(tracker->limit_ - bytes) || |
557 | 6.99k | tracker->ExpandLimit(bytes)6.92k ) { |
558 | 79 | if (!TryIncrementBy(bytes, tracker->limit_, &tracker->consumption_, tracker->metrics_)) { |
559 | 0 | break; |
560 | 0 | } |
561 | 6.91k | } else { |
562 | 6.91k | break; |
563 | 6.91k | } |
564 | 6.99k | } |
565 | 36.4M | } |
566 | 79.9M | } |
567 | | // Everyone succeeded, return. |
568 | 16.9M | if (i == -1) { |
569 | 16.9M | return true; |
570 | 16.9M | } |
571 | | |
572 | | // Someone failed, roll back the ones that succeeded. |
573 | | // TODO: this doesn't roll it back completely since the max values for |
574 | | // the updated trackers aren't decremented. The max values are only used |
575 | | // for error reporting so this is probably okay. Rolling those back is |
576 | | // pretty hard; we'd need something like 2PC. |
577 | | // |
578 | | // TODO: This might leave us with an allocated resource that we can't use. Do we need |
579 | | // to adjust the consumption of the query tracker to stop the resource from never |
580 | | // getting used by a subsequent TryConsume()? |
581 | 34.1k | for (ssize_t j = all_trackers_.size(); 18.5k --j > i;) { |
582 | 15.5k | IncrementBy(-bytes, &all_trackers_[j]->consumption_, all_trackers_[j]->metrics_); |
583 | 15.5k | } |
584 | 18.5k | if (blocking_mem_tracker) { |
585 | 1.78k | *blocking_mem_tracker = all_trackers_[i]; |
586 | 1.78k | } |
587 | | |
588 | 18.5k | return false; |
589 | 16.9M | } |
590 | | |
591 | 344M | void MemTracker::Release(int64_t bytes) { |
592 | 344M | if (bytes < 0) { |
593 | 0 | Consume(-bytes); |
594 | 0 | return; |
595 | 0 | } |
596 | | |
597 | 344M | if (PREDICT_FALSE(base::subtle::Barrier_AtomicIncrement(&released_memory_since_gc, bytes) > |
598 | 344M | GetAtomicFlag(&FLAGS_mem_tracker_tcmalloc_gc_release_bytes))) { |
599 | 344M | GcTcmalloc(); |
600 | 344M | } |
601 | | |
602 | 344M | if (UpdateConsumption()) { |
603 | 0 | return; |
604 | 0 | } |
605 | | |
606 | 344M | if (bytes == 0) { |
607 | 1.76M | return; |
608 | 1.76M | } |
609 | 342M | if (PREDICT_FALSE(enable_logging_)) { |
610 | 0 | LogUpdate(false, bytes); |
611 | 0 | } |
612 | | |
613 | 1.59G | for (auto& tracker : all_trackers_) { |
614 | 1.59G | if (!tracker->UpdateConsumption()1.59G ) { |
615 | 1.59G | IncrementBy(-bytes, &tracker->consumption_, tracker->metrics_); |
616 | | // If a UDF calls FunctionContext::TrackAllocation() but allocates less than the |
617 | | // reported amount, the subsequent call to FunctionContext::Free() may cause the |
618 | | // process mem tracker to go negative until it is synced back to the tcmalloc |
619 | | // metric. Don't blow up in this case. (Note that this doesn't affect non-process |
620 | | // trackers since we can enforce that the reported memory usage is internally |
621 | | // consistent.) |
622 | 1.59G | DCHECK_GE(tracker->consumption_.current_value(), 0) << "Tracker: " << tracker->ToString()0 ; |
623 | 1.59G | } |
624 | 1.59G | } |
625 | 342M | } |
626 | | |
627 | 7 | bool MemTracker::AnyLimitExceeded() { |
628 | 16 | for (const auto& tracker : limit_trackers_) { |
629 | 16 | if (tracker->LimitExceeded()) { |
630 | 3 | return true; |
631 | 3 | } |
632 | 16 | } |
633 | 4 | return false; |
634 | 7 | } |
635 | | |
636 | 41.9M | bool MemTracker::LimitExceeded() { |
637 | 41.9M | if (PREDICT_FALSE(CheckLimitExceeded())) { |
638 | 100k | return GcMemory(limit_); |
639 | 100k | } |
640 | 41.8M | return false; |
641 | 41.9M | } |
642 | | |
643 | 41.9M | SoftLimitExceededResult MemTracker::SoftLimitExceeded(double* score) { |
644 | | // Did we exceed the actual limit? |
645 | 41.9M | if (LimitExceeded()) { |
646 | 100k | return {true, consumption() * 100.0 / limit()}; |
647 | 100k | } |
648 | | |
649 | | // No soft limit defined. |
650 | 41.8M | if (!has_limit() || limit_ == soft_limit_41.8M ) { |
651 | 0 | return {false, 0.0}; |
652 | 0 | } |
653 | | |
654 | | // Are we under the soft limit threshold? |
655 | 41.8M | int64_t usage = consumption(); |
656 | 41.8M | if (usage < soft_limit_) { |
657 | 41.6M | return {false, 0.0}; |
658 | 41.6M | } |
659 | | |
660 | | // We're over the threshold; were we randomly chosen to be over the soft limit? |
661 | 218k | if (*score == 0.0) { |
662 | 201k | *score = RandomUniformReal<double>(); |
663 | 201k | } |
664 | 218k | if (usage + (limit_ - soft_limit_) * *score > limit_ && GcMemory(soft_limit_)50.0k ) { |
665 | 50.0k | return {true, usage * 100.0 / limit()}; |
666 | 50.0k | } |
667 | 168k | return {false, 0.0}; |
668 | 218k | } |
669 | | |
670 | 41.6M | SoftLimitExceededResult MemTracker::AnySoftLimitExceeded(double* score) { |
671 | 41.6M | for (MemTracker* t : limit_trackers_) { |
672 | 41.6M | auto result = t->SoftLimitExceeded(score); |
673 | 41.6M | if (result.exceeded) { |
674 | 116 | return result; |
675 | 116 | } |
676 | 41.6M | } |
677 | 41.6M | return {false, 0.0}; |
678 | 41.6M | } |
679 | | |
680 | 5.12k | int64_t MemTracker::SpareCapacity() const { |
681 | 5.12k | int64_t result = std::numeric_limits<int64_t>::max(); |
682 | 15.3k | for (const auto& tracker : limit_trackers_) { |
683 | 15.3k | int64_t mem_left = tracker->limit() - tracker->consumption(); |
684 | 15.3k | result = std::min(result, mem_left); |
685 | 15.3k | } |
686 | 5.12k | return result; |
687 | 5.12k | } |
688 | | |
689 | 164k | bool MemTracker::GcMemory(int64_t max_consumption) { |
690 | 164k | if (max_consumption < 0) { |
691 | | // Impossible to GC enough memory to reach the goal. |
692 | 5.48k | return true; |
693 | 5.48k | } |
694 | | |
695 | 159k | { |
696 | 159k | int64_t current_consumption = GetUpdatedConsumption(); |
697 | | // Check if someone gc'd before us |
698 | 159k | if (current_consumption <= max_consumption) { |
699 | 0 | return false; |
700 | 0 | } |
701 | | |
702 | | // Create vector of alive garbage collectors. Also remove stale garbage collectors. |
703 | 159k | std::vector<std::shared_ptr<GarbageCollector>> collectors; |
704 | 159k | { |
705 | 159k | std::lock_guard<simple_spinlock> l(gc_mutex_); |
706 | 159k | collectors.reserve(gcs_.size()); |
707 | 159k | auto w = gcs_.begin(); |
708 | 159k | for (auto i = gcs_.begin(); i != gcs_.end(); ++i82 ) { |
709 | 82 | auto gc = i->lock(); |
710 | 82 | if (!gc) { |
711 | 0 | continue; |
712 | 0 | } |
713 | 82 | collectors.push_back(gc); |
714 | 82 | if (w != i) { |
715 | 0 | *w = *i; |
716 | 0 | } |
717 | 82 | ++w; |
718 | 82 | } |
719 | 159k | gcs_.erase(w, gcs_.end()); |
720 | 159k | } |
721 | | |
722 | | // Try to free up some memory |
723 | 159k | for (const auto& gc : collectors) { |
724 | 80 | gc->CollectGarbage(current_consumption - max_consumption); |
725 | 80 | current_consumption = GetUpdatedConsumption(); |
726 | 80 | if (current_consumption <= max_consumption) { |
727 | 79 | break; |
728 | 79 | } |
729 | 80 | } |
730 | 159k | } |
731 | | |
732 | 1 | int64_t current_consumption = GetUpdatedConsumption(); |
733 | 159k | if (current_consumption > max_consumption) { |
734 | 159k | std::vector<MemTrackerPtr> children; |
735 | 159k | { |
736 | 159k | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
737 | 166k | for (auto it = child_trackers_.begin(); it != child_trackers_.end();) { |
738 | 7.71k | auto child = it->second.lock(); |
739 | 7.71k | if (child) { |
740 | 7.71k | children.push_back(std::move(child)); |
741 | 7.71k | ++it; |
742 | 7.71k | } else { |
743 | 2 | it = child_trackers_.erase(it); |
744 | 2 | } |
745 | 7.71k | } |
746 | 159k | } |
747 | | |
748 | 159k | for (const auto& child : children) { |
749 | 7.67k | bool did_gc = child->GcMemory(max_consumption - current_consumption + child->consumption()); |
750 | 7.67k | if (did_gc) { |
751 | 7.67k | current_consumption = GetUpdatedConsumption(); |
752 | 7.67k | if (current_consumption <= max_consumption) { |
753 | 17 | return true; |
754 | 17 | } |
755 | 7.67k | } |
756 | 7.67k | } |
757 | 159k | } |
758 | | |
759 | 159k | return consumption() > max_consumption; |
760 | 159k | } |
761 | | |
762 | 344M | void MemTracker::GcTcmalloc() { |
763 | | #ifdef TCMALLOC_ENABLED |
764 | | released_memory_since_gc = 0; |
765 | | TRACE_EVENT0("process", "MemTracker::GcTcmalloc"); |
766 | | |
767 | | // Number of bytes in the 'NORMAL' free list (i.e reserved by tcmalloc but |
768 | | // not in use). |
769 | | int64_t bytes_overhead = GetTCMallocProperty("tcmalloc.pageheap_free_bytes"); |
770 | | // Bytes allocated by the application. |
771 | | int64_t bytes_used = GetTCMallocCurrentAllocatedBytes(); |
772 | | |
773 | | int64_t max_overhead = bytes_used * FLAGS_tcmalloc_max_free_bytes_percentage / 100.0; |
774 | | if (bytes_overhead > max_overhead) { |
775 | | int64_t extra = bytes_overhead - max_overhead; |
776 | | while (extra > 0) { |
777 | | // Release 1MB at a time, so that tcmalloc releases its page heap lock |
778 | | // allowing other threads to make progress. This still disrupts the current |
779 | | // thread, but is better than disrupting all. |
780 | | MallocExtension::instance()->ReleaseToSystem(1024 * 1024); |
781 | | extra -= 1024 * 1024; |
782 | | } |
783 | | } |
784 | | |
785 | | #else |
786 | | // Nothing to do if not using tcmalloc. |
787 | 344M | #endif |
788 | 344M | } |
789 | | |
790 | 0 | string MemTracker::LogUsage(const string& prefix, int64_t usage_threshold, int indent) const { |
791 | 0 | stringstream ss; |
792 | 0 | ss << prefix << std::string(indent, ' ') << id_ << ":"; |
793 | 0 | if (CheckLimitExceeded()) { |
794 | 0 | ss << " memory limit exceeded."; |
795 | 0 | } |
796 | 0 | if (limit_ > 0) { |
797 | 0 | ss << " Limit=" << HumanReadableNumBytes::ToString(limit_); |
798 | 0 | } |
799 | 0 | ss << " Consumption=" << HumanReadableNumBytes::ToString(consumption()); |
800 | |
|
801 | 0 | stringstream prefix_ss; |
802 | 0 | prefix_ss << prefix << " "; |
803 | 0 | string new_prefix = prefix_ss.str(); |
804 | 0 | std::lock_guard<std::mutex> lock(child_trackers_mutex_); |
805 | 0 | for (const auto& p : child_trackers_) { |
806 | 0 | auto child = p.second.lock(); |
807 | 0 | if (child && child->consumption() >= usage_threshold) { |
808 | 0 | ss << std::endl; |
809 | 0 | ss << child->LogUsage(prefix, usage_threshold, indent + 2); |
810 | 0 | } |
811 | 0 | } |
812 | 0 | return ss.str(); |
813 | 0 | } |
814 | | |
815 | 0 | void MemTracker::LogUpdate(bool is_consume, int64_t bytes) const { |
816 | 0 | stringstream ss; |
817 | 0 | ss << this << " " << (is_consume ? "Consume: " : "Release: ") << bytes |
818 | 0 | << " Consumption: " << consumption() << " Limit: " << limit_; |
819 | 0 | if (log_stack_) { |
820 | 0 | ss << std::endl << GetStackTrace(); |
821 | 0 | } |
822 | 0 | LOG(ERROR) << ss.str(); |
823 | 0 | } |
824 | | |
825 | 1.42M | shared_ptr<MemTracker> MemTracker::GetRootTracker() { |
826 | 1.42M | GoogleOnceInit(&root_tracker_once, &MemTracker::CreateRootTracker); |
827 | 1.42M | return root_tracker; |
828 | 1.42M | } |
829 | | |
830 | | void MemTracker::SetMetricEntity( |
831 | 748k | const MetricEntityPtr& metric_entity, const std::string& name_suffix) { |
832 | 748k | if (metrics_) { |
833 | 48 | LOG_IF(DFATAL, metric_entity->id() != metrics_->metric_entity_->id()) |
834 | 0 | << "SetMetricEntity (" << metric_entity->id() << ") while " |
835 | 0 | << ToString() << " already has a different metric entity " |
836 | 0 | << metrics_->metric_entity_->id(); |
837 | 48 | return; |
838 | 48 | } |
839 | 748k | metrics_ = std::make_unique<TrackerMetrics>(metric_entity); |
840 | 748k | metrics_->Init(*this, name_suffix); |
841 | 748k | } |
842 | | |
843 | 20.9M | scoped_refptr<MetricEntity> MemTracker::metric_entity() const { |
844 | 20.9M | return metrics_ ? metrics_->metric_entity_18.0M : nullptr2.84M ; |
845 | 20.9M | } |
846 | | |
847 | | const MemTrackerData& CollectMemTrackerData(const MemTrackerPtr& tracker, int depth, |
848 | 1.38k | std::vector<MemTrackerData>* output) { |
849 | 1.38k | size_t idx = output->size(); |
850 | 1.38k | output->push_back({tracker, depth, 0}); |
851 | | |
852 | 1.38k | auto children = tracker->ListChildren(); |
853 | 1.38k | std::sort(children.begin(), children.end(), [](const auto& lhs, const auto& rhs) { |
854 | 1.16k | return lhs->id() < rhs->id(); |
855 | 1.16k | }); |
856 | | |
857 | 1.38k | for (const auto& child : children) { |
858 | 1.32k | const auto& child_data = CollectMemTrackerData(child, depth + 1, output); |
859 | 1.32k | (*output)[idx].consumption_excluded_from_ancestors += |
860 | 1.32k | child_data.consumption_excluded_from_ancestors; |
861 | 1.32k | if (!child_data.tracker->add_to_parent()) { |
862 | 163 | (*output)[idx].consumption_excluded_from_ancestors += child_data.tracker->consumption(); |
863 | 163 | } |
864 | 1.32k | } |
865 | | |
866 | 1.38k | return (*output)[idx]; |
867 | 1.38k | } |
868 | | |
869 | 54 | std::string DumpMemTrackers() { |
870 | 54 | std::ostringstream out; |
871 | 54 | std::vector<MemTrackerData> trackers; |
872 | 54 | CollectMemTrackerData(MemTracker::GetRootTracker(), 0, &trackers); |
873 | 1.29k | for (const auto& data : trackers) { |
874 | 1.29k | const auto& tracker = data.tracker; |
875 | 1.29k | const std::string current_consumption_str = |
876 | 1.29k | HumanReadableNumBytes::ToString(tracker->consumption()); |
877 | 1.29k | out << std::string(data.depth, ' ') << tracker->id() << ": "; |
878 | 1.29k | if (!data.consumption_excluded_from_ancestors || data.tracker->UpdateConsumption()336 ) { |
879 | 954 | out << current_consumption_str; |
880 | 954 | } else { |
881 | 336 | auto full_consumption_str = HumanReadableNumBytes::ToString( |
882 | 336 | tracker->consumption() + data.consumption_excluded_from_ancestors); |
883 | 336 | out << current_consumption_str << " (" << full_consumption_str << ")"; |
884 | 336 | } |
885 | 1.29k | out << std::endl; |
886 | 1.29k | } |
887 | 54 | return out.str(); |
888 | 54 | } |
889 | | |
890 | 54 | std::string DumpMemoryUsage() { |
891 | 54 | std::ostringstream out; |
892 | 54 | auto tcmalloc_stats = TcMallocStats(); |
893 | 54 | if (!tcmalloc_stats.empty()) { |
894 | 0 | out << "TCMalloc stats: \n" << tcmalloc_stats << "\n"; |
895 | 0 | } |
896 | 54 | out << "Memory usage: \n" << DumpMemTrackers(); |
897 | 54 | return out.str(); |
898 | 54 | } |
899 | | |
900 | | bool CheckMemoryPressureWithLogging( |
901 | 8.94M | const MemTrackerPtr& mem_tracker, double score, const char* error_prefix) { |
902 | 8.94M | const auto soft_limit_exceeded_result = mem_tracker->AnySoftLimitExceeded(&score); |
903 | 8.94M | if (!soft_limit_exceeded_result.exceeded) { |
904 | 8.93M | return true; |
905 | 8.93M | } |
906 | | |
907 | 11.4k | const std::string msg = StringPrintf( |
908 | 11.4k | "Soft memory limit exceeded (at %.2f%% of capacity), score: %.2f", |
909 | 11.4k | soft_limit_exceeded_result.current_capacity_pct, score); |
910 | 11.4k | if (soft_limit_exceeded_result.current_capacity_pct >= |
911 | 11.4k | FLAGS_memory_limit_warn_threshold_percentage) { |
912 | 2 | YB_LOG_EVERY_N_SECS(WARNING, 1) << error_prefix << msg << THROTTLE_MSG; |
913 | 11.4k | } else { |
914 | 11.4k | YB_LOG_EVERY_N_SECS(INFO, 1) << error_prefix << msg << THROTTLE_MSG5 ; |
915 | 11.4k | } |
916 | | |
917 | 11.4k | return false; |
918 | 8.94M | } |
919 | | |
920 | | } // namespace yb |