/Users/deen/code/yugabyte-db/src/yb/server/total_mem_watcher.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #if defined(__linux__) |
15 | | #include <unistd.h> |
16 | | #endif |
17 | | |
18 | | #include <fstream> |
19 | | #include <chrono> |
20 | | #include <thread> |
21 | | |
22 | | #include "yb/server/total_mem_watcher.h" |
23 | | #include "yb/util/format.h" |
24 | | #include "yb/util/logging.h" |
25 | | #include "yb/util/mem_tracker.h" |
26 | | #include "yb/util/memory/memory.h" |
27 | | #include "yb/util/scope_exit.h" |
28 | | #include "yb/util/status_format.h" |
29 | | |
30 | | #ifdef TCMALLOC_ENABLED |
31 | | #include <gperftools/malloc_extension.h> |
32 | | #endif |
33 | | |
34 | | using namespace std::literals; |
35 | | |
36 | | #if defined(THREAD_SANITIZER) |
37 | | const int kDefaultMemoryLimitTerminationPercent = 500; |
38 | | #elif defined(ADDRESS_SANITIZER) |
39 | | const int kDefaultMemoryLimitTerminationPercent = 300; |
40 | | #else |
41 | | const int kDefaultMemoryLimitTerminationPercent = 200; |
42 | | #endif |
43 | | |
44 | | DEFINE_int32(memory_limit_termination_threshold_pct, kDefaultMemoryLimitTerminationPercent, |
45 | | "If the RSS (resident set size) of the program reaches this percentage of the " |
46 | | "root memory tracker limit, the program will exit. RSS is measured using operating " |
47 | | "system means, not the memory allocator. Set to 0 to disable this behavior."); |
48 | | |
49 | | DEFINE_int32(total_mem_watcher_interval_millis, 1000, |
50 | | "Interval in milliseconds between checking the total memory usage of the current " |
51 | | "process as seen by the operating system, and deciding whether to terminate in case " |
52 | | "of excessive memory consumption."); |
53 | | |
54 | | namespace yb { |
55 | | namespace server { |
56 | | |
57 | 4.93k | TotalMemWatcher::TotalMemWatcher() { |
58 | 4.93k | if (FLAGS_memory_limit_termination_threshold_pct <= 0) { |
59 | 0 | rss_termination_limit_bytes_ = 0; |
60 | 4.93k | } else { |
61 | 4.93k | rss_termination_limit_bytes_ = |
62 | 4.93k | (MemTracker::GetRootTracker()->limit() * |
63 | 4.93k | FLAGS_memory_limit_termination_threshold_pct) / 100; |
64 | 4.93k | } |
65 | 4.93k | } |
66 | | |
67 | 0 | TotalMemWatcher::~TotalMemWatcher() { |
68 | 0 | } |
69 | | |
70 | | void TotalMemWatcher::MemoryMonitoringLoop( |
71 | | std::function<void()> shutdown_fn, |
72 | 4.93k | std::function<bool()> is_shutdown_finished_fn) { |
73 | 4.93k | if (FLAGS_memory_limit_termination_threshold_pct > 0) { |
74 | 4.93k | int64_t root_tracker_limit = MemTracker::GetRootTracker()->limit(); |
75 | 4.93k | LOG(INFO) << "Root memtracker limit: " << root_tracker_limit << " (" |
76 | 4.93k | << (root_tracker_limit / 1024 / 1024) << " MiB); this server will stop if memory " |
77 | 4.93k | << "usage exceeds " << FLAGS_memory_limit_termination_threshold_pct << "% of that: " |
78 | 4.93k | << rss_termination_limit_bytes_ << " bytes (" |
79 | 4.93k | << (rss_termination_limit_bytes_ / 1024 / 1024) << " MiB)."; |
80 | 4.93k | } |
81 | 139k | while (true) { |
82 | 134k | std::this_thread::sleep_for(1ms * FLAGS_total_mem_watcher_interval_millis); |
83 | | |
84 | 134k | Status mem_check_status = Check(); |
85 | 134k | if (!mem_check_status.ok()) { |
86 | 0 | YB_LOG_EVERY_N_SECS(WARNING, 10) |
87 | 0 | << "Failed to check total memory usage: " << mem_check_status.ToString(); |
88 | 0 | continue; |
89 | 0 | } |
90 | 134k | std::string termination_explanation = GetTerminationExplanation(); |
91 | 134k | if (!termination_explanation.empty()) { |
92 | 0 | LOG(ERROR) << "Memory usage exceeded configured limit, terminating the process: " |
93 | 0 | << termination_explanation << "\nDetails:\n" |
94 | 0 | << GetMemoryUsageDetails(); |
95 | 0 | shutdown_fn(); |
96 | 0 | const int kMaxSecToWait = 10; |
97 | 0 | for (int secondsLeft = kMaxSecToWait; |
98 | 0 | secondsLeft > 0 && !is_shutdown_finished_fn(); |
99 | 0 | --secondsLeft) { |
100 | 0 | LOG(INFO) << "Waiting for server to shut down (will wait up to " << secondsLeft |
101 | 0 | << " seconds)"; |
102 | 0 | std::this_thread::sleep_for(1s); |
103 | 0 | } |
104 | 0 | LOG(WARNING) << "Server is exiting"; |
105 | 0 | return; |
106 | 0 | } |
107 | 134k | } |
108 | 4.93k | } |
109 | | |
110 | | #ifdef __linux__ |
111 | | |
112 | | // Data available in /proc/[pid]/statm on Linux. |
113 | | // Provides information about memory usage, measured in pages. |
114 | | // |
115 | | // More details at: http://man7.org/linux/man-pages/man5/proc.5.html |
116 | | struct StatM { |
117 | | // size (1) total program size |
118 | | // (same as VmSize in /proc/[pid]/status) |
119 | | int64_t size = 0; |
120 | | |
121 | | // resident (2) resident set size |
122 | | // (same as VmRSS in /proc/[pid]/status) |
123 | | int64_t resident = 0; |
124 | | |
125 | | // shared (3) number of resident shared pages (i.e., backed by a file) |
126 | | // (same as RssFile+RssShmem in /proc/[pid]/status) |
127 | | int64_t shared = 0; |
128 | | |
129 | | // text (4) text (code) |
130 | | int64_t text = 0; |
131 | | |
132 | | // lib (5) library (unused since Linux 2.6; always 0) [skipped] |
133 | | |
134 | | // data (6) data + stack |
135 | | int64_t data = 0; |
136 | | |
137 | | // dt (7) dirty pages (unused since Linux 2.6; always 0) [skipped] |
138 | | }; |
139 | | |
140 | | class LinuxTotalMemWatcher : public TotalMemWatcher { |
141 | | public: |
142 | | LinuxTotalMemWatcher() |
143 | | : statm_path_(Format("/proc/$0/statm", getpid())), |
144 | | page_size_(sysconf(_SC_PAGESIZE)) { |
145 | | } |
146 | | ~LinuxTotalMemWatcher() {} |
147 | | |
148 | | Status Check() override { |
149 | | RETURN_NOT_OK(ReadStatM()); |
150 | | return Status::OK(); |
151 | | } |
152 | | |
153 | | std::string GetTerminationExplanation() override { |
154 | | if (rss_termination_limit_bytes_ == 0) { |
155 | | return std::string(); |
156 | | } |
157 | | size_t non_shared_rss_bytes = (statm_.resident - statm_.shared) * page_size_; |
158 | | if (non_shared_rss_bytes <= rss_termination_limit_bytes_) { |
159 | | return std::string(); |
160 | | } |
161 | | return Format("Non-shared RSS ($0 bytes, $1 MiB) has exceeded the configured limit " |
162 | | "($2 bytes, $3 MiB)", |
163 | | non_shared_rss_bytes, non_shared_rss_bytes / 1024 / 1024, |
164 | | rss_termination_limit_bytes_, rss_termination_limit_bytes_ / 1024 / 1024); |
165 | | } |
166 | | |
167 | | std::string GetMemoryUsageDetails() override { |
168 | | std::string result; |
169 | | #ifdef TCMALLOC_ENABLED |
170 | | result += TcMallocStats(); |
171 | | result += "\n"; |
172 | | #endif |
173 | | result += Format( |
174 | | "---------------------\n" |
175 | | "OS-level memory stats\n" |
176 | | "---------------------\n" |
177 | | "VmSize: $0 bytes ($1 MiB)\n" |
178 | | "VmRSS: $2 bytes ($3 MiB)\n" |
179 | | "RssFile + RssShmem: $4 bytes ($5 MiB)\n" |
180 | | "Text (code): $6 bytes ($7 MiB)\n" |
181 | | "Data + stack: $8 bytes ($9 MiB)", |
182 | | statm_.size * page_size_, statm_.size * page_size_ / 1024 / 1024, |
183 | | statm_.resident * page_size_, statm_.resident * page_size_ / 1024 / 1024, |
184 | | statm_.shared * page_size_, statm_.shared * page_size_ / 1024 / 1024, |
185 | | statm_.text * page_size_, statm_.text * page_size_ / 1024 / 1024, |
186 | | statm_.data * page_size_, statm_.data * page_size_ / 1024 / 1024); |
187 | | return result; |
188 | | } |
189 | | |
190 | | private: |
191 | | Status ReadStatM() { |
192 | | std::ifstream input_file(statm_path_.c_str()); |
193 | | if (!input_file) { |
194 | | return STATUS_FORMAT(IOError, "Could not open $0: $1", statm_path_, strerror(errno)); |
195 | | } |
196 | | |
197 | | input_file >> statm_.size; |
198 | | if (!input_file.good()) { |
199 | | return STATUS_FORMAT(IOError, "Error reading total program size from $0", statm_path_); |
200 | | } |
201 | | |
202 | | input_file >> statm_.resident; |
203 | | if (!input_file.good()) { |
204 | | return STATUS_FORMAT(IOError, "Error reading resident set size from $0", statm_path_); |
205 | | } |
206 | | |
207 | | input_file >> statm_.shared; |
208 | | if (!input_file.good()) { |
209 | | return STATUS_FORMAT(IOError, "Error reading number of resident shared pages from $0", |
210 | | statm_path_); |
211 | | } |
212 | | |
213 | | input_file >> statm_.text; |
214 | | if (!input_file.good()) { |
215 | | return STATUS_FORMAT(IOError, "Error reading text (code) size from $0", statm_path_); |
216 | | } |
217 | | |
218 | | size_t lib_ignored; |
219 | | input_file >> lib_ignored; |
220 | | if (!input_file.good()) { |
221 | | return STATUS_FORMAT(IOError, "Error reading library size (unused) from $0", statm_path_); |
222 | | } |
223 | | |
224 | | input_file >> statm_.data; |
225 | | if (!input_file.good()) { |
226 | | return STATUS_FORMAT(IOError, "Error reading data + stack size from $0", statm_path_); |
227 | | } |
228 | | |
229 | | // No need to read dt, which is always 0. |
230 | | |
231 | | return Status::OK(); |
232 | | } |
233 | | |
234 | | const std::string statm_path_; |
235 | | StatM statm_; |
236 | | size_t page_size_; |
237 | | }; |
238 | | #endif |
239 | | |
240 | | #ifdef __APPLE__ |
241 | | class DarwinTotalMemWatcher : public TotalMemWatcher { |
242 | | public: |
243 | 0 | ~DarwinTotalMemWatcher() {} |
244 | 130k | Status Check() override { |
245 | 130k | return Status::OK(); |
246 | 130k | } |
247 | | }; |
248 | | #endif |
249 | | |
250 | 4.93k | std::unique_ptr<TotalMemWatcher> TotalMemWatcher::Create() { |
251 | | #if defined(__linux__) |
252 | | return std::make_unique<LinuxTotalMemWatcher>(); |
253 | | #elif defined(__APPLE__) |
254 | 4.93k | return std::make_unique<DarwinTotalMemWatcher>(); |
255 | | #else |
256 | | #error Unknown platform |
257 | | #endif |
258 | 4.93k | } |
259 | | |
260 | | } // namespace server |
261 | | } // namespace yb |