YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/server/total_mem_watcher.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#if defined(__linux__)
15
#include <unistd.h>
16
#endif
17
18
#include <fstream>
19
#include <chrono>
20
#include <thread>
21
22
#include "yb/server/total_mem_watcher.h"
23
#include "yb/util/format.h"
24
#include "yb/util/logging.h"
25
#include "yb/util/mem_tracker.h"
26
#include "yb/util/memory/memory.h"
27
#include "yb/util/scope_exit.h"
28
#include "yb/util/status_format.h"
29
30
#ifdef TCMALLOC_ENABLED
31
#include <gperftools/malloc_extension.h>
32
#endif
33
34
using namespace std::literals;
35
36
#if defined(THREAD_SANITIZER)
37
const int kDefaultMemoryLimitTerminationPercent = 500;
38
#elif defined(ADDRESS_SANITIZER)
39
const int kDefaultMemoryLimitTerminationPercent = 300;
40
#else
41
const int kDefaultMemoryLimitTerminationPercent = 200;
42
#endif
43
44
DEFINE_int32(memory_limit_termination_threshold_pct, kDefaultMemoryLimitTerminationPercent,
45
             "If the RSS (resident set size) of the program reaches this percentage of the "
46
             "root memory tracker limit, the program will exit. RSS is measured using operating "
47
             "system means, not the memory allocator. Set to 0 to disable this behavior.");
48
49
DEFINE_int32(total_mem_watcher_interval_millis, 1000,
50
             "Interval in milliseconds between checking the total memory usage of the current "
51
             "process as seen by the operating system, and deciding whether to terminate in case "
52
             "of excessive memory consumption.");
53
54
namespace yb {
55
namespace server {
56
57
6.93k
TotalMemWatcher::TotalMemWatcher() {
58
6.93k
  if (FLAGS_memory_limit_termination_threshold_pct <= 0) {
59
0
    rss_termination_limit_bytes_ = 0;
60
6.93k
  } else {
61
6.93k
    rss_termination_limit_bytes_ =
62
6.93k
        (MemTracker::GetRootTracker()->limit() *
63
6.93k
         FLAGS_memory_limit_termination_threshold_pct) / 100;
64
6.93k
  }
65
6.93k
}
66
67
0
TotalMemWatcher::~TotalMemWatcher() {
68
0
}
69
70
void TotalMemWatcher::MemoryMonitoringLoop(
71
    std::function<void()> shutdown_fn,
72
6.93k
    std::function<bool()> is_shutdown_finished_fn) {
73
6.93k
  if (FLAGS_memory_limit_termination_threshold_pct > 0) {
74
6.93k
    int64_t root_tracker_limit = MemTracker::GetRootTracker()->limit();
75
6.93k
    LOG(INFO) << "Root memtracker limit: " << root_tracker_limit << " ("
76
6.93k
              << (root_tracker_limit / 1024 / 1024) << " MiB); this server will stop if memory "
77
6.93k
              << "usage exceeds " << FLAGS_memory_limit_termination_threshold_pct << "% of that: "
78
6.93k
              << rss_termination_limit_bytes_ << " bytes ("
79
6.93k
              << (rss_termination_limit_bytes_ / 1024 / 1024) << " MiB).";
80
6.93k
  }
81
2.41M
  while (true) {
82
2.40M
    std::this_thread::sleep_for(1ms * FLAGS_total_mem_watcher_interval_millis);
83
84
2.40M
    Status mem_check_status = Check();
85
2.40M
    if (!mem_check_status.ok()) {
86
0
      YB_LOG_EVERY_N_SECS(WARNING, 10)
87
0
          << "Failed to check total memory usage: " << mem_check_status.ToString();
88
0
      continue;
89
0
    }
90
2.40M
    std::string termination_explanation = GetTerminationExplanation();
91
2.40M
    if (!termination_explanation.empty()) {
92
0
      LOG(ERROR) << "Memory usage exceeded configured limit, terminating the process: "
93
0
                 << termination_explanation << "\nDetails:\n"
94
0
                 << GetMemoryUsageDetails();
95
0
      shutdown_fn();
96
0
      const int kMaxSecToWait = 10;
97
0
      for (int secondsLeft = kMaxSecToWait;
98
0
           secondsLeft > 0 && !is_shutdown_finished_fn();
99
0
           --secondsLeft) {
100
0
        LOG(INFO) << "Waiting for server to shut down (will wait up to " << secondsLeft
101
0
                  << " seconds)";
102
0
        std::this_thread::sleep_for(1s);
103
0
      }
104
0
      LOG(WARNING) << "Server is exiting";
105
0
      return;
106
0
    }
107
2.40M
  }
108
6.93k
}
109
110
#ifdef __linux__
111
112
// Data available in /proc/[pid]/statm on Linux.
113
// Provides information about memory usage, measured in pages.
114
//
115
// More details at: http://man7.org/linux/man-pages/man5/proc.5.html
116
struct StatM {
117
  // size       (1) total program size
118
  //            (same as VmSize in /proc/[pid]/status)
119
  int64_t size = 0;
120
121
  // resident   (2) resident set size
122
  //            (same as VmRSS in /proc/[pid]/status)
123
  int64_t resident = 0;
124
125
  // shared     (3) number of resident shared pages (i.e., backed by a file)
126
  //            (same as RssFile+RssShmem in /proc/[pid]/status)
127
  int64_t shared = 0;
128
129
  // text       (4) text (code)
130
  int64_t text = 0;
131
132
  // lib        (5) library (unused since Linux 2.6; always 0) [skipped]
133
134
  // data       (6) data + stack
135
  int64_t data = 0;
136
137
  // dt         (7) dirty pages (unused since Linux 2.6; always 0) [skipped]
138
};
139
140
class LinuxTotalMemWatcher : public TotalMemWatcher {
141
 public:
142
  LinuxTotalMemWatcher()
143
      : statm_path_(Format("/proc/$0/statm", getpid())),
144
        page_size_(sysconf(_SC_PAGESIZE)) {
145
  }
146
  ~LinuxTotalMemWatcher() {}
147
148
  Status Check() override {
149
    RETURN_NOT_OK(ReadStatM());
150
    return Status::OK();
151
  }
152
153
  std::string GetTerminationExplanation() override {
154
    if (rss_termination_limit_bytes_ == 0) {
155
      return std::string();
156
    }
157
    size_t non_shared_rss_bytes = (statm_.resident - statm_.shared) * page_size_;
158
    if (non_shared_rss_bytes <= rss_termination_limit_bytes_) {
159
      return std::string();
160
    }
161
    return Format("Non-shared RSS ($0 bytes, $1 MiB) has exceeded the configured limit "
162
                  "($2 bytes, $3 MiB)",
163
                  non_shared_rss_bytes, non_shared_rss_bytes / 1024 / 1024,
164
                  rss_termination_limit_bytes_,  rss_termination_limit_bytes_ / 1024 / 1024);
165
  }
166
167
  std::string GetMemoryUsageDetails() override {
168
    std::string result;
169
#ifdef TCMALLOC_ENABLED
170
    result += TcMallocStats();
171
    result += "\n";
172
#endif
173
    result += Format(
174
        "---------------------\n"
175
        "OS-level memory stats\n"
176
        "---------------------\n"
177
        "VmSize:             $0 bytes ($1 MiB)\n"
178
        "VmRSS:              $2 bytes ($3 MiB)\n"
179
        "RssFile + RssShmem: $4 bytes ($5 MiB)\n"
180
        "Text (code):        $6 bytes ($7 MiB)\n"
181
        "Data + stack:       $8 bytes ($9 MiB)",
182
        statm_.size * page_size_,     statm_.size * page_size_ / 1024 / 1024,
183
        statm_.resident * page_size_, statm_.resident * page_size_ / 1024 / 1024,
184
        statm_.shared * page_size_,   statm_.shared * page_size_ / 1024 / 1024,
185
        statm_.text * page_size_,     statm_.text * page_size_ / 1024 / 1024,
186
        statm_.data * page_size_,     statm_.data * page_size_ / 1024 / 1024);
187
    return result;
188
  }
189
190
 private:
191
  Status ReadStatM() {
192
    std::ifstream input_file(statm_path_.c_str());
193
    if (!input_file) {
194
      return STATUS_FORMAT(IOError, "Could not open $0: $1", statm_path_, strerror(errno));
195
    }
196
197
    input_file >> statm_.size;
198
    if (!input_file.good()) {
199
      return STATUS_FORMAT(IOError, "Error reading total program size from $0", statm_path_);
200
    }
201
202
    input_file >> statm_.resident;
203
    if (!input_file.good()) {
204
      return STATUS_FORMAT(IOError, "Error reading resident set size from $0", statm_path_);
205
    }
206
207
    input_file >> statm_.shared;
208
    if (!input_file.good()) {
209
      return STATUS_FORMAT(IOError, "Error reading number of resident shared pages from $0",
210
                           statm_path_);
211
    }
212
213
    input_file >> statm_.text;
214
    if (!input_file.good()) {
215
      return STATUS_FORMAT(IOError, "Error reading text (code) size from $0", statm_path_);
216
    }
217
218
    size_t lib_ignored;
219
    input_file >> lib_ignored;
220
    if (!input_file.good()) {
221
      return STATUS_FORMAT(IOError, "Error reading library size (unused) from $0", statm_path_);
222
    }
223
224
    input_file >> statm_.data;
225
    if (!input_file.good()) {
226
      return STATUS_FORMAT(IOError, "Error reading data + stack size from $0", statm_path_);
227
    }
228
229
    // No need to read dt, which is always 0.
230
231
    return Status::OK();
232
  }
233
234
  const std::string statm_path_;
235
  StatM statm_;
236
  size_t page_size_;
237
};
238
#endif
239
240
#ifdef __APPLE__
241
class DarwinTotalMemWatcher : public TotalMemWatcher {
242
 public:
243
0
  ~DarwinTotalMemWatcher() {}
244
2.40M
  Status Check() override {
245
2.40M
    return Status::OK();
246
2.40M
  }
247
};
248
#endif
249
250
6.93k
std::unique_ptr<TotalMemWatcher> TotalMemWatcher::Create() {
251
#if defined(__linux__)
252
  return std::make_unique<LinuxTotalMemWatcher>();
253
#elif defined(__APPLE__)
254
6.93k
  return std::make_unique<DarwinTotalMemWatcher>();
255
#else
256
#error Unknown platform
257
#endif
258
6.93k
}
259
260
}  // namespace server
261
}  // namespace yb