/Users/deen/code/yugabyte-db/src/yb/tablet/maintenance_manager-test.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #include <memory> |
33 | | #include <mutex> |
34 | | #include <vector> |
35 | | |
36 | | #include <gtest/gtest.h> |
37 | | |
38 | | #include "yb/tablet/maintenance_manager.h" |
39 | | #include "yb/tablet/tablet.pb.h" |
40 | | #include "yb/util/mem_tracker.h" |
41 | | #include "yb/util/metrics.h" |
42 | | #include "yb/util/status_log.h" |
43 | | #include "yb/util/test_macros.h" |
44 | | #include "yb/util/test_util.h" |
45 | | #include "yb/util/thread.h" |
46 | | |
47 | | using yb::tablet::MaintenanceManagerStatusPB; |
48 | | using std::shared_ptr; |
49 | | using std::vector; |
50 | | using strings::Substitute; |
51 | | |
52 | | METRIC_DEFINE_entity(test); |
53 | | METRIC_DEFINE_gauge_uint32(test, maintenance_ops_running, |
54 | | "Number of Maintenance Operations Running", |
55 | | yb::MetricUnit::kMaintenanceOperations, |
56 | | "The number of background maintenance operations currently running."); |
57 | | METRIC_DEFINE_coarse_histogram(test, maintenance_op_duration, |
58 | | "Maintenance Operation Duration", |
59 | | yb::MetricUnit::kSeconds, ""); |
60 | | |
61 | | namespace yb { |
62 | | |
63 | | const int kHistorySize = 4; |
64 | | |
65 | | class MaintenanceManagerTest : public YBTest { |
66 | | public: |
67 | 5 | MaintenanceManagerTest() { |
68 | 5 | test_tracker_ = MemTracker::CreateTracker(1000, "test"); |
69 | 5 | MaintenanceManager::Options options; |
70 | 5 | options.num_threads = 2; |
71 | 5 | options.polling_interval_ms = 1; |
72 | 5 | options.history_size = kHistorySize; |
73 | 5 | options.parent_mem_tracker = test_tracker_; |
74 | 5 | manager_.reset(new MaintenanceManager(options)); |
75 | 5 | CHECK_OK(manager_->Init()); |
76 | 5 | } |
77 | 5 | ~MaintenanceManagerTest() { |
78 | 5 | manager_->Shutdown(); |
79 | 5 | } |
80 | | |
81 | | protected: |
82 | | shared_ptr<MemTracker> test_tracker_; |
83 | | shared_ptr<MaintenanceManager> manager_; |
84 | | }; |
85 | | |
86 | | // Just create the MaintenanceManager and then shut it down, to make sure |
87 | | // there are no race conditions there. |
88 | 1 | TEST_F(MaintenanceManagerTest, TestCreateAndShutdown) { |
89 | 1 | } |
90 | | |
91 | | enum TestMaintenanceOpState { |
92 | | OP_DISABLED, |
93 | | OP_RUNNABLE, |
94 | | OP_RUNNING, |
95 | | OP_FINISHED, |
96 | | }; |
97 | | |
98 | | class TestMaintenanceOp : public MaintenanceOp { |
99 | | public: |
100 | | TestMaintenanceOp(const std::string& name, |
101 | | IOUsage io_usage, |
102 | | TestMaintenanceOpState state, |
103 | | const shared_ptr<MemTracker>& tracker) |
104 | | : MaintenanceOp(name, io_usage), |
105 | | state_change_cond_(&lock_), |
106 | | state_(state), |
107 | | consumption_(tracker, 500), |
108 | | logs_retained_bytes_(0), |
109 | | perf_improvement_(0), |
110 | | metric_entity_(METRIC_ENTITY_test.Instantiate(&metric_registry_, "test")), |
111 | | maintenance_op_duration_(METRIC_maintenance_op_duration.Instantiate(metric_entity_)), |
112 | 10 | maintenance_ops_running_(METRIC_maintenance_ops_running.Instantiate(metric_entity_, 0)) { |
113 | 10 | } |
114 | | |
115 | 10 | virtual ~TestMaintenanceOp() {} |
116 | | |
117 | 7 | bool Prepare() override { |
118 | 7 | std::lock_guard<Mutex> guard(lock_); |
119 | 7 | if (state_ != OP_RUNNABLE) { |
120 | 0 | return false; |
121 | 0 | } |
122 | 7 | state_ = OP_RUNNING; |
123 | 7 | state_change_cond_.Broadcast(); |
124 | 7 | DLOG(INFO) << "Prepared op " << name(); |
125 | 7 | return true; |
126 | 7 | } |
127 | | |
128 | 7 | void Perform() override { |
129 | 7 | DLOG(INFO) << "Performing op " << name(); |
130 | 7 | std::lock_guard<Mutex> guard(lock_); |
131 | 7 | CHECK_EQ(OP_RUNNING, state_); |
132 | 7 | state_ = OP_FINISHED; |
133 | 7 | state_change_cond_.Broadcast(); |
134 | 7 | } |
135 | | |
136 | 31 | void UpdateStats(MaintenanceOpStats* stats) override { |
137 | 31 | std::lock_guard<Mutex> guard(lock_); |
138 | 31 | stats->set_runnable(state_ == OP_RUNNABLE); |
139 | 31 | stats->set_ram_anchored(consumption_.consumption()); |
140 | 31 | stats->set_logs_retained_bytes(logs_retained_bytes_); |
141 | 31 | stats->set_perf_improvement(perf_improvement_); |
142 | 31 | } |
143 | | |
144 | 1 | void Enable() { |
145 | 1 | std::lock_guard<Mutex> guard(lock_); |
146 | 1 | DCHECK((state_ == OP_DISABLED) || (state_ == OP_FINISHED)); |
147 | 1 | state_ = OP_RUNNABLE; |
148 | 1 | state_change_cond_.Broadcast(); |
149 | 1 | } |
150 | | |
151 | 2 | void WaitForState(TestMaintenanceOpState state) { |
152 | 2 | std::lock_guard<Mutex> guard(lock_); |
153 | 6 | while (true) { |
154 | 6 | if (state_ == state) { |
155 | 2 | return; |
156 | 2 | } |
157 | 4 | state_change_cond_.Wait(); |
158 | 4 | } |
159 | 2 | } |
160 | | |
161 | 6 | bool WaitForStateWithTimeout(TestMaintenanceOpState state, int ms) { |
162 | 6 | MonoDelta to_wait = MonoDelta::FromMilliseconds(ms); |
163 | 6 | std::lock_guard<Mutex> guard(lock_); |
164 | 16 | while (true) { |
165 | 16 | if (state_ == state) { |
166 | 5 | return true; |
167 | 5 | } |
168 | 11 | if (!state_change_cond_.TimedWait(to_wait)) { |
169 | 1 | return false; |
170 | 1 | } |
171 | 11 | } |
172 | 6 | } |
173 | | |
174 | 11 | void set_ram_anchored(uint64_t ram_anchored) { |
175 | 11 | std::lock_guard<Mutex> guard(lock_); |
176 | 11 | consumption_.Reset(ram_anchored); |
177 | 11 | } |
178 | | |
179 | 3 | void set_logs_retained_bytes(uint64_t logs_retained_bytes) { |
180 | 3 | std::lock_guard<Mutex> guard(lock_); |
181 | 3 | logs_retained_bytes_ = logs_retained_bytes; |
182 | 3 | } |
183 | | |
184 | 5 | void set_perf_improvement(uint64_t perf_improvement) { |
185 | 5 | std::lock_guard<Mutex> guard(lock_); |
186 | 5 | perf_improvement_ = perf_improvement; |
187 | 5 | } |
188 | | |
189 | 7 | scoped_refptr<Histogram> DurationHistogram() const override { |
190 | 7 | return maintenance_op_duration_; |
191 | 7 | } |
192 | | |
193 | 14 | scoped_refptr<AtomicGauge<uint32_t> > RunningGauge() const override { |
194 | 14 | return maintenance_ops_running_; |
195 | 14 | } |
196 | | |
197 | | private: |
198 | | Mutex lock_; |
199 | | ConditionVariable state_change_cond_; |
200 | | enum TestMaintenanceOpState state_; |
201 | | ScopedTrackedConsumption consumption_; |
202 | | uint64_t logs_retained_bytes_; |
203 | | uint64_t perf_improvement_; |
204 | | MetricRegistry metric_registry_; |
205 | | scoped_refptr<MetricEntity> metric_entity_; |
206 | | scoped_refptr<Histogram> maintenance_op_duration_; |
207 | | scoped_refptr<AtomicGauge<uint32_t> > maintenance_ops_running_; |
208 | | }; |
209 | | |
210 | | // Create an op and wait for it to start running. Unregister it while it is |
211 | | // running and verify that UnregisterOp waits for it to finish before |
212 | | // proceeding. |
213 | 1 | TEST_F(MaintenanceManagerTest, TestRegisterUnregister) { |
214 | 1 | TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE, OP_DISABLED, test_tracker_); |
215 | 1 | op1.set_ram_anchored(1001); |
216 | 1 | manager_->RegisterOp(&op1); |
217 | 1 | scoped_refptr<yb::Thread> thread; |
218 | 1 | CHECK_OK(Thread::Create( |
219 | 1 | "TestThread", "TestRegisterUnregister", std::bind(&TestMaintenanceOp::Enable, &op1), |
220 | 1 | &thread)); |
221 | 1 | op1.WaitForState(OP_FINISHED); |
222 | 1 | manager_->UnregisterOp(&op1); |
223 | 1 | CHECK_OK(ThreadJoiner(thread.get()).Join()); |
224 | 1 | } |
225 | | |
226 | | // Test that we'll run an operation that doesn't improve performance when memory |
227 | | // pressure gets high. |
228 | 1 | TEST_F(MaintenanceManagerTest, TestMemoryPressure) { |
229 | 1 | TestMaintenanceOp op("op", MaintenanceOp::HIGH_IO_USAGE, OP_RUNNABLE, test_tracker_); |
230 | 1 | op.set_ram_anchored(100); |
231 | 1 | manager_->RegisterOp(&op); |
232 | | |
233 | | // At first, we don't want to run this, since there is no perf_improvement. |
234 | 1 | CHECK_EQ(false, op.WaitForStateWithTimeout(OP_FINISHED, 20)); |
235 | | |
236 | | // set the ram_anchored by the high mem op so high that we'll have to run it. |
237 | 1 | scoped_refptr<yb::Thread> thread; |
238 | 1 | CHECK_OK(Thread::Create( |
239 | 1 | "TestThread", "MaintenanceManagerTest", |
240 | 1 | std::bind(&TestMaintenanceOp::set_ram_anchored, &op, 1100), &thread)); |
241 | 1 | op.WaitForState(OP_FINISHED); |
242 | 1 | manager_->UnregisterOp(&op); |
243 | 1 | CHECK_OK(ThreadJoiner(thread.get()).Join()); |
244 | 1 | } |
245 | | |
246 | | // Test that ops are prioritized correctly when we add log retention. |
247 | 1 | TEST_F(MaintenanceManagerTest, TestLogRetentionPrioritization) { |
248 | 1 | manager_->Shutdown(); |
249 | | |
250 | 1 | TestMaintenanceOp op1("op1", MaintenanceOp::LOW_IO_USAGE, OP_RUNNABLE, test_tracker_); |
251 | 1 | op1.set_ram_anchored(0); |
252 | 1 | op1.set_logs_retained_bytes(100); |
253 | | |
254 | 1 | TestMaintenanceOp op2("op2", MaintenanceOp::HIGH_IO_USAGE, OP_RUNNABLE, test_tracker_); |
255 | 1 | op2.set_ram_anchored(100); |
256 | 1 | op2.set_logs_retained_bytes(100); |
257 | | |
258 | 1 | TestMaintenanceOp op3("op3", MaintenanceOp::HIGH_IO_USAGE, OP_RUNNABLE, test_tracker_); |
259 | 1 | op3.set_ram_anchored(200); |
260 | 1 | op3.set_logs_retained_bytes(100); |
261 | | |
262 | 1 | manager_->RegisterOp(&op1); |
263 | 1 | manager_->RegisterOp(&op2); |
264 | 1 | manager_->RegisterOp(&op3); |
265 | | |
266 | | // We want to do the low IO op first since it clears up some log retention, i.e. - op1 |
267 | | // Then we find the op clears the most log retention and ram, i.e. - op3 |
268 | 3 | for (auto* op : { &op1, &op3, &op2 }) { |
269 | 3 | { |
270 | 3 | std::lock_guard<std::mutex> lock(manager_->mutex_); |
271 | | |
272 | 3 | ASSERT_EQ(op, manager_->FindBestOp()); |
273 | 3 | } |
274 | | |
275 | 3 | manager_->UnregisterOp(op); |
276 | 3 | } |
277 | 1 | } |
278 | | |
279 | | // Test adding operations and make sure that the history of recently completed operations |
280 | | // is correct in that it wraps around and doesn't grow. |
281 | 1 | TEST_F(MaintenanceManagerTest, TestCompletedOpsHistory) { |
282 | 6 | for (int i = 0; i < 5; i++) { |
283 | 5 | string name = Substitute("op$0", i); |
284 | 5 | TestMaintenanceOp op(name, MaintenanceOp::HIGH_IO_USAGE, OP_RUNNABLE, test_tracker_); |
285 | 5 | op.set_perf_improvement(1); |
286 | 5 | op.set_ram_anchored(100); |
287 | 5 | manager_->RegisterOp(&op); |
288 | | |
289 | 5 | ASSERT_TRUE(op.WaitForStateWithTimeout(OP_FINISHED, 200)); |
290 | 5 | manager_->UnregisterOp(&op); |
291 | | |
292 | 5 | MaintenanceManagerStatusPB status_pb; |
293 | 5 | manager_->GetMaintenanceManagerStatusDump(&status_pb); |
294 | | // The size should be at most the history_size. |
295 | 5 | ASSERT_GE(kHistorySize, status_pb.completed_operations_size()); |
296 | | // See that we have the right name, even if we wrap around. |
297 | 5 | ASSERT_EQ(name, status_pb.completed_operations(i % 4).name()); |
298 | 5 | } |
299 | 1 | } |
300 | | |
301 | | } // namespace yb |