/Users/deen/code/yugabyte-db/src/yb/util/failure_detector-test.cc
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include <string> |
34 | | |
35 | | #include <gtest/gtest.h> |
36 | | |
37 | | #include "yb/gutil/bind.h" |
38 | | #include "yb/util/countdown_latch.h" |
39 | | #include "yb/util/failure_detector.h" |
40 | | #include "yb/util/locks.h" |
41 | | #include "yb/util/monotime.h" |
42 | | #include "yb/util/status.h" |
43 | | #include "yb/util/test_macros.h" |
44 | | #include "yb/util/test_util.h" |
45 | | |
46 | | namespace yb { |
47 | | |
48 | | // How often we expect a node to heartbeat to assert its "aliveness". |
49 | | static const int kExpectedHeartbeatPeriodMillis = 100; |
50 | | |
51 | | // Number of heartbeats after which the FD will consider the node dead. |
52 | | static const int kMaxMissedHeartbeats = 2; |
53 | | |
54 | | // Let's check for failures every 100ms on average +/- 10ms. |
55 | | static const int kFailureMonitorMeanMillis = 100; |
56 | | static const int kFailureMonitorStddevMillis = 10; |
57 | | |
58 | | static const char* kNodeName = "node-1"; |
59 | | static const char* kTestTabletName = "test-tablet"; |
60 | | |
61 | | class FailureDetectorTest : public YBTest { |
62 | | public: |
63 | | FailureDetectorTest() |
64 | | : YBTest(), |
65 | | latch_(1), |
66 | | monitor_(new RandomizedFailureMonitor(SeedRandom(), |
67 | | kFailureMonitorMeanMillis, |
68 | 1 | kFailureMonitorStddevMillis)) { |
69 | 1 | } |
70 | | |
71 | 1 | void FailureFunction(const std::string& name, const Status& status) { |
72 | 1 | LOG(INFO) << "Detected failure of " << name; |
73 | 1 | latch_.CountDown(); |
74 | 1 | } |
75 | | |
76 | | protected: |
77 | 1 | void WaitForFailure() { |
78 | 1 | latch_.Wait(); |
79 | 1 | } |
80 | | |
81 | | CountDownLatch latch_; |
82 | | std::unique_ptr<RandomizedFailureMonitor> monitor_; |
83 | | }; |
84 | | |
85 | | // Tests that we can track a node, that while we notify that we're received messages from |
86 | | // that node everything is ok and that once we stop doing so the failure detection function |
87 | | // gets called. |
88 | 1 | TEST_F(FailureDetectorTest, TestDetectsFailure) { |
89 | 1 | ASSERT_OK(monitor_->Start()); |
90 | | |
91 | 1 | scoped_refptr<FailureDetector> detector(new TimedFailureDetector( |
92 | 1 | MonoDelta::FromMilliseconds(kExpectedHeartbeatPeriodMillis * kMaxMissedHeartbeats))); |
93 | | |
94 | 1 | ASSERT_OK(monitor_->MonitorFailureDetector(kTestTabletName, detector)); |
95 | 1 | ASSERT_FALSE(detector->IsTracking(kNodeName)); |
96 | 1 | ASSERT_OK(detector->Track(kNodeName, |
97 | 1 | MonoTime::Now(), |
98 | 1 | Bind(&FailureDetectorTest::FailureFunction, Unretained(this)))); |
99 | 1 | ASSERT_TRUE(detector->IsTracking(kNodeName)); |
100 | | |
101 | 1 | const int kNumPeriodsToWait = 4; // Num heartbeat periods to wait for a failure. |
102 | 1 | const int kUpdatesPerPeriod = 10; // Num updates we give per period to minimize test flakiness. |
103 | | |
104 | 41 | for (int i = 0; i < kNumPeriodsToWait * kUpdatesPerPeriod; i++) { |
105 | | // Report in (heartbeat) to the detector. |
106 | 40 | ASSERT_OK(detector->MessageFrom(kNodeName, MonoTime::Now())); |
107 | | |
108 | | // We sleep for a fraction of heartbeat period, to minimize test flakiness. |
109 | 40 | SleepFor(MonoDelta::FromMilliseconds(kExpectedHeartbeatPeriodMillis / kUpdatesPerPeriod)); |
110 | | |
111 | | // The latch shouldn't have counted down, since the node's been reporting that |
112 | | // it's still alive. |
113 | 40 | ASSERT_EQ(1, latch_.count()); |
114 | 40 | } |
115 | | |
116 | | // If we stop reporting he node is alive the failure callback is eventually |
117 | | // triggered and we exit. |
118 | 1 | WaitForFailure(); |
119 | | |
120 | 1 | ASSERT_OK(detector->UnTrack(kNodeName)); |
121 | 1 | ASSERT_FALSE(detector->IsTracking(kNodeName)); |
122 | | |
123 | 1 | ASSERT_OK(monitor_->UnmonitorFailureDetector(kTestTabletName)); |
124 | 1 | monitor_->Shutdown(); |
125 | 1 | } |
126 | | |
127 | | } // namespace yb |