YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/util/failure_detector-test.cc
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include <string>
34
35
#include <gtest/gtest.h>
36
37
#include "yb/gutil/bind.h"
38
#include "yb/util/countdown_latch.h"
39
#include "yb/util/failure_detector.h"
40
#include "yb/util/locks.h"
41
#include "yb/util/monotime.h"
42
#include "yb/util/status.h"
43
#include "yb/util/test_macros.h"
44
#include "yb/util/test_util.h"
45
46
namespace yb {
47
48
// How often we expect a node to heartbeat to assert its "aliveness".
49
static const int kExpectedHeartbeatPeriodMillis = 100;
50
51
// Number of heartbeats after which the FD will consider the node dead.
52
static const int kMaxMissedHeartbeats = 2;
53
54
// Let's check for failures every 100ms on average +/- 10ms.
55
static const int kFailureMonitorMeanMillis = 100;
56
static const int kFailureMonitorStddevMillis = 10;
57
58
static const char* kNodeName = "node-1";
59
static const char* kTestTabletName = "test-tablet";
60
61
class FailureDetectorTest : public YBTest {
62
 public:
63
  FailureDetectorTest()
64
    : YBTest(),
65
      latch_(1),
66
      monitor_(new RandomizedFailureMonitor(SeedRandom(),
67
                                            kFailureMonitorMeanMillis,
68
1
                                            kFailureMonitorStddevMillis)) {
69
1
  }
70
71
1
  void FailureFunction(const std::string& name, const Status& status) {
72
1
    LOG(INFO) << "Detected failure of " << name;
73
1
    latch_.CountDown();
74
1
  }
75
76
 protected:
77
1
  void WaitForFailure() {
78
1
    latch_.Wait();
79
1
  }
80
81
  CountDownLatch latch_;
82
  std::unique_ptr<RandomizedFailureMonitor> monitor_;
83
};
84
85
// Tests that we can track a node, that while we notify that we're received messages from
86
// that node everything is ok and that once we stop doing so the failure detection function
87
// gets called.
88
1
TEST_F(FailureDetectorTest, TestDetectsFailure) {
89
1
  ASSERT_OK(monitor_->Start());
90
91
1
  scoped_refptr<FailureDetector> detector(new TimedFailureDetector(
92
1
      MonoDelta::FromMilliseconds(kExpectedHeartbeatPeriodMillis * kMaxMissedHeartbeats)));
93
94
1
  ASSERT_OK(monitor_->MonitorFailureDetector(kTestTabletName, detector));
95
1
  ASSERT_FALSE(detector->IsTracking(kNodeName));
96
1
  ASSERT_OK(detector->Track(kNodeName,
97
1
                            MonoTime::Now(),
98
1
                            Bind(&FailureDetectorTest::FailureFunction, Unretained(this))));
99
1
  ASSERT_TRUE(detector->IsTracking(kNodeName));
100
101
1
  const int kNumPeriodsToWait = 4;  // Num heartbeat periods to wait for a failure.
102
1
  const int kUpdatesPerPeriod = 10; // Num updates we give per period to minimize test flakiness.
103
104
41
  for (int i = 0; i < kNumPeriodsToWait * kUpdatesPerPeriod; i++) {
105
    // Report in (heartbeat) to the detector.
106
40
    ASSERT_OK(detector->MessageFrom(kNodeName, MonoTime::Now()));
107
108
    // We sleep for a fraction of heartbeat period, to minimize test flakiness.
109
40
    SleepFor(MonoDelta::FromMilliseconds(kExpectedHeartbeatPeriodMillis / kUpdatesPerPeriod));
110
111
    // The latch shouldn't have counted down, since the node's been reporting that
112
    // it's still alive.
113
40
    ASSERT_EQ(1, latch_.count());
114
40
  }
115
116
  // If we stop reporting he node is alive the failure callback is eventually
117
  // triggered and we exit.
118
1
  WaitForFailure();
119
120
1
  ASSERT_OK(detector->UnTrack(kNodeName));
121
1
  ASSERT_FALSE(detector->IsTracking(kNodeName));
122
123
1
  ASSERT_OK(monitor_->UnmonitorFailureDetector(kTestTabletName));
124
1
  monitor_->Shutdown();
125
1
}
126
127
}  // namespace yb