YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/randomized_docdb-test.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include <algorithm>
15
#include <utility>
16
#include <vector>
17
18
19
#include "yb/docdb/docdb.h"
20
#include "yb/docdb/docdb_test_base.h"
21
#include "yb/docdb/docdb_test_util.h"
22
#include "yb/util/scope_exit.h"
23
24
// Use a lower default number of tests when running on ASAN/TSAN so as not to exceed the test time
25
// limit.
26
#if defined(THREAD_SANITIZER) || defined(ADDRESS_SANITIZER)
27
static constexpr int kDefaultTestNumIter = 2000;
28
static constexpr int kDefaultSnapshotVerificationTestNumIter = 2000;
29
#else
30
static constexpr int kDefaultTestNumIter = 20000;
31
static constexpr int kDefaultSnapshotVerificationTestNumIter = 15000;
32
#endif
33
34
DEFINE_int32(snapshot_verification_test_num_iter, kDefaultSnapshotVerificationTestNumIter,
35
             "Number iterations for randomized history cleanup DocDB tests.");
36
37
DEFINE_int32(test_num_iter, kDefaultTestNumIter,
38
             "Number iterations for randomized DocDB tests, except those involving logical DocDB "
39
             "snapshots.");
40
41
constexpr int kNumDocKeys = 50;
42
constexpr int kNumUniqueSubKeys = 500;
43
44
using std::vector;
45
using std::pair;
46
using std::sort;
47
48
namespace yb {
49
namespace docdb {
50
51
namespace {
52
53
void RemoveEntriesWithSecondComponentHigherThan(vector<pair<int, int>> *v,
54
0
                                                int max_second_component) {
55
  // See https://en.wikipedia.org/wiki/Erase-remove_idiom.
56
0
  v->erase(
57
0
      std::remove_if(v->begin(), v->end(), [&](const pair<int, int>& p) {
58
0
          return p.second > max_second_component;
59
0
      }),
60
0
      v->end());
61
0
}
62
63
}  // anonymous namespace
64
65
class RandomizedDocDBTest : public DocDBTestBase,
66
    public ::testing::WithParamInterface<ResolveIntentsDuringRead> {
67
 protected:
68
0
  RandomizedDocDBTest() : verify_history_cleanup_(true) {
69
0
  }
70
71
0
  void Init(const UseHash use_hash) {
72
    // This test was created when this was the only supported init marker behavior.
73
0
    SetInitMarkerBehavior(InitMarkerBehavior::kRequired);
74
0
    if (load_gen_.get() != nullptr) {
75
0
      ClearLogicalSnapshots();
76
0
      ASSERT_OK(DestroyRocksDB());
77
0
      ASSERT_OK(ReopenRocksDB());
78
0
    }
79
0
    load_gen_.reset(new DocDBLoadGenerator(this, kNumDocKeys, kNumUniqueSubKeys, use_hash,
80
0
        resolve_intents_));
81
0
    SeedRandom();
82
0
  }
83
84
0
  ~RandomizedDocDBTest() override {}
85
  void RunWorkloadWithSnaphots(bool enable_history_cleanup);
86
87
0
  int num_iterations_divider() {
88
    // Read path is slower when trying to resolve intents, so we reduce number of iterations in
89
    // order to respect the timeout.
90
0
    return resolve_intents_ ? 2 : 1;
91
0
  }
92
93
0
  void CompactionWithCleanup(HybridTime cleanup_ht) {
94
0
    const auto start_time = MonoTime::Now();
95
0
    ASSERT_NO_FATALS(FullyCompactHistoryBefore(cleanup_ht));
96
0
    const auto elapsed_time_ms = (MonoTime::Now() - start_time).ToMilliseconds();
97
0
    total_compaction_time_ms_ += elapsed_time_ms;
98
0
    LOG(INFO) << "Compaction with cleanup_ht=" << cleanup_ht << " took "
99
0
              << elapsed_time_ms << " ms, all compactions so far: "
100
0
              << total_compaction_time_ms_ << " ms";
101
0
  }
102
103
  ResolveIntentsDuringRead resolve_intents_ = ResolveIntentsDuringRead::kTrue;
104
  bool verify_history_cleanup_;
105
  std::unique_ptr<DocDBLoadGenerator> load_gen_;
106
  int64_t total_compaction_time_ms_ = 0;
107
};
108
109
0
void RandomizedDocDBTest::RunWorkloadWithSnaphots(bool enable_history_cleanup) {
110
0
  auto scope_exit = ScopeExit([this]() {
111
0
    LOG(INFO) << "Total compaction time: " << total_compaction_time_ms_ << " ms";
112
0
  });
113
  // We start doing snapshots every other iterations, but make it less frequent after a number of
114
  // iterations (kIterationToSwitchToInfrequentSnapshots to be precise, see the loop below).
115
0
  int snapshot_frequency = 2;
116
0
  int verification_frequency = 1;
117
118
0
  constexpr int kEventualSnapshotFrequency = 1000;
119
0
  constexpr int kEventualVerificationFrequency = 250;
120
0
  constexpr int kFlushFrequency = 100;
121
0
  constexpr int kIterationToSwitchToInfrequentSnapshots = 300;
122
123
0
  constexpr int kHistoryCleanupChance = 500;
124
125
0
  vector<pair<int, int>> cleanup_ht_and_iteration;
126
127
0
  HybridTime max_history_cleanup_ht(0);
128
129
0
  const int kNumIter = FLAGS_snapshot_verification_test_num_iter / num_iterations_divider();
130
131
0
  while (load_gen_->next_iteration() <= kNumIter) {
132
0
    const int current_iteration = load_gen_->next_iteration();
133
0
    if (current_iteration == kIterationToSwitchToInfrequentSnapshots) {
134
      // This is where we make snapshot/verification less frequent so the test can actually finish.
135
0
      snapshot_frequency = kEventualSnapshotFrequency;
136
0
      verification_frequency = kEventualVerificationFrequency;
137
0
    }
138
0
    ASSERT_NO_FATALS(load_gen_->PerformOperation()) << "at iteration " << current_iteration;
139
0
    if (current_iteration % kFlushFrequency == 0) {
140
0
      ASSERT_OK(FlushRocksDbAndWait());
141
0
    }
142
0
    if (current_iteration % snapshot_frequency == 0) {
143
0
      load_gen_->CaptureDocDbSnapshot();
144
0
    }
145
0
    if (current_iteration % verification_frequency == 0) {
146
0
      ASSERT_NO_FATALS(load_gen_->VerifyRandomDocDbSnapshot());
147
0
    }
148
149
0
    if (enable_history_cleanup && load_gen_->NextRandomInt(kHistoryCleanupChance) == 0) {
150
      // Pick a random cleanup hybrid_time from 0 to the last operation hybrid_time inclusively.
151
0
      const HybridTime cleanup_ht = HybridTime(
152
0
          load_gen_->NextRandom() % (load_gen_->last_operation_ht().value() + 1));
153
0
      if (cleanup_ht.CompareTo(max_history_cleanup_ht) <= 0) {
154
        // We are performing cleanup at an old hybrid_time, and don't expect it to have any effect.
155
0
        InMemDocDbState snapshot_before_cleanup;
156
0
        snapshot_before_cleanup.CaptureAt(doc_db(), HybridTime::kMax);
157
0
        ASSERT_NO_FATALS(CompactionWithCleanup(cleanup_ht));
158
159
0
        InMemDocDbState snapshot_after_cleanup;
160
0
        snapshot_after_cleanup.CaptureAt(doc_db(), HybridTime::kMax);
161
0
        ASSERT_TRUE(snapshot_after_cleanup.EqualsAndLogDiff(snapshot_before_cleanup));
162
0
      } else {
163
0
        max_history_cleanup_ht = cleanup_ht;
164
0
        cleanup_ht_and_iteration.emplace_back(cleanup_ht.value(),
165
0
                                              load_gen_->last_operation_ht().value());
166
0
        ASSERT_NO_FATALS(CompactionWithCleanup(cleanup_ht));
167
168
        // We expect some snapshots at hybrid_times earlier than cleanup_ht to no longer be
169
        // recoverable.
170
0
        ASSERT_NO_FATALS(load_gen_->CheckIfOldestSnapshotIsStillValid(cleanup_ht));
171
172
0
        load_gen_->RemoveSnapshotsBefore(cleanup_ht);
173
174
        // Now that we're removed all snapshots that could have been affected by history cleanup,
175
        // we expect the oldest remaining snapshot to match the RocksDB-backed DocDB state.
176
0
        ASSERT_NO_FATALS(load_gen_->VerifyOldestSnapshot());
177
0
      }
178
0
    }
179
0
  }
180
181
0
  LOG(INFO) << "Finished the primary part of the randomized DocDB test.\n"
182
0
            << "  enable_history_cleanup: " << enable_history_cleanup << "\n"
183
0
            << "  last_operation_ht: " << load_gen_->last_operation_ht() << "\n"
184
0
            << "  max_history_cleanup_ht: " << max_history_cleanup_ht.value();
185
186
0
  if (!enable_history_cleanup || !verify_history_cleanup_) return;
187
188
0
  if (FLAGS_snapshot_verification_test_num_iter > kDefaultSnapshotVerificationTestNumIter) {
189
0
    LOG(WARNING)
190
0
        << "Number of iterations specified for the history cleanup test is greater than "
191
0
        << kDefaultSnapshotVerificationTestNumIter << ", and therefore this test is "
192
0
        << "NOT CHECKING THAT OLD SNAPSHOTS ARE INVALIDATED BY HISTORY CLEANUP.";
193
0
    return;
194
0
  }
195
196
  // Verify that some old snapshots got invalidated by history cleanup at a higher hybrid_time.
197
198
  // First we verify that history cleanup is happening at expected times, so that we can validate
199
  // that the maximum history cleanup hybrid_time (max_history_cleanup_ht) is as expected.
200
201
  // An entry (t, i) here says that after iteration i there was a history cleanup with a history
202
  // cutoff hybrid_time of t. The iteration here corresponds one to one to the operation
203
  // hybrid_time. We always have t < i because we perform cleanup at a past hybrid_time,
204
  // not a future one.
205
  //                                                        cleanup_ht | iteration (last op. ts.)
206
  //
207
  // These numbers depend on DocDB load generator parameters (random seed, frequencies of various
208
  // events) and will need to be replaced in such cases. Ideally, we should come up with a way to
209
  // either re-generate those quickly, or not rely on hard-coded expected results for validation.
210
  // However, we do handle variations in the number of iterations here, up a certain limit.
211
0
  vector<pair<int, int>> expected_cleanup_ht_and_iteration{{1,           85},
212
0
                                                           {40,          121},
213
0
                                                           {46,          255},
214
0
                                                           {245,         484},
215
0
                                                           {774,         2246},
216
0
                                                           {2341,        3417},
217
0
                                                           {2741,        5248},
218
0
                                                           {4762,        5652},
219
0
                                                           {5049,        6377},
220
0
                                                           {6027,        7573},
221
0
                                                           {8423,        9531},
222
0
                                                           {8829,        10413},
223
0
                                                           {10061,       10610},
224
0
                                                           {13137,       13920}};
225
226
  // Remove expected (cleanup_hybrid_time, iteration) entries that don't apply to our test run in
227
  // case we did fewer than 15000 iterations.
228
0
  RemoveEntriesWithSecondComponentHigherThan(
229
0
      &expected_cleanup_ht_and_iteration,
230
0
      narrow_cast<int>(load_gen_->last_operation_ht().value()));
231
232
0
  ASSERT_FALSE(expected_cleanup_ht_and_iteration.empty());
233
0
  ASSERT_EQ(expected_cleanup_ht_and_iteration, cleanup_ht_and_iteration);
234
235
0
  if (kNumIter > 2000) {
236
0
    ASSERT_GT(load_gen_->num_divergent_old_snapshot(), 0);
237
0
  } else {
238
0
    ASSERT_EQ(0, load_gen_->num_divergent_old_snapshot());
239
0
  }
240
241
  // Expected hybrid_times of snapshots invalidated by history cleanup, and actual history cutoff
242
  // hybrid_times at which that happened. This is deterministic, but highly dependent on the
243
  // parameters at the top of this test.
244
0
  vector<pair<int, int>> expected_divergent_snapshot_and_cleanup_ht{
245
0
      {298,   774},
246
0
      {2000,  2341},
247
0
      {4000,  4762},
248
0
      {5000,  5049},
249
0
      {6000,  6027},
250
0
      {8000,  8423},
251
0
      {10000, 10061},
252
0
      {13000, 13137}
253
0
  };
254
255
  // Remove entries that don't apply to us because we did not get to do a cleanup at that
256
  // hybrid_time.
257
0
  RemoveEntriesWithSecondComponentHigherThan(&expected_divergent_snapshot_and_cleanup_ht,
258
0
                                             narrow_cast<int>(max_history_cleanup_ht.value()));
259
260
0
  ASSERT_EQ(expected_divergent_snapshot_and_cleanup_ht,
261
0
            load_gen_->divergent_snapshot_ht_and_cleanup_ht());
262
0
}
263
264
0
TEST_P(RandomizedDocDBTest, TestNoFlush) {
265
0
  resolve_intents_ = GetParam();
266
0
  const int num_iter = FLAGS_test_num_iter / num_iterations_divider();
267
0
  for (auto use_hash : UseHash::kValues) {
268
0
    Init(use_hash);
269
0
    while (load_gen_->next_iteration() <= num_iter) {
270
0
      ASSERT_NO_FATALS(load_gen_->PerformOperation()) << "at iteration " <<
271
0
          load_gen_->next_iteration();
272
0
    }
273
0
  }
274
0
}
275
276
0
TEST_P(RandomizedDocDBTest, TestWithFlush) {
277
0
  resolve_intents_ = GetParam();
278
0
  const int num_iter = FLAGS_test_num_iter / num_iterations_divider();
279
0
  for (auto use_hash : UseHash::kValues) {
280
0
    Init(use_hash);
281
0
    while (load_gen_->next_iteration() <= num_iter) {
282
0
      ASSERT_NO_FATALS(load_gen_->PerformOperation()) << "at iteration "
283
0
                                                      << load_gen_->next_iteration();
284
0
      if (load_gen_->next_iteration() % 250 == 0) {
285
0
        ASSERT_NO_FATALS(load_gen_->FlushRocksDB());
286
0
      }
287
0
    }
288
0
  }
289
0
}
290
291
0
TEST_P(RandomizedDocDBTest, Snapshots) {
292
0
  resolve_intents_ = GetParam();
293
0
  for (auto use_hash : UseHash::kValues) {
294
0
    Init(use_hash);
295
0
    RunWorkloadWithSnaphots(/* enable_history_cleanup = */ false);
296
0
  }
297
0
}
298
299
0
TEST_P(RandomizedDocDBTest, SnapshotsWithHistoryCleanup) {
300
0
  resolve_intents_ = GetParam();
301
0
  for (auto use_hash : UseHash::kValues) {
302
0
    Init(use_hash);
303
    // Don't verify history cleanup in case we use hashed components, since hardcoded expected
304
    // values doesn't work for that use case.
305
    // TODO: update expected values or find a better way to test it.
306
0
    verify_history_cleanup_ = !use_hash;
307
0
    RunWorkloadWithSnaphots(/* enable_history_cleanup = */ true);
308
0
  }
309
0
}
310
311
INSTANTIATE_TEST_CASE_P(bool, RandomizedDocDBTest, ::testing::Values(
312
    ResolveIntentsDuringRead::kFalse, ResolveIntentsDuringRead::kTrue));
313
314
// This is a bit different from SnapshotsWithHistoryCleanup. Here, we perform history cleanup within
315
// DocDBLoadGenerator::PerformOperation itself, reading the document being modified both before
316
// and after the history cleanup.
317
0
TEST_F(RandomizedDocDBTest, ImmediateHistoryCleanup) {
318
0
  for (auto use_hash : UseHash::kValues) {
319
0
    Init(use_hash);
320
0
    while (load_gen_->next_iteration() <= FLAGS_test_num_iter) {
321
0
      if (load_gen_->next_iteration() % 250 == 0) {
322
0
        ASSERT_NO_FATALS(load_gen_->FlushRocksDB());
323
0
        ASSERT_NO_FATALS(load_gen_->PerformOperation(/* history_cleanup = */ true));
324
0
      } else {
325
0
        ASSERT_NO_FATALS(load_gen_->PerformOperation());
326
0
      }
327
0
    }
328
0
  }
329
0
}
330
331
}  // namespace docdb
332
}  // namespace yb