YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/db_test.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24
// Introduction of SyncPoint effectively disabled building and running this test
25
// in Release build.
26
// which is a pity, it is a good test
27
#include <fcntl.h>
28
#include <algorithm>
29
#include <thread>
30
#include <unordered_set>
31
#include <utility>
32
#ifndef OS_WIN
33
#include <unistd.h>
34
#endif
35
#ifdef OS_SOLARIS
36
#include <alloca.h>
37
#endif
38
39
#include "yb/rocksdb/db/db_test_util.h"
40
#include "yb/rocksdb/port/stack_trace.h"
41
#include "yb/rocksdb/cache.h"
42
#include "yb/rocksdb/db.h"
43
#include "yb/rocksdb/db/version_set.h"
44
#include "yb/rocksdb/env.h"
45
#include "yb/rocksdb/experimental.h"
46
#include "yb/rocksdb/options.h"
47
#include "yb/rocksdb/perf_context.h"
48
#include "yb/rocksdb/perf_level.h"
49
#include "yb/rocksdb/snapshot.h"
50
#include "yb/rocksdb/sst_file_writer.h"
51
#include "yb/rocksdb/table_properties.h"
52
#include "yb/rocksdb/wal_filter.h"
53
#include "yb/rocksdb/utilities/write_batch_with_index.h"
54
#include "yb/rocksdb/util/file_reader_writer.h"
55
#include "yb/rocksdb/util/file_util.h"
56
#include "yb/rocksdb/util/logging.h"
57
#include "yb/rocksdb/util/mutexlock.h"
58
#include "yb/rocksdb/util/rate_limiter.h"
59
#include "yb/rocksdb/util/sync_point.h"
60
61
#include "yb/rocksutil/yb_rocksdb_logger.h"
62
63
#include "yb/util/countdown_latch.h"
64
#include "yb/util/format.h"
65
#include "yb/util/priority_thread_pool.h"
66
#include "yb/util/random_util.h"
67
#include "yb/util/slice.h"
68
#include "yb/util/string_util.h"
69
#include "yb/util/test_macros.h"
70
#include "yb/util/test_thread_holder.h"
71
#include "yb/util/tsan_util.h"
72
73
DECLARE_bool(use_priority_thread_pool_for_compactions);
74
DECLARE_bool(use_priority_thread_pool_for_flushes);
75
76
namespace rocksdb {
77
78
#ifndef ROCKSDB_LITE
79
// A helper function that ensures the table properties returned in
80
// `GetPropertiesOfAllTablesTest` is correct.
81
// This test assumes entries size is different for each of the tables.
82
namespace {
83
84
uint64_t GetNumberOfSstFilesForColumnFamily(DB* db,
85
37
                                            std::string column_family_name) {
86
37
  std::vector<LiveFileMetaData> metadata;
87
37
  db->GetLiveFilesMetaData(&metadata);
88
37
  uint64_t result = 0;
89
168
  for (auto& fileMetadata : metadata) {
90
168
    result += (fileMetadata.column_family_name == column_family_name);
91
168
  }
92
37
  return result;
93
37
}
94
95
}  // namespace
96
#endif  // ROCKSDB_LITE
97
98
class DBTest : public DBTestBase {
99
 public:
100
231
  DBTest() : DBTestBase("/db_test") {}
101
};
102
103
class DBTestWithParam
104
    : public DBTest,
105
      public testing::WithParamInterface<std::tuple<uint32_t, bool>> {
106
 public:
107
12
  DBTestWithParam() {
108
12
    max_subcompactions_ = std::get<0>(GetParam());
109
12
    exclusive_manual_compaction_ = std::get<1>(GetParam());
110
12
  }
111
112
  // Required if inheriting from testing::WithParamInterface<>
113
12
  static void SetUpTestCase() {}
114
12
  static void TearDownTestCase() {}
115
116
  uint32_t max_subcompactions_;
117
  bool exclusive_manual_compaction_;
118
};
119
120
1
TEST_F(DBTest, MockEnvTest) {
121
1
  unique_ptr<MockEnv> env{new MockEnv(Env::Default())};
122
1
  Options options;
123
1
  options.create_if_missing = true;
124
1
  options.env = env.get();
125
1
  DB* db;
126
127
1
  const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
128
1
  const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
129
130
1
  ASSERT_OK(DB::Open(options, "/dir/db", &db));
131
4
  for (size_t i = 0; i < 3; ++i) {
132
3
    ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
133
3
  }
134
135
4
  for (size_t i = 0; i < 3; ++i) {
136
3
    std::string res;
137
3
    ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
138
3
    ASSERT_TRUE(res == vals[i]);
139
3
  }
140
141
1
  Iterator* iterator = db->NewIterator(ReadOptions());
142
1
  iterator->SeekToFirst();
143
4
  for (size_t i = 0; i < 3; ++i) {
144
3
    ASSERT_TRUE(iterator->Valid());
145
3
    ASSERT_TRUE(keys[i] == iterator->key());
146
3
    ASSERT_TRUE(vals[i] == iterator->value());
147
3
    iterator->Next();
148
3
  }
149
1
  ASSERT_TRUE(!iterator->Valid());
150
1
  delete iterator;
151
152
  // TEST_FlushMemTable() is not supported in ROCKSDB_LITE
153
1
  #ifndef ROCKSDB_LITE
154
1
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
155
1
  ASSERT_OK(dbi->TEST_FlushMemTable());
156
157
4
  for (size_t i = 0; i < 3; ++i) {
158
3
    std::string res;
159
3
    ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
160
3
    ASSERT_TRUE(res == vals[i]);
161
3
  }
162
1
  #endif  // ROCKSDB_LITE
163
164
1
  delete db;
165
1
}
166
167
// NewMemEnv returns nullptr in ROCKSDB_LITE since class InMemoryEnv isn't
168
// defined.
169
#ifndef ROCKSDB_LITE
170
1
TEST_F(DBTest, MemEnvTest) {
171
1
  unique_ptr<Env> env{NewMemEnv(Env::Default())};
172
1
  Options options;
173
1
  options.create_if_missing = true;
174
1
  options.env = env.get();
175
1
  DB* db;
176
177
1
  const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")};
178
1
  const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")};
179
180
1
  ASSERT_OK(DB::Open(options, "/dir/db", &db));
181
4
  for (size_t i = 0; i < 3; ++i) {
182
3
    ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i]));
183
3
  }
184
185
4
  for (size_t i = 0; i < 3; ++i) {
186
3
    std::string res;
187
3
    ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
188
3
    ASSERT_TRUE(res == vals[i]);
189
3
  }
190
191
1
  Iterator* iterator = db->NewIterator(ReadOptions());
192
1
  iterator->SeekToFirst();
193
4
  for (size_t i = 0; i < 3; ++i) {
194
3
    ASSERT_TRUE(iterator->Valid());
195
3
    ASSERT_TRUE(keys[i] == iterator->key());
196
3
    ASSERT_TRUE(vals[i] == iterator->value());
197
3
    iterator->Next();
198
3
  }
199
1
  ASSERT_TRUE(!iterator->Valid());
200
1
  delete iterator;
201
202
1
  DBImpl* dbi = reinterpret_cast<DBImpl*>(db);
203
1
  ASSERT_OK(dbi->TEST_FlushMemTable());
204
205
4
  for (size_t i = 0; i < 3; ++i) {
206
3
    std::string res;
207
3
    ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
208
3
    ASSERT_TRUE(res == vals[i]);
209
3
  }
210
211
1
  delete db;
212
213
1
  options.create_if_missing = false;
214
1
  ASSERT_OK(DB::Open(options, "/dir/db", &db));
215
4
  for (size_t i = 0; i < 3; ++i) {
216
3
    std::string res;
217
3
    ASSERT_OK(db->Get(ReadOptions(), keys[i], &res));
218
3
    ASSERT_TRUE(res == vals[i]);
219
3
  }
220
1
  delete db;
221
1
}
222
#endif  // ROCKSDB_LITE
223
224
1
TEST_F(DBTest, WriteEmptyBatch) {
225
1
  Options options;
226
1
  options.env = env_;
227
1
  options.write_buffer_size = 100000;
228
1
  options = CurrentOptions(options);
229
1
  CreateAndReopenWithCF({"pikachu"}, options);
230
231
1
  ASSERT_OK(Put(1, "foo", "bar"));
232
1
  WriteOptions wo;
233
1
  wo.sync = true;
234
1
  wo.disableWAL = false;
235
1
  WriteBatch empty_batch;
236
1
  ASSERT_OK(dbfull()->Write(wo, &empty_batch));
237
238
  // make sure we can re-open it.
239
1
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
240
1
  ASSERT_EQ("bar", Get(1, "foo"));
241
1
}
242
243
#ifndef ROCKSDB_LITE
244
1
TEST_F(DBTest, ReadOnlyDB) {
245
1
  ASSERT_OK(Put("foo", "v1"));
246
1
  ASSERT_OK(Put("bar", "v2"));
247
1
  ASSERT_OK(Put("foo", "v3"));
248
1
  Close();
249
250
1
  auto options = CurrentOptions();
251
1
  assert(options.env = env_);
252
1
  ASSERT_OK(ReadOnlyReopen(options));
253
1
  ASSERT_EQ("v3", Get("foo"));
254
1
  ASSERT_EQ("v2", Get("bar"));
255
1
  Iterator* iter = db_->NewIterator(ReadOptions());
256
1
  int count = 0;
257
3
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
258
2
    ASSERT_OK(iter->status());
259
2
    ++count;
260
2
  }
261
1
  ASSERT_EQ(count, 2);
262
1
  delete iter;
263
1
  Close();
264
265
  // Reopen and flush memtable.
266
1
  Reopen(options);
267
1
  ASSERT_OK(Flush());
268
1
  Close();
269
  // Now check keys in read only mode.
270
1
  ASSERT_OK(ReadOnlyReopen(options));
271
1
  ASSERT_EQ("v3", Get("foo"));
272
1
  ASSERT_EQ("v2", Get("bar"));
273
1
  ASSERT_TRUE(db_->SyncWAL().IsNotSupported());
274
1
}
275
276
1
TEST_F(DBTest, CompactedDB) {
277
1
  const uint64_t kFileSize = 1 << 20;
278
1
  Options options;
279
1
  options.disable_auto_compactions = true;
280
1
  options.write_buffer_size = kFileSize;
281
1
  options.target_file_size_base = kFileSize;
282
1
  options.max_bytes_for_level_base = 1 << 30;
283
1
  options.compression = kNoCompression;
284
1
  options = CurrentOptions(options);
285
1
  Reopen(options);
286
  // 1 L0 file, use CompactedDB if max_open_files = -1
287
1
  ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, '1')));
288
1
  ASSERT_OK(Flush());
289
1
  Close();
290
1
  ASSERT_OK(ReadOnlyReopen(options));
291
1
  Status s = Put("new", "value");
292
1
  ASSERT_EQ(s.ToString(false),
293
1
            "Not implemented: Not supported operation in read only mode.");
294
1
  ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa"));
295
1
  Close();
296
1
  options.max_open_files = -1;
297
1
  ASSERT_OK(ReadOnlyReopen(options));
298
1
  s = Put("new", "value");
299
1
  ASSERT_EQ(s.ToString(false),
300
1
            "Not implemented: Not supported in compacted db mode.");
301
1
  ASSERT_EQ(DummyString(kFileSize / 2, '1'), Get("aaa"));
302
1
  Close();
303
1
  Reopen(options);
304
  // Add more L0 files
305
1
  ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, '2')));
306
1
  ASSERT_OK(Flush());
307
1
  ASSERT_OK(Put("aaa", DummyString(kFileSize / 2, 'a')));
308
1
  ASSERT_OK(Flush());
309
1
  ASSERT_OK(Put("bbb", DummyString(kFileSize / 2, 'b')));
310
1
  ASSERT_OK(Put("eee", DummyString(kFileSize / 2, 'e')));
311
1
  ASSERT_OK(Flush());
312
1
  Close();
313
314
1
  ASSERT_OK(ReadOnlyReopen(options));
315
  // Fallback to read-only DB
316
1
  s = Put("new", "value");
317
1
  ASSERT_EQ(s.ToString(false),
318
1
            "Not implemented: Not supported operation in read only mode.");
319
1
  Close();
320
321
  // Full compaction
322
1
  Reopen(options);
323
  // Add more keys
324
1
  ASSERT_OK(Put("fff", DummyString(kFileSize / 2, 'f')));
325
1
  ASSERT_OK(Put("hhh", DummyString(kFileSize / 2, 'h')));
326
1
  ASSERT_OK(Put("iii", DummyString(kFileSize / 2, 'i')));
327
1
  ASSERT_OK(Put("jjj", DummyString(kFileSize / 2, 'j')));
328
1
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
329
1
  ASSERT_EQ(3, NumTableFilesAtLevel(1));
330
1
  Close();
331
332
  // CompactedDB
333
1
  ASSERT_OK(ReadOnlyReopen(options));
334
1
  s = Put("new", "value");
335
1
  ASSERT_EQ(s.ToString(false),
336
1
            "Not implemented: Not supported in compacted db mode.");
337
1
  ASSERT_EQ("NOT_FOUND", Get("abc"));
338
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'a'), Get("aaa"));
339
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'b'), Get("bbb"));
340
1
  ASSERT_EQ("NOT_FOUND", Get("ccc"));
341
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'e'), Get("eee"));
342
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'f'), Get("fff"));
343
1
  ASSERT_EQ("NOT_FOUND", Get("ggg"));
344
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'h'), Get("hhh"));
345
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'i'), Get("iii"));
346
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'j'), Get("jjj"));
347
1
  ASSERT_EQ("NOT_FOUND", Get("kkk"));
348
349
  // MultiGet
350
1
  std::vector<std::string> values;
351
1
  std::vector<Status> status_list = dbfull()->MultiGet(ReadOptions(),
352
1
      std::vector<Slice>({Slice("aaa"), Slice("ccc"), Slice("eee"),
353
1
                          Slice("ggg"), Slice("iii"), Slice("kkk")}),
354
1
      &values);
355
1
  ASSERT_EQ(status_list.size(), static_cast<uint64_t>(6));
356
1
  ASSERT_EQ(values.size(), static_cast<uint64_t>(6));
357
1
  ASSERT_OK(status_list[0]);
358
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'a'), values[0]);
359
1
  ASSERT_TRUE(status_list[1].IsNotFound());
360
1
  ASSERT_OK(status_list[2]);
361
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'e'), values[2]);
362
1
  ASSERT_TRUE(status_list[3].IsNotFound());
363
1
  ASSERT_OK(status_list[4]);
364
1
  ASSERT_EQ(DummyString(kFileSize / 2, 'i'), values[4]);
365
1
  ASSERT_TRUE(status_list[5].IsNotFound());
366
1
}
367
368
// Make sure that when options.block_cache is set, after a new table is
369
// created its index/filter blocks are added to block cache.
370
1
TEST_F(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
371
1
  Options options = CurrentOptions();
372
1
  options.create_if_missing = true;
373
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
374
1
  BlockBasedTableOptions table_options;
375
1
  table_options.cache_index_and_filter_blocks = true;
376
1
  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
377
1
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
378
1
  CreateAndReopenWithCF({"pikachu"}, options);
379
380
1
  ASSERT_OK(Put(1, "key", "val"));
381
  // Create a new table.
382
1
  ASSERT_OK(Flush(1));
383
384
  // index/filter blocks added to block cache right after table creation.
385
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
386
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
387
1
  ASSERT_EQ(2, /* only index/filter were added */
388
1
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
389
1
  ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
390
1
  uint64_t int_num;
391
1
  ASSERT_TRUE(
392
1
      dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
393
1
  ASSERT_EQ(int_num, 0U);
394
395
  // Make sure filter block is in cache.
396
1
  std::string value;
397
1
  ReadOptions ropt;
398
1
  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
399
400
  // Miss count should remain the same.
401
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
402
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
403
404
1
  db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
405
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
406
1
  ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
407
408
  // Make sure index block is in cache.
409
1
  auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT);
410
1
  value = Get(1, "key");
411
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
412
1
  ASSERT_EQ(index_block_hit + 1,
413
1
            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
414
415
1
  value = Get(1, "key");
416
1
  ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
417
1
  ASSERT_EQ(index_block_hit + 2,
418
1
            TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
419
1
}
420
421
1
TEST_F(DBTest, ParanoidFileChecks) {
422
1
  Options options = CurrentOptions();
423
1
  options.create_if_missing = true;
424
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
425
1
  options.level0_file_num_compaction_trigger = 2;
426
1
  options.paranoid_file_checks = true;
427
1
  BlockBasedTableOptions table_options;
428
1
  table_options.cache_index_and_filter_blocks = false;
429
1
  table_options.filter_policy.reset(NewBloomFilterPolicy(20));
430
1
  options.table_factory.reset(new BlockBasedTableFactory(table_options));
431
1
  CreateAndReopenWithCF({"pikachu"}, options);
432
433
1
  ASSERT_OK(Put(1, "1_key", "val"));
434
1
  ASSERT_OK(Put(1, "9_key", "val"));
435
  // Create a new table.
436
1
  ASSERT_OK(Flush(1));
437
1
  ASSERT_EQ(1, /* read and cache data block */
438
1
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
439
440
1
  ASSERT_OK(Put(1, "1_key2", "val2"));
441
1
  ASSERT_OK(Put(1, "9_key2", "val2"));
442
  // Create a new SST file. This will further trigger a compaction
443
  // and generate another file.
444
1
  ASSERT_OK(Flush(1));
445
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
446
1
  ASSERT_EQ(3, /* Totally 3 files created up to now */
447
1
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
448
449
  // After disabling options.paranoid_file_checks. NO further block
450
  // is added after generating a new file.
451
1
  ASSERT_OK(
452
1
      dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
453
454
1
  ASSERT_OK(Put(1, "1_key3", "val3"));
455
1
  ASSERT_OK(Put(1, "9_key3", "val3"));
456
1
  ASSERT_OK(Flush(1));
457
1
  ASSERT_OK(Put(1, "1_key4", "val4"));
458
1
  ASSERT_OK(Put(1, "9_key4", "val4"));
459
1
  ASSERT_OK(Flush(1));
460
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
461
1
  ASSERT_EQ(3, /* Totally 3 files created up to now */
462
1
            TestGetTickerCount(options, BLOCK_CACHE_ADD));
463
1
}
464
465
1
TEST_F(DBTest, LevelLimitReopen) {
466
1
  Options options = CurrentOptions();
467
1
  CreateAndReopenWithCF({"pikachu"}, options);
468
469
1
  const std::string value(1024 * 1024, ' ');
470
1
  int i = 0;
471
276
  while (NumTableFilesAtLevel(2, 1) == 0) {
472
275
    ASSERT_OK(Put(1, Key(i++), value));
473
275
  }
474
475
1
  options.num_levels = 1;
476
1
  options.max_bytes_for_level_multiplier_additional.resize(1, 1);
477
1
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
478
1
  ASSERT_EQ(s.IsInvalidArgument(), true);
479
1
  ASSERT_EQ(s.ToString(false),
480
1
            "Invalid argument: db has more levels than options.num_levels");
481
482
1
  options.num_levels = 10;
483
1
  options.max_bytes_for_level_multiplier_additional.resize(10, 1);
484
1
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
485
1
}
486
#endif  // ROCKSDB_LITE
487
488
1
TEST_F(DBTest, PutDeleteGet) {
489
30
  do {
490
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
491
30
    ASSERT_OK(Put(1, "foo", "v1"));
492
30
    ASSERT_EQ("v1", Get(1, "foo"));
493
30
    ASSERT_OK(Put(1, "foo", "v2"));
494
30
    ASSERT_EQ("v2", Get(1, "foo"));
495
30
    ASSERT_OK(Delete(1, "foo"));
496
30
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
497
30
  } while (ChangeOptions());
498
1
}
499
500
1
TEST_F(DBTest, PutSingleDeleteGet) {
501
26
  do {
502
26
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
503
26
    ASSERT_OK(Put(1, "foo", "v1"));
504
26
    ASSERT_EQ("v1", Get(1, "foo"));
505
26
    ASSERT_OK(Put(1, "foo2", "v2"));
506
26
    ASSERT_EQ("v2", Get(1, "foo2"));
507
26
    ASSERT_OK(SingleDelete(1, "foo"));
508
26
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
509
    // FIFO and universal compaction do not apply to the test case.
510
    // Skip MergePut because single delete does not get removed when it encounters a merge.
511
26
  } while (ChangeOptions(kSkipFIFOCompaction |
512
26
                         kSkipUniversalCompaction | kSkipMergePut));
513
1
}
514
515
1
TEST_F(DBTest, ReadFromPersistedTier) {
516
30
  do {
517
30
    Random rnd(301);
518
30
    Options options = CurrentOptions();
519
90
    for (int disableWAL = 0; disableWAL <= 1; ++disableWAL) {
520
60
      CreateAndReopenWithCF({"pikachu"}, options);
521
60
      WriteOptions wopt;
522
60
      wopt.disableWAL = (disableWAL == 1);
523
      // 1st round: put but not flush
524
60
      ASSERT_OK(db_->Put(wopt, handles_[1], "foo", "first"));
525
60
      ASSERT_OK(db_->Put(wopt, handles_[1], "bar", "one"));
526
60
      ASSERT_EQ("first", Get(1, "foo"));
527
60
      ASSERT_EQ("one", Get(1, "bar"));
528
529
      // Read directly from persited data.
530
60
      ReadOptions ropt;
531
60
      ropt.read_tier = kPersistedTier;
532
60
      std::string value;
533
60
      if (wopt.disableWAL) {
534
        // as data has not yet being flushed, we expect not found.
535
30
        ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
536
30
        ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
537
30
      } else {
538
30
        ASSERT_OK(db_->Get(ropt, handles_[1], "foo", &value));
539
30
        ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value));
540
30
      }
541
542
      // Multiget
543
60
      std::vector<ColumnFamilyHandle*> multiget_cfs;
544
60
      multiget_cfs.push_back(handles_[1]);
545
60
      multiget_cfs.push_back(handles_[1]);
546
60
      std::vector<Slice> multiget_keys;
547
60
      multiget_keys.push_back("foo");
548
60
      multiget_keys.push_back("bar");
549
60
      std::vector<std::string> multiget_values;
550
60
      auto statuses =
551
60
          db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
552
60
      if (wopt.disableWAL) {
553
30
        ASSERT_TRUE(statuses[0].IsNotFound());
554
30
        ASSERT_TRUE(statuses[1].IsNotFound());
555
30
      } else {
556
30
        ASSERT_OK(statuses[0]);
557
30
        ASSERT_OK(statuses[1]);
558
30
      }
559
560
      // 2nd round: flush and put a new value in memtable.
561
60
      ASSERT_OK(Flush(1));
562
60
      ASSERT_OK(db_->Put(wopt, handles_[1], "rocksdb", "hello"));
563
564
      // once the data has been flushed, we are able to get the
565
      // data when kPersistedTier is used.
566
60
      ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).ok());
567
60
      ASSERT_EQ(value, "first");
568
60
      ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
569
60
      ASSERT_EQ(value, "one");
570
60
      if (wopt.disableWAL) {
571
30
        ASSERT_TRUE(
572
30
            db_->Get(ropt, handles_[1], "rocksdb", &value).IsNotFound());
573
30
      } else {
574
30
        ASSERT_OK(db_->Get(ropt, handles_[1], "rocksdb", &value));
575
30
        ASSERT_EQ(value, "hello");
576
30
      }
577
578
      // Expect same result in multiget
579
60
      multiget_cfs.push_back(handles_[1]);
580
60
      multiget_keys.push_back("rocksdb");
581
60
      statuses =
582
60
          db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
583
60
      ASSERT_TRUE(statuses[0].ok());
584
60
      ASSERT_EQ("first", multiget_values[0]);
585
60
      ASSERT_TRUE(statuses[1].ok());
586
60
      ASSERT_EQ("one", multiget_values[1]);
587
60
      if (wopt.disableWAL) {
588
30
        ASSERT_TRUE(statuses[2].IsNotFound());
589
30
      } else {
590
30
        ASSERT_OK(statuses[2]);
591
30
      }
592
593
      // 3rd round: delete and flush
594
60
      ASSERT_OK(db_->Delete(wopt, handles_[1], "foo"));
595
60
      ASSERT_OK(Flush(1));
596
60
      ASSERT_OK(db_->Delete(wopt, handles_[1], "bar"));
597
598
60
      ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound());
599
60
      if (wopt.disableWAL) {
600
        // Still expect finding the value as its delete has not yet being
601
        // flushed.
602
30
        ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).ok());
603
30
        ASSERT_EQ(value, "one");
604
30
      } else {
605
30
        ASSERT_TRUE(db_->Get(ropt, handles_[1], "bar", &value).IsNotFound());
606
30
      }
607
60
      ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok());
608
60
      ASSERT_EQ(value, "hello");
609
610
60
      statuses =
611
60
          db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values);
612
60
      ASSERT_TRUE(statuses[0].IsNotFound());
613
60
      if (wopt.disableWAL) {
614
30
        ASSERT_TRUE(statuses[1].ok());
615
30
        ASSERT_EQ("one", multiget_values[1]);
616
30
      } else {
617
30
        ASSERT_TRUE(statuses[1].IsNotFound());
618
30
      }
619
60
      ASSERT_TRUE(statuses[2].ok());
620
60
      ASSERT_EQ("hello", multiget_values[2]);
621
60
      if (wopt.disableWAL == 0) {
622
30
        DestroyAndReopen(options);
623
30
      }
624
60
    }
625
30
  } while (ChangeOptions());
626
1
}
627
628
1
TEST_F(DBTest, IteratorProperty) {
629
  // The test needs to be changed if kPersistedTier is supported in iterator.
630
1
  Options options = CurrentOptions();
631
1
  CreateAndReopenWithCF({"pikachu"}, options);
632
1
  ASSERT_OK(Put(1, "1", "2"));
633
1
  ReadOptions ropt;
634
1
  ropt.pin_data = false;
635
1
  {
636
1
    unique_ptr<Iterator> iter(db_->NewIterator(ropt, handles_[1]));
637
1
    iter->SeekToFirst();
638
1
    std::string prop_value;
639
1
    ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
640
1
    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
641
1
    ASSERT_EQ("0", prop_value);
642
1
    iter->Next();
643
1
    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
644
1
    ASSERT_EQ("Iterator is not valid.", prop_value);
645
1
  }
646
1
  Close();
647
1
}
648
649
1
TEST_F(DBTest, PersistedTierOnIterator) {
650
  // The test needs to be changed if kPersistedTier is supported in iterator.
651
1
  Options options = CurrentOptions();
652
1
  CreateAndReopenWithCF({"pikachu"}, options);
653
1
  ReadOptions ropt;
654
1
  ropt.read_tier = kPersistedTier;
655
656
1
  auto* iter = db_->NewIterator(ropt, handles_[1]);
657
1
  ASSERT_TRUE(iter->status().IsNotSupported());
658
1
  delete iter;
659
660
1
  std::vector<Iterator*> iters;
661
1
  ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
662
1
  Close();
663
1
}
664
665
1
TEST_F(DBTest, SingleDeleteFlush) {
666
  // Test to check whether flushing preserves a single delete hidden
667
  // behind a put.
668
26
  do {
669
26
    Random rnd(301);
670
671
26
    Options options = CurrentOptions();
672
26
    options.disable_auto_compactions = true;
673
26
    CreateAndReopenWithCF({"pikachu"}, options);
674
675
    // Put values on second level (so that they will not be in the same
676
    // compaction as the other operations.
677
26
    ASSERT_OK(Put(1, "foo", "first"));
678
26
    ASSERT_OK(Put(1, "bar", "one"));
679
26
    ASSERT_OK(Flush(1));
680
26
    MoveFilesToLevel(2, 1);
681
682
    // (Single) delete hidden by a put
683
26
    ASSERT_OK(SingleDelete(1, "foo"));
684
26
    ASSERT_OK(Put(1, "foo", "second"));
685
26
    ASSERT_OK(Delete(1, "bar"));
686
26
    ASSERT_OK(Put(1, "bar", "two"));
687
26
    ASSERT_OK(Flush(1));
688
689
26
    ASSERT_OK(SingleDelete(1, "foo"));
690
26
    ASSERT_OK(Delete(1, "bar"));
691
26
    ASSERT_OK(Flush(1));
692
693
26
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
694
695
26
    ASSERT_EQ("NOT_FOUND", Get(1, "bar"));
696
26
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
697
    // FIFO and universal compaction do not apply to the test case.
698
    // Skip MergePut because merges cannot be combined with single deletions.
699
26
  } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut));
700
1
}
701
702
1
TEST_F(DBTest, SingleDeletePutFlush) {
703
  // Single deletes that encounter the matching put in a flush should get
704
  // removed.
705
26
  do {
706
26
    Random rnd(301);
707
708
26
    Options options = CurrentOptions();
709
26
    options.disable_auto_compactions = true;
710
26
    CreateAndReopenWithCF({"pikachu"}, options);
711
712
26
    ASSERT_OK(Put(1, "foo", Slice()));
713
26
    ASSERT_OK(Put(1, "a", Slice()));
714
26
    ASSERT_OK(SingleDelete(1, "a"));
715
26
    ASSERT_OK(Flush(1));
716
717
26
    ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
718
    // FIFO and universal compaction do not apply to the test case.
719
    // Skip MergePut because merges cannot be combined with single deletions.
720
26
  } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut));
721
1
}
722
723
1
TEST_F(DBTest, EmptyFlush) {
724
  // It is possible to produce empty flushes when using single deletes. Tests
725
  // whether empty flushes cause issues.
726
26
  do {
727
26
    Random rnd(301);
728
729
26
    Options options = CurrentOptions();
730
26
    options.disable_auto_compactions = true;
731
26
    CreateAndReopenWithCF({"pikachu"}, options);
732
733
26
    ASSERT_OK(Put(1, "a", Slice()));
734
26
    ASSERT_OK(SingleDelete(1, "a"));
735
26
    ASSERT_OK(Flush(1));
736
737
26
    ASSERT_EQ("[ ]", AllEntriesFor("a", 1));
738
    // FIFO and universal compaction do not apply to the test case.
739
    // Skip MergePut because merges cannot be combined with single deletions.
740
26
  } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut));
741
1
}
742
743
// Disable because not all platform can run it.
744
// It requires more than 9GB memory to run it, With single allocation
745
// of more than 3GB.
746
0
TEST_F(DBTest, DISABLED_VeryLargeValue) {
747
0
  const size_t kValueSize = 3221225472u;  // 3GB value
748
0
  const size_t kKeySize = 8388608u;       // 8MB key
749
0
  std::string raw(kValueSize, 'v');
750
0
  std::string key1(kKeySize, 'c');
751
0
  std::string key2(kKeySize, 'd');
752
753
0
  Options options;
754
0
  options.env = env_;
755
0
  options.write_buffer_size = 100000;  // Small write buffer
756
0
  options.paranoid_checks = true;
757
0
  options = CurrentOptions(options);
758
0
  DestroyAndReopen(options);
759
760
0
  ASSERT_OK(Put("boo", "v1"));
761
0
  ASSERT_OK(Put("foo", "v1"));
762
0
  ASSERT_OK(Put(key1, raw));
763
0
  raw[0] = 'w';
764
0
  ASSERT_OK(Put(key2, raw));
765
0
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
766
767
0
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
768
769
0
  std::string value;
770
0
  Status s = db_->Get(ReadOptions(), key1, &value);
771
0
  ASSERT_OK(s);
772
0
  ASSERT_EQ(kValueSize, value.size());
773
0
  ASSERT_EQ('v', value[0]);
774
775
0
  s = db_->Get(ReadOptions(), key2, &value);
776
0
  ASSERT_OK(s);
777
0
  ASSERT_EQ(kValueSize, value.size());
778
0
  ASSERT_EQ('w', value[0]);
779
780
  // Compact all files.
781
0
  ASSERT_OK(Flush());
782
0
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
783
784
  // Check DB is not in read-only state.
785
0
  ASSERT_OK(Put("boo", "v1"));
786
787
0
  s = db_->Get(ReadOptions(), key1, &value);
788
0
  ASSERT_OK(s);
789
0
  ASSERT_EQ(kValueSize, value.size());
790
0
  ASSERT_EQ('v', value[0]);
791
792
0
  s = db_->Get(ReadOptions(), key2, &value);
793
0
  ASSERT_OK(s);
794
0
  ASSERT_EQ(kValueSize, value.size());
795
0
  ASSERT_EQ('w', value[0]);
796
0
}
797
798
1
TEST_F(DBTest, GetFromImmutableLayer) {
799
30
  do {
800
30
    Options options;
801
30
    options.env = env_;
802
30
    options.write_buffer_size = 100000;  // Small write buffer
803
30
    options = CurrentOptions(options);
804
30
    CreateAndReopenWithCF({"pikachu"}, options);
805
806
30
    ASSERT_OK(Put(1, "foo", "v1"));
807
30
    ASSERT_EQ("v1", Get(1, "foo"));
808
809
    // Block sync calls
810
30
    env_->delay_sstable_sync_.store(true, std::memory_order_release);
811
30
    ASSERT_OK(Put(1, "k1", std::string(100000, 'x')));          // Fill memtable
812
30
    ASSERT_OK(Put(1, "k2", std::string(100000, 'y')));          // Trigger flush
813
30
    ASSERT_EQ("v1", Get(1, "foo"));
814
30
    ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
815
    // Release sync calls
816
30
    env_->delay_sstable_sync_.store(false, std::memory_order_release);
817
30
  } while (ChangeOptions());
818
1
}
819
820
1
TEST_F(DBTest, GetFromVersions) {
821
30
  do {
822
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
823
30
    ASSERT_OK(Put(1, "foo", "v1"));
824
30
    ASSERT_OK(Flush(1));
825
30
    ASSERT_EQ("v1", Get(1, "foo"));
826
30
    ASSERT_EQ("NOT_FOUND", Get(0, "foo"));
827
30
  } while (ChangeOptions());
828
1
}
829
830
#ifndef ROCKSDB_LITE
831
1
TEST_F(DBTest, GetSnapshot) {
832
1
  anon::OptionsOverride options_override;
833
1
  options_override.skip_policy = kSkipNoSnapshot;
834
30
  do {
835
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
836
    // Try with both a short key and a long key
837
90
    for (int i = 0; i < 2; i++) {
838
30
      std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
839
60
      ASSERT_OK(Put(1, key, "v1"));
840
60
      const Snapshot* s1 = db_->GetSnapshot();
841
60
      ASSERT_OK(Put(1, key, "v2"));
842
60
      ASSERT_EQ("v2", Get(1, key));
843
60
      ASSERT_EQ("v1", Get(1, key, s1));
844
60
      ASSERT_OK(Flush(1));
845
60
      ASSERT_EQ("v2", Get(1, key));
846
60
      ASSERT_EQ("v1", Get(1, key, s1));
847
60
      db_->ReleaseSnapshot(s1);
848
60
    }
849
30
  } while (ChangeOptions());
850
1
}
851
#endif  // ROCKSDB_LITE
852
853
1
TEST_F(DBTest, GetLevel0Ordering) {
854
30
  do {
855
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
856
    // Check that we process level-0 files in correct order.  The code
857
    // below generates two level-0 files where the earlier one comes
858
    // before the later one in the level-0 file list since the earlier
859
    // one has a smaller "smallest" key.
860
30
    ASSERT_OK(Put(1, "bar", "b"));
861
30
    ASSERT_OK(Put(1, "foo", "v1"));
862
30
    ASSERT_OK(Flush(1));
863
30
    ASSERT_OK(Put(1, "foo", "v2"));
864
30
    ASSERT_OK(Flush(1));
865
30
    ASSERT_EQ("v2", Get(1, "foo"));
866
30
  } while (ChangeOptions());
867
1
}
868
869
1
TEST_F(DBTest, WrongLevel0Config) {
870
1
  Options options = CurrentOptions();
871
1
  Close();
872
1
  ASSERT_OK(DestroyDB(dbname_, options));
873
1
  options.level0_stop_writes_trigger = 1;
874
1
  options.level0_slowdown_writes_trigger = 2;
875
1
  options.level0_file_num_compaction_trigger = 3;
876
1
  ASSERT_OK(DB::Open(options, dbname_, &db_));
877
1
}
878
879
#ifndef ROCKSDB_LITE
880
1
TEST_F(DBTest, GetOrderedByLevels) {
881
30
  do {
882
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
883
30
    ASSERT_OK(Put(1, "foo", "v1"));
884
30
    Compact(1, "a", "z");
885
30
    ASSERT_EQ("v1", Get(1, "foo"));
886
30
    ASSERT_OK(Put(1, "foo", "v2"));
887
30
    ASSERT_EQ("v2", Get(1, "foo"));
888
30
    ASSERT_OK(Flush(1));
889
30
    ASSERT_EQ("v2", Get(1, "foo"));
890
30
  } while (ChangeOptions());
891
1
}
892
893
1
TEST_F(DBTest, GetPicksCorrectFile) {
894
30
  do {
895
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
896
    // Arrange to have multiple files in a non-level-0 level.
897
30
    ASSERT_OK(Put(1, "a", "va"));
898
30
    Compact(1, "a", "b");
899
30
    ASSERT_OK(Put(1, "x", "vx"));
900
30
    Compact(1, "x", "y");
901
30
    ASSERT_OK(Put(1, "f", "vf"));
902
30
    Compact(1, "f", "g");
903
30
    ASSERT_EQ("va", Get(1, "a"));
904
30
    ASSERT_EQ("vf", Get(1, "f"));
905
30
    ASSERT_EQ("vx", Get(1, "x"));
906
30
  } while (ChangeOptions());
907
1
}
908
909
1
TEST_F(DBTest, GetEncountersEmptyLevel) {
910
27
  do {
911
27
    Options options = CurrentOptions();
912
27
    options.disableDataSync = true;
913
27
    CreateAndReopenWithCF({"pikachu"}, options);
914
    // Arrange for the following to happen:
915
    //   * sstable A in level 0
916
    //   * nothing in level 1
917
    //   * sstable B in level 2
918
    // Then do enough Get() calls to arrange for an automatic compaction
919
    // of sstable A.  A bug would cause the compaction to be marked as
920
    // occurring at level 1 (instead of the correct level 0).
921
922
    // Step 1: First place sstables in levels 0 and 2
923
27
    ASSERT_OK(Put(1, "a", "begin"));
924
27
    ASSERT_OK(Put(1, "z", "end"));
925
27
    ASSERT_OK(Flush(1));
926
27
    ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
927
27
    ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
928
27
    ASSERT_OK(Put(1, "a", "begin"));
929
27
    ASSERT_OK(Put(1, "z", "end"));
930
27
    ASSERT_OK(Flush(1));
931
27
    ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
932
27
    ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
933
934
    // Step 2: clear level 1 if necessary.
935
27
    ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
936
27
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);
937
27
    ASSERT_EQ(NumTableFilesAtLevel(1, 1), 0);
938
27
    ASSERT_EQ(NumTableFilesAtLevel(2, 1), 1);
939
940
    // Step 3: read a bunch of times
941
27.0k
    for (int i = 0; i < 1000; i++) {
942
27.0k
      ASSERT_EQ("NOT_FOUND", Get(1, "missing"));
943
27.0k
    }
944
945
    // Step 4: Wait for compaction to finish
946
27
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
947
948
27
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 1);  // XXX
949
27
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
950
1
}
951
#endif  // ROCKSDB_LITE
952
953
1
TEST_F(DBTest, NonBlockingIteration) {
954
23
  do {
955
23
    ReadOptions non_blocking_opts, regular_opts;
956
23
    Options options = CurrentOptions();
957
23
    options.statistics = rocksdb::CreateDBStatisticsForTests();
958
23
    non_blocking_opts.read_tier = kBlockCacheTier;
959
23
    CreateAndReopenWithCF({"pikachu"}, options);
960
    // write one kv to the database.
961
23
    ASSERT_OK(Put(1, "a", "b"));
962
963
    // scan using non-blocking iterator. We should find it because
964
    // it is in memtable.
965
23
    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
966
23
    int count = 0;
967
46
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
968
23
      ASSERT_OK(iter->status());
969
23
      count++;
970
23
    }
971
23
    ASSERT_EQ(count, 1);
972
23
    delete iter;
973
974
    // flush memtable to storage. Now, the key should not be in the
975
    // memtable neither in the block cache.
976
23
    ASSERT_OK(Flush(1));
977
978
    // verify that a non-blocking iterator does not find any
979
    // kvs. Neither does it do any IOs to storage.
980
23
    uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
981
23
    uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
982
23
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
983
23
    count = 0;
984
23
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
985
0
      count++;
986
0
    }
987
23
    ASSERT_EQ(count, 0);
988
23
    ASSERT_TRUE(iter->status().IsIncomplete());
989
23
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
990
23
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
991
23
    delete iter;
992
993
    // read in the specified block via a regular get
994
23
    ASSERT_EQ(Get(1, "a"), "b");
995
996
    // verify that we can find it via a non-blocking scan
997
23
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
998
23
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
999
23
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
1000
23
    count = 0;
1001
46
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1002
23
      ASSERT_OK(iter->status());
1003
23
      count++;
1004
23
    }
1005
23
    ASSERT_EQ(count, 1);
1006
23
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
1007
23
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
1008
23
    delete iter;
1009
1010
    // This test verifies block cache behaviors, which is not used by plain
1011
    // table format.
1012
23
  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
1013
1
}
1014
1015
#ifndef ROCKSDB_LITE
1016
1
TEST_F(DBTest, ManagedNonBlockingIteration) {
1017
23
  do {
1018
23
    ReadOptions non_blocking_opts, regular_opts;
1019
23
    Options options = CurrentOptions();
1020
23
    options.statistics = rocksdb::CreateDBStatisticsForTests();
1021
23
    non_blocking_opts.read_tier = kBlockCacheTier;
1022
23
    non_blocking_opts.managed = true;
1023
23
    CreateAndReopenWithCF({"pikachu"}, options);
1024
    // write one kv to the database.
1025
23
    ASSERT_OK(Put(1, "a", "b"));
1026
1027
    // scan using non-blocking iterator. We should find it because
1028
    // it is in memtable.
1029
23
    Iterator* iter = db_->NewIterator(non_blocking_opts, handles_[1]);
1030
23
    int count = 0;
1031
46
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1032
23
      ASSERT_OK(iter->status());
1033
23
      count++;
1034
23
    }
1035
23
    ASSERT_EQ(count, 1);
1036
23
    delete iter;
1037
1038
    // flush memtable to storage. Now, the key should not be in the
1039
    // memtable neither in the block cache.
1040
23
    ASSERT_OK(Flush(1));
1041
1042
    // verify that a non-blocking iterator does not find any
1043
    // kvs. Neither does it do any IOs to storage.
1044
23
    int64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
1045
23
    int64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
1046
23
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
1047
23
    count = 0;
1048
23
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1049
0
      count++;
1050
0
    }
1051
23
    ASSERT_EQ(count, 0);
1052
23
    ASSERT_TRUE(iter->status().IsIncomplete());
1053
23
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
1054
23
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
1055
23
    delete iter;
1056
1057
    // read in the specified block via a regular get
1058
23
    ASSERT_EQ(Get(1, "a"), "b");
1059
1060
    // verify that we can find it via a non-blocking scan
1061
23
    numopen = TestGetTickerCount(options, NO_FILE_OPENS);
1062
23
    cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
1063
23
    iter = db_->NewIterator(non_blocking_opts, handles_[1]);
1064
23
    count = 0;
1065
46
    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1066
23
      ASSERT_OK(iter->status());
1067
23
      count++;
1068
23
    }
1069
23
    ASSERT_EQ(count, 1);
1070
23
    ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
1071
23
    ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
1072
23
    delete iter;
1073
1074
    // This test verifies block cache behaviors, which is not used by plain
1075
    // table format.
1076
23
  } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
1077
1
}
1078
#endif  // ROCKSDB_LITE
1079
1080
1
TEST_F(DBTest, IterSeekBeforePrev) {
1081
1
  ASSERT_OK(Put("a", "b"));
1082
1
  ASSERT_OK(Put("c", "d"));
1083
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1084
1
  ASSERT_OK(Put("0", "f"));
1085
1
  ASSERT_OK(Put("1", "h"));
1086
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1087
1
  ASSERT_OK(Put("2", "j"));
1088
1
  auto iter = db_->NewIterator(ReadOptions());
1089
1
  iter->Seek(Slice("c"));
1090
1
  iter->Prev();
1091
1
  iter->Seek(Slice("a"));
1092
1
  iter->Prev();
1093
1
  delete iter;
1094
1
}
1095
1096
namespace {
1097
15
std::string MakeLongKey(size_t length, char c) {
1098
15
  return std::string(length, c);
1099
15
}
1100
}  // namespace
1101
1102
1
TEST_F(DBTest, IterLongKeys) {
1103
1
  ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
1104
1
  ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
1105
1
  ASSERT_OK(Put("a", "b"));
1106
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1107
1
  ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
1108
1
  ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
1109
1
  ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
1110
1
  auto iter = db_->NewIterator(ReadOptions());
1111
1112
  // Create a key that needs to be skipped for Seq too new
1113
1
  iter->Seek(MakeLongKey(20, 0));
1114
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
1115
1
  iter->Next();
1116
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
1117
1
  iter->Next();
1118
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
1119
1
  iter->Next();
1120
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
1121
1
  iter->Next();
1122
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
1123
1
  delete iter;
1124
1125
1
  iter = db_->NewIterator(ReadOptions());
1126
1
  iter->Seek(MakeLongKey(50, 1));
1127
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
1128
1
  iter->Next();
1129
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
1130
1
  iter->Next();
1131
1
  ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
1132
1
  delete iter;
1133
1
}
1134
1135
1
TEST_F(DBTest, IterNextWithNewerSeq) {
1136
1
  ASSERT_OK(Put("0", "0"));
1137
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1138
1
  ASSERT_OK(Put("a", "b"));
1139
1
  ASSERT_OK(Put("c", "d"));
1140
1
  ASSERT_OK(Put("d", "e"));
1141
1
  auto iter = db_->NewIterator(ReadOptions());
1142
1143
  // Create a key that needs to be skipped for Seq too new
1144
10
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
1145
9
       i++) {
1146
9
    ASSERT_OK(Put("b", "f"));
1147
9
  }
1148
1149
1
  iter->Seek(Slice("a"));
1150
1
  ASSERT_EQ(IterStatus(iter), "a->b");
1151
1
  iter->Next();
1152
1
  ASSERT_EQ(IterStatus(iter), "c->d");
1153
1
  delete iter;
1154
1
}
1155
1156
1
TEST_F(DBTest, IterPrevWithNewerSeq) {
1157
1
  ASSERT_OK(Put("0", "0"));
1158
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1159
1
  ASSERT_OK(Put("a", "b"));
1160
1
  ASSERT_OK(Put("c", "d"));
1161
1
  ASSERT_OK(Put("d", "e"));
1162
1
  auto iter = db_->NewIterator(ReadOptions());
1163
1164
  // Create a key that needs to be skipped for Seq too new
1165
10
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
1166
9
       i++) {
1167
9
    ASSERT_OK(Put("b", "f"));
1168
9
  }
1169
1170
1
  iter->Seek(Slice("d"));
1171
1
  ASSERT_EQ(IterStatus(iter), "d->e");
1172
1
  iter->Prev();
1173
1
  ASSERT_EQ(IterStatus(iter), "c->d");
1174
1
  iter->Prev();
1175
1
  ASSERT_EQ(IterStatus(iter), "a->b");
1176
1177
1
  iter->Prev();
1178
1
  delete iter;
1179
1
}
1180
1181
1
TEST_F(DBTest, IterPrevWithNewerSeq2) {
1182
1
  ASSERT_OK(Put("0", "0"));
1183
1
  ASSERT_OK(dbfull()->Flush(FlushOptions()));
1184
1
  ASSERT_OK(Put("a", "b"));
1185
1
  ASSERT_OK(Put("c", "d"));
1186
1
  ASSERT_OK(Put("d", "e"));
1187
1
  auto iter = db_->NewIterator(ReadOptions());
1188
1
  iter->Seek(Slice("c"));
1189
1
  ASSERT_EQ(IterStatus(iter), "c->d");
1190
1191
  // Create a key that needs to be skipped for Seq too new
1192
10
  for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
1193
9
      i++) {
1194
9
    ASSERT_OK(Put("b", "f"));
1195
9
  }
1196
1197
1
  iter->Prev();
1198
1
  ASSERT_EQ(IterStatus(iter), "a->b");
1199
1200
1
  iter->Prev();
1201
1
  delete iter;
1202
1
}
1203
1204
1
TEST_F(DBTest, IterEmpty) {
1205
5
  do {
1206
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1207
5
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1208
1209
5
    iter->SeekToFirst();
1210
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1211
1212
5
    iter->SeekToLast();
1213
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1214
1215
5
    iter->Seek("foo");
1216
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1217
1218
5
    delete iter;
1219
5
  } while (ChangeCompactOptions());
1220
1
}
1221
1222
1
TEST_F(DBTest, IterSingle) {
1223
5
  do {
1224
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1225
5
    ASSERT_OK(Put(1, "a", "va"));
1226
5
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1227
1228
5
    iter->SeekToFirst();
1229
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1230
5
    iter->Next();
1231
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1232
5
    iter->SeekToFirst();
1233
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1234
5
    iter->Prev();
1235
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1236
1237
5
    iter->SeekToLast();
1238
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1239
5
    iter->Next();
1240
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1241
5
    iter->SeekToLast();
1242
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1243
5
    iter->Prev();
1244
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1245
1246
5
    iter->Seek("");
1247
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1248
5
    iter->Next();
1249
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1250
1251
5
    iter->Seek("a");
1252
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1253
5
    iter->Next();
1254
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1255
1256
5
    iter->Seek("b");
1257
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1258
1259
5
    delete iter;
1260
5
  } while (ChangeCompactOptions());
1261
1
}
1262
1263
1
TEST_F(DBTest, IterMulti) {
1264
5
  do {
1265
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1266
5
    ASSERT_OK(Put(1, "a", "va"));
1267
5
    ASSERT_OK(Put(1, "b", "vb"));
1268
5
    ASSERT_OK(Put(1, "c", "vc"));
1269
5
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1270
1271
5
    iter->SeekToFirst();
1272
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1273
5
    iter->Next();
1274
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1275
5
    iter->Next();
1276
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1277
5
    iter->Next();
1278
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1279
5
    iter->SeekToFirst();
1280
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1281
5
    iter->Prev();
1282
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1283
1284
5
    iter->SeekToLast();
1285
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1286
5
    iter->Prev();
1287
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1288
5
    iter->Prev();
1289
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1290
5
    iter->Prev();
1291
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1292
5
    iter->SeekToLast();
1293
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1294
5
    iter->Next();
1295
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1296
1297
5
    iter->Seek("");
1298
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1299
5
    iter->Seek("a");
1300
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1301
5
    iter->Seek("ax");
1302
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1303
1304
5
    iter->Seek("b");
1305
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1306
5
    iter->Seek("z");
1307
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1308
1309
    // Switch from reverse to forward
1310
5
    iter->SeekToLast();
1311
5
    iter->Prev();
1312
5
    iter->Prev();
1313
5
    iter->Next();
1314
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1315
1316
    // Switch from forward to reverse
1317
5
    iter->SeekToFirst();
1318
5
    iter->Next();
1319
5
    iter->Next();
1320
5
    iter->Prev();
1321
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1322
1323
    // Make sure iter stays at snapshot
1324
5
    ASSERT_OK(Put(1, "a", "va2"));
1325
5
    ASSERT_OK(Put(1, "a2", "va3"));
1326
5
    ASSERT_OK(Put(1, "b", "vb2"));
1327
5
    ASSERT_OK(Put(1, "c", "vc2"));
1328
5
    ASSERT_OK(Delete(1, "b"));
1329
5
    iter->SeekToFirst();
1330
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1331
5
    iter->Next();
1332
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1333
5
    iter->Next();
1334
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1335
5
    iter->Next();
1336
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1337
5
    iter->SeekToLast();
1338
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1339
5
    iter->Prev();
1340
5
    ASSERT_EQ(IterStatus(iter), "b->vb");
1341
5
    iter->Prev();
1342
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1343
5
    iter->Prev();
1344
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1345
1346
5
    delete iter;
1347
5
  } while (ChangeCompactOptions());
1348
1
}
1349
1350
// Check that we can skip over a run of user keys
1351
// by using reseek rather than sequential scan
1352
1
TEST_F(DBTest, IterReseek) {
1353
1
  anon::OptionsOverride options_override;
1354
1
  options_override.skip_policy = kSkipNoSnapshot;
1355
1
  Options options = CurrentOptions(options_override);
1356
1
  options.max_sequential_skip_in_iterations = 3;
1357
1
  options.create_if_missing = true;
1358
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
1359
1
  DestroyAndReopen(options);
1360
1
  CreateAndReopenWithCF({"pikachu"}, options);
1361
1362
  // insert three keys with same userkey and verify that
1363
  // reseek is not invoked. For each of these test cases,
1364
  // verify that we can find the next key "b".
1365
1
  ASSERT_OK(Put(1, "a", "zero"));
1366
1
  ASSERT_OK(Put(1, "a", "one"));
1367
1
  ASSERT_OK(Put(1, "a", "two"));
1368
1
  ASSERT_OK(Put(1, "b", "bone"));
1369
1
  Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1370
1
  iter->SeekToFirst();
1371
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
1372
1
  ASSERT_EQ(IterStatus(iter), "a->two");
1373
1
  iter->Next();
1374
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
1375
1
  ASSERT_EQ(IterStatus(iter), "b->bone");
1376
1
  delete iter;
1377
1378
  // insert a total of three keys with same userkey and verify
1379
  // that reseek is still not invoked.
1380
1
  ASSERT_OK(Put(1, "a", "three"));
1381
1
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
1382
1
  iter->SeekToFirst();
1383
1
  ASSERT_EQ(IterStatus(iter), "a->three");
1384
1
  iter->Next();
1385
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
1386
1
  ASSERT_EQ(IterStatus(iter), "b->bone");
1387
1
  delete iter;
1388
1389
  // insert a total of four keys with same userkey and verify
1390
  // that reseek is invoked.
1391
1
  ASSERT_OK(Put(1, "a", "four"));
1392
1
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
1393
1
  iter->SeekToFirst();
1394
1
  ASSERT_EQ(IterStatus(iter), "a->four");
1395
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
1396
1
  iter->Next();
1397
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
1398
1
  ASSERT_EQ(IterStatus(iter), "b->bone");
1399
1
  delete iter;
1400
1401
  // Testing reverse iterator
1402
  // At this point, we have three versions of "a" and one version of "b".
1403
  // The reseek statistics is already at 1.
1404
1
  int num_reseeks =
1405
1
      static_cast<int>(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
1406
1407
  // Insert another version of b and assert that reseek is not invoked
1408
1
  ASSERT_OK(Put(1, "b", "btwo"));
1409
1
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
1410
1
  iter->SeekToLast();
1411
1
  ASSERT_EQ(IterStatus(iter), "b->btwo");
1412
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
1413
1
            num_reseeks);
1414
1
  iter->Prev();
1415
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
1416
1
            num_reseeks + 1);
1417
1
  ASSERT_EQ(IterStatus(iter), "a->four");
1418
1
  delete iter;
1419
1420
  // insert two more versions of b. This makes a total of 4 versions
1421
  // of b and 4 versions of a.
1422
1
  ASSERT_OK(Put(1, "b", "bthree"));
1423
1
  ASSERT_OK(Put(1, "b", "bfour"));
1424
1
  iter = db_->NewIterator(ReadOptions(), handles_[1]);
1425
1
  iter->SeekToLast();
1426
1
  ASSERT_EQ(IterStatus(iter), "b->bfour");
1427
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
1428
1
            num_reseeks + 2);
1429
1
  iter->Prev();
1430
1431
  // the previous Prev call should have invoked reseek
1432
1
  ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
1433
1
            num_reseeks + 3);
1434
1
  ASSERT_EQ(IterStatus(iter), "a->four");
1435
1
  delete iter;
1436
1
}
1437
1438
1
TEST_F(DBTest, IterSmallAndLargeMix) {
1439
5
  do {
1440
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1441
5
    ASSERT_OK(Put(1, "a", "va"));
1442
5
    ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
1443
5
    ASSERT_OK(Put(1, "c", "vc"));
1444
5
    ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
1445
5
    ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
1446
1447
5
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1448
1449
5
    iter->SeekToFirst();
1450
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1451
5
    iter->Next();
1452
5
    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
1453
5
    iter->Next();
1454
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1455
5
    iter->Next();
1456
5
    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
1457
5
    iter->Next();
1458
5
    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
1459
5
    iter->Next();
1460
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1461
1462
5
    iter->SeekToLast();
1463
5
    ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
1464
5
    iter->Prev();
1465
5
    ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
1466
5
    iter->Prev();
1467
5
    ASSERT_EQ(IterStatus(iter), "c->vc");
1468
5
    iter->Prev();
1469
5
    ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
1470
5
    iter->Prev();
1471
5
    ASSERT_EQ(IterStatus(iter), "a->va");
1472
5
    iter->Prev();
1473
5
    ASSERT_EQ(IterStatus(iter), "(invalid)");
1474
1475
5
    delete iter;
1476
5
  } while (ChangeCompactOptions());
1477
1
}
1478
1479
1
TEST_F(DBTest, IterMultiWithDelete) {
1480
30
  do {
1481
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1482
30
    ASSERT_OK(Put(1, "ka", "va"));
1483
30
    ASSERT_OK(Put(1, "kb", "vb"));
1484
30
    ASSERT_OK(Put(1, "kc", "vc"));
1485
30
    ASSERT_OK(Delete(1, "kb"));
1486
30
    ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
1487
1488
30
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
1489
30
    iter->Seek("kc");
1490
30
    ASSERT_EQ(IterStatus(iter), "kc->vc");
1491
30
    if (!CurrentOptions().merge_operator) {
1492
      // TODO: merge operator does not support backward iteration yet
1493
29
      if (kPlainTableAllBytesPrefix != option_config_&&
1494
28
          kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
1495
27
          kHashLinkList != option_config_) {
1496
26
        iter->Prev();
1497
26
        ASSERT_EQ(IterStatus(iter), "ka->va");
1498
26
      }
1499
29
    }
1500
30
    delete iter;
1501
30
  } while (ChangeOptions());
1502
1
}
1503
1504
1
TEST_F(DBTest, IterPrevMaxSkip) {
1505
27
  do {
1506
27
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1507
81
    for (int i = 0; i < 2; i++) {
1508
54
      ASSERT_OK(Put(1, "key1", "v1"));
1509
54
      ASSERT_OK(Put(1, "key2", "v2"));
1510
54
      ASSERT_OK(Put(1, "key3", "v3"));
1511
54
      ASSERT_OK(Put(1, "key4", "v4"));
1512
54
      ASSERT_OK(Put(1, "key5", "v5"));
1513
54
    }
1514
1515
27
    VerifyIterLast("key5->v5", 1);
1516
1517
27
    ASSERT_OK(Delete(1, "key5"));
1518
27
    VerifyIterLast("key4->v4", 1);
1519
1520
27
    ASSERT_OK(Delete(1, "key4"));
1521
27
    VerifyIterLast("key3->v3", 1);
1522
1523
27
    ASSERT_OK(Delete(1, "key3"));
1524
27
    VerifyIterLast("key2->v2", 1);
1525
1526
27
    ASSERT_OK(Delete(1, "key2"));
1527
27
    VerifyIterLast("key1->v1", 1);
1528
1529
27
    ASSERT_OK(Delete(1, "key1"));
1530
27
    VerifyIterLast("(invalid)", 1);
1531
27
  } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
1532
1
}
1533
1534
1
TEST_F(DBTest, IterWithSnapshot) {
1535
1
  anon::OptionsOverride options_override;
1536
1
  options_override.skip_policy = kSkipNoSnapshot;
1537
30
  do {
1538
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
1539
30
    ASSERT_OK(Put(1, "key1", "val1"));
1540
30
    ASSERT_OK(Put(1, "key2", "val2"));
1541
30
    ASSERT_OK(Put(1, "key3", "val3"));
1542
30
    ASSERT_OK(Put(1, "key4", "val4"));
1543
30
    ASSERT_OK(Put(1, "key5", "val5"));
1544
1545
30
    const Snapshot *snapshot = db_->GetSnapshot();
1546
30
    ReadOptions options;
1547
30
    options.snapshot = snapshot;
1548
30
    Iterator* iter = db_->NewIterator(options, handles_[1]);
1549
1550
    // Put more values after the snapshot
1551
30
    ASSERT_OK(Put(1, "key100", "val100"));
1552
30
    ASSERT_OK(Put(1, "key101", "val101"));
1553
1554
30
    iter->Seek("key5");
1555
30
    ASSERT_EQ(IterStatus(iter), "key5->val5");
1556
30
    if (!CurrentOptions().merge_operator) {
1557
      // TODO: merge operator does not support backward iteration yet
1558
29
      if (kPlainTableAllBytesPrefix != option_config_&&
1559
28
        kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
1560
27
        kHashLinkList != option_config_) {
1561
26
        iter->Prev();
1562
26
        ASSERT_EQ(IterStatus(iter), "key4->val4");
1563
26
        iter->Prev();
1564
26
        ASSERT_EQ(IterStatus(iter), "key3->val3");
1565
1566
26
        iter->Next();
1567
26
        ASSERT_EQ(IterStatus(iter), "key4->val4");
1568
26
        iter->Next();
1569
26
        ASSERT_EQ(IterStatus(iter), "key5->val5");
1570
26
      }
1571
29
      iter->Next();
1572
29
      ASSERT_TRUE(!iter->Valid());
1573
29
    }
1574
30
    db_->ReleaseSnapshot(snapshot);
1575
30
    delete iter;
1576
30
  } while (ChangeOptions());
1577
1
}
1578
1579
1
TEST_F(DBTest, Recover) {
1580
30
  do {
1581
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1582
30
    ASSERT_OK(Put(1, "foo", "v1"));
1583
30
    ASSERT_OK(Put(1, "baz", "v5"));
1584
1585
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1586
30
    ASSERT_EQ("v1", Get(1, "foo"));
1587
1588
30
    ASSERT_EQ("v1", Get(1, "foo"));
1589
30
    ASSERT_EQ("v5", Get(1, "baz"));
1590
30
    ASSERT_OK(Put(1, "bar", "v2"));
1591
30
    ASSERT_OK(Put(1, "foo", "v3"));
1592
1593
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1594
30
    ASSERT_EQ("v3", Get(1, "foo"));
1595
30
    ASSERT_OK(Put(1, "foo", "v4"));
1596
30
    ASSERT_EQ("v4", Get(1, "foo"));
1597
30
    ASSERT_EQ("v2", Get(1, "bar"));
1598
30
    ASSERT_EQ("v5", Get(1, "baz"));
1599
30
  } while (ChangeOptions());
1600
1
}
1601
1602
1
TEST_F(DBTest, RecoverWithTableHandle) {
1603
30
  do {
1604
30
    Options options;
1605
30
    options.create_if_missing = true;
1606
30
    options.write_buffer_size = 100;
1607
30
    options.disable_auto_compactions = true;
1608
30
    options = CurrentOptions(options);
1609
30
    DestroyAndReopen(options);
1610
30
    CreateAndReopenWithCF({"pikachu"}, options);
1611
1612
30
    ASSERT_OK(Put(1, "foo", "v1"));
1613
30
    ASSERT_OK(Put(1, "bar", "v2"));
1614
30
    ASSERT_OK(Flush(1));
1615
30
    ASSERT_OK(Put(1, "foo", "v3"));
1616
30
    ASSERT_OK(Put(1, "bar", "v4"));
1617
30
    ASSERT_OK(Flush(1));
1618
30
    ASSERT_OK(Put(1, "big", std::string(100, 'a')));
1619
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1620
1621
30
    std::vector<std::vector<FileMetaData>> files;
1622
30
    dbfull()->TEST_GetFilesMetaData(handles_[1], &files);
1623
30
    size_t total_files = 0;
1624
195
    for (const auto& level : files) {
1625
195
      total_files += level.size();
1626
195
    }
1627
30
    ASSERT_EQ(total_files, 3);
1628
195
    for (const auto& level : files) {
1629
90
      for (const auto& file : level) {
1630
90
        if (kInfiniteMaxOpenFiles == option_config_) {
1631
3
          ASSERT_TRUE(file.table_reader_handle != nullptr);
1632
87
        } else {
1633
87
          ASSERT_TRUE(file.table_reader_handle == nullptr);
1634
87
        }
1635
90
      }
1636
195
    }
1637
30
  } while (ChangeOptions());
1638
1
}
1639
1640
1
TEST_F(DBTest, IgnoreRecoveredLog) {
1641
1
  std::string backup_logs = dbname_ + "/backup_logs";
1642
1643
  // delete old files in backup_logs directory
1644
1
  ASSERT_OK(env_->CreateDirIfMissing(backup_logs));
1645
1
  std::vector<std::string> old_files;
1646
1
  ASSERT_OK(env_->GetChildren(backup_logs, &old_files));
1647
2
  for (auto& file : old_files) {
1648
2
    if (file != "." && file != "..") {
1649
0
      ASSERT_OK(env_->DeleteFile(backup_logs + "/" + file));
1650
0
    }
1651
2
  }
1652
1653
30
  do {
1654
30
    Options options = CurrentOptions();
1655
30
    options.create_if_missing = true;
1656
30
    options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1657
30
    options.wal_dir = dbname_ + "/logs";
1658
30
    DestroyAndReopen(options);
1659
1660
    // fill up the DB
1661
30
    std::string one, two;
1662
30
    PutFixed64(&one, 1);
1663
30
    PutFixed64(&two, 2);
1664
30
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
1665
30
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("foo"), Slice(one)));
1666
30
    ASSERT_OK(db_->Merge(WriteOptions(), Slice("bar"), Slice(one)));
1667
1668
    // copy the logs to backup
1669
30
    std::vector<std::string> logs;
1670
30
    ASSERT_OK(env_->GetChildren(options.wal_dir, &logs));
1671
90
    for (auto& log : logs) {
1672
90
      if (log != ".." && log != ".") {
1673
30
        CopyFile(options.wal_dir + "/" + log, backup_logs + "/" + log);
1674
30
      }
1675
90
    }
1676
1677
    // recover the DB
1678
30
    Reopen(options);
1679
30
    ASSERT_EQ(two, Get("foo"));
1680
30
    ASSERT_EQ(one, Get("bar"));
1681
30
    Close();
1682
1683
    // copy the logs from backup back to wal dir
1684
90
    for (auto& log : logs) {
1685
90
      if (log != ".." && log != ".") {
1686
30
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
1687
30
      }
1688
90
    }
1689
    // this should ignore the log files, recovery should not happen again
1690
    // if the recovery happens, the same merge operator would be called twice,
1691
    // leading to incorrect results
1692
30
    Reopen(options);
1693
30
    ASSERT_EQ(two, Get("foo"));
1694
30
    ASSERT_EQ(one, Get("bar"));
1695
30
    Close();
1696
30
    Destroy(options);
1697
30
    Reopen(options);
1698
30
    Close();
1699
1700
    // copy the logs from backup back to wal dir
1701
30
    ASSERT_OK(env_->CreateDirIfMissing(options.wal_dir));
1702
90
    for (auto& log : logs) {
1703
90
      if (log != ".." && log != ".") {
1704
30
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
1705
30
      }
1706
90
    }
1707
    // assert that we successfully recovered only from logs, even though we
1708
    // destroyed the DB
1709
30
    Reopen(options);
1710
30
    ASSERT_EQ(two, Get("foo"));
1711
30
    ASSERT_EQ(one, Get("bar"));
1712
1713
    // Recovery will fail if DB directory doesn't exist.
1714
30
    Destroy(options);
1715
    // copy the logs from backup back to wal dir
1716
30
    ASSERT_OK(env_->CreateDirIfMissing(options.wal_dir));
1717
90
    for (auto& log : logs) {
1718
90
      if (log != ".." && log != ".") {
1719
30
        CopyFile(backup_logs + "/" + log, options.wal_dir + "/" + log);
1720
        // we won't be needing this file no more
1721
30
        ASSERT_OK(env_->DeleteFile(backup_logs + "/" + log));
1722
30
      }
1723
90
    }
1724
30
    Status s = TryReopen(options);
1725
30
    ASSERT_TRUE(!s.ok());
1726
30
  } while (ChangeOptions());
1727
1
}
1728
1729
1
TEST_F(DBTest, CheckLock) {
1730
5
  do {
1731
5
    DB* localdb;
1732
5
    Options options = CurrentOptions();
1733
5
    ASSERT_OK(TryReopen(options));
1734
1735
    // second open should fail
1736
5
    ASSERT_TRUE(!(DB::Open(options, dbname_, &localdb)).ok());
1737
5
  } while (ChangeCompactOptions());
1738
1
}
1739
1740
1
TEST_F(DBTest, FlushMultipleMemtable) {
1741
5
  do {
1742
5
    Options options = CurrentOptions();
1743
5
    WriteOptions writeOpt = WriteOptions();
1744
5
    writeOpt.disableWAL = true;
1745
5
    options.max_write_buffer_number = 4;
1746
5
    options.min_write_buffer_number_to_merge = 3;
1747
5
    options.max_write_buffer_number_to_maintain = -1;
1748
5
    CreateAndReopenWithCF({"pikachu"}, options);
1749
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
1750
5
    ASSERT_OK(Flush(1));
1751
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
1752
1753
5
    ASSERT_EQ("v1", Get(1, "foo"));
1754
5
    ASSERT_EQ("v1", Get(1, "bar"));
1755
5
    ASSERT_OK(Flush(1));
1756
5
  } while (ChangeCompactOptions());
1757
1
}
1758
1759
1
TEST_F(DBTest, FlushEmptyColumnFamily) {
1760
  // Block flush thread and disable compaction thread
1761
1
  env_->SetBackgroundThreads(1, Env::HIGH);
1762
1
  env_->SetBackgroundThreads(1, Env::LOW);
1763
1
  test::SleepingBackgroundTask sleeping_task_low;
1764
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
1765
1
                 Env::Priority::LOW);
1766
1
  test::SleepingBackgroundTask sleeping_task_high;
1767
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
1768
1
                 &sleeping_task_high, Env::Priority::HIGH);
1769
1770
1
  Options options = CurrentOptions();
1771
  // disable compaction
1772
1
  options.disable_auto_compactions = true;
1773
1
  WriteOptions writeOpt = WriteOptions();
1774
1
  writeOpt.disableWAL = true;
1775
1
  options.max_write_buffer_number = 2;
1776
1
  options.min_write_buffer_number_to_merge = 1;
1777
1
  options.max_write_buffer_number_to_maintain = 1;
1778
1
  CreateAndReopenWithCF({"pikachu"}, options);
1779
1780
  // Compaction can still go through even if no thread can flush the
1781
  // mem table.
1782
1
  ASSERT_OK(Flush(0));
1783
1
  ASSERT_OK(Flush(1));
1784
1785
  // Insert can go through
1786
1
  ASSERT_OK(dbfull()->Put(writeOpt, handles_[0], "foo", "v1"));
1787
1
  ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
1788
1789
1
  ASSERT_EQ("v1", Get(0, "foo"));
1790
1
  ASSERT_EQ("v1", Get(1, "bar"));
1791
1792
1
  sleeping_task_high.WakeUp();
1793
1
  sleeping_task_high.WaitUntilDone();
1794
1795
  // Flush can still go through.
1796
1
  ASSERT_OK(Flush(0));
1797
1
  ASSERT_OK(Flush(1));
1798
1799
1
  sleeping_task_low.WakeUp();
1800
1
  sleeping_task_low.WaitUntilDone();
1801
1
}
1802
1803
1
TEST_F(DBTest, FLUSH) {
1804
5
  do {
1805
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1806
5
    WriteOptions writeOpt = WriteOptions();
1807
5
    writeOpt.disableWAL = true;
1808
5
    SetPerfLevel(PerfLevel::kEnableTime);;
1809
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v1"));
1810
    // this will now also flush the last 2 writes
1811
5
    ASSERT_OK(Flush(1));
1812
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v1"));
1813
1814
5
    perf_context.Reset();
1815
5
    Get(1, "foo");
1816
5
    ASSERT_GT(perf_context.get_from_output_files_time, 0);
1817
1818
5
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1819
5
    ASSERT_EQ("v1", Get(1, "foo"));
1820
5
    ASSERT_EQ("v1", Get(1, "bar"));
1821
1822
5
    writeOpt.disableWAL = true;
1823
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v2"));
1824
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v2"));
1825
5
    ASSERT_OK(Flush(1));
1826
1827
5
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1828
5
    ASSERT_EQ("v2", Get(1, "bar"));
1829
5
    perf_context.Reset();
1830
5
    ASSERT_EQ("v2", Get(1, "foo"));
1831
5
    ASSERT_GT(perf_context.get_from_output_files_time, 0);
1832
1833
5
    writeOpt.disableWAL = false;
1834
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "bar", "v3"));
1835
5
    ASSERT_OK(dbfull()->Put(writeOpt, handles_[1], "foo", "v3"));
1836
5
    ASSERT_OK(Flush(1));
1837
1838
5
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1839
    // 'foo' should be there because its put
1840
    // has WAL enabled.
1841
5
    ASSERT_EQ("v3", Get(1, "foo"));
1842
5
    ASSERT_EQ("v3", Get(1, "bar"));
1843
1844
5
    SetPerfLevel(PerfLevel::kDisable);
1845
5
  } while (ChangeCompactOptions());
1846
1
}
1847
1848
1
TEST_F(DBTest, RecoveryWithEmptyLog) {
1849
30
  do {
1850
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
1851
30
    ASSERT_OK(Put(1, "foo", "v1"));
1852
30
    ASSERT_OK(Put(1, "foo", "v2"));
1853
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1854
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1855
30
    ASSERT_OK(Put(1, "foo", "v3"));
1856
30
    ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
1857
30
    ASSERT_EQ("v3", Get(1, "foo"));
1858
30
  } while (ChangeOptions());
1859
1
}
1860
1861
#ifndef ROCKSDB_LITE
1862
1
TEST_F(DBTest, FlushSchedule) {
1863
1
  Options options = CurrentOptions();
1864
1
  options.disable_auto_compactions = true;
1865
1
  options.level0_stop_writes_trigger = 1 << 10;
1866
1
  options.level0_slowdown_writes_trigger = 1 << 10;
1867
1
  options.min_write_buffer_number_to_merge = 1;
1868
1
  options.max_write_buffer_number_to_maintain = 1;
1869
1
  options.max_write_buffer_number = 2;
1870
1
  options.write_buffer_size = 120 * 1024;
1871
1
  CreateAndReopenWithCF({"pikachu"}, options);
1872
1
  std::vector<std::thread> threads;
1873
1874
1
  std::atomic<int> thread_num(0);
1875
  // each column family will have 5 thread, each thread generating 2 memtables.
1876
  // each column family should end up with 10 table files
1877
10
  std::function<void()> fill_memtable_func = [&]() {
1878
10
    int a = thread_num.fetch_add(1);
1879
10
    Random rnd(a);
1880
10
    WriteOptions wo;
1881
    // this should fill up 2 memtables
1882
49.8k
    for (int k = 0; k < 5000; ++k) {
1883
49.8k
      ASSERT_OK(db_->Put(wo, handles_[a & 1], RandomString(&rnd, 13), ""));
1884
49.8k
    }
1885
10
  };
1886
1887
11
  for (int i = 0; i < 10; ++i) {
1888
10
    threads.emplace_back(fill_memtable_func);
1889
10
  }
1890
1891
10
  for (auto& t : threads) {
1892
10
    t.join();
1893
10
  }
1894
1895
1
  auto default_tables = GetNumberOfSstFilesForColumnFamily(db_, "default");
1896
1
  auto pikachu_tables = GetNumberOfSstFilesForColumnFamily(db_, "pikachu");
1897
1
  ASSERT_LE(default_tables, static_cast<uint64_t>(10));
1898
1
  ASSERT_GT(default_tables, static_cast<uint64_t>(0));
1899
1
  ASSERT_LE(pikachu_tables, static_cast<uint64_t>(10));
1900
1
  ASSERT_GT(pikachu_tables, static_cast<uint64_t>(0));
1901
1
}
1902
#endif  // ROCKSDB_LITE
1903
1904
1
TEST_F(DBTest, ManifestRollOver) {
1905
5
  do {
1906
5
    Options options;
1907
5
    options.max_manifest_file_size = 10; // 10 bytes
1908
5
    options = CurrentOptions(options);
1909
5
    CreateAndReopenWithCF({"pikachu"}, options);
1910
5
    {
1911
5
      ASSERT_OK(Put(1, "manifest_key1", std::string(1000, '1')));
1912
5
      ASSERT_OK(Put(1, "manifest_key2", std::string(1000, '2')));
1913
5
      ASSERT_OK(Put(1, "manifest_key3", std::string(1000, '3')));
1914
5
      uint64_t manifest_before_flush = dbfull()->TEST_Current_Manifest_FileNo();
1915
5
      ASSERT_OK(Flush(1));  // This should trigger LogAndApply.
1916
5
      uint64_t manifest_after_flush = dbfull()->TEST_Current_Manifest_FileNo();
1917
5
      ASSERT_GT(manifest_after_flush, manifest_before_flush);
1918
5
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
1919
5
      ASSERT_GT(dbfull()->TEST_Current_Manifest_FileNo(), manifest_after_flush);
1920
      // check if a new manifest file got inserted or not.
1921
5
      ASSERT_EQ(std::string(1000, '1'), Get(1, "manifest_key1"));
1922
5
      ASSERT_EQ(std::string(1000, '2'), Get(1, "manifest_key2"));
1923
5
      ASSERT_EQ(std::string(1000, '3'), Get(1, "manifest_key3"));
1924
5
    }
1925
5
  } while (ChangeCompactOptions());
1926
1
}
1927
1928
1
TEST_F(DBTest, IdentityAcrossRestarts) {
1929
5
  do {
1930
5
    std::string id1;
1931
5
    ASSERT_OK(db_->GetDbIdentity(&id1));
1932
1933
5
    Options options = CurrentOptions();
1934
5
    Reopen(options);
1935
5
    std::string id2;
1936
5
    ASSERT_OK(db_->GetDbIdentity(&id2));
1937
    // id1 should match id2 because identity was not regenerated
1938
5
    ASSERT_EQ(id1.compare(id2), 0);
1939
1940
5
    std::string idfilename = IdentityFileName(dbname_);
1941
5
    ASSERT_OK(env_->DeleteFile(idfilename));
1942
5
    Reopen(options);
1943
5
    std::string id3;
1944
5
    ASSERT_OK(db_->GetDbIdentity(&id3));
1945
    // id1 should NOT match id3 because identity was regenerated
1946
5
    ASSERT_NE(id1.compare(id3), 0);
1947
5
  } while (ChangeCompactOptions());
1948
1
}
1949
1950
#ifndef ROCKSDB_LITE
1951
1
TEST_F(DBTest, RecoverWithLargeLog) {
1952
5
  do {
1953
5
    {
1954
5
      Options options = CurrentOptions();
1955
5
      CreateAndReopenWithCF({"pikachu"}, options);
1956
5
      ASSERT_OK(Put(1, "big1", std::string(200000, '1')));
1957
5
      ASSERT_OK(Put(1, "big2", std::string(200000, '2')));
1958
5
      ASSERT_OK(Put(1, "small3", std::string(10, '3')));
1959
5
      ASSERT_OK(Put(1, "small4", std::string(10, '4')));
1960
5
      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1961
5
    }
1962
1963
    // Make sure that if we re-open with a small write buffer size that
1964
    // we flush table files in the middle of a large log file.
1965
5
    Options options;
1966
5
    options.write_buffer_size = 100000;
1967
5
    options = CurrentOptions(options);
1968
5
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
1969
5
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 3);
1970
5
    ASSERT_EQ(std::string(200000, '1'), Get(1, "big1"));
1971
5
    ASSERT_EQ(std::string(200000, '2'), Get(1, "big2"));
1972
5
    ASSERT_EQ(std::string(10, '3'), Get(1, "small3"));
1973
5
    ASSERT_EQ(std::string(10, '4'), Get(1, "small4"));
1974
5
    ASSERT_GT(NumTableFilesAtLevel(0, 1), 1);
1975
5
  } while (ChangeCompactOptions());
1976
1
}
1977
#endif  // ROCKSDB_LITE
1978
1979
namespace {
1980
class KeepFilter : public CompactionFilter {
1981
 public:
1982
  FilterDecision Filter(int level, const Slice& key, const Slice& value,
1983
0
                        std::string* new_value, bool* value_changed) override {
1984
0
    return FilterDecision::kKeep;
1985
0
  }
1986
1987
0
  const char* Name() const override { return "KeepFilter"; }
1988
};
1989
1990
class KeepFilterFactory : public CompactionFilterFactory {
1991
 public:
1992
  explicit KeepFilterFactory(bool check_context = false)
1993
4
      : check_context_(check_context) {}
1994
1995
  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
1996
1.80k
      const CompactionFilter::Context& context) override {
1997
1.80k
    if (check_context_) {
1998
0
      EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction);
1999
0
      EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction);
2000
0
    }
2001
1.80k
    return std::unique_ptr<CompactionFilter>(new KeepFilter());
2002
1.80k
  }
2003
2004
12
  const char* Name() const override { return "KeepFilterFactory"; }
2005
  bool check_context_;
2006
  std::atomic_bool expect_full_compaction_;
2007
  std::atomic_bool expect_manual_compaction_;
2008
};
2009
2010
class DelayFilter : public CompactionFilter {
2011
 public:
2012
4
  explicit DelayFilter(DBTestBase* d) : db_test(d) {}
2013
  FilterDecision Filter(int level, const Slice& key, const Slice& value,
2014
                        std::string* new_value,
2015
184
                        bool* value_changed) override {
2016
184
    db_test->env_->addon_time_.fetch_add(1000);
2017
184
    return FilterDecision::kDiscard;
2018
184
  }
2019
2020
0
  const char* Name() const override { return "DelayFilter"; }
2021
2022
 private:
2023
  DBTestBase* db_test;
2024
};
2025
2026
class DelayFilterFactory : public CompactionFilterFactory {
2027
 public:
2028
4
  explicit DelayFilterFactory(DBTestBase* d) : db_test(d) {}
2029
  virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
2030
4
      const CompactionFilter::Context& context) override {
2031
4
    return std::unique_ptr<CompactionFilter>(new DelayFilter(db_test));
2032
4
  }
2033
2034
24
  const char* Name() const override { return "DelayFilterFactory"; }
2035
2036
 private:
2037
  DBTestBase* db_test;
2038
};
2039
}  // namespace
2040
2041
#ifndef ROCKSDB_LITE
2042
1
TEST_F(DBTest, CompressedCache) {
2043
1
  if (!Snappy_Supported()) {
2044
0
    return;
2045
0
  }
2046
1
  int num_iter = 80;
2047
2048
  // Run this test three iterations.
2049
  // Iteration 1: only a uncompressed block cache
2050
  // Iteration 2: only a compressed block cache
2051
  // Iteration 3: both block cache and compressed cache
2052
  // Iteration 4: both block cache and compressed cache, but DB is not
2053
  // compressed
2054
5
  for (int iter = 0; iter < 4; iter++) {
2055
4
    Options options;
2056
4
    options.write_buffer_size = 64*1024;        // small write buffer
2057
4
    options.statistics = rocksdb::CreateDBStatisticsForTests();
2058
4
    options = CurrentOptions(options);
2059
2060
4
    BlockBasedTableOptions table_options;
2061
4
    switch (iter) {
2062
1
      case 0:
2063
        // only uncompressed block cache
2064
1
        table_options.block_cache = NewLRUCache(8*1024);
2065
1
        table_options.block_cache_compressed = nullptr;
2066
1
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2067
1
        break;
2068
1
      case 1:
2069
        // no block cache, only compressed cache
2070
1
        table_options.no_block_cache = true;
2071
1
        table_options.block_cache = nullptr;
2072
1
        table_options.block_cache_compressed = NewLRUCache(8*1024);
2073
1
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2074
1
        break;
2075
1
      case 2:
2076
        // both compressed and uncompressed block cache
2077
1
        table_options.block_cache = NewLRUCache(1024);
2078
1
        table_options.block_cache_compressed = NewLRUCache(8*1024);
2079
1
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2080
1
        break;
2081
1
      case 3:
2082
        // both block cache and compressed cache, but DB is not compressed
2083
        // also, make block cache sizes bigger, to trigger block cache hits
2084
1
        table_options.block_cache = NewLRUCache(1024 * 1024);
2085
1
        table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
2086
1
        options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2087
1
        options.compression = kNoCompression;
2088
1
        break;
2089
0
      default:
2090
0
        ASSERT_TRUE(false);
2091
4
    }
2092
4
    CreateAndReopenWithCF({"pikachu"}, options);
2093
    // default column family doesn't have block cache
2094
4
    Options no_block_cache_opts;
2095
4
    no_block_cache_opts.statistics = options.statistics;
2096
4
    no_block_cache_opts = CurrentOptions(no_block_cache_opts);
2097
4
    BlockBasedTableOptions table_options_no_bc;
2098
4
    table_options_no_bc.no_block_cache = true;
2099
4
    no_block_cache_opts.table_factory.reset(
2100
4
        NewBlockBasedTableFactory(table_options_no_bc));
2101
4
    ReopenWithColumnFamilies({"default", "pikachu"},
2102
4
        std::vector<Options>({no_block_cache_opts, options}));
2103
2104
4
    Random rnd(301);
2105
2106
    // Write 8MB (80 values, each 100K)
2107
4
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2108
4
    std::vector<std::string> values;
2109
4
    std::string str;
2110
324
    for (int i = 0; i < num_iter; i++) {
2111
320
      if (i % 4 == 0) {        // high compression ratio
2112
80
        str = RandomString(&rnd, 1000);
2113
80
      }
2114
320
      values.push_back(str);
2115
320
      ASSERT_OK(Put(1, Key(i), values[i]));
2116
320
    }
2117
2118
    // flush all data from memtable so that reads are from block cache
2119
4
    ASSERT_OK(Flush(1));
2120
2121
324
    for (int i = 0; i < num_iter; i++) {
2122
320
      ASSERT_EQ(Get(1, Key(i)), values[i]);
2123
320
    }
2124
2125
    // check that we triggered the appropriate code paths in the cache
2126
4
    switch (iter) {
2127
1
      case 0:
2128
        // only uncompressed block cache
2129
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
2130
1
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
2131
1
        break;
2132
1
      case 1:
2133
        // no block cache, only compressed cache
2134
1
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
2135
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
2136
1
        break;
2137
1
      case 2:
2138
        // both compressed and uncompressed block cache
2139
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
2140
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
2141
1
        break;
2142
1
      case 3:
2143
        // both compressed and uncompressed block cache
2144
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
2145
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
2146
1
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_SINGLE_TOUCH_HIT) +
2147
1
                  TestGetTickerCount(options, BLOCK_CACHE_MULTI_TOUCH_HIT),
2148
1
                  TestGetTickerCount(options, BLOCK_CACHE_HIT));
2149
1
        ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
2150
        // compressed doesn't have any hits since blocks are not compressed on
2151
        // storage
2152
1
        ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
2153
1
        break;
2154
0
      default:
2155
0
        ASSERT_TRUE(false);
2156
4
    }
2157
2158
4
    options.create_if_missing = true;
2159
4
    DestroyAndReopen(options);
2160
4
  }
2161
1
}
2162
2163
119
static std::string CompressibleString(Random* rnd, int len) {
2164
119
  std::string r;
2165
119
  CompressibleString(rnd, 0.8, len, &r);
2166
119
  return r;
2167
119
}
2168
#endif  // ROCKSDB_LITE
2169
2170
1
TEST_F(DBTest, FailMoreDbPaths) {
2171
1
  Options options = CurrentOptions();
2172
1
  options.db_paths.emplace_back(dbname_, 10000000);
2173
1
  options.db_paths.emplace_back(dbname_ + "_2", 1000000);
2174
1
  options.db_paths.emplace_back(dbname_ + "_3", 1000000);
2175
1
  options.db_paths.emplace_back(dbname_ + "_4", 1000000);
2176
1
  options.db_paths.emplace_back(dbname_ + "_5", 1000000);
2177
1
  ASSERT_TRUE(TryReopen(options).IsNotSupported());
2178
1
}
2179
2180
100
void CheckColumnFamilyMeta(const ColumnFamilyMetaData& cf_meta) {
2181
100
  uint64_t cf_size = 0;
2182
100
  uint64_t cf_csize = 0;
2183
100
  size_t file_count = 0;
2184
700
  for (auto level_meta : cf_meta.levels) {
2185
700
    uint64_t level_size = 0;
2186
700
    uint64_t level_csize = 0;
2187
700
    file_count += level_meta.files.size();
2188
0
    for (auto file_meta : level_meta.files) {
2189
0
      level_size += file_meta.total_size;
2190
0
    }
2191
700
    ASSERT_EQ(level_meta.size, level_size);
2192
700
    cf_size += level_size;
2193
700
    cf_csize += level_csize;
2194
700
  }
2195
100
  ASSERT_EQ(cf_meta.file_count, file_count);
2196
100
  ASSERT_EQ(cf_meta.size, cf_size);
2197
100
}
2198
2199
#ifndef ROCKSDB_LITE
2200
1
TEST_F(DBTest, ColumnFamilyMetaDataTest) {
2201
1
  Options options = CurrentOptions();
2202
1
  options.create_if_missing = true;
2203
1
  DestroyAndReopen(options);
2204
2205
1
  Random rnd(301);
2206
1
  int key_index = 0;
2207
1
  ColumnFamilyMetaData cf_meta;
2208
101
  for (int i = 0; i < 100; ++i) {
2209
100
    GenerateNewFile(&rnd, &key_index);
2210
100
    db_->GetColumnFamilyMetaData(&cf_meta);
2211
100
    CheckColumnFamilyMeta(cf_meta);
2212
100
  }
2213
1
}
2214
2215
namespace {
2216
4
void MinLevelHelper(DBTest* self, const Options& options) {
2217
4
  Random rnd(301);
2218
2219
4
  for (int num = 0;
2220
12
    num < options.level0_file_num_compaction_trigger - 1;
2221
8
    num++) {
2222
8
    std::vector<std::string> values;
2223
    // Write 120KB (12 values, each 10K)
2224
104
    for (int i = 0; i < 12; i++) {
2225
96
      values.push_back(RandomString(&rnd, 10000));
2226
96
      ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
2227
96
    }
2228
8
    ASSERT_OK(self->dbfull()->TEST_WaitForFlushMemTable());
2229
8
    ASSERT_EQ(self->NumTableFilesAtLevel(0), num + 1);
2230
8
  }
2231
2232
  // generate one more file in level-0, and should trigger level-0 compaction
2233
4
  std::vector<std::string> values;
2234
52
  for (int i = 0; i < 12; i++) {
2235
48
    values.push_back(RandomString(&rnd, 10000));
2236
48
    ASSERT_OK(self->Put(DBTestBase::Key(i), values[i]));
2237
48
  }
2238
4
  ASSERT_OK(self->dbfull()->TEST_WaitForCompact());
2239
2240
4
  ASSERT_EQ(self->NumTableFilesAtLevel(0), 0);
2241
4
  ASSERT_EQ(self->NumTableFilesAtLevel(1), 1);
2242
4
}
2243
2244
// returns false if the calling-Test should be skipped
2245
bool MinLevelToCompress(int wbits, int lev, int strategy,
2246
2
    CompressionType* type, Options* options) {
2247
2
  fprintf(stderr,
2248
2
      "Test with compression options : window_bits = %d, level =  %d, strategy = %d}\n",
2249
2
      wbits, lev, strategy);
2250
2
  options->write_buffer_size = 100 << 10; // 100KB
2251
2
  options->arena_block_size = 4096;
2252
2
  options->num_levels = 3;
2253
2
  options->level0_file_num_compaction_trigger = 3;
2254
2
  options->create_if_missing = true;
2255
2256
2
  if (Snappy_Supported()) {
2257
2
    *type = kSnappyCompression;
2258
2
    fprintf(stderr, "using snappy\n");
2259
0
  } else if (Zlib_Supported()) {
2260
0
    *type = kZlibCompression;
2261
0
    fprintf(stderr, "using zlib\n");
2262
0
  } else if (BZip2_Supported()) {
2263
0
    *type = kBZip2Compression;
2264
0
    fprintf(stderr, "using bzip2\n");
2265
0
  } else if (LZ4_Supported()) {
2266
0
    *type = kLZ4Compression;
2267
0
    fprintf(stderr, "using lz4\n");
2268
0
  } else {
2269
0
    fprintf(stderr, "skipping test, compression disabled\n");
2270
0
    return false;
2271
0
  }
2272
2
  options->compression_per_level.resize(options->num_levels);
2273
2274
  // do not compress L0
2275
4
  for (int i = 0; i < 1; i++) {
2276
2
    options->compression_per_level[i] = kNoCompression;
2277
2
  }
2278
6
  for (int i = 1; i < options->num_levels; i++) {
2279
4
    options->compression_per_level[i] = *type;
2280
4
  }
2281
2
  return true;
2282
2
}
2283
}  // namespace
2284
2285
1
TEST_F(DBTest, MinLevelToCompress1) {
2286
1
  Options options = CurrentOptions();
2287
1
  CompressionType type = kSnappyCompression;
2288
1
  if (!MinLevelToCompress(-14, -1, 0, &type, &options)) {
2289
0
    return;
2290
0
  }
2291
1
  Reopen(options);
2292
1
  MinLevelHelper(this, options);
2293
2294
  // do not compress L0 and L1
2295
3
  for (int i = 0; i < 2; i++) {
2296
2
    options.compression_per_level[i] = kNoCompression;
2297
2
  }
2298
2
  for (int i = 2; i < options.num_levels; i++) {
2299
1
    options.compression_per_level[i] = type;
2300
1
  }
2301
1
  DestroyAndReopen(options);
2302
1
  MinLevelHelper(this, options);
2303
1
}
2304
2305
1
TEST_F(DBTest, MinLevelToCompress2) {
2306
1
  Options options = CurrentOptions();
2307
1
  CompressionType type = kSnappyCompression;
2308
1
  if (!MinLevelToCompress(15, -1, 0, &type, &options)) {
2309
0
    return;
2310
0
  }
2311
1
  Reopen(options);
2312
1
  MinLevelHelper(this, options);
2313
2314
  // do not compress L0 and L1
2315
3
  for (int i = 0; i < 2; i++) {
2316
2
    options.compression_per_level[i] = kNoCompression;
2317
2
  }
2318
2
  for (int i = 2; i < options.num_levels; i++) {
2319
1
    options.compression_per_level[i] = type;
2320
1
  }
2321
1
  DestroyAndReopen(options);
2322
1
  MinLevelHelper(this, options);
2323
1
}
2324
2325
1
TEST_F(DBTest, RepeatedWritesToSameKey) {
2326
5
  do {
2327
5
    Options options;
2328
5
    options.env = env_;
2329
5
    options.write_buffer_size = 100000;  // Small write buffer
2330
5
    options = CurrentOptions(options);
2331
5
    CreateAndReopenWithCF({"pikachu"}, options);
2332
2333
    // We must have at most one file per level except for level-0,
2334
    // which may have up to kL0_StopWritesTrigger files.
2335
5
    const int kMaxFiles =
2336
5
        options.num_levels + options.level0_stop_writes_trigger;
2337
2338
5
    Random rnd(301);
2339
5
    std::string value =
2340
5
        RandomString(&rnd, static_cast<int>(2 * options.write_buffer_size));
2341
760
    for (int i = 0; i < 5 * kMaxFiles; i++) {
2342
755
      ASSERT_OK(Put(1, "key", value));
2343
755
      ASSERT_LE(TotalTableFiles(1), kMaxFiles);
2344
755
    }
2345
5
  } while (ChangeCompactOptions());
2346
1
}
2347
#endif  // ROCKSDB_LITE
2348
2349
1
TEST_F(DBTest, SparseMerge) {
2350
5
  do {
2351
5
    Options options = CurrentOptions();
2352
5
    options.compression = kNoCompression;
2353
5
    CreateAndReopenWithCF({"pikachu"}, options);
2354
2355
5
    FillLevels("A", "Z", 1);
2356
2357
    // Suppose there is:
2358
    //    small amount of data with prefix A
2359
    //    large amount of data with prefix B
2360
    //    small amount of data with prefix C
2361
    // and that recent updates have made small changes to all three prefixes.
2362
    // Check that we do not do a compaction that merges all of B in one shot.
2363
5
    const std::string value(1000, 'x');
2364
5
    ASSERT_OK(Put(1, "A", "va"));
2365
    // Write approximately 100MB of "B" values
2366
500k
    for (int i = 0; i < 100000; i++) {
2367
500k
      char key[100];
2368
500k
      snprintf(key, sizeof(key), "B%010d", i);
2369
500k
      ASSERT_OK(Put(1, key, value));
2370
500k
    }
2371
5
    ASSERT_OK(Put(1, "C", "vc"));
2372
5
    ASSERT_OK(Flush(1));
2373
5
    ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
2374
2375
    // Make sparse update
2376
5
    ASSERT_OK(Put(1, "A", "va2"));
2377
5
    ASSERT_OK(Put(1, "B100", "bvalue2"));
2378
5
    ASSERT_OK(Put(1, "C", "vc2"));
2379
5
    ASSERT_OK(Flush(1));
2380
2381
    // Compactions should not cause us to create a situation where
2382
    // a file overlaps too much data at the next level.
2383
5
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
2384
5
              20 * 1048576);
2385
5
    ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
2386
5
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
2387
5
              20 * 1048576);
2388
5
    ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr));
2389
5
    ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(handles_[1]),
2390
5
              20 * 1048576);
2391
5
  } while (ChangeCompactOptions());
2392
1
}
2393
2394
#ifndef ROCKSDB_LITE
2395
13.9k
static bool Between(uint64_t val, uint64_t low, uint64_t high) {
2396
13.9k
  bool result = (val >= low) && (val <= high);
2397
13.9k
  if (!result) {
2398
0
    fprintf(stderr, "Value %" PRIu64 " is not in range [%" PRIu64 ", %" PRIu64 "]\n",
2399
0
            val,
2400
0
            low,
2401
0
            high);
2402
0
  }
2403
13.9k
  return result;
2404
13.9k
}
2405
2406
1
TEST_F(DBTest, ApproximateSizesMemTable) {
2407
1
  Options options;
2408
1
  options.write_buffer_size = 100000000;  // Large write buffer
2409
1
  options.compression = kNoCompression;
2410
1
  options.create_if_missing = true;
2411
1
  options = CurrentOptions(options);
2412
1
  DestroyAndReopen(options);
2413
2414
1
  const int N = 128;
2415
1
  Random rnd(301);
2416
129
  for (int i = 0; i < N; i++) {
2417
128
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
2418
128
  }
2419
2420
1
  uint64_t size;
2421
1
  std::string start = Key(50);
2422
1
  std::string end = Key(60);
2423
1
  Range r(start, end);
2424
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2425
1
  ASSERT_GT(size, 6000);
2426
1
  ASSERT_LT(size, 204800);
2427
  // Zero if not including mem table
2428
1
  db_->GetApproximateSizes(&r, 1, &size, false);
2429
1
  ASSERT_EQ(size, 0);
2430
2431
1
  start = Key(500);
2432
1
  end = Key(600);
2433
1
  r = Range(start, end);
2434
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2435
1
  ASSERT_EQ(size, 0);
2436
2437
129
  for (int i = 0; i < N; i++) {
2438
128
    ASSERT_OK(Put(Key(1000 + i), RandomString(&rnd, 1024)));
2439
128
  }
2440
2441
1
  start = Key(500);
2442
1
  end = Key(600);
2443
1
  r = Range(start, end);
2444
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2445
1
  ASSERT_EQ(size, 0);
2446
2447
1
  start = Key(100);
2448
1
  end = Key(1020);
2449
1
  r = Range(start, end);
2450
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2451
1
  ASSERT_GT(size, 6000);
2452
2453
1
  options.max_write_buffer_number = 8;
2454
1
  options.min_write_buffer_number_to_merge = 5;
2455
1
  options.write_buffer_size = 1024 * N;  // Not very large
2456
1
  DestroyAndReopen(options);
2457
2458
1
  int keys[N * 3];
2459
129
  for (int i = 0; i < N; i++) {
2460
128
    keys[i * 3] = i * 5;
2461
128
    keys[i * 3 + 1] = i * 5 + 1;
2462
128
    keys[i * 3 + 2] = i * 5 + 2;
2463
128
  }
2464
1
  std::random_shuffle(std::begin(keys), std::end(keys));
2465
2466
385
  for (int i = 0; i < N * 3; i++) {
2467
384
    ASSERT_OK(Put(Key(keys[i] + 1000), RandomString(&rnd, 1024)));
2468
384
  }
2469
2470
1
  start = Key(100);
2471
1
  end = Key(300);
2472
1
  r = Range(start, end);
2473
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2474
1
  ASSERT_EQ(size, 0);
2475
2476
1
  start = Key(1050);
2477
1
  end = Key(1080);
2478
1
  r = Range(start, end);
2479
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2480
1
  ASSERT_GT(size, 6000);
2481
2482
1
  start = Key(2100);
2483
1
  end = Key(2300);
2484
1
  r = Range(start, end);
2485
1
  db_->GetApproximateSizes(&r, 1, &size, true);
2486
1
  ASSERT_EQ(size, 0);
2487
2488
1
  start = Key(1050);
2489
1
  end = Key(1080);
2490
1
  r = Range(start, end);
2491
1
  uint64_t size_with_mt, size_without_mt;
2492
1
  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
2493
1
  ASSERT_GT(size_with_mt, 6000);
2494
1
  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
2495
1
  ASSERT_EQ(size_without_mt, 0);
2496
2497
1
  ASSERT_OK(Flush());
2498
2499
129
  for (int i = 0; i < N; i++) {
2500
128
    ASSERT_OK(Put(Key(i + 1000), RandomString(&rnd, 1024)));
2501
128
  }
2502
2503
1
  start = Key(1050);
2504
1
  end = Key(1080);
2505
1
  r = Range(start, end);
2506
1
  db_->GetApproximateSizes(&r, 1, &size_with_mt, true);
2507
1
  db_->GetApproximateSizes(&r, 1, &size_without_mt, false);
2508
1
  ASSERT_GT(size_with_mt, size_without_mt);
2509
1
  ASSERT_GT(size_without_mt, 6000);
2510
1
}
2511
2512
1
TEST_F(DBTest, ApproximateSizes) {
2513
21
  do {
2514
21
    Options options;
2515
21
    options.write_buffer_size = 100000000;        // Large write buffer
2516
21
    options.compression = kNoCompression;
2517
21
    options.create_if_missing = true;
2518
21
    options = CurrentOptions(options);
2519
21
    DestroyAndReopen(options);
2520
21
    CreateAndReopenWithCF({"pikachu"}, options);
2521
2522
21
    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
2523
21
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
2524
21
    ASSERT_TRUE(Between(Size("", "xyz", 1), 0, 0));
2525
2526
    // Write 8MB (80 values, each 100K)
2527
21
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2528
21
    const int N = 80;
2529
21
    static const int S1 = 100000;
2530
21
    static const int S2 = 105000;  // Allow some expansion from metadata
2531
21
    Random rnd(301);
2532
1.70k
    for (int i = 0; i < N; i++) {
2533
1.68k
      ASSERT_OK(Put(1, Key(i), RandomString(&rnd, S1)));
2534
1.68k
    }
2535
2536
    // 0 because GetApproximateSizes() does not account for memtable space
2537
21
    ASSERT_TRUE(Between(Size("", Key(50), 1), 0, 0));
2538
2539
    // Check sizes across recovery by reopening a few times
2540
84
    for (int run = 0; run < 3; run++) {
2541
63
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
2542
2543
567
      for (int compact_start = 0; compact_start < N; compact_start += 10) {
2544
4.53k
        for (int i = 0; i < N; i += 10) {
2545
4.03k
          ASSERT_TRUE(Between(Size("", Key(i), 1), S1 * i, S2 * i));
2546
4.03k
          ASSERT_TRUE(Between(Size("", Key(i) + ".suffix", 1), S1 * (i + 1),
2547
4.03k
                              S2 * (i + 1)));
2548
4.03k
          ASSERT_TRUE(Between(Size(Key(i), Key(i + 10), 1), S1 * 10, S2 * 10));
2549
4.03k
        }
2550
504
        ASSERT_TRUE(Between(Size("", Key(50), 1), S1 * 50, S2 * 50));
2551
504
        ASSERT_TRUE(
2552
504
            Between(Size("", Key(50) + ".suffix", 1), S1 * 50, S2 * 50));
2553
2554
504
        std::string cstart_str = Key(compact_start);
2555
504
        std::string cend_str = Key(compact_start + 9);
2556
504
        Slice cstart = cstart_str;
2557
504
        Slice cend = cend_str;
2558
504
        ASSERT_OK(dbfull()->TEST_CompactRange(0, &cstart, &cend, handles_[1]));
2559
504
      }
2560
2561
63
      ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2562
63
      ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
2563
63
    }
2564
    // ApproximateOffsetOf() is not yet implemented in plain table format.
2565
21
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction |
2566
21
                         kSkipPlainTable | kSkipHashIndex));
2567
1
}
2568
2569
1
TEST_F(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
2570
26
  do {
2571
26
    Options options = CurrentOptions();
2572
26
    options.compression = kNoCompression;
2573
26
    CreateAndReopenWithCF({"pikachu"}, options);
2574
2575
26
    Random rnd(301);
2576
26
    std::string big1 = RandomString(&rnd, 100000);
2577
26
    ASSERT_OK(Put(1, Key(0), RandomString(&rnd, 10000)));
2578
26
    ASSERT_OK(Put(1, Key(1), RandomString(&rnd, 10000)));
2579
26
    ASSERT_OK(Put(1, Key(2), big1));
2580
26
    ASSERT_OK(Put(1, Key(3), RandomString(&rnd, 10000)));
2581
26
    ASSERT_OK(Put(1, Key(4), big1));
2582
26
    ASSERT_OK(Put(1, Key(5), RandomString(&rnd, 10000)));
2583
26
    ASSERT_OK(Put(1, Key(6), RandomString(&rnd, 300000)));
2584
26
    ASSERT_OK(Put(1, Key(7), RandomString(&rnd, 10000)));
2585
2586
    // Check sizes across recovery by reopening a few times
2587
104
    for (int run = 0; run < 3; run++) {
2588
78
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
2589
2590
78
      ASSERT_TRUE(Between(Size("", Key(0), 1), 0, 0));
2591
78
      ASSERT_TRUE(Between(Size("", Key(1), 1), 10000, 11000));
2592
78
      ASSERT_TRUE(Between(Size("", Key(2), 1), 20000, 21000));
2593
78
      ASSERT_TRUE(Between(Size("", Key(3), 1), 120000, 121000));
2594
78
      ASSERT_TRUE(Between(Size("", Key(4), 1), 130000, 131000));
2595
78
      ASSERT_TRUE(Between(Size("", Key(5), 1), 230000, 231000));
2596
78
      ASSERT_TRUE(Between(Size("", Key(6), 1), 240000, 241000));
2597
78
      ASSERT_TRUE(Between(Size("", Key(7), 1), 540000, 541000));
2598
78
      ASSERT_TRUE(Between(Size("", Key(8), 1), 550000, 560000));
2599
2600
78
      ASSERT_TRUE(Between(Size(Key(3), Key(5), 1), 110000, 111000));
2601
2602
78
      ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[1]));
2603
78
    }
2604
    // ApproximateOffsetOf() is not yet implemented in plain table format.
2605
26
  } while (ChangeOptions(kSkipPlainTable));
2606
1
}
2607
#endif  // ROCKSDB_LITE
2608
2609
1
TEST_F(DBTest, IteratorPinsRef) {
2610
5
  do {
2611
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
2612
5
    ASSERT_OK(Put(1, "foo", "hello"));
2613
2614
    // Get iterator that will yield the current contents of the DB.
2615
5
    Iterator* iter = db_->NewIterator(ReadOptions(), handles_[1]);
2616
2617
    // Write to force compactions
2618
5
    ASSERT_OK(Put(1, "foo", "newvalue1"));
2619
505
    for (int i = 0; i < 100; i++) {
2620
      // 100K values
2621
500
      ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
2622
500
    }
2623
5
    ASSERT_OK(Put(1, "foo", "newvalue2"));
2624
2625
5
    iter->SeekToFirst();
2626
5
    ASSERT_TRUE(iter->Valid());
2627
5
    ASSERT_EQ("foo", iter->key().ToString());
2628
5
    ASSERT_EQ("hello", iter->value().ToString());
2629
5
    iter->Next();
2630
5
    ASSERT_TRUE(!iter->Valid());
2631
5
    delete iter;
2632
5
  } while (ChangeCompactOptions());
2633
1
}
2634
2635
#ifndef ROCKSDB_LITE
2636
1
TEST_F(DBTest, Snapshot) {
2637
1
  anon::OptionsOverride options_override;
2638
1
  options_override.skip_policy = kSkipNoSnapshot;
2639
30
  do {
2640
30
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
2641
30
    ASSERT_OK(Put(0, "foo", "0v1"));
2642
30
    ASSERT_OK(Put(1, "foo", "1v1"));
2643
2644
30
    const Snapshot* s1 = db_->GetSnapshot();
2645
30
    ASSERT_EQ(1U, GetNumSnapshots());
2646
30
    uint64_t time_snap1 = GetTimeOldestSnapshots();
2647
30
    ASSERT_GT(time_snap1, 0U);
2648
30
    ASSERT_OK(Put(0, "foo", "0v2"));
2649
30
    ASSERT_OK(Put(1, "foo", "1v2"));
2650
2651
30
    env_->addon_time_.fetch_add(1);
2652
2653
30
    const Snapshot* s2 = db_->GetSnapshot();
2654
30
    ASSERT_EQ(2U, GetNumSnapshots());
2655
30
    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
2656
30
    ASSERT_OK(Put(0, "foo", "0v3"));
2657
30
    ASSERT_OK(Put(1, "foo", "1v3"));
2658
2659
30
    {
2660
30
      ManagedSnapshot s3(db_);
2661
30
      ASSERT_EQ(3U, GetNumSnapshots());
2662
30
      ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
2663
2664
30
      ASSERT_OK(Put(0, "foo", "0v4"));
2665
30
      ASSERT_OK(Put(1, "foo", "1v4"));
2666
30
      ASSERT_EQ("0v1", Get(0, "foo", s1));
2667
30
      ASSERT_EQ("1v1", Get(1, "foo", s1));
2668
30
      ASSERT_EQ("0v2", Get(0, "foo", s2));
2669
30
      ASSERT_EQ("1v2", Get(1, "foo", s2));
2670
30
      ASSERT_EQ("0v3", Get(0, "foo", s3.snapshot()));
2671
30
      ASSERT_EQ("1v3", Get(1, "foo", s3.snapshot()));
2672
30
      ASSERT_EQ("0v4", Get(0, "foo"));
2673
30
      ASSERT_EQ("1v4", Get(1, "foo"));
2674
30
    }
2675
2676
30
    ASSERT_EQ(2U, GetNumSnapshots());
2677
30
    ASSERT_EQ(time_snap1, GetTimeOldestSnapshots());
2678
30
    ASSERT_EQ("0v1", Get(0, "foo", s1));
2679
30
    ASSERT_EQ("1v1", Get(1, "foo", s1));
2680
30
    ASSERT_EQ("0v2", Get(0, "foo", s2));
2681
30
    ASSERT_EQ("1v2", Get(1, "foo", s2));
2682
30
    ASSERT_EQ("0v4", Get(0, "foo"));
2683
30
    ASSERT_EQ("1v4", Get(1, "foo"));
2684
2685
30
    db_->ReleaseSnapshot(s1);
2686
30
    ASSERT_EQ("0v2", Get(0, "foo", s2));
2687
30
    ASSERT_EQ("1v2", Get(1, "foo", s2));
2688
30
    ASSERT_EQ("0v4", Get(0, "foo"));
2689
30
    ASSERT_EQ("1v4", Get(1, "foo"));
2690
30
    ASSERT_EQ(1U, GetNumSnapshots());
2691
30
    ASSERT_LT(time_snap1, GetTimeOldestSnapshots());
2692
2693
30
    db_->ReleaseSnapshot(s2);
2694
30
    ASSERT_EQ(0U, GetNumSnapshots());
2695
30
    ASSERT_EQ("0v4", Get(0, "foo"));
2696
30
    ASSERT_EQ("1v4", Get(1, "foo"));
2697
30
  } while (ChangeOptions());
2698
1
}
2699
2700
1
TEST_F(DBTest, HiddenValuesAreRemoved) {
2701
1
  anon::OptionsOverride options_override;
2702
1
  options_override.skip_policy = kSkipNoSnapshot;
2703
23
  do {
2704
23
    Options options = CurrentOptions(options_override);
2705
23
    CreateAndReopenWithCF({"pikachu"}, options);
2706
23
    Random rnd(301);
2707
23
    FillLevels("a", "z", 1);
2708
2709
23
    std::string big = RandomString(&rnd, 50000);
2710
23
    ASSERT_OK(Put(1, "foo", big));
2711
23
    ASSERT_OK(Put(1, "pastfoo", "v"));
2712
23
    const Snapshot* snapshot = db_->GetSnapshot();
2713
23
    ASSERT_OK(Put(1, "foo", "tiny"));
2714
23
    ASSERT_OK(Put(1, "pastfoo2", "v2"));  // Advance sequence number one more
2715
2716
23
    ASSERT_OK(Flush(1));
2717
23
    ASSERT_GT(NumTableFilesAtLevel(0, 1), 0);
2718
2719
23
    ASSERT_EQ(big, Get(1, "foo", snapshot));
2720
23
    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 50000, 60000));
2721
23
    db_->ReleaseSnapshot(snapshot);
2722
23
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny, " + big + " ]");
2723
23
    Slice x("x");
2724
23
    ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, &x, handles_[1]));
2725
23
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2726
23
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
2727
23
    ASSERT_GE(NumTableFilesAtLevel(1, 1), 1);
2728
23
    ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, &x, handles_[1]));
2729
23
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ tiny ]");
2730
2731
23
    ASSERT_TRUE(Between(Size("", "pastfoo", 1), 0, 1000));
2732
    // ApproximateOffsetOf() is not yet implemented in plain table format,
2733
    // which is used by Size().
2734
23
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction | kSkipPlainTable));
2735
1
}
2736
#endif  // ROCKSDB_LITE
2737
2738
1
TEST_F(DBTest, CompactBetweenSnapshots) {
2739
1
  anon::OptionsOverride options_override;
2740
1
  options_override.skip_policy = kSkipNoSnapshot;
2741
29
  do {
2742
29
    Options options = CurrentOptions(options_override);
2743
29
    options.disable_auto_compactions = true;
2744
29
    CreateAndReopenWithCF({"pikachu"}, options);
2745
29
    Random rnd(301);
2746
29
    FillLevels("a", "z", 1);
2747
2748
29
    ASSERT_OK(Put(1, "foo", "first"));
2749
29
    const Snapshot* snapshot1 = db_->GetSnapshot();
2750
29
    ASSERT_OK(Put(1, "foo", "second"));
2751
29
    ASSERT_OK(Put(1, "foo", "third"));
2752
29
    ASSERT_OK(Put(1, "foo", "fourth"));
2753
29
    const Snapshot* snapshot2 = db_->GetSnapshot();
2754
29
    ASSERT_OK(Put(1, "foo", "fifth"));
2755
29
    ASSERT_OK(Put(1, "foo", "sixth"));
2756
2757
    // All entries (including duplicates) exist
2758
    // before any compaction or flush is triggered.
2759
29
    ASSERT_EQ(AllEntriesFor("foo", 1),
2760
29
              "[ sixth, fifth, fourth, third, second, first ]");
2761
29
    ASSERT_EQ("sixth", Get(1, "foo"));
2762
29
    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
2763
29
    ASSERT_EQ("first", Get(1, "foo", snapshot1));
2764
2765
    // After a flush, "second", "third" and "fifth" should
2766
    // be removed
2767
29
    ASSERT_OK(Flush(1));
2768
29
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth, first ]");
2769
2770
    // after we release the snapshot1, only two values left
2771
29
    db_->ReleaseSnapshot(snapshot1);
2772
29
    FillLevels("a", "z", 1);
2773
29
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
2774
2775
    // We have only one valid snapshot snapshot2. Since snapshot1 is
2776
    // not valid anymore, "first" should be removed by a compaction.
2777
29
    ASSERT_EQ("sixth", Get(1, "foo"));
2778
29
    ASSERT_EQ("fourth", Get(1, "foo", snapshot2));
2779
29
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth, fourth ]");
2780
2781
    // after we release the snapshot2, only one value should be left
2782
29
    db_->ReleaseSnapshot(snapshot2);
2783
29
    FillLevels("a", "z", 1);
2784
29
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
2785
29
    ASSERT_EQ("sixth", Get(1, "foo"));
2786
29
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ sixth ]");
2787
29
  } while (ChangeOptions(kSkipFIFOCompaction));
2788
1
}
2789
2790
1
TEST_F(DBTest, UnremovableSingleDelete) {
2791
  // If we compact:
2792
  //
2793
  // Put(A, v1) Snapshot SingleDelete(A) Put(A, v2)
2794
  //
2795
  // We do not want to end up with:
2796
  //
2797
  // Put(A, v1) Snapshot Put(A, v2)
2798
  //
2799
  // Because a subsequent SingleDelete(A) would delete the Put(A, v2)
2800
  // but not Put(A, v1), so Get(A) would return v1.
2801
1
  anon::OptionsOverride options_override;
2802
1
  options_override.skip_policy = kSkipNoSnapshot;
2803
26
  do {
2804
26
    Options options = CurrentOptions(options_override);
2805
26
    options.disable_auto_compactions = true;
2806
26
    CreateAndReopenWithCF({"pikachu"}, options);
2807
2808
26
    ASSERT_OK(Put(1, "foo", "first"));
2809
26
    const Snapshot* snapshot = db_->GetSnapshot();
2810
26
    ASSERT_OK(SingleDelete(1, "foo"));
2811
26
    ASSERT_OK(Put(1, "foo", "second"));
2812
26
    ASSERT_OK(Flush(1));
2813
2814
26
    ASSERT_EQ("first", Get(1, "foo", snapshot));
2815
26
    ASSERT_EQ("second", Get(1, "foo"));
2816
2817
26
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
2818
26
    ASSERT_EQ("[ second, SDEL, first ]", AllEntriesFor("foo", 1));
2819
2820
26
    ASSERT_OK(SingleDelete(1, "foo"));
2821
2822
26
    ASSERT_EQ("first", Get(1, "foo", snapshot));
2823
26
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2824
2825
26
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
2826
2827
26
    ASSERT_EQ("first", Get(1, "foo", snapshot));
2828
26
    ASSERT_EQ("NOT_FOUND", Get(1, "foo"));
2829
26
    db_->ReleaseSnapshot(snapshot);
2830
    // FIFO and universal compaction do not apply to the test case.
2831
    // Skip MergePut because single delete does not get removed when it encounters a merge.
2832
26
  } while (ChangeOptions(kSkipFIFOCompaction | kSkipUniversalCompaction | kSkipMergePut));
2833
1
}
2834
2835
#ifndef ROCKSDB_LITE
2836
1
TEST_F(DBTest, DeletionMarkers1) {
2837
1
  Options options = CurrentOptions();
2838
1
  options.max_background_flushes = 0;
2839
1
  CreateAndReopenWithCF({"pikachu"}, options);
2840
1
  ASSERT_OK(Put(1, "foo", "v1"));
2841
1
  ASSERT_OK(Flush(1));
2842
1
  const int last = 2;
2843
1
  MoveFilesToLevel(last, 1);
2844
  // foo => v1 is now in last level
2845
1
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2846
2847
  // Place a table at level last-1 to prevent merging with preceding mutation
2848
1
  ASSERT_OK(Put(1, "a", "begin"));
2849
1
  ASSERT_OK(Put(1, "z", "end"));
2850
1
  ASSERT_OK(Flush(1));
2851
1
  MoveFilesToLevel(last - 1, 1);
2852
1
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2853
1
  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
2854
2855
1
  ASSERT_OK(Delete(1, "foo"));
2856
1
  ASSERT_OK(Put(1, "foo", "v2"));
2857
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
2858
1
  ASSERT_OK(Flush(1));  // Moves to level last-2
2859
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2860
1
  Slice z("z");
2861
1
  ASSERT_OK(dbfull()->TEST_CompactRange(last - 2, nullptr, &z, handles_[1]));
2862
  // DEL eliminated, but v1 remains because we aren't compacting that level
2863
  // (DEL can be eliminated because v2 hides v1).
2864
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
2865
1
  ASSERT_OK(dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]));
2866
  // Merging last-1 w/ last, so we are the base level for "foo", so
2867
  // DEL is removed.  (as is v1).
2868
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
2869
1
}
2870
2871
1
TEST_F(DBTest, DeletionMarkers2) {
2872
1
  Options options = CurrentOptions();
2873
1
  CreateAndReopenWithCF({"pikachu"}, options);
2874
1
  ASSERT_OK(Put(1, "foo", "v1"));
2875
1
  ASSERT_OK(Flush(1));
2876
1
  const int last = 2;
2877
1
  MoveFilesToLevel(last, 1);
2878
  // foo => v1 is now in last level
2879
1
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2880
2881
  // Place a table at level last-1 to prevent merging with preceding mutation
2882
1
  ASSERT_OK(Put(1, "a", "begin"));
2883
1
  ASSERT_OK(Put(1, "z", "end"));
2884
1
  ASSERT_OK(Flush(1));
2885
1
  MoveFilesToLevel(last - 1, 1);
2886
1
  ASSERT_EQ(NumTableFilesAtLevel(last, 1), 1);
2887
1
  ASSERT_EQ(NumTableFilesAtLevel(last - 1, 1), 1);
2888
2889
1
  ASSERT_OK(Delete(1, "foo"));
2890
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2891
1
  ASSERT_OK(Flush(1));  // Moves to level last-2
2892
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2893
1
  ASSERT_OK(dbfull()->TEST_CompactRange(last - 2, nullptr, nullptr, handles_[1]));
2894
  // DEL kept: "last" file overlaps
2895
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v1 ]");
2896
1
  ASSERT_OK(dbfull()->TEST_CompactRange(last - 1, nullptr, nullptr, handles_[1]));
2897
  // Merging last-1 w/ last, so we are the base level for "foo", so
2898
  // DEL is removed.  (as is v1).
2899
1
  ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
2900
1
}
2901
2902
1
TEST_F(DBTest, OverlapInLevel0) {
2903
27
  do {
2904
27
    Options options = CurrentOptions();
2905
27
    CreateAndReopenWithCF({"pikachu"}, options);
2906
2907
    // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
2908
27
    ASSERT_OK(Put(1, "100", "v100"));
2909
27
    ASSERT_OK(Put(1, "999", "v999"));
2910
27
    ASSERT_OK(Flush(1));
2911
27
    MoveFilesToLevel(2, 1);
2912
27
    ASSERT_OK(Delete(1, "100"));
2913
27
    ASSERT_OK(Delete(1, "999"));
2914
27
    ASSERT_OK(Flush(1));
2915
27
    MoveFilesToLevel(1, 1);
2916
27
    ASSERT_EQ("0,1,1", FilesPerLevel(1));
2917
2918
    // Make files spanning the following ranges in level-0:
2919
    //  files[0]  200 .. 900
2920
    //  files[1]  300 .. 500
2921
    // Note that files are sorted by smallest key.
2922
27
    ASSERT_OK(Put(1, "300", "v300"));
2923
27
    ASSERT_OK(Put(1, "500", "v500"));
2924
27
    ASSERT_OK(Flush(1));
2925
27
    ASSERT_OK(Put(1, "200", "v200"));
2926
27
    ASSERT_OK(Put(1, "600", "v600"));
2927
27
    ASSERT_OK(Put(1, "900", "v900"));
2928
27
    ASSERT_OK(Flush(1));
2929
27
    ASSERT_EQ("2,1,1", FilesPerLevel(1));
2930
2931
    // Compact away the placeholder files we created initially
2932
27
    ASSERT_OK(dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[1]));
2933
27
    ASSERT_OK(dbfull()->TEST_CompactRange(2, nullptr, nullptr, handles_[1]));
2934
27
    ASSERT_EQ("2", FilesPerLevel(1));
2935
2936
    // Do a memtable compaction.  Before bug-fix, the compaction would
2937
    // not detect the overlap with level-0 files and would incorrectly place
2938
    // the deletion in a deeper level.
2939
27
    ASSERT_OK(Delete(1, "600"));
2940
27
    ASSERT_OK(Flush(1));
2941
27
    ASSERT_EQ("3", FilesPerLevel(1));
2942
27
    ASSERT_EQ("NOT_FOUND", Get(1, "600"));
2943
27
  } while (ChangeOptions(kSkipUniversalCompaction | kSkipFIFOCompaction));
2944
1
}
2945
#endif  // ROCKSDB_LITE
2946
2947
1
TEST_F(DBTest, ComparatorCheck) {
2948
1
  class NewComparator : public Comparator {
2949
1
   public:
2950
15
    const char* Name() const override {
2951
15
      return "rocksdb.NewComparator";
2952
15
    }
2953
0
    int Compare(const Slice& a, const Slice& b) const override {
2954
0
      return BytewiseComparator()->Compare(a, b);
2955
0
    }
2956
1
    virtual void FindShortestSeparator(std::string* s,
2957
0
                                       const Slice& l) const override {
2958
0
      BytewiseComparator()->FindShortestSeparator(s, l);
2959
0
    }
2960
0
    void FindShortSuccessor(std::string* key) const override {
2961
0
      BytewiseComparator()->FindShortSuccessor(key);
2962
0
    }
2963
1
  };
2964
1
  Options new_options, options;
2965
1
  NewComparator cmp;
2966
5
  do {
2967
5
    options = CurrentOptions();
2968
5
    CreateAndReopenWithCF({"pikachu"}, options);
2969
5
    new_options = CurrentOptions();
2970
5
    new_options.comparator = &cmp;
2971
    // only the non-default column family has non-matching comparator
2972
5
    Status s = TryReopenWithColumnFamilies({"default", "pikachu"},
2973
5
        std::vector<Options>({options, new_options}));
2974
5
    ASSERT_TRUE(!s.ok());
2975
10
    ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
2976
10
        << s.ToString();
2977
5
  } while (ChangeCompactOptions());
2978
1
}
2979
2980
1
TEST_F(DBTest, CustomComparator) {
2981
1
  class NumberComparator : public Comparator {
2982
1
   public:
2983
104
    const char* Name() const override {
2984
104
      return "test.NumberComparator";
2985
104
    }
2986
75.5k
    int Compare(const Slice& a, const Slice& b) const override {
2987
75.5k
      return ToNumber(a) - ToNumber(b);
2988
75.5k
    }
2989
1
    virtual void FindShortestSeparator(std::string* s,
2990
100
                                       const Slice& l) const override {
2991
100
      ToNumber(*s);     // Check format
2992
100
      ToNumber(l);      // Check format
2993
100
    }
2994
31
    void FindShortSuccessor(std::string* key) const override {
2995
31
      ToNumber(*key);   // Check format
2996
31
    }
2997
1
   private:
2998
151k
    static int ToNumber(const Slice& x) {
2999
      // Check that there are no extra characters.
3000
302k
      EXPECT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size() - 1] == ']')
3001
302k
          << EscapeString(x);
3002
151k
      int val;
3003
151k
      char ignored;
3004
302k
      EXPECT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
3005
302k
          << EscapeString(x);
3006
151k
      return val;
3007
151k
    }
3008
1
  };
3009
1
  Options new_options;
3010
1
  NumberComparator cmp;
3011
5
  do {
3012
5
    new_options = CurrentOptions();
3013
5
    new_options.create_if_missing = true;
3014
5
    new_options.comparator = &cmp;
3015
5
    new_options.write_buffer_size = 4096;  // Compact more often
3016
5
    new_options.arena_block_size = 4096;
3017
5
    new_options = CurrentOptions(new_options);
3018
5
    DestroyAndReopen(new_options);
3019
5
    CreateAndReopenWithCF({"pikachu"}, new_options);
3020
5
    ASSERT_OK(Put(1, "[10]", "ten"));
3021
5
    ASSERT_OK(Put(1, "[0x14]", "twenty"));
3022
15
    for (int i = 0; i < 2; i++) {
3023
10
      ASSERT_EQ("ten", Get(1, "[10]"));
3024
10
      ASSERT_EQ("ten", Get(1, "[0xa]"));
3025
10
      ASSERT_EQ("twenty", Get(1, "[20]"));
3026
10
      ASSERT_EQ("twenty", Get(1, "[0x14]"));
3027
10
      ASSERT_EQ("NOT_FOUND", Get(1, "[15]"));
3028
10
      ASSERT_EQ("NOT_FOUND", Get(1, "[0xf]"));
3029
10
      Compact(1, "[0]", "[9999]");
3030
10
    }
3031
3032
15
    for (int run = 0; run < 2; run++) {
3033
10.0k
      for (int i = 0; i < 1000; i++) {
3034
10.0k
        char buf[100];
3035
10.0k
        snprintf(buf, sizeof(buf), "[%d]", i*10);
3036
10.0k
        ASSERT_OK(Put(1, buf, buf));
3037
10.0k
      }
3038
10
      Compact(1, "[0]", "[1000000]");
3039
10
    }
3040
5
  } while (ChangeCompactOptions());
3041
1
}
3042
3043
1
TEST_F(DBTest, DBOpen_Options) {
3044
1
  Options options = CurrentOptions();
3045
1
  std::string dbname = test::TmpDir(env_) + "/db_options_test";
3046
1
  ASSERT_OK(DestroyDB(dbname, options));
3047
3048
  // Does not exist, and create_if_missing == false: error
3049
1
  DB* db = nullptr;
3050
1
  options.create_if_missing = false;
3051
1
  Status s = DB::Open(options, dbname, &db);
3052
1
  ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != nullptr);
3053
1
  ASSERT_TRUE(db == nullptr);
3054
3055
  // Does not exist, and create_if_missing == true: OK
3056
1
  options.create_if_missing = true;
3057
1
  s = DB::Open(options, dbname, &db);
3058
1
  ASSERT_OK(s);
3059
1
  ASSERT_TRUE(db != nullptr);
3060
3061
1
  delete db;
3062
1
  db = nullptr;
3063
3064
  // Does exist, and error_if_exists == true: error
3065
1
  options.create_if_missing = false;
3066
1
  options.error_if_exists = true;
3067
1
  s = DB::Open(options, dbname, &db);
3068
1
  ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != nullptr);
3069
1
  ASSERT_TRUE(db == nullptr);
3070
3071
  // Does exist, and error_if_exists == false: OK
3072
1
  options.create_if_missing = true;
3073
1
  options.error_if_exists = false;
3074
1
  s = DB::Open(options, dbname, &db);
3075
1
  ASSERT_OK(s);
3076
1
  ASSERT_TRUE(db != nullptr);
3077
3078
1
  delete db;
3079
1
  db = nullptr;
3080
1
}
3081
3082
1
TEST_F(DBTest, DBOpen_Change_NumLevels) {
3083
1
  Options options = CurrentOptions();
3084
1
  options.create_if_missing = true;
3085
1
  DestroyAndReopen(options);
3086
1
  ASSERT_TRUE(db_ != nullptr);
3087
1
  CreateAndReopenWithCF({"pikachu"}, options);
3088
3089
1
  ASSERT_OK(Put(1, "a", "123"));
3090
1
  ASSERT_OK(Put(1, "b", "234"));
3091
1
  ASSERT_OK(Flush(1));
3092
1
  MoveFilesToLevel(3, 1);
3093
1
  Close();
3094
3095
1
  options.create_if_missing = false;
3096
1
  options.num_levels = 2;
3097
1
  Status s = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
3098
1
  ASSERT_TRUE(strstr(s.ToString().c_str(), "Invalid argument") != nullptr);
3099
1
  ASSERT_TRUE(db_ == nullptr);
3100
1
}
3101
3102
1
TEST_F(DBTest, DestroyDBMetaDatabase) {
3103
1
  std::string dbname = test::TmpDir(env_) + "/db_meta";
3104
1
  ASSERT_OK(env_->CreateDirIfMissing(dbname));
3105
1
  std::string metadbname = MetaDatabaseName(dbname, 0);
3106
1
  ASSERT_OK(env_->CreateDirIfMissing(metadbname));
3107
1
  std::string metametadbname = MetaDatabaseName(metadbname, 0);
3108
1
  ASSERT_OK(env_->CreateDirIfMissing(metametadbname));
3109
3110
  // Destroy previous versions if they exist. Using the long way.
3111
1
  Options options = CurrentOptions();
3112
1
  ASSERT_OK(DestroyDB(metametadbname, options));
3113
1
  ASSERT_OK(DestroyDB(metadbname, options));
3114
1
  ASSERT_OK(DestroyDB(dbname, options));
3115
3116
  // Setup databases
3117
1
  DB* db = nullptr;
3118
1
  ASSERT_OK(DB::Open(options, dbname, &db));
3119
1
  delete db;
3120
1
  db = nullptr;
3121
1
  ASSERT_OK(DB::Open(options, metadbname, &db));
3122
1
  delete db;
3123
1
  db = nullptr;
3124
1
  ASSERT_OK(DB::Open(options, metametadbname, &db));
3125
1
  delete db;
3126
1
  db = nullptr;
3127
3128
  // Delete databases
3129
1
  ASSERT_OK(DestroyDB(dbname, options));
3130
3131
  // Check if deletion worked.
3132
1
  options.create_if_missing = false;
3133
1
  ASSERT_TRUE(!(DB::Open(options, dbname, &db)).ok());
3134
1
  ASSERT_TRUE(!(DB::Open(options, metadbname, &db)).ok());
3135
1
  ASSERT_TRUE(!(DB::Open(options, metametadbname, &db)).ok());
3136
1
}
3137
3138
#ifndef ROCKSDB_LITE
3139
// Check that number of files does not grow when writes are dropped
3140
1
TEST_F(DBTest, DropWrites) {
3141
5
  do {
3142
5
    Options options = CurrentOptions();
3143
5
    options.env = env_;
3144
5
    options.paranoid_checks = false;
3145
5
    Reopen(options);
3146
3147
5
    ASSERT_OK(Put("foo", "v1"));
3148
5
    ASSERT_EQ("v1", Get("foo"));
3149
5
    Compact("a", "z");
3150
5
    const size_t num_files = CountFiles();
3151
    // Force out-of-space errors
3152
5
    env_->drop_writes_.store(true, std::memory_order_release);
3153
5
    env_->sleep_counter_.Reset();
3154
5
    env_->no_sleep_ = true;
3155
30
    for (int i = 0; i < 5; i++) {
3156
25
      if (option_config_ != kUniversalCompactionMultiLevel &&
3157
20
          option_config_ != kUniversalSubcompactions) {
3158
80
        for (int level = 0; level < dbfull()->NumberLevels(); level++) {
3159
75
          if (level > 0 && level == dbfull()->NumberLevels() - 1) {
3160
10
            break;
3161
10
          }
3162
65
          WARN_NOT_OK(
3163
65
              dbfull()->TEST_CompactRange(
3164
65
                  level, nullptr, nullptr, nullptr, true /* disallow trivial move */),
3165
65
              "Compact range failed");
3166
65
        }
3167
10
      } else {
3168
10
        WARN_NOT_OK(
3169
10
            dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr),
3170
10
            "Compact range failed");
3171
10
      }
3172
25
    }
3173
3174
5
    std::string property_value;
3175
5
    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
3176
5
    ASSERT_EQ("5", property_value);
3177
3178
5
    env_->drop_writes_.store(false, std::memory_order_release);
3179
5
    ASSERT_LT(CountFiles(), num_files + 3);
3180
3181
    // Check that compaction attempts slept after errors
3182
    // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
3183
    // versions
3184
5
    ASSERT_GE(env_->sleep_counter_.Read(), 4);
3185
5
  } while (ChangeCompactOptions());
3186
1
}
3187
3188
// Check background error counter bumped on flush failures.
3189
1
TEST_F(DBTest, DropWritesFlush) {
3190
5
  do {
3191
5
    Options options = CurrentOptions();
3192
5
    options.env = env_;
3193
5
    options.max_background_flushes = 1;
3194
5
    Reopen(options);
3195
3196
5
    ASSERT_OK(Put("foo", "v1"));
3197
    // Force out-of-space errors
3198
5
    env_->drop_writes_.store(true, std::memory_order_release);
3199
3200
5
    std::string property_value;
3201
    // Background error count is 0 now.
3202
5
    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
3203
5
    ASSERT_EQ("0", property_value);
3204
3205
5
    ASSERT_NOK(dbfull()->TEST_FlushMemTable(true));
3206
3207
5
    ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
3208
5
    ASSERT_EQ("1", property_value);
3209
3210
5
    env_->drop_writes_.store(false, std::memory_order_release);
3211
5
  } while (ChangeCompactOptions());
3212
1
}
3213
#endif  // ROCKSDB_LITE
3214
3215
// Check that CompactRange() returns failure if there is not enough space left
3216
// on device
3217
1
TEST_F(DBTest, NoSpaceCompactRange) {
3218
5
  do {
3219
5
    Options options = CurrentOptions();
3220
5
    options.env = env_;
3221
5
    options.disable_auto_compactions = true;
3222
5
    Reopen(options);
3223
3224
    // generate 5 tables
3225
30
    for (int i = 0; i < 5; ++i) {
3226
25
      ASSERT_OK(Put(Key(i), Key(i) + "v"));
3227
25
      ASSERT_OK(Flush());
3228
25
    }
3229
3230
    // Force out-of-space errors
3231
5
    env_->no_space_.store(true, std::memory_order_release);
3232
3233
5
    Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
3234
5
                                           true /* disallow trivial move */);
3235
5
    ASSERT_TRUE(s.IsIOError());
3236
3237
5
    env_->no_space_.store(false, std::memory_order_release);
3238
5
  } while (ChangeCompactOptions());
3239
1
}
3240
3241
1
TEST_F(DBTest, NonWritableFileSystem) {
3242
5
  do {
3243
5
    Options options = CurrentOptions();
3244
5
    options.write_buffer_size = 4096;
3245
5
    options.arena_block_size = 4096;
3246
5
    options.env = env_;
3247
5
    Reopen(options);
3248
5
    ASSERT_OK(Put("foo", "v1"));
3249
5
    env_->non_writeable_rate_.store(100);
3250
5
    std::string big(100000, 'x');
3251
5
    int errors = 0;
3252
105
    for (int i = 0; i < 20; i++) {
3253
100
      if (!Put("foo", big).ok()) {
3254
95
        errors++;
3255
95
        env_->SleepForMicroseconds(100000);
3256
95
      }
3257
100
    }
3258
5
    ASSERT_GT(errors, 0);
3259
5
    env_->non_writeable_rate_.store(0);
3260
5
  } while (ChangeCompactOptions());
3261
1
}
3262
3263
#ifndef ROCKSDB_LITE
3264
1
TEST_F(DBTest, ManifestWriteError) {
3265
  // Test for the following problem:
3266
  // (a) Compaction produces file F
3267
  // (b) Log record containing F is written to MANIFEST file, but Sync() fails
3268
  // (c) GC deletes F
3269
  // (d) After reopening DB, reads fail since deleted F is named in log record
3270
3271
  // We iterate twice.  In the second iteration, everything is the
3272
  // same except the log record never makes it to the MANIFEST file.
3273
3
  for (int iter = 0; iter < 2; iter++) {
3274
2
    std::atomic<bool>* error_type = (iter == 0)
3275
1
        ? &env_->manifest_sync_error_
3276
1
        : &env_->manifest_write_error_;
3277
3278
    // Insert foo=>bar mapping
3279
2
    Options options = CurrentOptions();
3280
2
    options.env = env_;
3281
2
    options.create_if_missing = true;
3282
2
    options.error_if_exists = false;
3283
2
    options.paranoid_checks = true;
3284
2
    DestroyAndReopen(options);
3285
2
    ASSERT_OK(Put("foo", "bar"));
3286
2
    ASSERT_EQ("bar", Get("foo"));
3287
3288
    // Memtable compaction (will succeed)
3289
2
    ASSERT_OK(Flush());
3290
2
    ASSERT_EQ("bar", Get("foo"));
3291
2
    const int last = 2;
3292
2
    MoveFilesToLevel(2);
3293
2
    ASSERT_EQ(NumTableFilesAtLevel(last), 1);   // foo=>bar is now in last level
3294
3295
    // Merging compaction (will fail)
3296
2
    error_type->store(true, std::memory_order_release);
3297
2
    ASSERT_NOK(dbfull()->TEST_CompactRange(last, nullptr, nullptr));  // Should fail
3298
2
    ASSERT_EQ("bar", Get("foo"));
3299
3300
2
    error_type->store(false, std::memory_order_release);
3301
3302
    // Since paranoid_checks=true, writes should fail
3303
2
    ASSERT_NOK(Put("foo2", "bar2"));
3304
3305
    // Recovery: should not lose data
3306
2
    ASSERT_EQ("bar", Get("foo"));
3307
3308
    // Try again with paranoid_checks=false
3309
2
    Close();
3310
2
    options.paranoid_checks = false;
3311
2
    Reopen(options);
3312
3313
    // Merging compaction (will fail)
3314
2
    error_type->store(true, std::memory_order_release);
3315
2
    ASSERT_OK(dbfull()->TEST_CompactRange(last, nullptr, nullptr));  // Should fail
3316
2
    ASSERT_EQ("bar", Get("foo"));
3317
3318
    // Recovery: should not lose data
3319
2
    error_type->store(false, std::memory_order_release);
3320
2
    Reopen(options);
3321
2
    ASSERT_EQ("bar", Get("foo"));
3322
3323
    // Since paranoid_checks=false, writes should succeed
3324
2
    ASSERT_OK(Put("foo2", "bar2"));
3325
2
    ASSERT_EQ("bar", Get("foo"));
3326
2
    ASSERT_EQ("bar2", Get("foo2"));
3327
2
  }
3328
1
}
3329
#endif  // ROCKSDB_LITE
3330
3331
1
TEST_F(DBTest, PutFailsParanoid) {
3332
  // Test the following:
3333
  // (a) A random put fails in paranoid mode (simulate by sync fail)
3334
  // (b) All other puts have to fail, even if writes would succeed
3335
  // (c) All of that should happen ONLY if paranoid_checks = true
3336
3337
1
  Options options = CurrentOptions();
3338
1
  options.env = env_;
3339
1
  options.create_if_missing = true;
3340
1
  options.error_if_exists = false;
3341
1
  options.paranoid_checks = true;
3342
1
  DestroyAndReopen(options);
3343
1
  CreateAndReopenWithCF({"pikachu"}, options);
3344
1
  Status s;
3345
3346
1
  ASSERT_OK(Put(1, "foo", "bar"));
3347
1
  ASSERT_OK(Put(1, "foo1", "bar1"));
3348
  // simulate error
3349
1
  env_->log_write_error_.store(true, std::memory_order_release);
3350
1
  s = Put(1, "foo2", "bar2");
3351
1
  ASSERT_TRUE(!s.ok());
3352
1
  env_->log_write_error_.store(false, std::memory_order_release);
3353
1
  s = Put(1, "foo3", "bar3");
3354
  // the next put should fail, too
3355
1
  ASSERT_TRUE(!s.ok());
3356
  // but we're still able to read
3357
1
  ASSERT_EQ("bar", Get(1, "foo"));
3358
3359
  // do the same thing with paranoid checks off
3360
1
  options.paranoid_checks = false;
3361
1
  DestroyAndReopen(options);
3362
1
  CreateAndReopenWithCF({"pikachu"}, options);
3363
3364
1
  ASSERT_OK(Put(1, "foo", "bar"));
3365
1
  ASSERT_OK(Put(1, "foo1", "bar1"));
3366
  // simulate error
3367
1
  env_->log_write_error_.store(true, std::memory_order_release);
3368
1
  s = Put(1, "foo2", "bar2");
3369
1
  ASSERT_TRUE(!s.ok());
3370
1
  env_->log_write_error_.store(false, std::memory_order_release);
3371
1
  s = Put(1, "foo3", "bar3");
3372
  // the next put should NOT fail
3373
1
  ASSERT_TRUE(s.ok());
3374
1
}
3375
3376
#ifndef ROCKSDB_LITE
3377
1
TEST_F(DBTest, SnapshotFiles) {
3378
5
  do {
3379
5
    Options options = CurrentOptions();
3380
5
    options.write_buffer_size = 100000000;        // Large write buffer
3381
5
    CreateAndReopenWithCF({"pikachu"}, options);
3382
3383
5
    Random rnd(301);
3384
3385
    // Write 8MB (80 values, each 100K)
3386
5
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
3387
5
    std::vector<std::string> values;
3388
405
    for (int i = 0; i < 80; i++) {
3389
400
      values.push_back(RandomString(&rnd, 100000));
3390
400
      ASSERT_OK(Put((i < 40), Key(i), values[i]));
3391
400
    }
3392
3393
    // assert that nothing makes it to disk yet.
3394
5
    ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
3395
3396
    // get a file snapshot
3397
5
    uint64_t manifest_number = 0;
3398
5
    uint64_t manifest_size = 0;
3399
5
    std::vector<std::string> files;
3400
5
    ASSERT_OK(dbfull()->DisableFileDeletions());
3401
5
    ASSERT_OK(dbfull()->GetLiveFiles(files, &manifest_size));
3402
3403
    // CURRENT, MANIFEST, *.sst, *.sst.sblock files (one for each CF)
3404
5
    ASSERT_EQ(files.size(), 6U);
3405
3406
5
    uint64_t number = 0;
3407
5
    FileType type;
3408
3409
    // copy these files to a new snapshot directory
3410
5
    std::string snapdir = dbname_ + ".snapdir/";
3411
5
    ASSERT_OK(env_->CreateDirIfMissing(snapdir));
3412
3413
35
    for (size_t i = 0; i < files.size(); i++) {
3414
      // our clients require that GetLiveFiles returns
3415
      // files with "/" as first character!
3416
30
      ASSERT_EQ(files[i][0], '/');
3417
30
      std::string src = dbname_ + files[i];
3418
30
      std::string dest = snapdir + files[i];
3419
3420
30
      uint64_t size;
3421
30
      ASSERT_OK(env_->GetFileSize(src, &size));
3422
3423
      // record the number and the size of the
3424
      // latest manifest file
3425
30
      if (ParseFileName(files[i].substr(1), &number, &type)) {
3426
30
        if (type == kDescriptorFile) {
3427
5
          if (number > manifest_number) {
3428
5
            manifest_number = number;
3429
5
            ASSERT_GE(size, manifest_size);
3430
5
            size = manifest_size; // copy only valid MANIFEST data
3431
5
          }
3432
5
        }
3433
30
      }
3434
30
      CopyFile(src, dest, size);
3435
30
    }
3436
3437
    // release file snapshot
3438
5
    ASSERT_OK(dbfull()->DisableFileDeletions());
3439
    // overwrite one key, this key should not appear in the snapshot
3440
5
    std::vector<std::string> extras;
3441
10
    for (unsigned int i = 0; i < 1; i++) {
3442
5
      extras.push_back(RandomString(&rnd, 100000));
3443
5
      ASSERT_OK(Put(0, Key(i), extras[i]));
3444
5
    }
3445
3446
    // verify that data in the snapshot are correct
3447
5
    std::vector<ColumnFamilyDescriptor> column_families;
3448
5
    column_families.emplace_back("default", ColumnFamilyOptions());
3449
5
    column_families.emplace_back("pikachu", ColumnFamilyOptions());
3450
5
    std::vector<ColumnFamilyHandle*> cf_handles;
3451
5
    DB* snapdb;
3452
5
    DBOptions opts;
3453
5
    opts.env = env_;
3454
5
    opts.create_if_missing = false;
3455
5
    Status stat =
3456
5
        DB::Open(opts, snapdir, column_families, &cf_handles, &snapdb);
3457
5
    ASSERT_OK(stat);
3458
3459
5
    ReadOptions roptions;
3460
5
    std::string val;
3461
405
    for (unsigned int i = 0; i < 80; i++) {
3462
400
      stat = snapdb->Get(roptions, cf_handles[i < 40], Key(i), &val);
3463
400
      ASSERT_OK(stat);
3464
400
      ASSERT_EQ(values[i].compare(val), 0);
3465
400
    }
3466
10
    for (auto cfh : cf_handles) {
3467
10
      delete cfh;
3468
10
    }
3469
5
    delete snapdb;
3470
3471
    // look at the new live files after we added an 'extra' key
3472
    // and after we took the first snapshot.
3473
5
    uint64_t new_manifest_number = 0;
3474
5
    uint64_t new_manifest_size = 0;
3475
5
    std::vector<std::string> newfiles;
3476
5
    ASSERT_OK(dbfull()->DisableFileDeletions());
3477
5
    ASSERT_OK(dbfull()->GetLiveFiles(newfiles, &new_manifest_size));
3478
3479
    // find the new manifest file. assert that this manifest file is
3480
    // the same one as in the previous snapshot. But its size should be
3481
    // larger because we added an extra key after taking the
3482
    // previous shapshot.
3483
45
    for (size_t i = 0; i < newfiles.size(); i++) {
3484
40
      std::string src = dbname_ + "/" + newfiles[i];
3485
      // record the lognumber and the size of the
3486
      // latest manifest file
3487
40
      if (ParseFileName(newfiles[i].substr(1), &number, &type)) {
3488
40
        if (type == kDescriptorFile) {
3489
5
          if (number > new_manifest_number) {
3490
5
            uint64_t size;
3491
5
            new_manifest_number = number;
3492
5
            ASSERT_OK(env_->GetFileSize(src, &size));
3493
5
            ASSERT_GE(size, new_manifest_size);
3494
5
          }
3495
5
        }
3496
40
      }
3497
40
    }
3498
5
    ASSERT_EQ(manifest_number, new_manifest_number);
3499
5
    ASSERT_GT(new_manifest_size, manifest_size);
3500
3501
    // release file snapshot
3502
5
    ASSERT_OK(dbfull()->DisableFileDeletions());
3503
5
  } while (ChangeCompactOptions());
3504
1
}
3505
#endif
3506
3507
1
TEST_F(DBTest, CompactOnFlush) {
3508
1
  anon::OptionsOverride options_override;
3509
1
  options_override.skip_policy = kSkipNoSnapshot;
3510
5
  do {
3511
5
    Options options = CurrentOptions(options_override);
3512
5
    options.disable_auto_compactions = true;
3513
5
    CreateAndReopenWithCF({"pikachu"}, options);
3514
3515
5
    ASSERT_OK(Put(1, "foo", "v1"));
3516
5
    ASSERT_OK(Flush(1));
3517
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v1 ]");
3518
3519
    // Write two new keys
3520
5
    ASSERT_OK(Put(1, "a", "begin"));
3521
5
    ASSERT_OK(Put(1, "z", "end"));
3522
5
    ASSERT_OK(Flush(1));
3523
3524
    // Case1: Delete followed by a put
3525
5
    ASSERT_OK(Delete(1, "foo"));
3526
5
    ASSERT_OK(Put(1, "foo", "v2"));
3527
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, DEL, v1 ]");
3528
3529
    // After the current memtable is flushed, the DEL should
3530
    // have been removed
3531
5
    ASSERT_OK(Flush(1));
3532
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2, v1 ]");
3533
3534
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3535
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v2 ]");
3536
3537
    // Case 2: Delete followed by another delete
3538
5
    ASSERT_OK(Delete(1, "foo"));
3539
5
    ASSERT_OK(Delete(1, "foo"));
3540
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, DEL, v2 ]");
3541
5
    ASSERT_OK(Flush(1));
3542
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v2 ]");
3543
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3544
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
3545
3546
    // Case 3: Put followed by a delete
3547
5
    ASSERT_OK(Put(1, "foo", "v3"));
3548
5
    ASSERT_OK(Delete(1, "foo"));
3549
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL, v3 ]");
3550
5
    ASSERT_OK(Flush(1));
3551
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ DEL ]");
3552
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3553
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
3554
3555
    // Case 4: Put followed by another Put
3556
5
    ASSERT_OK(Put(1, "foo", "v4"));
3557
5
    ASSERT_OK(Put(1, "foo", "v5"));
3558
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5, v4 ]");
3559
5
    ASSERT_OK(Flush(1));
3560
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
3561
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3562
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v5 ]");
3563
3564
    // clear database
3565
5
    ASSERT_OK(Delete(1, "foo"));
3566
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3567
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
3568
3569
    // Case 5: Put followed by snapshot followed by another Put
3570
    // Both puts should remain.
3571
5
    ASSERT_OK(Put(1, "foo", "v6"));
3572
5
    const Snapshot* snapshot = db_->GetSnapshot();
3573
5
    ASSERT_OK(Put(1, "foo", "v7"));
3574
5
    ASSERT_OK(Flush(1));
3575
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v7, v6 ]");
3576
5
    db_->ReleaseSnapshot(snapshot);
3577
3578
    // clear database
3579
5
    ASSERT_OK(Delete(1, "foo"));
3580
5
    ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), handles_[1], nullptr, nullptr));
3581
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ ]");
3582
3583
    // Case 5: snapshot followed by a put followed by another Put
3584
    // Only the last put should remain.
3585
5
    const Snapshot* snapshot1 = db_->GetSnapshot();
3586
5
    ASSERT_OK(Put(1, "foo", "v8"));
3587
5
    ASSERT_OK(Put(1, "foo", "v9"));
3588
5
    ASSERT_OK(Flush(1));
3589
5
    ASSERT_EQ(AllEntriesFor("foo", 1), "[ v9 ]");
3590
5
    db_->ReleaseSnapshot(snapshot1);
3591
5
  } while (ChangeCompactOptions());
3592
1
}
3593
3594
namespace {
3595
std::vector<std::uint64_t> ListSpecificFiles(
3596
12
    Env* env, const std::string& path, const FileType expected_file_type) {
3597
12
  std::vector<std::string> files;
3598
12
  std::vector<uint64_t> file_numbers;
3599
12
  CHECK_OK(env->GetChildren(path, &files));
3600
12
  uint64_t number;
3601
12
  FileType type;
3602
248
  for (size_t i = 0; i < files.size(); ++i) {
3603
236
    if (ParseFileName(files[i], &number, &type)) {
3604
187
      if (type == expected_file_type) {
3605
46
        file_numbers.push_back(number);
3606
46
      }
3607
187
    }
3608
236
  }
3609
12
  return file_numbers;
3610
12
}
3611
3612
12
std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path) {
3613
12
  return ListSpecificFiles(env, path, kTableFile);
3614
12
}
3615
}  // namespace
3616
3617
1
TEST_F(DBTest, FlushOneColumnFamily) {
3618
1
  Options options = CurrentOptions();
3619
1
  CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
3620
1
                         "alyosha", "popovich"},
3621
1
                        options);
3622
3623
1
  ASSERT_OK(Put(0, "Default", "Default"));
3624
1
  ASSERT_OK(Put(1, "pikachu", "pikachu"));
3625
1
  ASSERT_OK(Put(2, "ilya", "ilya"));
3626
1
  ASSERT_OK(Put(3, "muromec", "muromec"));
3627
1
  ASSERT_OK(Put(4, "dobrynia", "dobrynia"));
3628
1
  ASSERT_OK(Put(5, "nikitich", "nikitich"));
3629
1
  ASSERT_OK(Put(6, "alyosha", "alyosha"));
3630
1
  ASSERT_OK(Put(7, "popovich", "popovich"));
3631
3632
9
  for (int i = 0; i < 8; ++i) {
3633
8
    ASSERT_OK(Flush(i));
3634
8
    auto tables = ListTableFiles(env_, dbname_);
3635
8
    ASSERT_EQ(tables.size(), i + 1U);
3636
8
  }
3637
1
}
3638
3639
#ifndef ROCKSDB_LITE
3640
// In https://reviews.facebook.net/D20661 we change
3641
// recovery behavior: previously for each log file each column family
3642
// memtable was flushed, even it was empty. Now it's changed:
3643
// we try to create the smallest number of table files by merging
3644
// updates from multiple logs
3645
1
TEST_F(DBTest, RecoverCheckFileAmountWithSmallWriteBuffer) {
3646
1
  Options options = CurrentOptions();
3647
1
  options.write_buffer_size = 5000000;
3648
1
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
3649
3650
  // Since we will reopen DB with smaller write_buffer_size,
3651
  // each key will go to new SST file
3652
1
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
3653
1
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
3654
1
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
3655
1
  ASSERT_OK(Put(1, Key(10), DummyString(1000000)));
3656
3657
1
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
3658
  // Make 'dobrynia' to be flushed and new WAL file to be created
3659
1
  ASSERT_OK(Put(2, Key(10), DummyString(7500000)));
3660
1
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
3661
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3662
1
  {
3663
1
    auto tables = ListTableFiles(env_, dbname_);
3664
1
    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
3665
    // Make sure 'dobrynia' was flushed: check sst files amount
3666
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3667
1
              static_cast<uint64_t>(1));
3668
1
  }
3669
  // New WAL file
3670
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3671
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3672
1
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
3673
1
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
3674
1
  ASSERT_OK(Put(3, Key(10), DummyString(1)));
3675
3676
1
  options.write_buffer_size = 4096;
3677
1
  options.arena_block_size = 4096;
3678
1
  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
3679
1
                           options);
3680
1
  {
3681
    // No inserts => default is empty
3682
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3683
1
              static_cast<uint64_t>(0));
3684
    // First 4 keys goes to separate SSTs + 1 more SST for 2 smaller keys
3685
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3686
1
              static_cast<uint64_t>(5));
3687
    // 1 SST for big key + 1 SST for small one
3688
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3689
1
              static_cast<uint64_t>(2));
3690
    // 1 SST for all keys
3691
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3692
1
              static_cast<uint64_t>(1));
3693
1
  }
3694
1
}
3695
3696
// In https://reviews.facebook.net/D20661 we change
3697
// recovery behavior: previously for each log file each column family
3698
// memtable was flushed, even it wasn't empty. Now it's changed:
3699
// we try to create the smallest number of table files by merging
3700
// updates from multiple logs
3701
1
TEST_F(DBTest, RecoverCheckFileAmount) {
3702
1
  Options options = CurrentOptions();
3703
1
  options.write_buffer_size = 100000;
3704
1
  options.arena_block_size = 4 * 1024;
3705
1
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
3706
3707
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3708
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3709
1
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
3710
3711
  // Make 'nikitich' memtable to be flushed
3712
1
  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
3713
1
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
3714
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3715
  // 4 memtable are not flushed, 1 sst file
3716
1
  {
3717
1
    auto tables = ListTableFiles(env_, dbname_);
3718
1
    ASSERT_EQ(tables.size(), static_cast<size_t>(1));
3719
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3720
1
              static_cast<uint64_t>(1));
3721
1
  }
3722
  // Memtable for 'nikitich' has flushed, new WAL file has opened
3723
  // 4 memtable still not flushed
3724
3725
  // Write to new WAL file
3726
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3727
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3728
1
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
3729
3730
  // Fill up 'nikitich' one more time
3731
1
  ASSERT_OK(Put(3, Key(10), DummyString(1002400)));
3732
  // make it flush
3733
1
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
3734
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3735
  // There are still 4 memtable not flushed, and 2 sst tables
3736
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3737
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3738
1
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
3739
3740
1
  {
3741
1
    auto tables = ListTableFiles(env_, dbname_);
3742
1
    ASSERT_EQ(tables.size(), static_cast<size_t>(2));
3743
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3744
1
              static_cast<uint64_t>(2));
3745
1
  }
3746
3747
1
  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
3748
1
                           options);
3749
1
  {
3750
1
    std::vector<uint64_t> table_files = ListTableFiles(env_, dbname_);
3751
    // Check, that records for 'default', 'dobrynia' and 'pikachu' from
3752
    // first, second and third WALs  went to the same SST.
3753
    // So, there is 6 SSTs: three  for 'nikitich', one for 'default', one for
3754
    // 'dobrynia', one for 'pikachu'
3755
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3756
1
              static_cast<uint64_t>(1));
3757
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3758
1
              static_cast<uint64_t>(3));
3759
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3760
1
              static_cast<uint64_t>(1));
3761
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3762
1
              static_cast<uint64_t>(1));
3763
1
  }
3764
1
}
3765
3766
1
TEST_F(DBTest, SharedWriteBuffer) {
3767
1
  Options options = CurrentOptions();
3768
1
  options.db_write_buffer_size = 100000;  // this is the real limit
3769
1
  options.write_buffer_size    = 500000;  // this is never hit
3770
1
  CreateAndReopenWithCF({"pikachu", "dobrynia", "nikitich"}, options);
3771
3772
  // Trigger a flush on CF "nikitich"
3773
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3774
1
  ASSERT_OK(Put(1, Key(1), DummyString(1)));
3775
1
  ASSERT_OK(Put(3, Key(1), DummyString(90000)));
3776
1
  ASSERT_OK(Put(2, Key(2), DummyString(20000)));
3777
1
  ASSERT_OK(Put(2, Key(1), DummyString(1)));
3778
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
3779
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
3780
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3781
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3782
1
  {
3783
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3784
1
              static_cast<uint64_t>(0));
3785
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3786
1
              static_cast<uint64_t>(0));
3787
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3788
1
              static_cast<uint64_t>(0));
3789
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3790
1
              static_cast<uint64_t>(1));
3791
1
  }
3792
3793
  // "dobrynia": 20KB
3794
  // Flush 'dobrynia'
3795
1
  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
3796
1
  ASSERT_OK(Put(2, Key(2), DummyString(70000)));
3797
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3798
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
3799
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3800
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3801
1
  {
3802
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3803
1
              static_cast<uint64_t>(0));
3804
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3805
1
              static_cast<uint64_t>(0));
3806
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3807
1
              static_cast<uint64_t>(1));
3808
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3809
1
              static_cast<uint64_t>(1));
3810
1
  }
3811
3812
  // "nikitich" still has has data of 80KB
3813
  // Inserting Data in "dobrynia" triggers "nikitich" flushing.
3814
1
  ASSERT_OK(Put(3, Key(2), DummyString(40000)));
3815
1
  ASSERT_OK(Put(2, Key(2), DummyString(40000)));
3816
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3817
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
3818
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3819
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3820
1
  {
3821
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3822
1
              static_cast<uint64_t>(0));
3823
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3824
1
              static_cast<uint64_t>(0));
3825
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3826
1
              static_cast<uint64_t>(1));
3827
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3828
1
              static_cast<uint64_t>(2));
3829
1
  }
3830
3831
  // "dobrynia" still has 40KB
3832
1
  ASSERT_OK(Put(1, Key(2), DummyString(20000)));
3833
1
  ASSERT_OK(Put(0, Key(1), DummyString(10000)));
3834
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3835
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
3836
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
3837
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3838
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3839
  // This should triggers no flush
3840
1
  {
3841
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3842
1
              static_cast<uint64_t>(0));
3843
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3844
1
              static_cast<uint64_t>(0));
3845
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3846
1
              static_cast<uint64_t>(1));
3847
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3848
1
              static_cast<uint64_t>(2));
3849
1
  }
3850
3851
  // "default": 10KB, "pikachu": 20KB, "dobrynia": 40KB
3852
1
  ASSERT_OK(Put(1, Key(2), DummyString(40000)));
3853
1
  ASSERT_OK(Put(0, Key(1), DummyString(1)));
3854
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[0]));
3855
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
3856
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[2]));
3857
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[3]));
3858
  // This should triggers flush of "pikachu"
3859
1
  {
3860
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3861
1
              static_cast<uint64_t>(0));
3862
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3863
1
              static_cast<uint64_t>(1));
3864
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3865
1
              static_cast<uint64_t>(1));
3866
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3867
1
              static_cast<uint64_t>(2));
3868
1
  }
3869
3870
  // "default": 10KB, "dobrynia": 40KB
3871
  // Some remaining writes so 'default', 'dobrynia' and 'nikitich' flush on
3872
  // closure.
3873
1
  ASSERT_OK(Put(3, Key(1), DummyString(1)));
3874
1
  ReopenWithColumnFamilies({"default", "pikachu", "dobrynia", "nikitich"},
3875
1
                           options);
3876
1
  {
3877
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"),
3878
1
              static_cast<uint64_t>(1));
3879
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"),
3880
1
              static_cast<uint64_t>(1));
3881
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "dobrynia"),
3882
1
              static_cast<uint64_t>(2));
3883
1
    ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"),
3884
1
              static_cast<uint64_t>(3));
3885
1
  }
3886
1
}
3887
#endif  // ROCKSDB_LITE
3888
3889
0
TEST_F(DBTest, DISABLED_PurgeInfoLogs) {
3890
0
  Options options = CurrentOptions();
3891
0
  options.keep_log_file_num = 5;
3892
0
  options.create_if_missing = true;
3893
0
  for (int mode = 0; mode <= 1; mode++) {
3894
0
    if (mode == 1) {
3895
0
      options.db_log_dir = dbname_ + "_logs";
3896
0
      ASSERT_OK(env_->CreateDirIfMissing(options.db_log_dir));
3897
0
    } else {
3898
0
      options.db_log_dir = "";
3899
0
    }
3900
0
    for (int i = 0; i < 8; i++) {
3901
0
      Reopen(options);
3902
0
    }
3903
3904
0
    std::vector<std::string> files;
3905
0
    ASSERT_OK(env_->GetChildren(options.db_log_dir.empty() ? dbname_ : options.db_log_dir, &files));
3906
0
    int info_log_count = 0;
3907
0
    for (std::string file : files) {
3908
0
      if (file.find("LOG") != std::string::npos) {
3909
0
        info_log_count++;
3910
0
      }
3911
0
    }
3912
0
    ASSERT_EQ(5, info_log_count);
3913
3914
0
    Destroy(options);
3915
    // For mode (1), test DestroyDB() to delete all the logs under DB dir.
3916
    // For mode (2), no info log file should have been put under DB dir.
3917
0
    std::vector<std::string> db_files;
3918
0
    ASSERT_OK(env_->GetChildren(dbname_, &db_files));
3919
0
    for (std::string file : db_files) {
3920
0
      ASSERT_TRUE(file.find("LOG") == std::string::npos);
3921
0
    }
3922
3923
0
    if (mode == 1) {
3924
      // Cleaning up
3925
0
      ASSERT_OK(env_->GetChildren(options.db_log_dir, &files));
3926
0
      for (std::string file : files) {
3927
0
        ASSERT_OK(env_->DeleteFile(options.db_log_dir + "/" + file));
3928
0
      }
3929
0
      ASSERT_OK(env_->DeleteDir(options.db_log_dir));
3930
0
    }
3931
0
  }
3932
0
}
3933
3934
1
TEST_F(DBTest, SyncMultipleLogs) {
3935
1
  const uint64_t kNumBatches = 2;
3936
1
  const int kBatchSize = 1000;
3937
3938
1
  Options options = CurrentOptions();
3939
1
  options.create_if_missing = true;
3940
1
  options.write_buffer_size = 4096;
3941
1
  Reopen(options);
3942
3943
1
  WriteBatch batch;
3944
1
  WriteOptions wo;
3945
1
  wo.sync = true;
3946
3947
3
  for (uint64_t b = 0; b < kNumBatches; b++) {
3948
2
    batch.Clear();
3949
2.00k
    for (int i = 0; i < kBatchSize; i++) {
3950
2.00k
      batch.Put(Key(i), DummyString(128));
3951
2.00k
    }
3952
3953
2
    ASSERT_OK(dbfull()->Write(wo, &batch));
3954
2
  }
3955
3956
1
  ASSERT_OK(dbfull()->SyncWAL());
3957
1
}
3958
3959
#ifndef ROCKSDB_LITE
3960
//
3961
// Test WAL recovery for the various modes available
3962
//
3963
class RecoveryTestHelper {
3964
 public:
3965
  // Number of WAL files to generate
3966
  // Lower number of wals for tsan due to low perf.
3967
  static constexpr int kWALFilesCount = yb::NonTsanVsTsan(10, 5);
3968
  // Starting number for the WAL file name like 00010.log
3969
  static const int kWALFileOffset = 10;
3970
  // Keys to be written per WAL file
3971
  static const int kKeysPerWALFile = 1024;
3972
  // Size of the value
3973
  static const int kValueSize = 10;
3974
3975
  // Create WAL files with values filled in
3976
  static void FillData(DBTest* test, Options* db_options, const size_t wal_count,
3977
311
                       size_t* count) {
3978
311
    *count = 0;
3979
3980
311
    shared_ptr<Cache> table_cache = NewLRUCache(50000, 16);
3981
311
    EnvOptions env_options;
3982
311
    WriteBuffer write_buffer(db_options->db_write_buffer_size);
3983
3984
311
    unique_ptr<VersionSet> versions;
3985
311
    unique_ptr<WalManager> wal_manager;
3986
311
    WriteController write_controller;
3987
3988
311
    versions.reset(new VersionSet(test->dbname_, db_options, env_options,
3989
311
                                  table_cache.get(), &write_buffer,
3990
311
                                  &write_controller));
3991
3992
311
    wal_manager.reset(new WalManager(*db_options, env_options));
3993
3994
311
    std::unique_ptr<log::Writer> current_log_writer;
3995
3996
3.42k
    for (size_t j = kWALFileOffset; j < wal_count + kWALFileOffset; j++) {
3997
3.11k
      uint64_t current_log_number = j;
3998
3.11k
      std::string fname = LogFileName(test->dbname_, current_log_number);
3999
3.11k
      unique_ptr<WritableFile> file;
4000
3.11k
      ASSERT_OK(db_options->env->NewWritableFile(fname, &file, env_options));
4001
3.11k
      unique_ptr<WritableFileWriter> file_writer(
4002
3.11k
          new WritableFileWriter(std::move(file), env_options));
4003
3.11k
      current_log_writer.reset(
4004
3.11k
          new log::Writer(std::move(file_writer), current_log_number,
4005
3.11k
                          db_options->recycle_log_file_num > 0));
4006
4007
3.18M
      for (int i = 0; i < kKeysPerWALFile; i++) {
4008
3.18M
        std::string key = "key" + ToString((*count)++);
4009
3.18M
        std::string value = test->DummyString(kValueSize);
4010
3.18M
        assert(current_log_writer.get() != nullptr);
4011
3.18M
        uint64_t seq = versions->LastSequence() + 1;
4012
3.18M
        WriteBatch batch;
4013
3.18M
        batch.Put(key, value);
4014
3.18M
        WriteBatchInternal::SetSequence(&batch, seq);
4015
3.18M
        ASSERT_OK(current_log_writer->AddRecord(WriteBatchInternal::Contents(&batch)));
4016
3.18M
        versions->SetLastSequence(seq);
4017
3.18M
      }
4018
3.11k
    }
4019
4020
4021
311
  }
4022
4023
  // Recreate and fill the store with some data
4024
311
  static size_t FillData(DBTest* test, Options* options) {
4025
311
    options->create_if_missing = true;
4026
311
    test->DestroyAndReopen(*options);
4027
311
    test->Close();
4028
4029
311
    size_t count = 0;
4030
311
    FillData(test, options, kWALFilesCount, &count);
4031
311
    return count;
4032
311
  }
4033
4034
  // Read back all the keys we wrote and return the number of keys found
4035
201
  static size_t GetData(DBTest* test) {
4036
201
    size_t count = 0;
4037
2.05M
    for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) {
4038
2.05M
      if (test->Get("key" + ToString(i)) != "NOT_FOUND") {
4039
1.76M
        ++count;
4040
1.76M
      }
4041
2.05M
    }
4042
201
    return count;
4043
201
  }
4044
4045
  // Manuall corrupt the specified WAL
4046
  static void CorruptWAL(DBTest* test, const Options& options, const double off,
4047
                         const double len, const int wal_file_id,
4048
310
                         const bool trunc = false) {
4049
310
    Env* env = options.env;
4050
310
    std::string fname = LogFileName(test->dbname_, wal_file_id);
4051
310
    uint64_t size;
4052
310
    ASSERT_OK(env->GetFileSize(fname, &size));
4053
310
    ASSERT_GT(size, 0);
4054
#ifdef OS_WIN
4055
    // Windows disk cache behaves differently. When we truncate
4056
    // the original content is still in the cache due to the original
4057
    // handle is still open. Generally, in Windows, one prohibits
4058
    // shared access to files and it is not needed for WAL but we allow
4059
    // it to induce corruption at various tests.
4060
    test->Close();
4061
#endif
4062
310
    if (trunc) {
4063
150
      ASSERT_EQ(0, truncate(fname.c_str(),
4064
150
        static_cast<int64_t>(size * off)));
4065
160
    } else {
4066
160
      InduceCorruption(fname, static_cast<size_t>(size * off),
4067
160
        static_cast<size_t>(size * len));
4068
160
    }
4069
310
  }
4070
4071
  // Overwrite data with 'a' from offset for length len
4072
  static void InduceCorruption(const std::string& filename, size_t offset,
4073
160
                               size_t len) {
4074
160
    ASSERT_GT(len, 0U);
4075
4076
160
    int fd = open(filename.c_str(), O_RDWR);
4077
4078
160
    ASSERT_GT(fd, 0);
4079
160
    ASSERT_EQ(offset, lseek(fd, offset, SEEK_SET));
4080
4081
160
    void* buf = alloca(len);
4082
160
    memset(buf, 'a', len);
4083
160
    ASSERT_EQ(len, write(fd, buf, len));
4084
4085
160
    close(fd);
4086
160
  }
4087
};
4088
4089
// Test scope:
4090
// - We expect to open the data store when there is incomplete trailing writes
4091
// at the end of any of the logs
4092
// - We do not expect to open the data store for corruption
4093
1
TEST_F(DBTest, kTolerateCorruptedTailRecords) {
4094
1
  const int jstart = RecoveryTestHelper::kWALFileOffset;
4095
1
  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
4096
4097
2
  for (auto trunc : {true, false}) {        /* Corruption style */
4098
10
    for (int i = 0; i < 4; i++) {           /* Corruption offset position */
4099
88
      for (int j = jstart; j < jend; j++) { /* WAL file */
4100
        // Fill data for testing
4101
80
        Options options = CurrentOptions();
4102
80
        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
4103
        // test checksum failure or parsing
4104
80
        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
4105
80
                                       /*len%=*/.1, /*wal=*/j, trunc);
4106
4107
80
        if (trunc) {
4108
40
          options.wal_recovery_mode =
4109
40
              WALRecoveryMode::kTolerateCorruptedTailRecords;
4110
40
          options.create_if_missing = false;
4111
40
          ASSERT_OK(TryReopen(options));
4112
40
          const size_t recovered_row_count = RecoveryTestHelper::GetData(this);
4113
40
          ASSERT_TRUE(i == 0 || recovered_row_count > 0);
4114
40
          ASSERT_LT(recovered_row_count, row_count);
4115
40
        } else {
4116
40
          options.wal_recovery_mode =
4117
40
              WALRecoveryMode::kTolerateCorruptedTailRecords;
4118
40
          ASSERT_NOK(TryReopen(options));
4119
40
        }
4120
80
      }
4121
8
    }
4122
2
  }
4123
1
}
4124
4125
// Test scope:
4126
// We don't expect the data store to be opened if there is any corruption
4127
// (leading, middle or trailing -- incomplete writes or corruption)
4128
1
TEST_F(DBTest, kAbsoluteConsistency) {
4129
1
  const int jstart = RecoveryTestHelper::kWALFileOffset;
4130
1
  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
4131
4132
  // Verify clean slate behavior
4133
1
  Options options = CurrentOptions();
4134
1
  const size_t row_count = RecoveryTestHelper::FillData(this, &options);
4135
1
  options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
4136
1
  options.create_if_missing = false;
4137
1
  ASSERT_OK(TryReopen(options));
4138
1
  ASSERT_EQ(RecoveryTestHelper::GetData(this), row_count);
4139
4140
2
  for (auto trunc : {true, false}) { /* Corruption style */
4141
10
    for (int i = 0; i < 4; i++) {    /* Corruption offset position */
4142
8
      if (trunc && i == 0) {
4143
1
        continue;
4144
1
      }
4145
4146
77
      for (int j = jstart; j < jend; j++) { /* wal files */
4147
        // fill with new date
4148
70
        RecoveryTestHelper::FillData(this, &options);
4149
        // corrupt the wal
4150
70
        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
4151
70
                                       /*len%=*/.1, j, trunc);
4152
        // verify
4153
70
        options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
4154
70
        options.create_if_missing = false;
4155
70
        ASSERT_NOK(TryReopen(options));
4156
70
      }
4157
7
    }
4158
2
  }
4159
1
}
4160
4161
// Test scope:
4162
// - We expect to open data store under all circumstances
4163
// - We expect only data upto the point where the first error was encountered
4164
1
TEST_F(DBTest, kPointInTimeRecovery) {
4165
1
  const int jstart = RecoveryTestHelper::kWALFileOffset;
4166
1
  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
4167
1
  const int maxkeys =
4168
1
      RecoveryTestHelper::kWALFilesCount * RecoveryTestHelper::kKeysPerWALFile;
4169
4170
2
  for (auto trunc : {true, false}) {        /* Corruption style */
4171
10
    for (int i = 0; i < 4; i++) {           /* Offset of corruption */
4172
88
      for (int j = jstart; j < jend; j++) { /* WAL file */
4173
        // Fill data for testing
4174
80
        Options options = CurrentOptions();
4175
80
        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
4176
4177
        // Corrupt the wal
4178
80
        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
4179
80
                                       /*len%=*/.1, j, trunc);
4180
4181
        // Verify
4182
80
        options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
4183
80
        options.create_if_missing = false;
4184
80
        ASSERT_OK(TryReopen(options));
4185
4186
        // Probe data for invariants
4187
80
        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
4188
80
        ASSERT_LT(recovered_row_count, row_count);
4189
4190
80
        bool expect_data = true;
4191
819k
        for (size_t k = 0; k < maxkeys; ++k) {
4192
819k
          bool found = Get("key" + ToString(i)) != "NOT_FOUND";
4193
819k
          if (expect_data && !found) {
4194
2
            expect_data = false;
4195
2
          }
4196
819k
          ASSERT_EQ(found, expect_data);
4197
819k
        }
4198
4199
80
        const size_t min = RecoveryTestHelper::kKeysPerWALFile *
4200
80
                           (j - RecoveryTestHelper::kWALFileOffset);
4201
80
        ASSERT_GE(recovered_row_count, min);
4202
80
        if (!trunc && i != 0) {
4203
30
          const size_t max = RecoveryTestHelper::kKeysPerWALFile *
4204
30
                             (j - RecoveryTestHelper::kWALFileOffset + 1);
4205
30
          ASSERT_LE(recovered_row_count, max);
4206
30
        }
4207
80
      }
4208
8
    }
4209
2
  }
4210
1
}
4211
4212
// Test scope:
4213
// - We expect to open the data store under all scenarios
4214
// - We expect to have recovered records past the corruption zone
4215
1
TEST_F(DBTest, kSkipAnyCorruptedRecords) {
4216
1
  const int jstart = RecoveryTestHelper::kWALFileOffset;
4217
1
  const int jend = jstart + RecoveryTestHelper::kWALFilesCount;
4218
4219
2
  for (auto trunc : {true, false}) {        /* Corruption style */
4220
10
    for (int i = 0; i < 4; i++) {           /* Corruption offset */
4221
88
      for (int j = jstart; j < jend; j++) { /* wal files */
4222
        // Fill data for testing
4223
80
        Options options = CurrentOptions();
4224
80
        const size_t row_count = RecoveryTestHelper::FillData(this, &options);
4225
4226
        // Corrupt the WAL
4227
80
        RecoveryTestHelper::CorruptWAL(this, options, /*off=*/i * .3,
4228
80
                                       /*len%=*/.1, j, trunc);
4229
4230
        // Verify behavior
4231
80
        options.wal_recovery_mode = WALRecoveryMode::kSkipAnyCorruptedRecords;
4232
80
        options.create_if_missing = false;
4233
80
        ASSERT_OK(TryReopen(options));
4234
4235
        // Probe data for invariants
4236
80
        size_t recovered_row_count = RecoveryTestHelper::GetData(this);
4237
80
        ASSERT_LT(recovered_row_count, row_count);
4238
4239
80
        if (!trunc) {
4240
40
          ASSERT_TRUE(i != 0 || recovered_row_count > 0);
4241
40
        }
4242
80
      }
4243
8
    }
4244
2
  }
4245
1
}
4246
4247
// Multi-threaded test:
4248
namespace {
4249
4250
static const int kColumnFamilies = 10;
4251
static const int kNumThreads = 10;
4252
static const int kTestSeconds = 10;
4253
static const int kNumKeys = 1000;
4254
4255
struct MTState {
4256
  DBTest* test;
4257
  std::atomic<bool> stop;
4258
  std::atomic<int> counter[kNumThreads];
4259
  std::atomic<bool> thread_done[kNumThreads];
4260
};
4261
4262
struct MTThread {
4263
  MTState* state;
4264
  int id;
4265
};
4266
4267
297
static void MTThreadBody(void* arg) {
4268
297
  MTThread* t = reinterpret_cast<MTThread*>(arg);
4269
297
  int id = t->id;
4270
297
  DB* db = t->state->test->db_;
4271
297
  int counter = 0;
4272
297
  fprintf(stderr, "... starting thread %d\n", id);
4273
297
  Random rnd(1000 + id);
4274
297
  char valbuf[1500];
4275
2.95M
  while (t->state->stop.load(std::memory_order_acquire) == false) {
4276
2.95M
    t->state->counter[id].store(counter, std::memory_order_release);
4277
4278
2.95M
    int key = rnd.Uniform(kNumKeys);
4279
2.95M
    char keybuf[20];
4280
2.95M
    snprintf(keybuf, sizeof(keybuf), "%016d", key);
4281
4282
2.95M
    if (rnd.OneIn(2)) {
4283
      // Write values of the form <key, my id, counter, cf, unique_id>.
4284
      // into each of the CFs
4285
      // We add some padding for force compactions.
4286
1.48M
      int unique_id = rnd.Uniform(1000000);
4287
4288
      // Half of the time directly use WriteBatch. Half of the time use
4289
      // WriteBatchWithIndex.
4290
1.48M
      if (rnd.OneIn(2)) {
4291
745k
        WriteBatch batch;
4292
8.17M
        for (int cf = 0; cf < kColumnFamilies; ++cf) {
4293
7.43M
          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
4294
7.43M
                   static_cast<int>(counter), cf, unique_id);
4295
7.43M
          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
4296
7.43M
        }
4297
745k
        ASSERT_OK(db->Write(WriteOptions(), &batch));
4298
738k
      } else {
4299
738k
        WriteBatchWithIndex batch(db->GetOptions().comparator);
4300
8.08M
        for (int cf = 0; cf < kColumnFamilies; ++cf) {
4301
7.35M
          snprintf(valbuf, sizeof(valbuf), "%d.%d.%d.%d.%-1000d", key, id,
4302
7.35M
                   static_cast<int>(counter), cf, unique_id);
4303
7.35M
          batch.Put(t->state->test->handles_[cf], Slice(keybuf), Slice(valbuf));
4304
7.35M
        }
4305
738k
        ASSERT_OK(db->Write(WriteOptions(), batch.GetWriteBatch()));
4306
738k
      }
4307
1.47M
    } else {
4308
      // Read a value and verify that it matches the pattern written above
4309
      // and that writes to all column families were atomic (unique_id is the
4310
      // same)
4311
1.47M
      std::vector<Slice> keys(kColumnFamilies, Slice(keybuf));
4312
1.47M
      std::vector<std::string> values;
4313
1.47M
      std::vector<Status> statuses =
4314
1.47M
          db->MultiGet(ReadOptions(), t->state->test->handles_, keys, &values);
4315
1.47M
      Status s = statuses[0];
4316
      // all statuses have to be the same
4317
14.8M
      for (size_t i = 1; i < statuses.size(); ++i) {
4318
        // they are either both ok or both not-found
4319
13.3M
        ASSERT_TRUE((s.ok() && statuses[i].ok()) ||
4320
13.3M
                    (s.IsNotFound() && statuses[i].IsNotFound()));
4321
13.3M
      }
4322
1.47M
      if (s.IsNotFound()) {
4323
        // Key has not yet been written
4324
1.44M
      } else {
4325
        // Check that the writer thread counter is >= the counter in the value
4326
1.44M
        ASSERT_OK(s);
4327
1.45M
        int unique_id = -1;
4328
15.9M
        for (int i = 0; i < kColumnFamilies; ++i) {
4329
14.5M
          int k, w, c, cf, u;
4330
29.0M
          ASSERT_EQ(5, sscanf(values[i].c_str(), "%d.%d.%d.%d.%d", &k, &w,
4331
29.0M
                              &c, &cf, &u))
4332
29.0M
              << values[i];
4333
14.5M
          ASSERT_EQ(k, key);
4334
14.5M
          ASSERT_GE(w, 0);
4335
14.5M
          ASSERT_LT(w, kNumThreads);
4336
14.5M
          ASSERT_LE(c, t->state->counter[w].load(std::memory_order_acquire));
4337
14.5M
          ASSERT_EQ(cf, i);
4338
14.5M
          if (i == 0) {
4339
1.45M
            unique_id = u;
4340
13.0M
          } else {
4341
            // this checks that updates across column families happened
4342
            // atomically -- all unique ids are the same
4343
13.0M
            ASSERT_EQ(u, unique_id);
4344
13.0M
          }
4345
14.5M
        }
4346
1.45M
      }
4347
1.47M
    }
4348
2.93M
    counter++;
4349
2.93M
  }
4350
18.4E
  t->state->thread_done[id].store(true, std::memory_order_release);
4351
18.4E
  fprintf(stderr, "... stopping thread %d after %d ops\n", id, counter);
4352
18.4E
}
4353
4354
}  // namespace
4355
4356
class MultiThreadedDBTest : public DBTest,
4357
                            public ::testing::WithParamInterface<int> {
4358
 public:
4359
30
  void SetUp() override { option_config_ = GetParam(); }
4360
4361
220
  static std::vector<int> GenerateOptionConfigs() {
4362
220
    std::vector<int> optionConfigs;
4363
6.82k
    for (int optionConfig = kDefault; optionConfig < kEnd; ++optionConfig) {
4364
6.60k
      optionConfigs.push_back(optionConfig);
4365
6.60k
    }
4366
220
    return optionConfigs;
4367
220
  }
4368
};
4369
4370
30
TEST_P(MultiThreadedDBTest, MultiThreaded) {
4371
30
  anon::OptionsOverride options_override;
4372
30
  options_override.skip_policy = kSkipNoSnapshot;
4373
30
  std::vector<std::string> cfs;
4374
300
  for (int i = 1; i < kColumnFamilies; ++i) {
4375
270
    cfs.push_back(ToString(i));
4376
270
  }
4377
30
  CreateAndReopenWithCF(cfs, CurrentOptions(options_override));
4378
  // Initialize state
4379
30
  MTState mt;
4380
30
  mt.test = this;
4381
30
  mt.stop.store(false, std::memory_order_release);
4382
330
  for (int id = 0; id < kNumThreads; id++) {
4383
300
    mt.counter[id].store(0, std::memory_order_release);
4384
300
    mt.thread_done[id].store(false, std::memory_order_release);
4385
300
  }
4386
4387
  // Start threads
4388
30
  MTThread thread[kNumThreads];
4389
330
  for (int id = 0; id < kNumThreads; id++) {
4390
300
    thread[id].state = &mt;
4391
300
    thread[id].id = id;
4392
300
    env_->StartThread(MTThreadBody, &thread[id]);
4393
300
  }
4394
4395
  // Let them run for a while
4396
30
  env_->SleepForMicroseconds(kTestSeconds * 1000000);
4397
4398
  // Stop the threads and wait for them to finish
4399
30
  mt.stop.store(true, std::memory_order_release);
4400
330
  for (int id = 0; id < kNumThreads; id++) {
4401
330
    while (mt.thread_done[id].load(std::memory_order_acquire) == false) {
4402
30
      env_->SleepForMicroseconds(100000);
4403
30
    }
4404
300
  }
4405
30
}
4406
4407
INSTANTIATE_TEST_CASE_P(
4408
    MultiThreaded, MultiThreadedDBTest,
4409
    ::testing::ValuesIn(MultiThreadedDBTest::GenerateOptionConfigs()));
4410
#endif  // ROCKSDB_LITE
4411
4412
// Group commit test:
4413
namespace {
4414
4415
static const int kGCNumThreads = 4;
4416
static const int kGCNumKeys = 1000;
4417
4418
struct GCThread {
4419
  DB* db;
4420
  int id;
4421
  std::atomic<bool> done;
4422
};
4423
4424
112
static void GCThreadBody(void* arg) {
4425
112
  GCThread* t = reinterpret_cast<GCThread*>(arg);
4426
112
  int id = t->id;
4427
112
  DB* db = t->db;
4428
112
  WriteOptions wo;
4429
4430
111k
  for (int i = 0; i < kGCNumKeys; ++i) {
4431
111k
    std::string kv(ToString(i + id * kGCNumKeys));
4432
111k
    ASSERT_OK(db->Put(wo, kv, kv));
4433
111k
  }
4434
18.4E
  t->done = true;
4435
18.4E
}
4436
4437
}  // namespace
4438
4439
1
TEST_F(DBTest, GroupCommitTest) {
4440
28
  do {
4441
28
    Options options = CurrentOptions();
4442
28
    options.env = env_;
4443
28
    env_->log_write_slowdown_.store(100);
4444
28
    options.statistics = rocksdb::CreateDBStatisticsForTests();
4445
28
    Reopen(options);
4446
4447
    // Start threads
4448
28
    GCThread thread[kGCNumThreads];
4449
140
    for (int id = 0; id < kGCNumThreads; id++) {
4450
112
      thread[id].id = id;
4451
112
      thread[id].db = db_;
4452
112
      thread[id].done = false;
4453
112
      env_->StartThread(GCThreadBody, &thread[id]);
4454
112
    }
4455
4456
140
    for (int id = 0; id < kGCNumThreads; id++) {
4457
223
      while (thread[id].done == false) {
4458
111
        env_->SleepForMicroseconds(100000);
4459
111
      }
4460
112
    }
4461
28
    env_->log_write_slowdown_.store(0);
4462
4463
28
    ASSERT_GT(TestGetTickerCount(options, WRITE_DONE_BY_OTHER), 0);
4464
4465
28
    std::vector<std::string> expected_db;
4466
112k
    for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
4467
112k
      expected_db.push_back(ToString(i));
4468
112k
    }
4469
28
    sort(expected_db.begin(), expected_db.end());
4470
4471
28
    Iterator* itr = db_->NewIterator(ReadOptions());
4472
28
    itr->SeekToFirst();
4473
112k
    for (auto x : expected_db) {
4474
112k
      ASSERT_TRUE(itr->Valid());
4475
112k
      ASSERT_EQ(itr->key().ToString(), x);
4476
112k
      ASSERT_EQ(itr->value().ToString(), x);
4477
112k
      itr->Next();
4478
112k
    }
4479
28
    ASSERT_TRUE(!itr->Valid());
4480
28
    delete itr;
4481
4482
28
    HistogramData hist_data = {0, 0, 0, 0, 0};
4483
28
    options.statistics->histogramData(DB_WRITE, &hist_data);
4484
28
    ASSERT_GT(hist_data.average, 0.0);
4485
28
  } while (ChangeOptions(kSkipNoSeekToLast));
4486
1
}
4487
4488
namespace {
4489
typedef std::map<std::string, std::string> KVMap;
4490
}
4491
4492
class ModelDB: public DB {
4493
 public:
4494
  class ModelSnapshot : public Snapshot {
4495
   public:
4496
    KVMap map_;
4497
4498
0
    SequenceNumber GetSequenceNumber() const override {
4499
      // no need to call this
4500
0
      assert(false);
4501
0
      return 0;
4502
0
    }
4503
  };
4504
4505
28
  explicit ModelDB(const Options& options) : options_(options) {}
4506
  using DB::Put;
4507
  virtual Status Put(const WriteOptions& o, ColumnFamilyHandle* cf,
4508
126k
                     const Slice& k, const Slice& v) override {
4509
126k
    WriteBatch batch;
4510
126k
    batch.Put(cf, k, v);
4511
126k
    return Write(o, &batch);
4512
126k
  }
4513
  using DB::Delete;
4514
  virtual Status Delete(const WriteOptions& o, ColumnFamilyHandle* cf,
4515
125k
                        const Slice& key) override {
4516
125k
    WriteBatch batch;
4517
125k
    batch.Delete(cf, key);
4518
125k
    return Write(o, &batch);
4519
125k
  }
4520
  using DB::SingleDelete;
4521
  virtual Status SingleDelete(const WriteOptions& o, ColumnFamilyHandle* cf,
4522
0
                              const Slice& key) override {
4523
0
    WriteBatch batch;
4524
0
    batch.SingleDelete(cf, key);
4525
0
    return Write(o, &batch);
4526
0
  }
4527
  using DB::Merge;
4528
  virtual Status Merge(const WriteOptions& o, ColumnFamilyHandle* cf,
4529
0
                       const Slice& k, const Slice& v) override {
4530
0
    WriteBatch batch;
4531
0
    batch.Merge(cf, k, v);
4532
0
    return Write(o, &batch);
4533
0
  }
4534
  using DB::Get;
4535
  virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* cf,
4536
0
                     const Slice& key, std::string* value) override {
4537
0
    return STATUS(NotSupported, key);
4538
0
  }
4539
4540
  using DB::MultiGet;
4541
  virtual std::vector<Status> MultiGet(
4542
      const ReadOptions& options,
4543
      const std::vector<ColumnFamilyHandle*>& column_family,
4544
      const std::vector<Slice>& keys,
4545
0
      std::vector<std::string>* values) override {
4546
0
    std::vector<Status> s(keys.size(),
4547
0
                          STATUS(NotSupported, "Not implemented."));
4548
0
    return s;
4549
0
  }
4550
4551
#ifndef ROCKSDB_LITE
4552
  using DB::AddFile;
4553
  virtual Status AddFile(ColumnFamilyHandle* column_family,
4554
                         const ExternalSstFileInfo* file_path,
4555
0
                         bool move_file) override {
4556
0
    return STATUS(NotSupported, "Not implemented.");
4557
0
  }
4558
  virtual Status AddFile(ColumnFamilyHandle* column_family,
4559
                         const std::string& file_path,
4560
0
                         bool move_file) override {
4561
0
    return STATUS(NotSupported, "Not implemented.");
4562
0
  }
4563
4564
  using DB::GetPropertiesOfAllTables;
4565
  virtual Status GetPropertiesOfAllTables(
4566
      ColumnFamilyHandle* column_family,
4567
0
      TablePropertiesCollection* props) override {
4568
0
    return Status();
4569
0
  }
4570
4571
  virtual Status GetPropertiesOfTablesInRange(
4572
      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
4573
0
      TablePropertiesCollection* props) override {
4574
0
    return Status();
4575
0
  }
4576
#endif  // ROCKSDB_LITE
4577
4578
  using DB::KeyMayExist;
4579
  virtual bool KeyMayExist(const ReadOptions& options,
4580
                           ColumnFamilyHandle* column_family, const Slice& key,
4581
                           std::string* value,
4582
0
                           bool* value_found = nullptr) override {
4583
0
    if (value_found != nullptr) {
4584
0
      *value_found = false;
4585
0
    }
4586
0
    return true; // Not Supported directly
4587
0
  }
4588
  using DB::NewIterator;
4589
  virtual Iterator* NewIterator(const ReadOptions& options,
4590
8.00k
                                ColumnFamilyHandle* column_family) override {
4591
8.00k
    if (options.snapshot == nullptr) {
4592
5.42k
      KVMap* saved = new KVMap;
4593
5.42k
      *saved = map_;
4594
5.42k
      return new ModelIter(saved, true);
4595
2.57k
    } else {
4596
2.57k
      const KVMap* snapshot_state =
4597
2.57k
          &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
4598
2.57k
      return new ModelIter(snapshot_state, false);
4599
2.57k
    }
4600
8.00k
  }
4601
  virtual Status NewIterators(
4602
      const ReadOptions& options,
4603
      const std::vector<ColumnFamilyHandle*>& column_family,
4604
0
      std::vector<Iterator*>* iterators) override {
4605
0
    return STATUS(NotSupported, "Not supported yet");
4606
0
  }
4607
2.80k
  const Snapshot* GetSnapshot() override {
4608
2.80k
    ModelSnapshot* snapshot = new ModelSnapshot;
4609
2.80k
    snapshot->map_ = map_;
4610
2.80k
    return snapshot;
4611
2.80k
  }
4612
4613
2.80k
  void ReleaseSnapshot(const Snapshot* snapshot) override {
4614
2.80k
    delete reinterpret_cast<const ModelSnapshot*>(snapshot);
4615
2.80k
  }
4616
4617
  virtual Status Write(const WriteOptions& options,
4618
280k
                       WriteBatch* batch) override {
4619
280k
    class Handler : public WriteBatch::Handler {
4620
280k
     public:
4621
280k
      KVMap* map_;
4622
175k
      void Put(const Slice& key, const Slice& value) override {
4623
175k
        (*map_)[key.ToString()] = value.ToString();
4624
175k
      }
4625
0
      void Merge(const Slice& key, const Slice& value) override {
4626
        // ignore merge for now
4627
        // (*map_)[key.ToString()] = value.ToString();
4628
0
      }
4629
173k
      void Delete(const Slice& key) override {
4630
173k
        map_->erase(key.ToString());
4631
173k
      }
4632
280k
    };
4633
280k
    Handler handler;
4634
280k
    handler.map_ = &map_;
4635
280k
    return batch->Iterate(&handler);
4636
280k
  }
4637
4638
  using DB::GetProperty;
4639
  virtual bool GetProperty(ColumnFamilyHandle* column_family,
4640
0
                           const Slice& property, std::string* value) override {
4641
0
    return false;
4642
0
  }
4643
  using DB::GetIntProperty;
4644
  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
4645
0
                              const Slice& property, uint64_t* value) override {
4646
0
    return false;
4647
0
  }
4648
  using DB::GetAggregatedIntProperty;
4649
  virtual bool GetAggregatedIntProperty(const Slice& property,
4650
0
                                        uint64_t* value) override {
4651
0
    return false;
4652
0
  }
4653
  using DB::GetApproximateSizes;
4654
  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
4655
                                   const Range* range, int n, uint64_t* sizes,
4656
0
                                   bool include_memtable) override {
4657
0
    for (int i = 0; i < n; i++) {
4658
0
      sizes[i] = 0;
4659
0
    }
4660
0
  }
4661
  using DB::CompactRange;
4662
  virtual Status CompactRange(const CompactRangeOptions& options,
4663
                              ColumnFamilyHandle* column_family,
4664
0
                              const Slice* start, const Slice* end) override {
4665
0
    return STATUS(NotSupported, "Not supported operation.");
4666
0
  }
4667
4668
  using DB::CompactFiles;
4669
  virtual Status CompactFiles(
4670
      const CompactionOptions& compact_options,
4671
      ColumnFamilyHandle* column_family,
4672
      const std::vector<std::string>& input_file_names,
4673
0
      const int output_level, const int output_path_id = -1) override {
4674
0
    return STATUS(NotSupported, "Not supported operation.");
4675
0
  }
4676
4677
0
  Status PauseBackgroundWork() override {
4678
0
    return STATUS(NotSupported, "Not supported operation.");
4679
0
  }
4680
4681
0
  Status ContinueBackgroundWork() override {
4682
0
    return STATUS(NotSupported, "Not supported operation.");
4683
0
  }
4684
4685
  Status EnableAutoCompaction(
4686
0
      const std::vector<ColumnFamilyHandle*>& column_family_handles) override {
4687
0
    return STATUS(NotSupported, "Not supported operation.");
4688
0
  }
4689
4690
  using DB::NumberLevels;
4691
0
  int NumberLevels(ColumnFamilyHandle* column_family) override {
4692
0
    return 1;
4693
0
  }
4694
4695
  using DB::MaxMemCompactionLevel;
4696
  virtual int MaxMemCompactionLevel(
4697
0
      ColumnFamilyHandle* column_family) override {
4698
0
    return 1;
4699
0
  }
4700
4701
  using DB::Level0StopWriteTrigger;
4702
  virtual int Level0StopWriteTrigger(
4703
0
      ColumnFamilyHandle* column_family) override {
4704
0
    return -1;
4705
0
  }
4706
4707
0
  const std::string& GetName() const override { return name_; }
4708
4709
0
  Env* GetEnv() const override { return nullptr; }
4710
4711
0
  Env* GetCheckpointEnv() const override { return nullptr; }
4712
4713
  using DB::GetOptions;
4714
  virtual const Options& GetOptions(
4715
0
      ColumnFamilyHandle* column_family) const override {
4716
0
    return options_;
4717
0
  }
4718
4719
  using DB::GetDBOptions;
4720
0
  const DBOptions& GetDBOptions() const override { return options_; }
4721
4722
  using DB::Flush;
4723
  virtual Status Flush(const rocksdb::FlushOptions& options,
4724
0
                       ColumnFamilyHandle* column_family) override {
4725
0
    return Status::OK();
4726
0
  }
4727
4728
  using DB::WaitForFlush;
4729
0
  virtual Status WaitForFlush(ColumnFamilyHandle* column_family) override {
4730
0
    return Status::OK();
4731
0
  }
4732
4733
0
  Status SyncWAL() override {
4734
0
    return Status::OK();
4735
0
  }
4736
4737
#ifndef ROCKSDB_LITE
4738
0
  Status DisableFileDeletions() override { return Status::OK(); }
4739
4740
0
  Status EnableFileDeletions(bool force) override {
4741
0
    return Status::OK();
4742
0
  }
4743
  virtual Status GetLiveFiles(std::vector<std::string>&, uint64_t* size,
4744
0
                              bool flush_memtable = true) override {
4745
0
    return Status::OK();
4746
0
  }
4747
4748
0
  Status GetSortedWalFiles(VectorLogPtr* files) override {
4749
0
    return Status::OK();
4750
0
  }
4751
4752
0
  Status DeleteFile(std::string name) override { return Status::OK(); }
4753
4754
  virtual Status GetUpdatesSince(
4755
      rocksdb::SequenceNumber, unique_ptr<rocksdb::TransactionLogIterator>*,
4756
      const TransactionLogIterator::ReadOptions&
4757
0
          read_options = TransactionLogIterator::ReadOptions()) override {
4758
0
    return NotSupported();
4759
0
  }
4760
4761
  virtual void GetColumnFamilyMetaData(
4762
      ColumnFamilyHandle* column_family,
4763
0
      ColumnFamilyMetaData* metadata) override {}
4764
4765
  virtual void GetColumnFamiliesOptions(
4766
      std::vector<std::string>* column_family_names,
4767
0
      std::vector<ColumnFamilyOptions>* column_family_options) override {}
4768
#endif  // ROCKSDB_LITE
4769
4770
0
  Status GetDbIdentity(std::string* identity) const override {
4771
0
    return Status::OK();
4772
0
  }
4773
4774
0
  SequenceNumber GetLatestSequenceNumber() const override { return 0; }
4775
4776
260k
  ColumnFamilyHandle* DefaultColumnFamily() const override {
4777
260k
    return nullptr;
4778
260k
  }
4779
4780
0
  Result<std::string> GetMiddleKey() override {
4781
0
    return NotSupported();
4782
0
  }
4783
4784
 private:
4785
0
  CHECKED_STATUS NotSupported() const {
4786
0
    return STATUS(NotSupported, "Not supported in Model DB");
4787
0
  }
4788
4789
  class ModelIter: public Iterator {
4790
   public:
4791
    ModelIter(const KVMap* map, bool owned)
4792
8.00k
        : map_(map), owned_(owned), iter_(map_->end()) {
4793
8.00k
    }
4794
8.00k
    ~ModelIter() {
4795
8.00k
      if (owned_) delete map_;
4796
8.00k
    }
4797
11.3M
    bool Valid() const override { return iter_ != map_->end(); }
4798
8.00k
    void SeekToFirst() override { iter_ = map_->begin(); }
4799
0
    void SeekToLast() override {
4800
0
      if (map_->empty()) {
4801
0
        iter_ = map_->end();
4802
0
      } else {
4803
0
        iter_ = map_->find(map_->rbegin()->first);
4804
0
      }
4805
0
    }
4806
0
    void Seek(const Slice& k) override {
4807
0
      iter_ = map_->lower_bound(k.ToString());
4808
0
    }
4809
11.3M
    void Next() override { ++iter_; }
4810
0
    void Prev() override {
4811
0
      if (iter_ == map_->begin()) {
4812
0
        iter_ = map_->end();
4813
0
        return;
4814
0
      }
4815
0
      --iter_;
4816
0
    }
4817
4818
11.3M
    Slice key() const override { return iter_->first; }
4819
11.3M
    Slice value() const override { return iter_->second; }
4820
0
    Status status() const override { return Status::OK(); }
4821
4822
   private:
4823
    const KVMap* const map_;
4824
    const bool owned_;  // Do we own map_
4825
    KVMap::const_iterator iter_;
4826
  };
4827
  const Options options_;
4828
  KVMap map_;
4829
  std::string name_ = "";
4830
};
4831
4832
342k
static std::string RandomKey(Random* rnd, int minimum = 0) {
4833
342k
  int len;
4834
344k
  do {
4835
344k
    len = (rnd->OneIn(3)
4836
115k
           ? 1                // Short sometimes to encourage collisions
4837
229k
           : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
4838
344k
  } while (len < minimum);
4839
342k
  return test::RandomKey(rnd, len);
4840
342k
}
4841
4842
static bool CompareIterators(int step,
4843
                             DB* model,
4844
                             DB* db,
4845
                             const Snapshot* model_snap,
4846
8.00k
                             const Snapshot* db_snap) {
4847
8.00k
  ReadOptions options;
4848
8.00k
  options.snapshot = model_snap;
4849
8.00k
  Iterator* miter = model->NewIterator(options);
4850
8.00k
  options.snapshot = db_snap;
4851
8.00k
  Iterator* dbiter = db->NewIterator(options);
4852
8.00k
  bool ok = true;
4853
8.00k
  int count = 0;
4854
8.00k
  for (miter->SeekToFirst(), dbiter->SeekToFirst();
4855
11.3M
       ok && miter->Valid() && dbiter->Valid();
4856
11.3M
       miter->Next(), dbiter->Next()) {
4857
11.3M
    count++;
4858
11.3M
    if (miter->key().compare(dbiter->key()) != 0) {
4859
0
      fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
4860
0
              step,
4861
0
              EscapeString(miter->key()).c_str(),
4862
0
              EscapeString(dbiter->key()).c_str());
4863
0
      ok = false;
4864
0
      break;
4865
0
    }
4866
4867
11.3M
    if (miter->value().compare(dbiter->value()) != 0) {
4868
0
      fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
4869
0
              step,
4870
0
              EscapeString(miter->key()).c_str(),
4871
0
              EscapeString(miter->value()).c_str(),
4872
0
              EscapeString(miter->value()).c_str());
4873
0
      ok = false;
4874
0
    }
4875
11.3M
  }
4876
4877
8.00k
  if (ok) {
4878
8.00k
    if (miter->Valid() != dbiter->Valid()) {
4879
0
      fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
4880
0
              step, miter->Valid(), dbiter->Valid());
4881
0
      ok = false;
4882
0
    }
4883
8.00k
  }
4884
8.00k
  delete miter;
4885
8.00k
  delete dbiter;
4886
8.00k
  return ok;
4887
8.00k
}
4888
4889
class DBTestRandomized : public DBTest,
4890
                         public ::testing::WithParamInterface<int> {
4891
 public:
4892
28
  void SetUp() override { option_config_ = GetParam(); }
4893
4894
220
  static std::vector<int> GenerateOptionConfigs() {
4895
220
    std::vector<int> option_configs;
4896
6.82k
    for (int option_config = kDefault; option_config < kEnd; ++option_config) {
4897
6.60k
      if (!ShouldSkipOptions(option_config, kSkipDeletesFilterFirst | kSkipNoSeekToLast)) {
4898
5.94k
        option_configs.push_back(option_config);
4899
5.94k
      }
4900
6.60k
    }
4901
220
    option_configs.push_back(kBlockBasedTableWithIndexRestartInterval);
4902
220
    return option_configs;
4903
220
  }
4904
};
4905
4906
INSTANTIATE_TEST_CASE_P(
4907
    DBTestRandomized, DBTestRandomized,
4908
    ::testing::ValuesIn(DBTestRandomized::GenerateOptionConfigs()));
4909
4910
28
TEST_P(DBTestRandomized, Randomized) {
4911
28
  anon::OptionsOverride options_override;
4912
28
  options_override.skip_policy = kSkipNoSnapshot;
4913
28
  Options options = CurrentOptions(options_override);
4914
28
  DestroyAndReopen(options);
4915
4916
28
  Random rnd(test::RandomSeed() + GetParam());
4917
28
  ModelDB model(options);
4918
28
    const int N = 10000;
4919
28
    const Snapshot* model_snap = nullptr;
4920
28
    const Snapshot* db_snap = nullptr;
4921
28
    std::string k, v;
4922
280k
    for (int step = 0; step < N; step++) {
4923
      // TODO(sanjay): Test Get() works
4924
280k
      int p = rnd.Uniform(100);
4925
280k
      int minimum = 0;
4926
280k
      if (option_config_ == kHashSkipList ||
4927
280k
          option_config_ == kHashLinkList ||
4928
280k
          option_config_ == kPlainTableFirstBytePrefix ||
4929
270k
          option_config_ == kBlockBasedTableWithWholeKeyHashIndex ||
4930
260k
          option_config_ == kBlockBasedTableWithPrefixHashIndex) {
4931
30.0k
        minimum = 1;
4932
30.0k
      }
4933
280k
      if (p < 45) {                               // Put
4934
126k
        k = RandomKey(&rnd, minimum);
4935
126k
        v = RandomString(&rnd,
4936
126k
                         rnd.OneIn(20)
4937
6.36k
                         ? 100 + rnd.Uniform(100)
4938
120k
                         : rnd.Uniform(8));
4939
126k
        ASSERT_OK(model.Put(WriteOptions(), k, v));
4940
126k
        ASSERT_OK(db_->Put(WriteOptions(), k, v));
4941
153k
      } else if (p < 90) {                        // Delete
4942
125k
        k = RandomKey(&rnd, minimum);
4943
125k
        ASSERT_OK(model.Delete(WriteOptions(), k));
4944
125k
        ASSERT_OK(db_->Delete(WriteOptions(), k));
4945
27.7k
      } else {                                    // Multi-element batch
4946
27.7k
        WriteBatch b;
4947
27.7k
        const int num = rnd.Uniform(8);
4948
124k
        for (int i = 0; i < num; i++) {
4949
97.1k
          if (i == 0 || !rnd.OneIn(10)) {
4950
89.8k
            k = RandomKey(&rnd, minimum);
4951
7.27k
          } else {
4952
            // Periodically re-use the same key from the previous iter, so
4953
            // we have multiple entries in the write batch for the same key
4954
7.27k
          }
4955
97.1k
          if (rnd.OneIn(2)) {
4956
48.6k
            v = RandomString(&rnd, rnd.Uniform(10));
4957
48.6k
            b.Put(k, v);
4958
48.5k
          } else {
4959
48.5k
            b.Delete(k);
4960
48.5k
          }
4961
97.1k
        }
4962
27.7k
        ASSERT_OK(model.Write(WriteOptions(), &b));
4963
27.7k
        ASSERT_OK(db_->Write(WriteOptions(), &b));
4964
27.7k
      }
4965
4966
280k
      if ((step % 100) == 0) {
4967
        // For DB instances that use the hash index + block-based table, the
4968
        // iterator will be invalid right when seeking a non-existent key, right
4969
        // than return a key that is close to it.
4970
2.80k
        if (option_config_ != kBlockBasedTableWithWholeKeyHashIndex &&
4971
2.70k
            option_config_ != kBlockBasedTableWithPrefixHashIndex) {
4972
2.60k
          ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
4973
2.60k
          ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
4974
2.60k
        }
4975
4976
        // Save a snapshot from each DB this time that we'll use next
4977
        // time we compare things, to make sure the current state is
4978
        // preserved with the snapshot
4979
2.80k
        if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
4980
2.80k
        if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
4981
4982
2.80k
        Reopen(options);
4983
2.80k
        ASSERT_TRUE(CompareIterators(step, &model, db_, nullptr, nullptr));
4984
4985
2.80k
        model_snap = model.GetSnapshot();
4986
2.80k
        db_snap = db_->GetSnapshot();
4987
2.80k
      }
4988
280k
    }
4989
28
    if (model_snap != nullptr) model.ReleaseSnapshot(model_snap);
4990
28
    if (db_snap != nullptr) db_->ReleaseSnapshot(db_snap);
4991
28
}
4992
4993
1
TEST_F(DBTest, MultiGetSimple) {
4994
5
  do {
4995
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
4996
5
    ASSERT_OK(Put(1, "k1", "v1"));
4997
5
    ASSERT_OK(Put(1, "k2", "v2"));
4998
5
    ASSERT_OK(Put(1, "k3", "v3"));
4999
5
    ASSERT_OK(Put(1, "k4", "v4"));
5000
5
    ASSERT_OK(Delete(1, "k4"));
5001
5
    ASSERT_OK(Put(1, "k5", "v5"));
5002
5
    ASSERT_OK(Delete(1, "no_key"));
5003
5004
5
    std::vector<Slice> keys({"k1", "k2", "k3", "k4", "k5", "no_key"});
5005
5006
5
    std::vector<std::string> values(20, "Temporary data to be overwritten");
5007
5
    std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
5008
5009
5
    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
5010
5
    ASSERT_EQ(values.size(), keys.size());
5011
5
    ASSERT_EQ(values[0], "v1");
5012
5
    ASSERT_EQ(values[1], "v2");
5013
5
    ASSERT_EQ(values[2], "v3");
5014
5
    ASSERT_EQ(values[4], "v5");
5015
5016
5
    ASSERT_OK(s[0]);
5017
5
    ASSERT_OK(s[1]);
5018
5
    ASSERT_OK(s[2]);
5019
5
    ASSERT_TRUE(s[3].IsNotFound());
5020
5
    ASSERT_OK(s[4]);
5021
5
    ASSERT_TRUE(s[5].IsNotFound());
5022
5
  } while (ChangeCompactOptions());
5023
1
}
5024
5025
1
TEST_F(DBTest, MultiGetEmpty) {
5026
5
  do {
5027
5
    CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
5028
    // Empty Key Set
5029
5
    std::vector<Slice> keys;
5030
5
    std::vector<std::string> values;
5031
5
    std::vector<ColumnFamilyHandle*> cfs;
5032
5
    std::vector<Status> s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
5033
5
    ASSERT_EQ(s.size(), 0U);
5034
5035
    // Empty Database, Empty Key Set
5036
5
    Options options = CurrentOptions();
5037
5
    options.create_if_missing = true;
5038
5
    DestroyAndReopen(options);
5039
5
    CreateAndReopenWithCF({"pikachu"}, options);
5040
5
    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
5041
5
    ASSERT_EQ(s.size(), 0U);
5042
5043
    // Empty Database, Search for Keys
5044
5
    keys.resize(2);
5045
5
    keys[0] = "a";
5046
5
    keys[1] = "b";
5047
5
    cfs.push_back(handles_[0]);
5048
5
    cfs.push_back(handles_[1]);
5049
5
    s = db_->MultiGet(ReadOptions(), cfs, keys, &values);
5050
5
    ASSERT_EQ(s.size(), 2);
5051
5
    ASSERT_TRUE(s[0].IsNotFound() && s[1].IsNotFound());
5052
5
  } while (ChangeCompactOptions());
5053
1
}
5054
5055
1
TEST_F(DBTest, BlockBasedTablePrefixIndexTest) {
5056
  // create a DB with block prefix index
5057
1
  BlockBasedTableOptions table_options;
5058
1
  Options options = CurrentOptions();
5059
1
  table_options.index_type = IndexType::kHashSearch;
5060
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5061
1
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
5062
5063
5064
1
  Reopen(options);
5065
1
  ASSERT_OK(Put("k1", "v1"));
5066
1
  ASSERT_OK(Flush());
5067
1
  ASSERT_OK(Put("k2", "v2"));
5068
5069
  // Reopen it without prefix extractor, make sure everything still works.
5070
  // RocksDB should just fall back to the binary index.
5071
1
  table_options.index_type = IndexType::kBinarySearch;
5072
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5073
1
  options.prefix_extractor.reset();
5074
5075
1
  Reopen(options);
5076
1
  ASSERT_EQ("v1", Get("k1"));
5077
1
  ASSERT_EQ("v2", Get("k2"));
5078
1
}
5079
5080
1
TEST_F(DBTest, ChecksumTest) {
5081
1
  BlockBasedTableOptions table_options;
5082
1
  Options options = CurrentOptions();
5083
5084
1
  table_options.checksum = kCRC32c;
5085
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5086
1
  Reopen(options);
5087
1
  ASSERT_OK(Put("a", "b"));
5088
1
  ASSERT_OK(Put("c", "d"));
5089
1
  ASSERT_OK(Flush());  // table with crc checksum
5090
5091
1
  table_options.checksum = kxxHash;
5092
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5093
1
  Reopen(options);
5094
1
  ASSERT_OK(Put("e", "f"));
5095
1
  ASSERT_OK(Put("g", "h"));
5096
1
  ASSERT_OK(Flush());  // table with xxhash checksum
5097
5098
1
  table_options.checksum = kCRC32c;
5099
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5100
1
  Reopen(options);
5101
1
  ASSERT_EQ("b", Get("a"));
5102
1
  ASSERT_EQ("d", Get("c"));
5103
1
  ASSERT_EQ("f", Get("e"));
5104
1
  ASSERT_EQ("h", Get("g"));
5105
5106
1
  table_options.checksum = kCRC32c;
5107
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
5108
1
  Reopen(options);
5109
1
  ASSERT_EQ("b", Get("a"));
5110
1
  ASSERT_EQ("d", Get("c"));
5111
1
  ASSERT_EQ("f", Get("e"));
5112
1
  ASSERT_EQ("h", Get("g"));
5113
1
}
5114
5115
#ifndef ROCKSDB_LITE
5116
4
TEST_P(DBTestWithParam, FIFOCompactionTest) {
5117
12
  for (int iter = 0; iter < 2; ++iter) {
5118
    // first iteration -- auto compaction
5119
    // second iteration -- manual compaction
5120
8
    Options options;
5121
8
    options.compaction_style = kCompactionStyleFIFO;
5122
8
    options.write_buffer_size = 100 << 10;                             // 100KB
5123
8
    options.arena_block_size = 4096;
5124
8
    options.compaction_options_fifo.max_table_files_size = 500 << 10;  // 500KB
5125
8
    options.compression = kNoCompression;
5126
8
    options.create_if_missing = true;
5127
8
    options.max_subcompactions = max_subcompactions_;
5128
8
    if (iter == 1) {
5129
4
      options.disable_auto_compactions = true;
5130
4
    }
5131
8
    options = CurrentOptions(options);
5132
8
    DestroyAndReopen(options);
5133
5134
8
    Random rnd(301);
5135
56
    for (int i = 0; i < 6; ++i) {
5136
5.32k
      for (int j = 0; j < 110; ++j) {
5137
5.28k
        ASSERT_OK(Put(ToString(i * 100 + j), RandomString(&rnd, 980)));
5138
5.28k
      }
5139
      // flush should happen here
5140
48
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5141
48
    }
5142
8
    if (iter == 0) {
5143
4
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
5144
4
    } else {
5145
4
      CompactRangeOptions cro;
5146
4
      cro.exclusive_manual_compaction = exclusive_manual_compaction_;
5147
4
      ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
5148
4
    }
5149
    // only 5 files should survive
5150
8
    ASSERT_EQ(NumTableFilesAtLevel(0), 5);
5151
408
    for (int i = 0; i < 50; ++i) {
5152
      // these keys should be deleted in previous compaction
5153
400
      ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
5154
400
    }
5155
8
  }
5156
4
}
5157
#endif  // ROCKSDB_LITE
5158
5159
// verify that we correctly deprecated timeout_hint_us
5160
1
TEST_F(DBTest, SimpleWriteTimeoutTest) {
5161
1
  WriteOptions write_opt;
5162
1
  write_opt.timeout_hint_us = 0;
5163
1
  ASSERT_OK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
5164
1
  write_opt.timeout_hint_us = 10;
5165
1
  ASSERT_NOK(Put(Key(1), Key(1) + std::string(100, 'v'), write_opt));
5166
1
}
5167
5168
// Class that wraps DB write instructions
5169
class DBWriter : public DBHolder {
5170
 public:
5171
  struct WriteStat {
5172
    int64_t bytes_count_ = 0;
5173
    yb::MonoTime start_time_;
5174
    yb::MonoTime stop_time_;
5175
5176
3
    Result<double> GetRate() const {
5177
3
      RETURN_NOT_OK(Verify());
5178
3
      return bytes_count_ / (stop_time_ - start_time_).ToSeconds();
5179
3
    }
5180
5181
6
    CHECKED_STATUS Verify() const {
5182
6
      SCHECK_GT(bytes_count_, 0, IllegalState, "Bytes count must be greater than zero");
5183
6
      SCHECK_LT(start_time_, stop_time_, IllegalState, "Start time must be less than stop time");
5184
6
      return Status::OK();
5185
6
    }
5186
  };
5187
5188
  explicit DBWriter(std::string path = "/db_test_write")
5189
7
      : DBHolder(path) , write_options_(CurrentOptions()) {
5190
7
    write_options_.write_buffer_size = 1_MB;
5191
7
    write_options_.level0_file_num_compaction_trigger = 2;
5192
7
    write_options_.target_file_size_base = 1_MB;
5193
7
    write_options_.max_bytes_for_level_base = 4_MB;
5194
7
    write_options_.max_bytes_for_level_multiplier = 4;
5195
7
    write_options_.compression = kNoCompression;
5196
7
    write_options_.create_if_missing = true;
5197
7
    write_options_.env = env_;
5198
7
    write_options_.IncreaseParallelism(4);
5199
7
  }
5200
5201
1
  CHECKED_STATUS InitRate(boost::optional<double> rate_bytes_per_sec = boost::none) {
5202
1
    if (rate_bytes_per_sec) {
5203
0
      write_reference_rate_bytes_per_sec_ = *rate_bytes_per_sec;
5204
0
      SCHECK_GT(write_reference_rate_bytes_per_sec_, 0.0,
5205
0
                IllegalState, "Reference rate must be greater than zero");
5206
1
    } else {
5207
1
      RETURN_NOT_OK(ExecWrite());
5208
1
      write_reference_rate_bytes_per_sec_ = VERIFY_RESULT(write_stat_.GetRate());
5209
1
    }
5210
1
    return Status::OK();
5211
1
  }
5212
5213
9
  CHECKED_STATUS ExecWrite() {
5214
9
    env_->bytes_written_ = 0;
5215
9
    DestroyAndReopen(write_options_);
5216
9
    WriteOptions wo;
5217
9
    wo.disableWAL = true;
5218
5219
    // Write ~48M data
5220
9
    auto rnd = &yb::ThreadLocalRandom();
5221
9
    write_stat_.start_time_ = yb::MonoTime::Now();
5222
233k
    for (size_t i = 0; i < (48 << 10); ++i) {
5223
233k
      RETURN_NOT_OK_PREPEND(
5224
233k
        Put(yb::RandomString(32, rnd), yb::RandomString(1_KB + 1, rnd), wo),
5225
233k
        "iteration #" + std::to_string(i));
5226
233k
    }
5227
3
    write_stat_.stop_time_ = yb::MonoTime::Now();
5228
3
    Close();
5229
3
    write_stat_.bytes_count_ = env_->bytes_written_.load();
5230
3
    return write_stat_.Verify();
5231
9
  }
5232
5233
2
  CHECKED_STATUS ExecWriteWithNewRateLimiter(double rate_bytes_per_sec) {
5234
2
    write_options_.rate_limiter.reset(
5235
2
      NewGenericRateLimiter(static_cast<int64_t>(rate_bytes_per_sec)));
5236
2
    RETURN_NOT_OK(ExecWrite());
5237
2
    SCHECK_EQ(write_stat_.bytes_count_, write_options_.rate_limiter->GetTotalBytesThrough(),
5238
2
              IllegalState, "Bytes count vs rate limiter total bytes inconsistency");
5239
2
    return Status::OK();
5240
2
  }
5241
5242
2
  CHECKED_STATUS MeasureWrite(double rate_ratio, double max_rate_ratio) {
5243
2
    SCHECK_GT(rate_ratio, 0.0, IllegalState, "Rate ratio must be greater than zero");
5244
2
    SCHECK_LE(rate_ratio, max_rate_ratio,
5245
2
              IllegalState, "Max rate ratio must be greater than rate ratio");
5246
2
    RETURN_NOT_OK(ExecWriteWithNewRateLimiter(write_reference_rate_bytes_per_sec_ * rate_ratio));
5247
2
    return CheckRatio(write_stat_, max_rate_ratio);
5248
2
  }
5249
5250
2
  CHECKED_STATUS CheckRatio(const WriteStat& period, double expected_ratio) const {
5251
2
    SCHECK_GT(write_reference_rate_bytes_per_sec_, 0.0,
5252
2
              IllegalState, "Reference rate must be greater than zero");
5253
2
    auto ratio = VERIFY_RESULT(period.GetRate()) / write_reference_rate_bytes_per_sec_;
5254
2
    LOG(INFO) << "Write rate ratio = " << std::fixed << std::setprecision(2) << ratio
5255
2
              << ", expected ratio = " << std::fixed << std::setprecision(2) << expected_ratio;
5256
2
    SCHECK_LE(ratio, expected_ratio, IllegalState, "Ratio must be less than expected ratio");
5257
2
    return Status::OK();
5258
2
  }
5259
5260
 public:
5261
  Options write_options_;
5262
  WriteStat write_stat_;
5263
  double write_reference_rate_bytes_per_sec_ = 0.0;
5264
};
5265
5266
/*
5267
 * This test is not reliable enough as it heavily depends on disk behavior.
5268
 */
5269
1
TEST_F_EX(DBTest, RateLimitingTest, RocksDBTest) {
5270
1
  DBWriter dbw;
5271
5272
  // # no rate limiting, initializing
5273
1
  ASSERT_OK(dbw.InitRate());
5274
5275
  // # rate limiting with 0.7 x threshold
5276
1
  ASSERT_OK(dbw.MeasureWrite(0.7, 0.8));
5277
5278
  // # rate limiting with half of the raw_rate
5279
1
  ASSERT_OK(dbw.MeasureWrite(0.5, 0.6));
5280
5281
  // # shared rate limiting with half of the raw rate
5282
1
  {
5283
1
    const size_t kNumDBs = yb::RandomUniformInt(4, 8, &yb::ThreadLocalRandom());
5284
1
    LOG(INFO) << "Number of writers: " << kNumDBs;
5285
5286
1
    std::vector<std::unique_ptr<DBWriter>> writers;
5287
1
    yb::TestThreadHolder workers;
5288
1
    yb::CountDownLatch ready_latch(kNumDBs);
5289
1
    yb::CountDownLatch done_latch(kNumDBs);
5290
1
    std::shared_ptr<RateLimiter> shared_limiter(
5291
1
        NewGenericRateLimiter(static_cast<int64_t>(dbw.write_reference_rate_bytes_per_sec_)));
5292
5293
7
    while (writers.size() != kNumDBs) {
5294
6
      writers.push_back(std::make_unique<DBWriter>(std::to_string(writers.size())));
5295
6
      auto& one_writer = *writers.back();
5296
6
      one_writer.write_options_.rate_limiter = shared_limiter;
5297
6
      workers.AddThread([&one_writer, &ready_latch, &done_latch](){
5298
6
        ready_latch.CountDown();
5299
6
        ready_latch.Wait();
5300
6
        EXPECT_OK(one_writer.ExecWrite());
5301
6
        done_latch.CountDown(); // To make TSAN happy
5302
6
      });
5303
6
    }
5304
5305
    // Wait for all jobs are done
5306
1
    done_latch.Wait();
5307
1
    workers.JoinAll();
5308
5309
    // Measure rates on writing is done: write rate for multiple parallel writers
5310
    // with the same shared rate limiter should be close to the rate of single writer
5311
    // with the same rate
5312
1
    DBWriter::WriteStat stat;
5313
1
    stat.start_time_ = yb::MonoTime::Max();
5314
1
    stat.stop_time_  = yb::MonoTime::Min();
5315
1
    for (const auto& w : writers) {
5316
1
      stat.bytes_count_ += w->write_stat_.bytes_count_;
5317
1
      stat.start_time_ = std::min(stat.start_time_, w->write_stat_.start_time_);
5318
1
      stat.stop_time_  = std::max(stat.stop_time_, w->write_stat_.stop_time_);
5319
1
    }
5320
1
    ASSERT_EQ(stat.bytes_count_, shared_limiter->GetTotalBytesThrough());
5321
0
    ASSERT_OK(dbw.CheckRatio(stat, 1.1));
5322
0
  }
5323
0
}
5324
5325
1
TEST_F(DBTest, TableOptionsSanitizeTest) {
5326
1
  Options options = CurrentOptions();
5327
1
  options.create_if_missing = true;
5328
1
  DestroyAndReopen(options);
5329
1
  ASSERT_EQ(db_->GetOptions().allow_mmap_reads, false);
5330
5331
1
  options.table_factory.reset(new PlainTableFactory());
5332
1
  options.prefix_extractor.reset(NewNoopTransform());
5333
1
  Destroy(options);
5334
1
  ASSERT_TRUE(!TryReopen(options).IsNotSupported());
5335
5336
  // Test for check of prefix_extractor when hash index is used for
5337
  // block-based table
5338
1
  BlockBasedTableOptions to;
5339
1
  to.index_type = IndexType::kHashSearch;
5340
1
  options = CurrentOptions();
5341
1
  options.create_if_missing = true;
5342
1
  options.table_factory.reset(NewBlockBasedTableFactory(to));
5343
1
  ASSERT_TRUE(TryReopen(options).IsInvalidArgument());
5344
1
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
5345
1
  ASSERT_OK(TryReopen(options));
5346
1
}
5347
5348
1
TEST_F(DBTest, ConcurrentMemtableNotSupported) {
5349
1
  Options options = CurrentOptions();
5350
1
  options.allow_concurrent_memtable_write = true;
5351
1
  options.soft_pending_compaction_bytes_limit = 0;
5352
1
  options.hard_pending_compaction_bytes_limit = 100;
5353
1
  options.create_if_missing = true;
5354
5355
1
  Close();
5356
1
  ASSERT_OK(DestroyDB(dbname_, options));
5357
1
  options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4));
5358
1
  ASSERT_NOK(TryReopen(options));
5359
5360
1
  options.memtable_factory.reset(new SkipListFactory);
5361
1
  ASSERT_OK(TryReopen(options));
5362
5363
1
  ColumnFamilyOptions cf_options(options);
5364
1
  cf_options.memtable_factory.reset(
5365
1
      NewHashLinkListRepFactory(4, 0, 3, true, 4));
5366
1
  ColumnFamilyHandle* handle;
5367
1
  ASSERT_NOK(db_->CreateColumnFamily(cf_options, "name", &handle));
5368
1
}
5369
5370
1
TEST_F(DBTest, SanitizeNumThreads) {
5371
3
  for (int attempt = 0; attempt < 2; attempt++) {
5372
2
    const size_t kTotalTasks = 8;
5373
2
    test::SleepingBackgroundTask sleeping_tasks[kTotalTasks];
5374
5375
2
    Options options = CurrentOptions();
5376
2
    if (attempt == 0) {
5377
1
      options.max_background_compactions = 3;
5378
1
      options.max_background_flushes = 2;
5379
1
    }
5380
2
    options.create_if_missing = true;
5381
2
    DestroyAndReopen(options);
5382
5383
18
    for (size_t i = 0; i < kTotalTasks; i++) {
5384
      // Insert 5 tasks to low priority queue and 5 tasks to high priority queue
5385
16
      env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
5386
16
                     &sleeping_tasks[i],
5387
8
                     (i < 4) ? Env::Priority::LOW : Env::Priority::HIGH);
5388
16
    }
5389
5390
    // Wait 100 milliseconds for they are scheduled.
5391
2
    env_->SleepForMicroseconds(100000);
5392
5393
    // pool size 3, total task 4. Queue size should be 1.
5394
2
    ASSERT_EQ(1U, options.env->GetThreadPoolQueueLen(Env::Priority::LOW));
5395
    // pool size 2, total task 4. Queue size should be 2.
5396
2
    ASSERT_EQ(2U, options.env->GetThreadPoolQueueLen(Env::Priority::HIGH));
5397
5398
18
    for (size_t i = 0; i < kTotalTasks; i++) {
5399
16
      sleeping_tasks[i].WakeUp();
5400
16
      sleeping_tasks[i].WaitUntilDone();
5401
16
    }
5402
5403
2
    ASSERT_OK(Put("abc", "def"));
5404
2
    ASSERT_EQ("def", Get("abc"));
5405
2
    ASSERT_OK(Flush());
5406
2
    ASSERT_EQ("def", Get("abc"));
5407
2
  }
5408
1
}
5409
5410
1
TEST_F(DBTest, DBIteratorBoundTest) {
5411
1
  Options options = CurrentOptions();
5412
1
  options.env = env_;
5413
1
  options.create_if_missing = true;
5414
5415
1
  options.prefix_extractor = nullptr;
5416
1
  DestroyAndReopen(options);
5417
1
  ASSERT_OK(Put("a", "0"));
5418
1
  ASSERT_OK(Put("foo", "bar"));
5419
1
  ASSERT_OK(Put("foo1", "bar1"));
5420
1
  ASSERT_OK(Put("g1", "0"));
5421
5422
  // testing basic case with no iterate_upper_bound and no prefix_extractor
5423
1
  {
5424
1
    ReadOptions ro;
5425
1
    ro.iterate_upper_bound = nullptr;
5426
5427
1
    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
5428
5429
1
    iter->Seek("foo");
5430
5431
1
    ASSERT_TRUE(iter->Valid());
5432
1
    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
5433
5434
1
    iter->Next();
5435
1
    ASSERT_TRUE(iter->Valid());
5436
1
    ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
5437
5438
1
    iter->Next();
5439
1
    ASSERT_TRUE(iter->Valid());
5440
1
    ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
5441
1
  }
5442
5443
  // testing iterate_upper_bound and forward iterator
5444
  // to make sure it stops at bound
5445
1
  {
5446
1
    ReadOptions ro;
5447
    // iterate_upper_bound points beyond the last expected entry
5448
1
    Slice prefix("foo2");
5449
1
    ro.iterate_upper_bound = &prefix;
5450
5451
1
    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
5452
5453
1
    iter->Seek("foo");
5454
5455
1
    ASSERT_TRUE(iter->Valid());
5456
1
    ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
5457
5458
1
    iter->Next();
5459
1
    ASSERT_TRUE(iter->Valid());
5460
1
    ASSERT_EQ(iter->key().compare(("foo1")), 0);
5461
5462
1
    iter->Next();
5463
    // should stop here...
5464
1
    ASSERT_TRUE(!iter->Valid());
5465
1
  }
5466
  // Testing SeekToLast with iterate_upper_bound set
5467
1
  {
5468
1
    ReadOptions ro;
5469
5470
1
    Slice prefix("foo");
5471
1
    ro.iterate_upper_bound = &prefix;
5472
5473
1
    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
5474
5475
1
    iter->SeekToLast();
5476
1
    ASSERT_TRUE(iter->Valid());
5477
1
    ASSERT_EQ(iter->key().compare(Slice("a")), 0);
5478
1
  }
5479
5480
  // prefix is the first letter of the key
5481
1
  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
5482
5483
1
  DestroyAndReopen(options);
5484
1
  ASSERT_OK(Put("a", "0"));
5485
1
  ASSERT_OK(Put("foo", "bar"));
5486
1
  ASSERT_OK(Put("foo1", "bar1"));
5487
1
  ASSERT_OK(Put("g1", "0"));
5488
5489
  // testing with iterate_upper_bound and prefix_extractor
5490
  // Seek target and iterate_upper_bound are not is same prefix
5491
  // This should be an error
5492
1
  {
5493
1
    ReadOptions ro;
5494
1
    Slice upper_bound("g");
5495
1
    ro.iterate_upper_bound = &upper_bound;
5496
5497
1
    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
5498
5499
1
    iter->Seek("foo");
5500
5501
1
    ASSERT_TRUE(iter->Valid());
5502
1
    ASSERT_EQ("foo", iter->key().ToString());
5503
5504
1
    iter->Next();
5505
1
    ASSERT_TRUE(iter->Valid());
5506
1
    ASSERT_EQ("foo1", iter->key().ToString());
5507
5508
1
    iter->Next();
5509
1
    ASSERT_TRUE(!iter->Valid());
5510
1
  }
5511
5512
  // testing that iterate_upper_bound prevents iterating over deleted items
5513
  // if the bound has already reached
5514
1
  {
5515
1
    options.prefix_extractor = nullptr;
5516
1
    DestroyAndReopen(options);
5517
1
    ASSERT_OK(Put("a", "0"));
5518
1
    ASSERT_OK(Put("b", "0"));
5519
1
    ASSERT_OK(Put("b1", "0"));
5520
1
    ASSERT_OK(Put("c", "0"));
5521
1
    ASSERT_OK(Put("d", "0"));
5522
1
    ASSERT_OK(Put("e", "0"));
5523
1
    ASSERT_OK(Delete("c"));
5524
1
    ASSERT_OK(Delete("d"));
5525
5526
    // base case with no bound
5527
1
    ReadOptions ro;
5528
1
    ro.iterate_upper_bound = nullptr;
5529
5530
1
    std::unique_ptr<Iterator> iter(db_->NewIterator(ro));
5531
5532
1
    iter->Seek("b");
5533
1
    ASSERT_TRUE(iter->Valid());
5534
1
    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
5535
5536
1
    iter->Next();
5537
1
    ASSERT_TRUE(iter->Valid());
5538
1
    ASSERT_EQ(iter->key().compare(("b1")), 0);
5539
5540
1
    perf_context.Reset();
5541
1
    iter->Next();
5542
5543
1
    ASSERT_TRUE(iter->Valid());
5544
1
    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 2);
5545
5546
    // now testing with iterate_bound
5547
1
    Slice prefix("c");
5548
1
    ro.iterate_upper_bound = &prefix;
5549
5550
1
    iter.reset(db_->NewIterator(ro));
5551
5552
1
    perf_context.Reset();
5553
5554
1
    iter->Seek("b");
5555
1
    ASSERT_TRUE(iter->Valid());
5556
1
    ASSERT_EQ(iter->key().compare(Slice("b")), 0);
5557
5558
1
    iter->Next();
5559
1
    ASSERT_TRUE(iter->Valid());
5560
1
    ASSERT_EQ(iter->key().compare(("b1")), 0);
5561
5562
1
    iter->Next();
5563
    // the iteration should stop as soon as the the bound key is reached
5564
    // even though the key is deleted
5565
    // hence internal_delete_skipped_count should be 0
5566
1
    ASSERT_TRUE(!iter->Valid());
5567
1
    ASSERT_EQ(static_cast<int>(perf_context.internal_delete_skipped_count), 0);
5568
1
  }
5569
1
}
5570
5571
1
TEST_F(DBTest, WriteSingleThreadEntry) {
5572
1
  std::vector<std::thread> threads;
5573
1
  dbfull()->TEST_LockMutex();
5574
1
  auto w = dbfull()->TEST_BeginWrite();
5575
1
  threads.emplace_back([&] { ASSERT_OK(Put("a", "b")); });
5576
1
  env_->SleepForMicroseconds(10000);
5577
1
  threads.emplace_back([&] { ASSERT_OK(Flush()); });
5578
1
  env_->SleepForMicroseconds(10000);
5579
1
  dbfull()->TEST_UnlockMutex();
5580
1
  dbfull()->TEST_LockMutex();
5581
1
  dbfull()->TEST_EndWrite(w);
5582
1
  dbfull()->TEST_UnlockMutex();
5583
5584
2
  for (auto& t : threads) {
5585
2
    t.join();
5586
2
  }
5587
1
}
5588
5589
1
TEST_F(DBTest, DisableDataSyncTest) {
5590
1
  env_->sync_counter_.store(0);
5591
  // iter 0 -- no sync
5592
  // iter 1 -- sync
5593
3
  for (int iter = 0; iter < 2; ++iter) {
5594
2
    Options options = CurrentOptions();
5595
2
    options.disableDataSync = iter == 0;
5596
2
    options.create_if_missing = true;
5597
2
    options.num_levels = 10;
5598
2
    options.env = env_;
5599
2
    Reopen(options);
5600
2
    CreateAndReopenWithCF({"pikachu"}, options);
5601
5602
2
    MakeTables(10, "a", "z");
5603
2
    Compact("a", "z");
5604
5605
2
    if (iter == 0) {
5606
1
      ASSERT_EQ(env_->sync_counter_.load(), 0);
5607
1
    } else {
5608
1
      ASSERT_GT(env_->sync_counter_.load(), 0);
5609
1
    }
5610
2
    Destroy(options);
5611
2
  }
5612
1
}
5613
5614
#ifndef ROCKSDB_LITE
5615
1
TEST_F(DBTest, DynamicMemtableOptions) {
5616
1
  const uint64_t k64KB = 1 << 16;
5617
1
  const uint64_t k128KB = 1 << 17;
5618
1
  const uint64_t k5KB = 5 * 1024;
5619
1
  const int kNumPutsBeforeWaitForFlush = 64;
5620
1
  Options options;
5621
1
  options.env = env_;
5622
1
  options.create_if_missing = true;
5623
1
  options.compression = kNoCompression;
5624
1
  options.max_background_compactions = 1;
5625
1
  options.write_buffer_size = k64KB;
5626
1
  options.arena_block_size = 16 * 1024;
5627
1
  options.max_write_buffer_number = 2;
5628
  // Don't trigger compact/slowdown/stop
5629
1
  options.level0_file_num_compaction_trigger = 1024;
5630
1
  options.level0_slowdown_writes_trigger = 1024;
5631
1
  options.level0_stop_writes_trigger = 1024;
5632
1
  DestroyAndReopen(options);
5633
5634
2
  auto gen_l0_kb = [this](int size) {
5635
2
    Random rnd(301);
5636
322
    for (int i = 0; i < size; i++) {
5637
320
      ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
5638
5639
      // The following condition prevents a race condition between flush jobs
5640
      // acquiring work and this thread filling up multiple memtables. Without
5641
      // this, the flush might produce less files than expected because
5642
      // multiple memtables are flushed into a single L0 file. This race
5643
      // condition affects assertion (A).
5644
320
      if (i % kNumPutsBeforeWaitForFlush == kNumPutsBeforeWaitForFlush - 1) {
5645
5
        ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5646
5
      }
5647
320
    }
5648
2
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5649
2
  };
5650
5651
  // Test write_buffer_size
5652
1
  gen_l0_kb(64);
5653
1
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
5654
1
  ASSERT_LT(SizeAtLevel(0), k64KB + k5KB);
5655
1
  ASSERT_GT(SizeAtLevel(0), k64KB - k5KB * 2);
5656
5657
  // Clean up L0
5658
1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5659
1
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
5660
5661
  // Increase buffer size
5662
1
  ASSERT_OK(dbfull()->SetOptions({
5663
1
    {"write_buffer_size", "131072"},
5664
1
  }));
5665
5666
  // The existing memtable is still 64KB in size, after it becomes immutable,
5667
  // the next memtable will be 128KB in size. Write 256KB total, we should
5668
  // have a 64KB L0 file, a 128KB L0 file, and a memtable with 64KB data
5669
1
  gen_l0_kb(256);
5670
1
  ASSERT_EQ(NumTableFilesAtLevel(0), 2);  // (A)
5671
1
  ASSERT_LT(SizeAtLevel(0), k128KB + k64KB + 2 * k5KB);
5672
1
  ASSERT_GT(SizeAtLevel(0), k128KB + k64KB - 4 * k5KB);
5673
5674
  // Test max_write_buffer_number
5675
  // Block compaction thread, which will also block the flushes because
5676
  // max_background_flushes == 0, so flushes are getting executed by the
5677
  // compaction thread
5678
1
  env_->SetBackgroundThreads(1, Env::LOW);
5679
1
  test::SleepingBackgroundTask sleeping_task_low;
5680
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5681
1
                 Env::Priority::LOW);
5682
  // Start from scratch and disable compaction/flush. Flush can only happen
5683
  // during compaction but trigger is pretty high
5684
1
  options.max_background_flushes = 0;
5685
1
  options.disable_auto_compactions = true;
5686
1
  DestroyAndReopen(options);
5687
5688
  // Put until writes are stopped, bounded by 256 puts. We should see stop at
5689
  // ~128KB
5690
1
  int count = 0;
5691
1
  Random rnd(301);
5692
5693
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
5694
1
      "DBImpl::DelayWrite:Wait",
5695
3
      [&](void* arg) { sleeping_task_low.WakeUp(); });
5696
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
5697
5698
118
  while (!sleeping_task_low.WokenUp() && count < 256) {
5699
117
    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
5700
117
    count++;
5701
117
  }
5702
1
  ASSERT_GT(static_cast<double>(count), 128 * 0.8);
5703
1
  ASSERT_LT(static_cast<double>(count), 128 * 1.2);
5704
5705
1
  sleeping_task_low.WaitUntilDone();
5706
5707
  // Increase
5708
1
  ASSERT_OK(dbfull()->SetOptions({
5709
1
    {"max_write_buffer_number", "8"},
5710
1
  }));
5711
  // Clean up memtable and L0
5712
1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5713
5714
1
  sleeping_task_low.Reset();
5715
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5716
1
                 Env::Priority::LOW);
5717
1
  count = 0;
5718
466
  while (!sleeping_task_low.WokenUp() && count < 1024) {
5719
465
    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
5720
465
    count++;
5721
465
  }
5722
  // Windows fails this test. Will tune in the future and figure out
5723
  // approp number
5724
1
#ifndef OS_WIN
5725
1
  ASSERT_GT(static_cast<double>(count), 512 * 0.8);
5726
1
  ASSERT_LT(static_cast<double>(count), 512 * 1.2);
5727
1
#endif
5728
1
  sleeping_task_low.WaitUntilDone();
5729
5730
  // Decrease
5731
1
  ASSERT_OK(dbfull()->SetOptions({
5732
1
    {"max_write_buffer_number", "4"},
5733
1
  }));
5734
  // Clean up memtable and L0
5735
1
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
5736
5737
1
  sleeping_task_low.Reset();
5738
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
5739
1
                 Env::Priority::LOW);
5740
5741
1
  count = 0;
5742
234
  while (!sleeping_task_low.WokenUp() && count < 1024) {
5743
233
    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), WriteOptions()));
5744
233
    count++;
5745
233
  }
5746
  // Windows fails this test. Will tune in the future and figure out
5747
  // approp number
5748
1
#ifndef OS_WIN
5749
1
  ASSERT_GT(static_cast<double>(count), 256 * 0.8);
5750
1
  ASSERT_LT(static_cast<double>(count), 266 * 1.2);
5751
1
#endif
5752
1
  sleeping_task_low.WaitUntilDone();
5753
5754
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
5755
1
}
5756
#endif  // ROCKSDB_LITE
5757
5758
#ifndef ROCKSDB_LITE
5759
1
TEST_F(DBTest, FlushOnDestroy) {
5760
1
  WriteOptions wo;
5761
1
  wo.disableWAL = true;
5762
1
  ASSERT_OK(Put("foo", "v1", wo));
5763
1
  CancelAllBackgroundWork(db_);
5764
1
}
5765
5766
1
TEST_F(DBTest, DynamicLevelCompressionPerLevel) {
5767
1
  if (!Snappy_Supported()) {
5768
0
    return;
5769
0
  }
5770
1
  const int kNKeys = 120;
5771
1
  int keys[kNKeys];
5772
121
  for (int i = 0; i < kNKeys; i++) {
5773
120
    keys[i] = i;
5774
120
  }
5775
1
  std::random_shuffle(std::begin(keys), std::end(keys));
5776
5777
1
  Random rnd(301);
5778
1
  Options options;
5779
1
  options.create_if_missing = true;
5780
1
  options.db_write_buffer_size = 20480;
5781
1
  options.write_buffer_size = 20480;
5782
1
  options.max_write_buffer_number = 2;
5783
1
  options.level0_file_num_compaction_trigger = 2;
5784
1
  options.level0_slowdown_writes_trigger = 2;
5785
1
  options.level0_stop_writes_trigger = 2;
5786
1
  options.target_file_size_base = 2048;
5787
1
  options.level_compaction_dynamic_level_bytes = true;
5788
1
  options.max_bytes_for_level_base = 102400;
5789
1
  options.max_bytes_for_level_multiplier = 4;
5790
1
  options.max_background_compactions = 1;
5791
1
  options.num_levels = 5;
5792
5793
1
  options.compression_per_level.resize(3);
5794
1
  options.compression_per_level[0] = kNoCompression;
5795
1
  options.compression_per_level[1] = kNoCompression;
5796
1
  options.compression_per_level[2] = kSnappyCompression;
5797
5798
1
  OnFileDeletionListener* listener = new OnFileDeletionListener();
5799
1
  options.listeners.emplace_back(listener);
5800
5801
1
  DestroyAndReopen(options);
5802
5803
  // Insert more than 80K. L4 should be base level. Neither L0 nor L4 should
5804
  // be compressed, so total data size should be more than 80K.
5805
21
  for (int i = 0; i < 20; i++) {
5806
20
    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
5807
20
  }
5808
1
  ASSERT_OK(Flush());
5809
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
5810
5811
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5812
1
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5813
1
  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5814
1
  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(4), 20U * 4000U);
5815
5816
  // Insert 400KB. Some data will be compressed
5817
100
  for (int i = 21; i < 120; i++) {
5818
99
    ASSERT_OK(Put(Key(keys[i]), CompressibleString(&rnd, 4000)));
5819
99
  }
5820
1
  ASSERT_OK(Flush());
5821
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
5822
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5823
1
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5824
1
  ASSERT_LT(SizeAtLevel(0) + SizeAtLevel(3) + SizeAtLevel(4), 120U * 4000U);
5825
  // Make sure data in files in L3 is not compacted by removing all files
5826
  // in L4 and calculate number of rows
5827
1
  ASSERT_OK(dbfull()->SetOptions({
5828
1
      {"disable_auto_compactions", "true"},
5829
1
  }));
5830
1
  ColumnFamilyMetaData cf_meta;
5831
1
  db_->GetColumnFamilyMetaData(&cf_meta);
5832
50
  for (auto file : cf_meta.levels[4].files) {
5833
50
    listener->SetExpectedFileName(dbname_ + file.name);
5834
50
    ASSERT_OK(dbfull()->DeleteFile(file.name));
5835
50
  }
5836
1
  listener->VerifyMatchedCount(cf_meta.levels[4].files.size());
5837
5838
1
  int num_keys = 0;
5839
1
  std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
5840
21
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
5841
20
    num_keys++;
5842
20
  }
5843
1
  ASSERT_OK(iter->status());
5844
1
  ASSERT_GT(SizeAtLevel(0) + SizeAtLevel(3), num_keys * 4000U);
5845
1
}
5846
5847
1
TEST_F(DBTest, DynamicLevelCompressionPerLevel2) {
5848
1
  if (!Snappy_Supported() || !LZ4_Supported() || !Zlib_Supported()) {
5849
0
    return;
5850
0
  }
5851
1
  const int kNKeys = 500;
5852
1
  int keys[kNKeys];
5853
501
  for (int i = 0; i < kNKeys; i++) {
5854
500
    keys[i] = i;
5855
500
  }
5856
1
  std::random_shuffle(std::begin(keys), std::end(keys));
5857
5858
1
  Random rnd(301);
5859
1
  Options options;
5860
1
  options.create_if_missing = true;
5861
1
  options.db_write_buffer_size = 6000;
5862
1
  options.write_buffer_size = 6000;
5863
1
  options.max_write_buffer_number = 2;
5864
1
  options.level0_file_num_compaction_trigger = 2;
5865
1
  options.level0_slowdown_writes_trigger = 2;
5866
1
  options.level0_stop_writes_trigger = 2;
5867
1
  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
5868
5869
  // Use file size to distinguish levels
5870
  // L1: 10, L2: 20, L3 40, L4 80
5871
  // L0 is less than 30
5872
1
  options.target_file_size_base = 10;
5873
1
  options.target_file_size_multiplier = 2;
5874
5875
1
  options.level_compaction_dynamic_level_bytes = true;
5876
1
  options.max_bytes_for_level_base = 200;
5877
1
  options.max_bytes_for_level_multiplier = 8;
5878
1
  options.max_background_compactions = 1;
5879
1
  options.num_levels = 5;
5880
1
  std::shared_ptr<mock::MockTableFactory> mtf(new mock::MockTableFactory);
5881
1
  options.table_factory = mtf;
5882
5883
1
  options.compression_per_level.resize(3);
5884
1
  options.compression_per_level[0] = kNoCompression;
5885
1
  options.compression_per_level[1] = kLZ4Compression;
5886
1
  options.compression_per_level[2] = kZlibCompression;
5887
5888
1
  DestroyAndReopen(options);
5889
  // When base level is L4, L4 is LZ4.
5890
1
  std::atomic<int> num_zlib(0);
5891
1
  std::atomic<int> num_lz4(0);
5892
1
  std::atomic<int> num_no(0);
5893
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
5894
2
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
5895
2
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
5896
2
        if (compaction->output_level() == 4) {
5897
2
          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
5898
2
          num_lz4.fetch_add(1);
5899
2
        }
5900
2
      });
5901
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
5902
4
      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
5903
4
        auto* compression = reinterpret_cast<CompressionType*>(arg);
5904
4
        ASSERT_TRUE(*compression == kNoCompression);
5905
4
        num_no.fetch_add(1);
5906
4
      });
5907
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
5908
5909
101
  for (int i = 0; i < 100; i++) {
5910
100
    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
5911
5912
100
    if (i % 25 == 0) {
5913
4
      ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5914
4
    }
5915
100
  }
5916
5917
1
  ASSERT_OK(Flush());
5918
1
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
5919
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
5920
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
5921
1
  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
5922
5923
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5924
1
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5925
1
  ASSERT_EQ(NumTableFilesAtLevel(3), 0);
5926
1
  ASSERT_GT(NumTableFilesAtLevel(4), 0);
5927
1
  ASSERT_GT(num_no.load(), 2);
5928
1
  ASSERT_GT(num_lz4.load(), 0);
5929
1
  int prev_num_files_l4 = NumTableFilesAtLevel(4);
5930
5931
  // After base level turn L4->L3, L3 becomes LZ4 and L4 becomes Zlib
5932
1
  num_lz4.store(0);
5933
1
  num_no.store(0);
5934
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
5935
14
      "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) {
5936
14
        Compaction* compaction = reinterpret_cast<Compaction*>(arg);
5937
14
        if (compaction->output_level() == 4 && compaction->start_level() == 3) {
5938
6
          ASSERT_TRUE(compaction->output_compression() == kZlibCompression);
5939
6
          num_zlib.fetch_add(1);
5940
8
        } else {
5941
8
          ASSERT_TRUE(compaction->output_compression() == kLZ4Compression);
5942
8
          num_lz4.fetch_add(1);
5943
8
        }
5944
14
      });
5945
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
5946
16
      "FlushJob::WriteLevel0Table:output_compression", [&](void* arg) {
5947
16
        auto* compression = reinterpret_cast<CompressionType*>(arg);
5948
16
        ASSERT_TRUE(*compression == kNoCompression);
5949
16
        num_no.fetch_add(1);
5950
16
      });
5951
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
5952
5953
400
  for (int i = 101; i < 500; i++) {
5954
399
    ASSERT_OK(Put(Key(keys[i]), RandomString(&rnd, 200)));
5955
399
    if (i % 100 == 99) {
5956
4
      ASSERT_OK(Flush());
5957
4
      ASSERT_OK(dbfull()->TEST_WaitForCompact());
5958
4
    }
5959
399
  }
5960
5961
1
  rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
5962
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
5963
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 0);
5964
1
  ASSERT_EQ(NumTableFilesAtLevel(2), 0);
5965
1
  ASSERT_GT(NumTableFilesAtLevel(3), 0);
5966
1
  ASSERT_GT(NumTableFilesAtLevel(4), prev_num_files_l4);
5967
1
  ASSERT_GT(num_no.load(), 2);
5968
1
  ASSERT_GT(num_lz4.load(), 0);
5969
1
  ASSERT_GT(num_zlib.load(), 0);
5970
1
}
5971
5972
1
TEST_F(DBTest, DynamicCompactionOptions) {
5973
  // minimum write buffer size is enforced at 64KB
5974
1
  const uint64_t k32KB = 1 << 15;
5975
1
  const uint64_t k64KB = 1 << 16;
5976
1
  const uint64_t k128KB = 1 << 17;
5977
1
  const uint64_t k1MB = 1 << 20;
5978
1
  const uint64_t k4KB = 1 << 12;
5979
1
  Options options;
5980
1
  options.env = env_;
5981
1
  options.create_if_missing = true;
5982
1
  options.compression = kNoCompression;
5983
1
  options.soft_pending_compaction_bytes_limit = 1024 * 1024;
5984
1
  options.write_buffer_size = k64KB;
5985
1
  options.arena_block_size = 4 * k4KB;
5986
1
  options.max_write_buffer_number = 2;
5987
  // Compaction related options
5988
1
  options.level0_file_num_compaction_trigger = 3;
5989
1
  options.level0_slowdown_writes_trigger = 4;
5990
1
  options.level0_stop_writes_trigger = 8;
5991
1
  options.max_grandparent_overlap_factor = 10;
5992
1
  options.expanded_compaction_factor = 25;
5993
1
  options.source_compaction_factor = 1;
5994
1
  options.target_file_size_base = k64KB;
5995
1
  options.target_file_size_multiplier = 1;
5996
1
  options.max_bytes_for_level_base = k128KB;
5997
1
  options.max_bytes_for_level_multiplier = 4;
5998
5999
  // Block flush thread and disable compaction thread
6000
1
  env_->SetBackgroundThreads(1, Env::LOW);
6001
1
  env_->SetBackgroundThreads(1, Env::HIGH);
6002
1
  DestroyAndReopen(options);
6003
6004
101
  auto gen_l0_kb = [this](int start, int size, int stride) {
6005
101
    Random rnd(301);
6006
2.95k
    for (int i = 0; i < size; i++) {
6007
2.91k
      ASSERT_OK(Put(Key(start + stride * i), RandomString(&rnd, 1024)));
6008
2.91k
    }
6009
44
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6010
44
  };
6011
6012
  // Write 3 files that have the same key range.
6013
  // Since level0_file_num_compaction_trigger is 3, compaction should be
6014
  // triggered. The compaction should result in one L1 file
6015
1
  gen_l0_kb(0, 64, 1);
6016
1
  ASSERT_EQ(NumTableFilesAtLevel(0), 1);
6017
1
  gen_l0_kb(0, 64, 1);
6018
1
  ASSERT_EQ(NumTableFilesAtLevel(0), 2);
6019
1
  gen_l0_kb(0, 64, 1);
6020
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6021
1
  ASSERT_EQ("0,1", FilesPerLevel());
6022
1
  std::vector<LiveFileMetaData> metadata;
6023
1
  db_->GetLiveFilesMetaData(&metadata);
6024
1
  ASSERT_EQ(1U, metadata.size());
6025
1
  ASSERT_LE(metadata[0].total_size, k64KB + k4KB);
6026
1
  ASSERT_GE(metadata[0].total_size, k64KB - k4KB);
6027
6028
  // Test compaction trigger and target_file_size_base
6029
  // Reduce compaction trigger to 2, and reduce L1 file size to 32KB.
6030
  // Writing to 64KB L0 files should trigger a compaction. Since these
6031
  // 2 L0 files have the same key range, compaction merge them and should
6032
  // result in 2 32KB L1 files.
6033
1
  ASSERT_OK(dbfull()->SetOptions({
6034
1
    {"level0_file_num_compaction_trigger", "2"},
6035
1
    {"target_file_size_base", ToString(k32KB) }
6036
1
  }));
6037
6038
1
  gen_l0_kb(0, 64, 1);
6039
1
  ASSERT_EQ("1,1", FilesPerLevel());
6040
1
  gen_l0_kb(0, 64, 1);
6041
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6042
1
  ASSERT_EQ("0,2", FilesPerLevel());
6043
1
  metadata.clear();
6044
1
  db_->GetLiveFilesMetaData(&metadata);
6045
1
  ASSERT_EQ(2U, metadata.size());
6046
1
  ASSERT_LE(metadata[0].total_size, k32KB + k4KB);
6047
1
  ASSERT_GE(metadata[0].total_size, k32KB - k4KB);
6048
1
  ASSERT_LE(metadata[1].total_size, k32KB + k4KB);
6049
1
  ASSERT_GE(metadata[1].total_size, k32KB - k4KB);
6050
6051
  // Test max_bytes_for_level_base
6052
  // Increase level base size to 256KB and write enough data that will
6053
  // fill L1 and L2. L1 size should be around 256KB while L2 size should be
6054
  // around 256KB x 4.
6055
1
  ASSERT_OK(dbfull()->SetOptions({
6056
1
    {"max_bytes_for_level_base", ToString(k1MB) }
6057
1
  }));
6058
6059
  // writing 96 x 64KB => 6 * 1024KB
6060
  // (L1 + L2) = (1 + 4) * 1024KB
6061
97
  for (int i = 0; i < 96; ++i) {
6062
96
    gen_l0_kb(i, 64, 96);
6063
96
  }
6064
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6065
0
  ASSERT_GT(SizeAtLevel(1), k1MB / 2);
6066
0
  ASSERT_LT(SizeAtLevel(1), k1MB + k1MB / 2);
6067
6068
  // Within (0.5, 1.5) of 4MB.
6069
0
  ASSERT_GT(SizeAtLevel(2), 2 * k1MB);
6070
0
  ASSERT_LT(SizeAtLevel(2), 6 * k1MB);
6071
6072
  // Test max_bytes_for_level_multiplier and
6073
  // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
6074
  // After filling enough data that can fit in L1 - L3, we should see L1 size
6075
  // reduces to 128KB from 256KB which was asserted previously. Same for L2.
6076
0
  ASSERT_OK(dbfull()->SetOptions({
6077
0
    {"max_bytes_for_level_multiplier", "2"},
6078
0
    {"max_bytes_for_level_base", ToString(k128KB) }
6079
0
  }));
6080
6081
  // writing 20 x 64KB = 10 x 128KB
6082
  // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
6083
0
  for (int i = 0; i < 20; ++i) {
6084
0
    gen_l0_kb(i, 64, 32);
6085
0
  }
6086
0
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6087
0
  uint64_t total_size =
6088
0
    SizeAtLevel(1) + SizeAtLevel(2) + SizeAtLevel(3);
6089
0
  ASSERT_TRUE(total_size < k128KB * 7 * 1.5);
6090
6091
  // Test level0_stop_writes_trigger.
6092
  // Clean up memtable and L0. Block compaction threads. If continue to write
6093
  // and flush memtables. We should see put stop after 8 memtable flushes
6094
  // since level0_stop_writes_trigger = 8
6095
0
  ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6096
0
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6097
  // Block compaction
6098
0
  test::SleepingBackgroundTask sleeping_task_low;
6099
0
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6100
0
                 Env::Priority::LOW);
6101
0
  sleeping_task_low.WaitUntilSleeping();
6102
0
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6103
0
  int count = 0;
6104
0
  Random rnd(301);
6105
0
  WriteOptions wo;
6106
0
  while (count < 64) {
6107
0
    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
6108
0
    ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6109
0
    count++;
6110
0
    if (dbfull()->TEST_write_controler().IsStopped()) {
6111
0
      sleeping_task_low.WakeUp();
6112
0
      break;
6113
0
    }
6114
0
  }
6115
  // Stop trigger = 8
6116
0
  ASSERT_EQ(count, 8);
6117
  // Unblock
6118
0
  sleeping_task_low.WaitUntilDone();
6119
6120
  // Now reduce level0_stop_writes_trigger to 6. Clear up memtables and L0.
6121
  // Block compaction thread again. Perform the put and memtable flushes
6122
  // until we see the stop after 6 memtable flushes.
6123
0
  ASSERT_OK(dbfull()->SetOptions({
6124
0
    {"level0_stop_writes_trigger", "6"}
6125
0
  }));
6126
0
  ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6127
0
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6128
0
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6129
6130
  // Block compaction again
6131
0
  sleeping_task_low.Reset();
6132
0
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6133
0
                 Env::Priority::LOW);
6134
0
  sleeping_task_low.WaitUntilSleeping();
6135
0
  count = 0;
6136
0
  while (count < 64) {
6137
0
    ASSERT_OK(Put(Key(count), RandomString(&rnd, 1024), wo));
6138
0
    ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6139
0
    count++;
6140
0
    if (dbfull()->TEST_write_controler().IsStopped()) {
6141
0
      sleeping_task_low.WakeUp();
6142
0
      break;
6143
0
    }
6144
0
  }
6145
0
  ASSERT_EQ(count, 6);
6146
  // Unblock
6147
0
  sleeping_task_low.WaitUntilDone();
6148
6149
  // Test disable_auto_compactions
6150
  // Compaction thread is unblocked but auto compaction is disabled. Write
6151
  // 4 L0 files and compaction should be triggered. If auto compaction is
6152
  // disabled, then TEST_WaitForCompact will be waiting for nothing. Number of
6153
  // L0 files do not change after the call.
6154
0
  ASSERT_OK(dbfull()->SetOptions({
6155
0
    {"disable_auto_compactions", "true"}
6156
0
  }));
6157
0
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6158
0
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6159
6160
0
  for (int i = 0; i < 4; ++i) {
6161
0
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
6162
    // Wait for compaction so that put won't stop
6163
0
    ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6164
0
  }
6165
0
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6166
0
  ASSERT_EQ(NumTableFilesAtLevel(0), 4);
6167
6168
  // Enable auto compaction and perform the same test, # of L0 files should be
6169
  // reduced after compaction.
6170
0
  ASSERT_OK(dbfull()->SetOptions({
6171
0
    {"disable_auto_compactions", "false"}
6172
0
  }));
6173
0
  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6174
0
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
6175
6176
0
  for (int i = 0; i < 4; ++i) {
6177
0
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
6178
    // Wait for compaction so that put won't stop
6179
0
    ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6180
0
  }
6181
0
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6182
0
  ASSERT_LT(NumTableFilesAtLevel(0), 4);
6183
0
}
6184
#endif  // ROCKSDB_LITE
6185
6186
1
TEST_F(DBTest, FileCreationRandomFailure) {
6187
1
  Options options;
6188
1
  options.env = env_;
6189
1
  options.create_if_missing = true;
6190
1
  options.write_buffer_size = 100000;  // Small write buffer
6191
1
  options.target_file_size_base = 200000;
6192
1
  options.max_bytes_for_level_base = 1000000;
6193
1
  options.max_bytes_for_level_multiplier = 2;
6194
6195
1
  DestroyAndReopen(options);
6196
1
  Random rnd(301);
6197
6198
1
  const int kCDTKeysPerBuffer = 4;
6199
1
  const int kTestSize = kCDTKeysPerBuffer * 4096;
6200
1
  const int kTotalIteration = 100;
6201
  // the second half of the test involves in random failure
6202
  // of file creation.
6203
1
  const int kRandomFailureTest = kTotalIteration / 2;
6204
1
  std::vector<std::string> values;
6205
16.3k
  for (int i = 0; i < kTestSize; ++i) {
6206
16.3k
    values.push_back("NOT_FOUND");
6207
16.3k
  }
6208
101
  for (int j = 0; j < kTotalIteration; ++j) {
6209
100
    if (j == kRandomFailureTest) {
6210
1
      env_->non_writeable_rate_.store(90);
6211
1
    }
6212
1.63M
    for (int k = 0; k < kTestSize; ++k) {
6213
      // here we expect some of the Put fails.
6214
1.63M
      std::string value = RandomString(&rnd, 100);
6215
1.63M
      Status s = Put(Key(k), Slice(value));
6216
1.63M
      if (s.ok()) {
6217
        // update the latest successful put
6218
819k
        values[k] = value;
6219
819k
      }
6220
      // But everything before we simulate the failure-test should succeed.
6221
1.63M
      if (j < kRandomFailureTest) {
6222
819k
        ASSERT_OK(s);
6223
819k
      }
6224
1.63M
    }
6225
100
  }
6226
6227
  // If rocksdb does not do the correct job, internal assert will fail here.
6228
1
  ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable());
6229
1
  ASSERT_NOK(dbfull()->TEST_WaitForCompact());
6230
6231
  // verify we have the latest successful update
6232
16.3k
  for (int k = 0; k < kTestSize; ++k) {
6233
16.3k
    auto v = Get(Key(k));
6234
16.3k
    ASSERT_EQ(v, values[k]);
6235
16.3k
  }
6236
6237
  // reopen and reverify we have the latest successful update
6238
1
  env_->non_writeable_rate_.store(0);
6239
1
  Reopen(options);
6240
16.3k
  for (int k = 0; k < kTestSize; ++k) {
6241
16.3k
    auto v = Get(Key(k));
6242
16.3k
    ASSERT_EQ(v, values[k]);
6243
16.3k
  }
6244
1
}
6245
6246
#ifndef ROCKSDB_LITE
6247
1
TEST_F(DBTest, DynamicMiscOptions) {
6248
  // Test max_sequential_skip_in_iterations
6249
1
  Options options;
6250
1
  options.env = env_;
6251
1
  options.create_if_missing = true;
6252
1
  options.max_sequential_skip_in_iterations = 16;
6253
1
  options.compression = kNoCompression;
6254
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6255
1
  DestroyAndReopen(options);
6256
6257
3
  auto assert_reseek_count = [this, &options](int key_start, int num_reseek) {
6258
3
    int key0 = key_start;
6259
3
    int key1 = key_start + 1;
6260
3
    int key2 = key_start + 2;
6261
3
    Random rnd(301);
6262
3
    ASSERT_OK(Put(Key(key0), RandomString(&rnd, 8)));
6263
33
    for (int i = 0; i < 10; ++i) {
6264
30
      ASSERT_OK(Put(Key(key1), RandomString(&rnd, 8)));
6265
30
    }
6266
3
    ASSERT_OK(Put(Key(key2), RandomString(&rnd, 8)));
6267
3
    std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
6268
3
    iter->Seek(Key(key1));
6269
3
    ASSERT_TRUE(iter->Valid());
6270
3
    ASSERT_EQ(iter->key().compare(Key(key1)), 0);
6271
3
    iter->Next();
6272
3
    ASSERT_TRUE(iter->Valid());
6273
3
    ASSERT_EQ(iter->key().compare(Key(key2)), 0);
6274
3
    ASSERT_EQ(num_reseek,
6275
3
              TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
6276
3
  };
6277
  // No reseek
6278
1
  assert_reseek_count(100, 0);
6279
6280
1
  ASSERT_OK(dbfull()->SetOptions({
6281
1
    {"max_sequential_skip_in_iterations", "4"}
6282
1
  }));
6283
  // Clear memtable and make new option effective
6284
1
  ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6285
  // Trigger reseek
6286
1
  assert_reseek_count(200, 1);
6287
6288
1
  ASSERT_OK(dbfull()->SetOptions({
6289
1
    {"max_sequential_skip_in_iterations", "16"}
6290
1
  }));
6291
  // Clear memtable and make new option effective
6292
1
  ASSERT_OK(dbfull()->TEST_FlushMemTable(true));
6293
  // No reseek
6294
1
  assert_reseek_count(300, 1);
6295
1
}
6296
#endif  // ROCKSDB_LITE
6297
6298
1
TEST_F(DBTest, L0L1L2AndUpHitCounter) {
6299
1
  Options options = CurrentOptions();
6300
1
  options.write_buffer_size = 32 * 1024;
6301
1
  options.target_file_size_base = 32 * 1024;
6302
1
  options.level0_file_num_compaction_trigger = 2;
6303
1
  options.level0_slowdown_writes_trigger = 2;
6304
1
  options.level0_stop_writes_trigger = 4;
6305
1
  options.max_bytes_for_level_base = 64 * 1024;
6306
1
  options.max_write_buffer_number = 2;
6307
1
  options.max_background_compactions = 8;
6308
1
  options.max_background_flushes = 8;
6309
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6310
1
  CreateAndReopenWithCF({"mypikachu"}, options);
6311
6312
1
  int numkeys = 20000;
6313
20.0k
  for (int i = 0; i < numkeys; i++) {
6314
20.0k
    ASSERT_OK(Put(1, Key(i), "val"));
6315
20.0k
  }
6316
1
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
6317
1
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
6318
1
  ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
6319
6320
1
  ASSERT_OK(Flush(1));
6321
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6322
6323
20.0k
  for (int i = 0; i < numkeys; i++) {
6324
20.0k
    ASSERT_EQ(Get(1, Key(i)), "val");
6325
20.0k
  }
6326
6327
1
  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L0), 100);
6328
1
  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L1), 100);
6329
1
  ASSERT_GT(TestGetTickerCount(options, GET_HIT_L2_AND_UP), 100);
6330
6331
1
  ASSERT_EQ(numkeys, TestGetTickerCount(options, GET_HIT_L0) +
6332
1
                         TestGetTickerCount(options, GET_HIT_L1) +
6333
1
                         TestGetTickerCount(options, GET_HIT_L2_AND_UP));
6334
1
}
6335
6336
1
TEST_F(DBTest, EncodeDecompressedBlockSizeTest) {
6337
  // iter 0 -- zlib
6338
  // iter 1 -- bzip2
6339
  // iter 2 -- lz4
6340
  // iter 3 -- lz4HC
6341
1
  CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
6342
1
                                    kLZ4Compression,  kLZ4HCCompression};
6343
5
  for (int iter = 0; iter < 4; ++iter) {
6344
4
    if (!CompressionTypeSupported(compressions[iter])) {
6345
1
      continue;
6346
1
    }
6347
    // first_table_version 1 -- generate with table_version == 1, read with
6348
    // table_version == 2
6349
    // first_table_version 2 -- generate with table_version == 2, read with
6350
    // table_version == 1
6351
9
    for (int first_table_version = 1; first_table_version <= 2;
6352
6
         ++first_table_version) {
6353
6
      BlockBasedTableOptions table_options;
6354
6
      table_options.format_version = first_table_version;
6355
6
      table_options.filter_policy.reset(NewBloomFilterPolicy(10));
6356
6
      Options options = CurrentOptions();
6357
6
      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6358
6
      options.create_if_missing = true;
6359
6
      options.compression = compressions[iter];
6360
6
      DestroyAndReopen(options);
6361
6362
6
      int kNumKeysWritten = 100000;
6363
6364
6
      Random rnd(301);
6365
600k
      for (int i = 0; i < kNumKeysWritten; ++i) {
6366
        // compressible string
6367
600k
        ASSERT_OK(Put(Key(i), RandomString(&rnd, 128) + std::string(128, 'a')));
6368
600k
      }
6369
6370
6
      table_options.format_version = first_table_version == 1 ? 2 : 1;
6371
6
      options.table_factory.reset(NewBlockBasedTableFactory(table_options));
6372
6
      Reopen(options);
6373
600k
      for (int i = 0; i < kNumKeysWritten; ++i) {
6374
600k
        auto r = Get(Key(i));
6375
600k
        ASSERT_EQ(r.substr(128), std::string(128, 'a'));
6376
600k
      }
6377
6
    }
6378
3
  }
6379
1
}
6380
6381
1
TEST_F(DBTest, MutexWaitStatsDisabledByDefault) {
6382
1
  Options options = CurrentOptions();
6383
1
  options.create_if_missing = true;
6384
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6385
1
  CreateAndReopenWithCF({"pikachu"}, options);
6386
1
  const uint64_t kMutexWaitDelay = 100;
6387
1
  ASSERT_OK(Put("hello", "rocksdb"));
6388
1
  ASSERT_EQ(TestGetTickerCount(options, DB_MUTEX_WAIT_MICROS), 0);
6389
1
}
6390
6391
1
TEST_F(DBTest, CloseSpeedup) {
6392
1
  Options options = CurrentOptions();
6393
1
  options.compaction_style = kCompactionStyleLevel;
6394
1
  options.write_buffer_size = 110 << 10;  // 110KB
6395
1
  options.arena_block_size = 4 << 10;
6396
1
  options.level0_file_num_compaction_trigger = 2;
6397
1
  options.num_levels = 4;
6398
1
  options.max_bytes_for_level_base = 400 * 1024;
6399
1
  options.max_write_buffer_number = 16;
6400
6401
  // Block background threads
6402
1
  env_->SetBackgroundThreads(1, Env::LOW);
6403
1
  env_->SetBackgroundThreads(1, Env::HIGH);
6404
1
  test::SleepingBackgroundTask sleeping_task_low;
6405
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6406
1
                 Env::Priority::LOW);
6407
1
  test::SleepingBackgroundTask sleeping_task_high;
6408
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6409
1
                 &sleeping_task_high, Env::Priority::HIGH);
6410
6411
1
  ASSERT_OK(DeleteRecursively(env_, dbname_));
6412
1
  DestroyAndReopen(options);
6413
6414
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
6415
1
  env_->SetBackgroundThreads(1, Env::LOW);
6416
1
  env_->SetBackgroundThreads(1, Env::HIGH);
6417
1
  Random rnd(301);
6418
1
  int key_idx = 0;
6419
6420
  // First three 110KB files are not going to level 2
6421
  // After that, (100K, 200K)
6422
6
  for (int num = 0; num < 5; num++) {
6423
5
    GenerateNewFile(&rnd, &key_idx, true);
6424
5
  }
6425
6426
1
  ASSERT_EQ(0, GetSstFileCount(dbname_));
6427
6428
1
  Close();
6429
1
  ASSERT_EQ(0, GetSstFileCount(dbname_));
6430
6431
  // Unblock background threads
6432
1
  sleeping_task_high.WakeUp();
6433
1
  sleeping_task_high.WaitUntilDone();
6434
1
  sleeping_task_low.WakeUp();
6435
1
  sleeping_task_low.WaitUntilDone();
6436
6437
1
  Destroy(options);
6438
1
}
6439
6440
class DelayedMergeOperator : public MergeOperator {
6441
 private:
6442
  DBTest* db_test_;
6443
6444
 public:
6445
5
  explicit DelayedMergeOperator(DBTest* d) : db_test_(d) {}
6446
  virtual bool FullMerge(const Slice& key, const Slice* existing_value,
6447
                         const std::deque<std::string>& operand_list,
6448
                         std::string* new_value,
6449
2
                         Logger* logger) const override {
6450
2
    db_test_->env_->addon_time_.fetch_add(1000);
6451
2
    *new_value = "";
6452
2
    return true;
6453
2
  }
6454
6455
15
  const char* Name() const override { return "DelayedMergeOperator"; }
6456
};
6457
6458
1
TEST_F(DBTest, MergeTestTime) {
6459
1
  std::string one, two, three;
6460
1
  PutFixed64(&one, 1);
6461
1
  PutFixed64(&two, 2);
6462
1
  PutFixed64(&three, 3);
6463
6464
  // Enable time profiling
6465
1
  SetPerfLevel(PerfLevel::kEnableTime);
6466
1
  this->env_->addon_time_.store(0);
6467
1
  this->env_->time_elapse_only_sleep_ = true;
6468
1
  this->env_->no_sleep_ = true;
6469
1
  Options options;
6470
1
  options = CurrentOptions(options);
6471
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6472
1
  options.merge_operator.reset(new DelayedMergeOperator(this));
6473
1
  DestroyAndReopen(options);
6474
6475
1
  ASSERT_EQ(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
6476
1
  ASSERT_OK(db_->Put(WriteOptions(), "foo", one));
6477
1
  ASSERT_OK(Flush());
6478
1
  ASSERT_OK(db_->Merge(WriteOptions(), "foo", two));
6479
1
  ASSERT_OK(Flush());
6480
1
  ASSERT_OK(db_->Merge(WriteOptions(), "foo", three));
6481
1
  ASSERT_OK(Flush());
6482
6483
1
  ReadOptions opt;
6484
1
  opt.verify_checksums = true;
6485
1
  opt.snapshot = nullptr;
6486
1
  std::string result;
6487
1
  ASSERT_OK(db_->Get(opt, "foo", &result));
6488
6489
1
  ASSERT_EQ(1000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
6490
6491
1
  ReadOptions read_options;
6492
1
  std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
6493
1
  int count = 0;
6494
2
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
6495
1
    ASSERT_OK(iter->status());
6496
1
    ++count;
6497
1
  }
6498
6499
1
  ASSERT_EQ(1, count);
6500
1
  ASSERT_EQ(2000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME));
6501
1
  this->env_->time_elapse_only_sleep_ = false;
6502
1
}
6503
6504
#ifndef ROCKSDB_LITE
6505
4
TEST_P(DBTestWithParam, MergeCompactionTimeTest) {
6506
4
  SetPerfLevel(PerfLevel::kEnableTime);
6507
4
  Options options;
6508
4
  options = CurrentOptions(options);
6509
4
  options.compaction_filter_factory = std::make_shared<KeepFilterFactory>();
6510
4
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6511
4
  options.merge_operator.reset(new DelayedMergeOperator(this));
6512
4
  options.compaction_style = kCompactionStyleUniversal;
6513
4
  options.max_subcompactions = max_subcompactions_;
6514
4
  DestroyAndReopen(options);
6515
6516
4.00k
  for (int i = 0; i < 1000; i++) {
6517
4.00k
    ASSERT_OK(db_->Merge(WriteOptions(), "foo", "TEST"));
6518
4.00k
    ASSERT_OK(Flush());
6519
4.00k
  }
6520
4
  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6521
4
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6522
6523
4
  ASSERT_NE(TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME), 0);
6524
4
}
6525
6526
4
TEST_P(DBTestWithParam, FilterCompactionTimeTest) {
6527
4
  Options options;
6528
4
  options.compaction_filter_factory =
6529
4
      std::make_shared<DelayFilterFactory>(this);
6530
4
  options.disable_auto_compactions = true;
6531
4
  options.create_if_missing = true;
6532
4
  options.statistics = rocksdb::CreateDBStatisticsForTests();
6533
4
  options.max_subcompactions = max_subcompactions_;
6534
4
  options = CurrentOptions(options);
6535
4
  DestroyAndReopen(options);
6536
6537
  // put some data
6538
20
  for (int table = 0; table < 4; ++table) {
6539
200
    for (int i = 0; i < 10 + table; ++i) {
6540
184
      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
6541
184
    }
6542
16
    ASSERT_OK(Flush());
6543
16
  }
6544
6545
4
  CompactRangeOptions cro;
6546
4
  cro.exclusive_manual_compaction = exclusive_manual_compaction_;
6547
4
  ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
6548
4
  ASSERT_EQ(0U, CountLiveFiles());
6549
6550
4
  Reopen(options);
6551
6552
4
  Iterator* itr = db_->NewIterator(ReadOptions());
6553
4
  itr->SeekToFirst();
6554
  // Stopwatch has been removed from compaction iterator. Disable assert below.
6555
  // ASSERT_NE(TestGetTickerCount(options, FILTER_OPERATION_TOTAL_TIME), 0);
6556
4
  delete itr;
6557
4
}
6558
#endif  // ROCKSDB_LITE
6559
6560
1
TEST_F(DBTest, TestLogCleanup) {
6561
1
  Options options = CurrentOptions();
6562
1
  options.write_buffer_size = 64 * 1024;  // very small
6563
  // only two memtables allowed ==> only two log files
6564
1
  options.max_write_buffer_number = 2;
6565
1
  Reopen(options);
6566
6567
100k
  for (int i = 0; i < 100000; ++i) {
6568
100k
    ASSERT_OK(Put(Key(i), "val"));
6569
    // only 2 memtables will be alive, so logs_to_free needs to always be below
6570
    // 2
6571
100k
    ASSERT_LT(dbfull()->TEST_LogsToFreeSize(), static_cast<size_t>(3));
6572
100k
  }
6573
1
}
6574
6575
#ifndef ROCKSDB_LITE
6576
1
TEST_F(DBTest, EmptyCompactedDB) {
6577
1
  Options options;
6578
1
  options.max_open_files = -1;
6579
1
  options = CurrentOptions(options);
6580
1
  Close();
6581
1
  ASSERT_OK(ReadOnlyReopen(options));
6582
1
  Status s = Put("new", "value");
6583
1
  ASSERT_TRUE(s.IsNotSupported());
6584
1
  Close();
6585
1
}
6586
#endif  // ROCKSDB_LITE
6587
6588
#ifndef ROCKSDB_LITE
6589
1
TEST_F(DBTest, SuggestCompactRangeTest) {
6590
1
  class CompactionFilterFactoryGetContext : public CompactionFilterFactory {
6591
1
   public:
6592
1
    virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
6593
11
        const CompactionFilter::Context& context) override {
6594
11
      saved_context = context;
6595
11
      std::unique_ptr<CompactionFilter> empty_filter;
6596
11
      return empty_filter;
6597
11
    }
6598
3
    const char* Name() const override {
6599
3
      return "CompactionFilterFactoryGetContext";
6600
3
    }
6601
2
    static bool IsManual(CompactionFilterFactory* compaction_filter_factory) {
6602
2
      return reinterpret_cast<CompactionFilterFactoryGetContext*>(
6603
2
                 compaction_filter_factory)->saved_context.is_manual_compaction;
6604
2
    }
6605
1
    CompactionFilter::Context saved_context;
6606
1
  };
6607
6608
1
  Options options = CurrentOptions();
6609
1
  options.memtable_factory.reset(
6610
1
      new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile));
6611
1
  options.compaction_style = kCompactionStyleLevel;
6612
1
  options.compaction_filter_factory.reset(
6613
1
      new CompactionFilterFactoryGetContext());
6614
1
  options.write_buffer_size = 200 << 10;
6615
1
  options.arena_block_size = 4 << 10;
6616
1
  options.level0_file_num_compaction_trigger = 4;
6617
1
  options.num_levels = 4;
6618
1
  options.compression = kNoCompression;
6619
1
  options.max_bytes_for_level_base = 450 << 10;
6620
1
  options.target_file_size_base = 98 << 10;
6621
1
  options.max_grandparent_overlap_factor = 1 << 20;  // inf
6622
6623
1
  Reopen(options);
6624
6625
1
  Random rnd(301);
6626
6627
4
  for (int num = 0; num < 3; num++) {
6628
3
    GenerateNewRandomFile(&rnd);
6629
3
  }
6630
6631
1
  GenerateNewRandomFile(&rnd);
6632
1
  ASSERT_EQ("0,4", FilesPerLevel(0));
6633
1
  ASSERT_TRUE(!CompactionFilterFactoryGetContext::IsManual(
6634
1
                   options.compaction_filter_factory.get()));
6635
6636
1
  GenerateNewRandomFile(&rnd);
6637
1
  ASSERT_EQ("1,4", FilesPerLevel(0));
6638
6639
1
  GenerateNewRandomFile(&rnd);
6640
1
  ASSERT_EQ("2,4", FilesPerLevel(0));
6641
6642
1
  GenerateNewRandomFile(&rnd);
6643
1
  ASSERT_EQ("3,4", FilesPerLevel(0));
6644
6645
1
  GenerateNewRandomFile(&rnd);
6646
1
  ASSERT_EQ("0,4,4", FilesPerLevel(0));
6647
6648
1
  GenerateNewRandomFile(&rnd);
6649
1
  ASSERT_EQ("1,4,4", FilesPerLevel(0));
6650
6651
1
  GenerateNewRandomFile(&rnd);
6652
1
  ASSERT_EQ("2,4,4", FilesPerLevel(0));
6653
6654
1
  GenerateNewRandomFile(&rnd);
6655
1
  ASSERT_EQ("3,4,4", FilesPerLevel(0));
6656
6657
1
  GenerateNewRandomFile(&rnd);
6658
1
  ASSERT_EQ("0,4,8", FilesPerLevel(0));
6659
6660
1
  GenerateNewRandomFile(&rnd);
6661
1
  ASSERT_EQ("1,4,8", FilesPerLevel(0));
6662
6663
  // compact it three times
6664
4
  for (int i = 0; i < 3; ++i) {
6665
3
    ASSERT_OK(experimental::SuggestCompactRange(db_, nullptr, nullptr));
6666
3
    ASSERT_OK(dbfull()->TEST_WaitForCompact());
6667
3
  }
6668
6669
  // All files are compacted
6670
1
  ASSERT_EQ(0, NumTableFilesAtLevel(0));
6671
1
  ASSERT_EQ(0, NumTableFilesAtLevel(1));
6672
6673
1
  GenerateNewRandomFile(&rnd);
6674
1
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
6675
6676
  // nonoverlapping with the file on level 0
6677
1
  Slice start("a"), end("b");
6678
1
  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6679
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6680
6681
  // should not compact the level 0 file
6682
1
  ASSERT_EQ(1, NumTableFilesAtLevel(0));
6683
6684
1
  start = Slice("j");
6685
1
  end = Slice("m");
6686
1
  ASSERT_OK(experimental::SuggestCompactRange(db_, &start, &end));
6687
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6688
1
  ASSERT_TRUE(CompactionFilterFactoryGetContext::IsManual(
6689
1
      options.compaction_filter_factory.get()));
6690
6691
  // now it should compact the level 0 file
6692
1
  ASSERT_EQ(0, NumTableFilesAtLevel(0));
6693
1
  ASSERT_EQ(1, NumTableFilesAtLevel(1));
6694
1
}
6695
6696
1
TEST_F(DBTest, PromoteL0) {
6697
1
  Options options = CurrentOptions();
6698
1
  options.disable_auto_compactions = true;
6699
1
  options.write_buffer_size = 10 * 1024 * 1024;
6700
1
  DestroyAndReopen(options);
6701
6702
  // non overlapping ranges
6703
1
  std::vector<std::pair<int32_t, int32_t>> ranges = {
6704
1
      {81, 160}, {0, 80}, {161, 240}, {241, 320}};
6705
6706
1
  int32_t value_size = 10 * 1024;  // 10 KB
6707
6708
1
  Random rnd(301);
6709
1
  std::map<int32_t, std::string> values;
6710
4
  for (const auto& range : ranges) {
6711
321
    for (int32_t j = range.first; j < range.second; j++) {
6712
317
      values[j] = RandomString(&rnd, value_size);
6713
317
      ASSERT_OK(Put(Key(j), values[j]));
6714
317
    }
6715
4
    ASSERT_OK(Flush());
6716
4
  }
6717
6718
1
  int32_t level0_files = NumTableFilesAtLevel(0, 0);
6719
1
  ASSERT_EQ(level0_files, ranges.size());
6720
1
  ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0);  // No files in L1
6721
6722
  // Promote L0 level to L2.
6723
1
  ASSERT_OK(experimental::PromoteL0(db_, db_->DefaultColumnFamily(), 2));
6724
  // We expect that all the files were trivially moved from L0 to L2
6725
1
  ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
6726
1
  ASSERT_EQ(NumTableFilesAtLevel(2, 0), level0_files);
6727
6728
317
  for (const auto& kv : values) {
6729
317
    ASSERT_EQ(Get(Key(kv.first)), kv.second);
6730
317
  }
6731
1
}
6732
6733
1
TEST_F(DBTest, PromoteL0Failure) {
6734
1
  Options options = CurrentOptions();
6735
1
  options.disable_auto_compactions = true;
6736
1
  options.write_buffer_size = 10 * 1024 * 1024;
6737
1
  DestroyAndReopen(options);
6738
6739
  // Produce two L0 files with overlapping ranges.
6740
1
  ASSERT_OK(Put(Key(0), ""));
6741
1
  ASSERT_OK(Put(Key(3), ""));
6742
1
  ASSERT_OK(Flush());
6743
1
  ASSERT_OK(Put(Key(1), ""));
6744
1
  ASSERT_OK(Flush());
6745
6746
1
  Status status;
6747
  // Fails because L0 has overlapping files.
6748
1
  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
6749
1
  ASSERT_TRUE(status.IsInvalidArgument());
6750
6751
1
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6752
  // Now there is a file in L1.
6753
1
  ASSERT_GE(NumTableFilesAtLevel(1, 0), 1);
6754
6755
1
  ASSERT_OK(Put(Key(5), ""));
6756
1
  ASSERT_OK(Flush());
6757
  // Fails because L1 is non-empty.
6758
1
  status = experimental::PromoteL0(db_, db_->DefaultColumnFamily());
6759
1
  ASSERT_TRUE(status.IsInvalidArgument());
6760
1
}
6761
#endif  // ROCKSDB_LITE
6762
6763
// Github issue #596
6764
1
TEST_F(DBTest, HugeNumberOfLevels) {
6765
1
  Options options = CurrentOptions();
6766
1
  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
6767
1
  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
6768
1
  options.num_levels = 12;
6769
1
  options.max_background_compactions = 10;
6770
1
  options.max_bytes_for_level_multiplier = 2;
6771
1
  options.level_compaction_dynamic_level_bytes = true;
6772
1
  DestroyAndReopen(options);
6773
6774
1
  Random rnd(301);
6775
215k
  for (int i = 0; i < 300000; ++i) {
6776
215k
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
6777
215k
  }
6778
6779
0
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
6780
0
}
6781
6782
1
TEST_F(DBTest, AutomaticConflictsWithManualCompaction) {
6783
1
  Options options = CurrentOptions();
6784
1
  options.write_buffer_size = 2 * 1024 * 1024;         // 2MB
6785
1
  options.max_bytes_for_level_base = 2 * 1024 * 1024;  // 2MB
6786
1
  options.num_levels = 12;
6787
1
  options.max_background_compactions = 10;
6788
1
  options.max_bytes_for_level_multiplier = 2;
6789
1
  options.level_compaction_dynamic_level_bytes = true;
6790
1
  DestroyAndReopen(options);
6791
6792
1
  Random rnd(301);
6793
215k
  for (int i = 0; i < 300000; ++i) {
6794
215k
    ASSERT_OK(Put(Key(i), RandomString(&rnd, 1024)));
6795
215k
  }
6796
6797
0
  std::atomic<int> callback_count(0);
6798
0
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
6799
0
      "DBImpl::RunManualCompaction()::Conflict",
6800
0
      [&](void* arg) { callback_count.fetch_add(1); });
6801
0
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
6802
0
  CompactRangeOptions croptions;
6803
0
  croptions.exclusive_manual_compaction = false;
6804
0
  ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
6805
0
  ASSERT_GE(callback_count.load(), 1);
6806
0
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
6807
0
  for (int i = 0; i < 300000; ++i) {
6808
0
    ASSERT_NE("NOT_FOUND", Get(Key(i)));
6809
0
  }
6810
0
}
6811
6812
// Github issue #595
6813
// Large write batch with column families
6814
1
TEST_F(DBTest, LargeBatchWithColumnFamilies) {
6815
1
  Options options;
6816
1
  options.env = env_;
6817
1
  options = CurrentOptions(options);
6818
1
  options.write_buffer_size = 100000;  // Small write buffer
6819
1
  CreateAndReopenWithCF({"pikachu"}, options);
6820
1
  int64_t j = 0;
6821
6
  for (int i = 0; i < 5; i++) {
6822
20
    for (int pass = 1; pass <= 3; pass++) {
6823
15
      WriteBatch batch;
6824
15
      size_t write_size = 1024 * 1024 * (5 + i);
6825
15
      fprintf(stderr, "prepare: %" ROCKSDB_PRIszt " MB, pass:%d\n", (write_size / 1024 / 1024),
6826
15
              pass);
6827
18.3k
      for (;;) {
6828
18.3k
        std::string data(3000, j++ % 127 + 20);
6829
18.3k
        data += ToString(j);
6830
18.3k
        batch.Put(handles_[0], Slice(data), Slice(data));
6831
18.3k
        if (batch.GetDataSize() > write_size) {
6832
15
          break;
6833
15
        }
6834
18.3k
      }
6835
15
      fprintf(stderr, "write: %" ROCKSDB_PRIszt " MB\n", (batch.GetDataSize() / 1024 / 1024));
6836
15
      ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
6837
15
      fprintf(stderr, "done\n");
6838
15
    }
6839
5
  }
6840
  // make sure we can re-open it.
6841
1
  ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options));
6842
1
}
6843
6844
// Make sure that Flushes can proceed in parallel with CompactRange()
6845
1
TEST_F(DBTest, FlushesInParallelWithCompactRange) {
6846
  // iter == 0 -- leveled
6847
  // iter == 1 -- leveled, but throw in a flush between two levels compacting
6848
  // iter == 2 -- universal
6849
4
  for (int iter = 0; iter < 3; ++iter) {
6850
3
    Options options = CurrentOptions();
6851
3
    if (iter < 2) {
6852
2
      options.compaction_style = kCompactionStyleLevel;
6853
1
    } else {
6854
1
      options.compaction_style = kCompactionStyleUniversal;
6855
1
    }
6856
3
    options.write_buffer_size = 110 << 10;
6857
3
    options.level0_file_num_compaction_trigger = 4;
6858
3
    options.num_levels = 4;
6859
3
    options.compression = kNoCompression;
6860
3
    options.max_bytes_for_level_base = 450 << 10;
6861
3
    options.target_file_size_base = 98 << 10;
6862
3
    options.max_write_buffer_number = 2;
6863
6864
3
    DestroyAndReopen(options);
6865
6866
3
    Random rnd(301);
6867
45
    for (int num = 0; num < 14; num++) {
6868
42
      GenerateNewRandomFile(&rnd);
6869
42
    }
6870
6871
3
    if (iter == 1) {
6872
1
    rocksdb::SyncPoint::GetInstance()->LoadDependency(
6873
1
        {{"DBImpl::RunManualCompaction()::1",
6874
1
          "DBTest::FlushesInParallelWithCompactRange:1"},
6875
1
         {"DBTest::FlushesInParallelWithCompactRange:2",
6876
1
          "DBImpl::RunManualCompaction()::2"}});
6877
2
    } else {
6878
2
      rocksdb::SyncPoint::GetInstance()->LoadDependency(
6879
2
          {{"CompactionJob::Run():Start",
6880
2
            "DBTest::FlushesInParallelWithCompactRange:1"},
6881
2
           {"DBTest::FlushesInParallelWithCompactRange:2",
6882
2
            "CompactionJob::Run():End"}});
6883
2
    }
6884
3
    rocksdb::SyncPoint::GetInstance()->EnableProcessing();
6885
6886
3
    std::vector<std::thread> threads;
6887
3
    threads.emplace_back([&]() { Compact("a", "z"); });
6888
6889
3
    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:1");
6890
6891
    // this has to start a flush. if flushes are blocked, this will try to
6892
    // create
6893
    // 3 memtables, and that will fail because max_write_buffer_number is 2
6894
12
    for (int num = 0; num < 3; num++) {
6895
9
      GenerateNewRandomFile(&rnd, /* nowait */ true);
6896
9
    }
6897
6898
3
    TEST_SYNC_POINT("DBTest::FlushesInParallelWithCompactRange:2");
6899
6900
3
    for (auto& t : threads) {
6901
3
      t.join();
6902
3
    }
6903
3
    rocksdb::SyncPoint::GetInstance()->DisableProcessing();
6904
3
  }
6905
1
}
6906
6907
1
TEST_F(DBTest, DelayedWriteRate) {
6908
1
  const int kEntriesPerMemTable = 100;
6909
1
  const int kTotalFlushes = 20;
6910
6911
1
  Options options;
6912
1
  env_->SetBackgroundThreads(1, Env::LOW);
6913
1
  options.env = env_;
6914
1
  env_->no_sleep_ = true;
6915
1
  options = CurrentOptions(options);
6916
1
  options.write_buffer_size = 100000000;
6917
1
  options.max_write_buffer_number = 256;
6918
1
  options.max_background_compactions = 1;
6919
1
  options.level0_file_num_compaction_trigger = 3;
6920
1
  options.level0_slowdown_writes_trigger = 3;
6921
1
  options.level0_stop_writes_trigger = 999999;
6922
1
  options.delayed_write_rate = 20000000;  // Start with 200MB/s
6923
1
  options.memtable_factory.reset(
6924
1
      new SpecialSkipListFactory(kEntriesPerMemTable));
6925
6926
1
  CreateAndReopenWithCF({"pikachu"}, options);
6927
6928
  // Block compactions
6929
1
  test::SleepingBackgroundTask sleeping_task_low;
6930
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6931
1
                 Env::Priority::LOW);
6932
6933
4
  for (int i = 0; i < 3; i++) {
6934
3
    ASSERT_OK(Put(Key(i), std::string(10000, 'x')));
6935
3
    ASSERT_OK(Flush());
6936
3
  }
6937
6938
  // These writes will be slowed down to 1KB/s
6939
1
  uint64_t estimated_sleep_time = 0;
6940
1
  Random rnd(301);
6941
1
  ASSERT_OK(Put("", ""));
6942
1
  uint64_t cur_rate = options.delayed_write_rate;
6943
21
  for (int i = 0; i < kTotalFlushes; i++) {
6944
20
    uint64_t size_memtable = 0;
6945
2.02k
    for (int j = 0; j < kEntriesPerMemTable; j++) {
6946
2.00k
      auto rand_num = rnd.Uniform(20);
6947
      // Spread the size range to more.
6948
2.00k
      size_t entry_size = rand_num * rand_num * rand_num;
6949
2.00k
      WriteOptions wo;
6950
2.00k
      ASSERT_OK(Put(Key(i), std::string(entry_size, 'x'), wo));
6951
2.00k
      size_memtable += entry_size + 18;
6952
      // Occasionally sleep a while
6953
2.00k
      if (rnd.Uniform(20) == 6) {
6954
98
        env_->SleepForMicroseconds(2666);
6955
98
      }
6956
2.00k
    }
6957
20
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
6958
20
    estimated_sleep_time += size_memtable * 1000000u / cur_rate;
6959
    // Slow down twice. One for memtable switch and one for flush finishes.
6960
20
    cur_rate = static_cast<uint64_t>(static_cast<double>(cur_rate) /
6961
20
                                     kSlowdownRatio / kSlowdownRatio);
6962
20
  }
6963
  // Estimate the total sleep time fall into the rough range.
6964
1
  ASSERT_GT(env_->addon_time_.load(),
6965
1
            static_cast<int64_t>(estimated_sleep_time / 2));
6966
1
  ASSERT_LT(env_->addon_time_.load(),
6967
1
            static_cast<int64_t>(estimated_sleep_time * 2));
6968
6969
1
  env_->no_sleep_ = false;
6970
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
6971
1
  sleeping_task_low.WakeUp();
6972
1
  sleeping_task_low.WaitUntilDone();
6973
1
}
6974
6975
1
TEST_F(DBTest, HardLimit) {
6976
1
  Options options;
6977
1
  options.env = env_;
6978
1
  env_->SetBackgroundThreads(1, Env::LOW);
6979
1
  options = CurrentOptions(options);
6980
1
  options.max_write_buffer_number = 256;
6981
1
  options.write_buffer_size = 110 << 10;  // 110KB
6982
1
  options.arena_block_size = 4 * 1024;
6983
1
  options.level0_file_num_compaction_trigger = 4;
6984
1
  options.level0_slowdown_writes_trigger = 999999;
6985
1
  options.level0_stop_writes_trigger = 999999;
6986
1
  options.hard_pending_compaction_bytes_limit = 800 << 10;
6987
1
  options.max_bytes_for_level_base = 10000000000u;
6988
1
  options.max_background_compactions = 1;
6989
1
  options.memtable_factory.reset(
6990
1
      new SpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
6991
6992
1
  env_->SetBackgroundThreads(1, Env::LOW);
6993
1
  test::SleepingBackgroundTask sleeping_task_low;
6994
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
6995
1
                 Env::Priority::LOW);
6996
6997
1
  CreateAndReopenWithCF({"pikachu"}, options);
6998
6999
1
  std::atomic<int> callback_count(0);
7000
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack("DBImpl::DelayWrite:Wait",
7001
5
                                                 [&](void* arg) {
7002
5
                                                   callback_count.fetch_add(1);
7003
5
                                                   sleeping_task_low.WakeUp();
7004
5
                                                 });
7005
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
7006
7007
1
  Random rnd(301);
7008
1
  int key_idx = 0;
7009
6
  for (int num = 0; num < 5; num++) {
7010
5
    GenerateNewFile(&rnd, &key_idx, true);
7011
5
  }
7012
7013
1
  ASSERT_EQ(0, callback_count.load());
7014
7015
6
  for (int num = 0; num < 5; num++) {
7016
5
    GenerateNewFile(&rnd, &key_idx, true);
7017
5
    ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
7018
5
  }
7019
1
  ASSERT_GE(callback_count.load(), 1);
7020
7021
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
7022
1
  sleeping_task_low.WaitUntilDone();
7023
1
}
7024
7025
#ifndef ROCKSDB_LITE
7026
1
TEST_F(DBTest, SoftLimit) {
7027
1
  Options options;
7028
1
  options.env = env_;
7029
1
  options = CurrentOptions(options);
7030
1
  options.write_buffer_size = 100000;  // Small write buffer
7031
1
  options.max_write_buffer_number = 256;
7032
1
  options.level0_file_num_compaction_trigger = 1;
7033
1
  options.level0_slowdown_writes_trigger = 3;
7034
1
  options.level0_stop_writes_trigger = 999999;
7035
1
  options.delayed_write_rate = 20000;  // About 200KB/s limited rate
7036
1
  options.soft_pending_compaction_bytes_limit = 200000;
7037
1
  options.target_file_size_base = 99999999;  // All into one file
7038
1
  options.max_bytes_for_level_base = 50000;
7039
1
  options.max_bytes_for_level_multiplier = 10;
7040
1
  options.max_background_compactions = 1;
7041
1
  options.compression = kNoCompression;
7042
7043
1
  Reopen(options);
7044
1
  ASSERT_OK(Put(Key(0), ""));
7045
7046
1
  test::SleepingBackgroundTask sleeping_task_low;
7047
  // Block compactions
7048
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
7049
1
                 Env::Priority::LOW);
7050
1
  sleeping_task_low.WaitUntilSleeping();
7051
7052
  // Create 3 L0 files, making score of L0 to be 3.
7053
4
  for (int i = 0; i < 3; i++) {
7054
3
    ASSERT_OK(Put(Key(i), std::string(5000, 'x')));
7055
3
    ASSERT_OK(Put(Key(100 - i), std::string(5000, 'x')));
7056
    // Flush the file. File size is around 30KB.
7057
3
    ASSERT_OK(Flush());
7058
3
  }
7059
1
  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
7060
7061
1
  sleeping_task_low.WakeUp();
7062
1
  sleeping_task_low.WaitUntilDone();
7063
1
  sleeping_task_low.Reset();
7064
1
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
7065
7066
  // Now there is one L1 file but doesn't trigger soft_rate_limit
7067
  // The L1 file size is around 30KB.
7068
7069
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
7070
1
  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
7071
7072
  // Only allow one compactin going through.
7073
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
7074
4
      "DBImpl:BackgroundCompaction:SmallCompaction", [&](void* arg) {
7075
        // Schedule a sleeping task.
7076
4
        sleeping_task_low.Reset();
7077
4
        env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
7078
4
                       &sleeping_task_low, Env::Priority::LOW);
7079
4
      });
7080
7081
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
7082
7083
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task_low,
7084
1
                 Env::Priority::LOW);
7085
1
  sleeping_task_low.WaitUntilSleeping();
7086
  // Create 3 L0 files, making score of L0 to be 3
7087
4
  for (int i = 0; i < 3; i++) {
7088
3
    ASSERT_OK(Put(Key(10 + i), std::string(5000, 'x')));
7089
3
    ASSERT_OK(Put(Key(90 - i), std::string(5000, 'x')));
7090
    // Flush the file. File size is around 30KB.
7091
3
    ASSERT_OK(Flush());
7092
3
  }
7093
7094
  // (Twice) Wake up sleep task to enable compaction to run and waits
7095
  // for it to go to sleep state again to make sure one compaction
7096
  // goes through.
7097
3
  for (int i = 0; i < 2; i++) {
7098
2
    sleeping_task_low.WakeUp();
7099
2
    sleeping_task_low.WaitUntilSleeping();
7100
2
  }
7101
7102
  // Now there is one L1 file (around 60KB) which exceeds 50KB base by 10KB
7103
  // Given level multiplier 10, estimated pending compaction is around 100KB
7104
  // doesn't trigger soft_pending_compaction_bytes_limit. Another compaction
7105
  // promoting the L1 file to L2 is unscheduled.
7106
1
  ASSERT_EQ(NumTableFilesAtLevel(1), 1);
7107
1
  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
7108
7109
  // Create 3 L0 files, making score of L0 to be 3, higher than L0.
7110
4
  for (int i = 0; i < 3; i++) {
7111
3
    ASSERT_OK(Put(Key(20 + i), std::string(5000, 'x')));
7112
3
    ASSERT_OK(Put(Key(80 - i), std::string(5000, 'x')));
7113
    // Flush the file. File size is around 30KB.
7114
3
    ASSERT_OK(Flush());
7115
3
  }
7116
  // Wake up sleep task to enable compaction to run and waits
7117
  // for it to go to sleep state again to make sure one compaction
7118
  // goes through.
7119
7120
1
  sleeping_task_low.WakeUp();
7121
1
  sleeping_task_low.WaitUntilSleeping();
7122
7123
  // Now there is one L2 file (around 60KB) which doesn't trigger
7124
  // soft_pending_compaction_bytes_limit but the 3 L0 files do get the delay token
7125
1
  ASSERT_EQ(NumTableFilesAtLevel(2), 1);
7126
1
  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
7127
7128
1
  sleeping_task_low.WakeUp();
7129
1
  sleeping_task_low.WaitUntilSleeping();
7130
7131
1
  ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
7132
7133
  // shrink level base so L2 will hit soft limit easier.
7134
1
  ASSERT_OK(dbfull()->SetOptions({
7135
1
      {"max_bytes_for_level_base", "5000"},
7136
1
  }));
7137
7138
1
  ASSERT_OK(Put("", ""));
7139
1
  ASSERT_OK(Flush());
7140
1
  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
7141
7142
1
  sleeping_task_low.WaitUntilSleeping();
7143
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
7144
1
  sleeping_task_low.WakeUp();
7145
1
  sleeping_task_low.WaitUntilDone();
7146
1
}
7147
7148
1
TEST_F(DBTest, LastWriteBufferDelay) {
7149
1
  Options options;
7150
1
  options.env = env_;
7151
1
  options = CurrentOptions(options);
7152
1
  options.write_buffer_size = 100000;
7153
1
  options.max_write_buffer_number = 4;
7154
1
  options.delayed_write_rate = 20000;
7155
1
  options.compression = kNoCompression;
7156
1
  options.disable_auto_compactions = true;
7157
1
  int kNumKeysPerMemtable = 3;
7158
1
  options.memtable_factory.reset(
7159
1
      new SpecialSkipListFactory(kNumKeysPerMemtable));
7160
7161
1
  Reopen(options);
7162
1
  test::SleepingBackgroundTask sleeping_task;
7163
  // Block flushes
7164
1
  env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_task,
7165
1
                 Env::Priority::HIGH);
7166
1
  sleeping_task.WaitUntilSleeping();
7167
7168
  // Create 3 L0 files, making score of L0 to be 3.
7169
4
  for (int i = 0; i < 3; i++) {
7170
    // Fill one mem table
7171
12
    for (int j = 0; j < kNumKeysPerMemtable; j++) {
7172
9
      ASSERT_OK(Put(Key(j), ""));
7173
9
    }
7174
3
    ASSERT_TRUE(!dbfull()->TEST_write_controler().NeedsDelay());
7175
3
  }
7176
  // Inserting a new entry would create a new mem table, triggering slow down.
7177
1
  ASSERT_OK(Put(Key(0), ""));
7178
1
  ASSERT_TRUE(dbfull()->TEST_write_controler().NeedsDelay());
7179
7180
1
  sleeping_task.WakeUp();
7181
1
  sleeping_task.WaitUntilDone();
7182
1
}
7183
#endif  // ROCKSDB_LITE
7184
7185
1
TEST_F(DBTest, FailWhenCompressionNotSupportedTest) {
7186
1
  CompressionType compressions[] = {kZlibCompression, kBZip2Compression,
7187
1
                                    kLZ4Compression,  kLZ4HCCompression};
7188
5
  for (int iter = 0; iter < 4; ++iter) {
7189
4
    if (!CompressionTypeSupported(compressions[iter])) {
7190
      // not supported, we should fail the Open()
7191
1
      Options options = CurrentOptions();
7192
1
      options.compression = compressions[iter];
7193
1
      ASSERT_TRUE(!TryReopen(options).ok());
7194
      // Try if CreateColumnFamily also fails
7195
1
      options.compression = kNoCompression;
7196
1
      ASSERT_OK(TryReopen(options));
7197
1
      ColumnFamilyOptions cf_options(options);
7198
1
      cf_options.compression = compressions[iter];
7199
1
      ColumnFamilyHandle* handle;
7200
1
      ASSERT_TRUE(!db_->CreateColumnFamily(cf_options, "name", &handle).ok());
7201
1
    }
7202
4
  }
7203
1
}
7204
7205
#ifndef ROCKSDB_LITE
7206
1
TEST_F(DBTest, RowCache) {
7207
1
  Options options = CurrentOptions();
7208
1
  options.statistics = rocksdb::CreateDBStatisticsForTests();
7209
1
  options.row_cache = NewLRUCache(8192);
7210
1
  DestroyAndReopen(options);
7211
7212
1
  ASSERT_OK(Put("foo", "bar"));
7213
1
  ASSERT_OK(Flush());
7214
7215
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
7216
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0);
7217
1
  ASSERT_EQ(Get("foo"), "bar");
7218
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0);
7219
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
7220
1
  ASSERT_EQ(Get("foo"), "bar");
7221
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1);
7222
1
  ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1);
7223
1
}
7224
#endif  // ROCKSDB_LITE
7225
7226
// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
7227
//             return the biggest key which is smaller than the seek key.
7228
1
TEST_F(DBTest, PrevAfterMerge) {
7229
1
  Options options;
7230
1
  options.create_if_missing = true;
7231
1
  options.merge_operator = MergeOperators::CreatePutOperator();
7232
1
  DestroyAndReopen(options);
7233
7234
  // write three entries with different keys using Merge()
7235
1
  WriteOptions wopts;
7236
1
  ASSERT_OK(db_->Merge(wopts, "1", "data1"));
7237
1
  ASSERT_OK(db_->Merge(wopts, "2", "data2"));
7238
1
  ASSERT_OK(db_->Merge(wopts, "3", "data3"));
7239
7240
1
  std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
7241
7242
1
  it->Seek("2");
7243
1
  ASSERT_TRUE(it->Valid());
7244
1
  ASSERT_EQ("2", it->key().ToString());
7245
7246
1
  it->Prev();
7247
1
  ASSERT_TRUE(it->Valid());
7248
1
  ASSERT_EQ("1", it->key().ToString());
7249
1
}
7250
7251
1
TEST_F(DBTest, DeletingOldWalAfterDrop) {
7252
1
  rocksdb::SyncPoint::GetInstance()->LoadDependency(
7253
1
      { { "Test:AllowFlushes", "DBImpl::BGWorkFlush" },
7254
1
        { "DBImpl::BGWorkFlush:done", "Test:WaitForFlush"} });
7255
1
  rocksdb::SyncPoint::GetInstance()->ClearTrace();
7256
7257
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
7258
1
  Options options = CurrentOptions();
7259
1
  options.max_total_wal_size = 8192;
7260
1
  options.compression = kNoCompression;
7261
1
  options.write_buffer_size = 1 << 20;
7262
1
  options.level0_file_num_compaction_trigger = (1<<30);
7263
1
  options.level0_slowdown_writes_trigger = (1<<30);
7264
1
  options.level0_stop_writes_trigger = (1<<30);
7265
1
  options.disable_auto_compactions = true;
7266
1
  DestroyAndReopen(options);
7267
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
7268
7269
1
  CreateColumnFamilies({"cf1", "cf2"}, options);
7270
1
  ASSERT_OK(Put(0, "key1", DummyString(8192)));
7271
1
  ASSERT_OK(Put(0, "key2", DummyString(8192)));
7272
  // the oldest wal should now be getting_flushed
7273
1
  ASSERT_OK(db_->DropColumnFamily(handles_[0]));
7274
  // all flushes should now do nothing because their CF is dropped
7275
1
  TEST_SYNC_POINT("Test:AllowFlushes");
7276
1
  TEST_SYNC_POINT("Test:WaitForFlush");
7277
1
  uint64_t lognum1 = dbfull()->TEST_LogfileNumber();
7278
1
  ASSERT_OK(Put(1, "key3", DummyString(8192)));
7279
1
  ASSERT_OK(Put(1, "key4", DummyString(8192)));
7280
  // new wal should have been created
7281
1
  uint64_t lognum2 = dbfull()->TEST_LogfileNumber();
7282
1
  EXPECT_GT(lognum2, lognum1);
7283
1
}
7284
7285
1
TEST_F(DBTest, UnsupportedManualSync) {
7286
1
  DestroyAndReopen(CurrentOptions());
7287
1
  env_->is_wal_sync_thread_safe_.store(false);
7288
1
  Status s = db_->SyncWAL();
7289
1
  ASSERT_TRUE(s.IsNotSupported());
7290
1
}
7291
7292
#ifndef ROCKSDB_LITE
7293
7294
1
TEST_F(DBTest, AddExternalSstFile) {
7295
23
  do {
7296
23
    std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
7297
23
    ASSERT_OK(env_->CreateDirIfMissing(sst_files_folder));
7298
23
    Options options = CurrentOptions();
7299
23
    options.env = env_;
7300
23
    const ImmutableCFOptions ioptions(options);
7301
7302
23
    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
7303
7304
    // file1.sst (0 => 99)
7305
23
    std::string file1 = sst_files_folder + "file1.sst";
7306
23
    ASSERT_OK(sst_file_writer.Open(file1));
7307
2.32k
    for (int k = 0; k < 100; k++) {
7308
2.30k
      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
7309
2.30k
    }
7310
23
    ExternalSstFileInfo file1_info;
7311
23
    Status s = sst_file_writer.Finish(&file1_info);
7312
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7313
23
    ASSERT_EQ(file1_info.file_path, file1);
7314
23
    ASSERT_EQ(file1_info.num_entries, 100);
7315
23
    ASSERT_EQ(file1_info.smallest_key, Key(0));
7316
23
    ASSERT_EQ(file1_info.largest_key, Key(99));
7317
    // sst_file_writer already finished, cannot add this value
7318
23
    s = sst_file_writer.Add(Key(100), "bad_val");
7319
46
    ASSERT_FALSE(s.ok()) << s.ToString();
7320
7321
    // file2.sst (100 => 199)
7322
23
    std::string file2 = sst_files_folder + "file2.sst";
7323
23
    ASSERT_OK(sst_file_writer.Open(file2));
7324
2.32k
    for (int k = 100; k < 200; k++) {
7325
2.30k
      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
7326
2.30k
    }
7327
    // Cannot add this key because it's not after last added key
7328
23
    s = sst_file_writer.Add(Key(99), "bad_val");
7329
46
    ASSERT_FALSE(s.ok()) << s.ToString();
7330
23
    ExternalSstFileInfo file2_info;
7331
23
    s = sst_file_writer.Finish(&file2_info);
7332
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7333
23
    ASSERT_EQ(file2_info.file_path, file2);
7334
23
    ASSERT_EQ(file2_info.num_entries, 100);
7335
23
    ASSERT_EQ(file2_info.smallest_key, Key(100));
7336
23
    ASSERT_EQ(file2_info.largest_key, Key(199));
7337
7338
    // file3.sst (195 => 299)
7339
    // This file values overlap with file2 values
7340
23
    std::string file3 = sst_files_folder + "file3.sst";
7341
23
    ASSERT_OK(sst_file_writer.Open(file3));
7342
2.43k
    for (int k = 195; k < 300; k++) {
7343
2.41k
      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
7344
2.41k
    }
7345
23
    ExternalSstFileInfo file3_info;
7346
23
    s = sst_file_writer.Finish(&file3_info);
7347
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7348
23
    ASSERT_EQ(file3_info.file_path, file3);
7349
23
    ASSERT_EQ(file3_info.num_entries, 105);
7350
23
    ASSERT_EQ(file3_info.smallest_key, Key(195));
7351
23
    ASSERT_EQ(file3_info.largest_key, Key(299));
7352
7353
    // file4.sst (30 => 39)
7354
    // This file values overlap with file1 values
7355
23
    std::string file4 = sst_files_folder + "file4.sst";
7356
23
    ASSERT_OK(sst_file_writer.Open(file4));
7357
253
    for (int k = 30; k < 40; k++) {
7358
230
      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
7359
230
    }
7360
23
    ExternalSstFileInfo file4_info;
7361
23
    s = sst_file_writer.Finish(&file4_info);
7362
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7363
23
    ASSERT_EQ(file4_info.file_path, file4);
7364
23
    ASSERT_EQ(file4_info.num_entries, 10);
7365
23
    ASSERT_EQ(file4_info.smallest_key, Key(30));
7366
23
    ASSERT_EQ(file4_info.largest_key, Key(39));
7367
7368
    // file5.sst (400 => 499)
7369
23
    std::string file5 = sst_files_folder + "file5.sst";
7370
23
    ASSERT_OK(sst_file_writer.Open(file5));
7371
2.32k
    for (int k = 400; k < 500; k++) {
7372
2.30k
      ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
7373
2.30k
    }
7374
23
    ExternalSstFileInfo file5_info;
7375
23
    s = sst_file_writer.Finish(&file5_info);
7376
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7377
23
    ASSERT_EQ(file5_info.file_path, file5);
7378
23
    ASSERT_EQ(file5_info.num_entries, 100);
7379
23
    ASSERT_EQ(file5_info.smallest_key, Key(400));
7380
23
    ASSERT_EQ(file5_info.largest_key, Key(499));
7381
7382
    // Cannot create an empty sst file
7383
23
    std::string file_empty = sst_files_folder + "file_empty.sst";
7384
23
    ExternalSstFileInfo file_empty_info;
7385
23
    s = sst_file_writer.Finish(&file_empty_info);
7386
23
    ASSERT_NOK(s);
7387
7388
23
    DestroyAndReopen(options);
7389
    // Add file using file path
7390
23
    s = db_->AddFile(file1);
7391
46
    ASSERT_TRUE(s.ok()) << s.ToString();
7392
23
    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
7393
2.32k
    for (int k = 0; k < 100; k++) {
7394
2.30k
      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
7395
2.30k
    }
7396
7397
    // Add file while holding a snapshot will fail
7398
23
    const Snapshot* s1 = db_->GetSnapshot();
7399
23
    if (s1 != nullptr) {
7400
23
      ASSERT_NOK(db_->AddFile(&file2_info));
7401
23
      db_->ReleaseSnapshot(s1);
7402
23
    }
7403
    // We can add the file after releaseing the snapshot
7404
23
    ASSERT_OK(db_->AddFile(&file2_info));
7405
7406
23
    ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
7407
4.62k
    for (int k = 0; k < 200; k++) {
7408
4.60k
      ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
7409
4.60k
    }
7410
7411
    // This file have overlapping values with the exisitng data
7412
23
    s = db_->AddFile(file3);
7413
46
    ASSERT_FALSE(s.ok()) << s.ToString();
7414
7415
    // This file have overlapping values with the exisitng data
7416
23
    s = db_->AddFile(&file4_info);
7417
46
    ASSERT_FALSE(s.ok()) << s.ToString();
7418
7419
    // Overwrite values of keys divisible by 5
7420
943
    for (int k = 0; k < 200; k += 5) {
7421
920
      ASSERT_OK(Put(Key(k), Key(k) + "_val_new"));
7422
920
    }
7423
23
    ASSERT_NE(db_->GetLatestSequenceNumber(), 0U);
7424
7425
    // Key range of file5 (400 => 499) dont overlap with any keys in DB
7426
23
    ASSERT_OK(db_->AddFile(file5));
7427
7428
    // Make sure values are correct before and after flush/compaction
7429
69
    for (int i = 0; i < 2; i++) {
7430
9.24k
      for (int k = 0; k < 200; k++) {
7431
9.20k
        std::string value = Key(k) + "_val";
7432
9.20k
        if (k % 5 == 0) {
7433
1.84k
          value += "_new";
7434
1.84k
        }
7435
9.20k
        ASSERT_EQ(Get(Key(k)), value);
7436
9.20k
      }
7437
4.64k
      for (int k = 400; k < 500; k++) {
7438
4.60k
        std::string value = Key(k) + "_val";
7439
4.60k
        ASSERT_EQ(Get(Key(k)), value);
7440
4.60k
      }
7441
46
      ASSERT_OK(Flush());
7442
46
      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7443
46
    }
7444
7445
23
    Close();
7446
23
    options.disable_auto_compactions = true;
7447
23
    Reopen(options);
7448
7449
    // Delete keys in range (400 => 499)
7450
2.32k
    for (int k = 400; k < 500; k++) {
7451
2.30k
      ASSERT_OK(Delete(Key(k)));
7452
2.30k
    }
7453
    // We deleted range (400 => 499) but cannot add file5 because
7454
    // of the range tombstones
7455
23
    ASSERT_NOK(db_->AddFile(file5));
7456
7457
    // Compacting the DB will remove the tombstones
7458
23
    ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7459
7460
    // Now we can add the file
7461
23
    ASSERT_OK(db_->AddFile(file5));
7462
7463
    // Verify values of file5 in DB
7464
2.32k
    for (int k = 400; k < 500; k++) {
7465
2.30k
      std::string value = Key(k) + "_val";
7466
2.30k
      ASSERT_EQ(Get(Key(k)), value);
7467
2.30k
    }
7468
23
  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
7469
23
                         kSkipFIFOCompaction));
7470
1
}
7471
7472
// This test reporduce a bug that can happen in some cases if the DB started
7473
// purging obsolete files when we are adding an external sst file.
7474
// This situation may result in deleting the file while it's being added.
7475
1
TEST_F(DBTest, AddExternalSstFilePurgeObsoleteFilesBug) {
7476
1
  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
7477
1
  ASSERT_OK(env_->CreateDir(sst_files_folder));
7478
1
  Options options = CurrentOptions();
7479
1
  options.env = env_;
7480
1
  const ImmutableCFOptions ioptions(options);
7481
7482
1
  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
7483
7484
  // file1.sst (0 => 500)
7485
1
  std::string sst_file_path = sst_files_folder + "file1.sst";
7486
1
  Status s = sst_file_writer.Open(sst_file_path);
7487
1
  ASSERT_OK(s);
7488
501
  for (int i = 0; i < 500; i++) {
7489
500
    std::string k = Key(i);
7490
500
    s = sst_file_writer.Add(k, k + "_val");
7491
500
    ASSERT_OK(s);
7492
500
  }
7493
7494
1
  ExternalSstFileInfo sst_file_info;
7495
1
  s = sst_file_writer.Finish(&sst_file_info);
7496
1
  ASSERT_OK(s);
7497
7498
1
  options.delete_obsolete_files_period_micros = 0;
7499
1
  options.disable_auto_compactions = true;
7500
1
  DestroyAndReopen(options);
7501
7502
1
  rocksdb::SyncPoint::GetInstance()->SetCallBack(
7503
1
      "DBImpl::AddFile:FileCopied", [&](void* arg) {
7504
1
        ASSERT_OK(Put("aaa", "bbb"));
7505
1
        ASSERT_OK(Flush());
7506
1
        ASSERT_OK(Put("aaa", "xxx"));
7507
1
        ASSERT_OK(Flush());
7508
1
        ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7509
1
      });
7510
1
  rocksdb::SyncPoint::GetInstance()->EnableProcessing();
7511
7512
1
  s = db_->AddFile(sst_file_path);
7513
1
  ASSERT_OK(s);
7514
7515
501
  for (int i = 0; i < 500; i++) {
7516
500
    std::string k = Key(i);
7517
500
    std::string v = k + "_val";
7518
500
    ASSERT_EQ(Get(k), v);
7519
500
  }
7520
7521
1
  rocksdb::SyncPoint::GetInstance()->DisableProcessing();
7522
1
}
7523
7524
1
TEST_F(DBTest, AddExternalSstFileNoCopy) {
7525
1
  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
7526
1
  ASSERT_OK(env_->CreateDir(sst_files_folder));
7527
1
  Options options = CurrentOptions();
7528
1
  options.env = env_;
7529
1
  const ImmutableCFOptions ioptions(options);
7530
7531
1
  SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
7532
7533
  // file1.sst (0 => 99)
7534
1
  std::string file1 = sst_files_folder + "file1.sst";
7535
1
  ASSERT_OK(sst_file_writer.Open(file1));
7536
101
  for (int k = 0; k < 100; k++) {
7537
100
    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
7538
100
  }
7539
1
  ExternalSstFileInfo file1_info;
7540
1
  Status s = sst_file_writer.Finish(&file1_info);
7541
2
  ASSERT_TRUE(s.ok()) << s.ToString();
7542
1
  ASSERT_EQ(file1_info.file_path, file1);
7543
1
  ASSERT_EQ(file1_info.num_entries, 100);
7544
1
  ASSERT_EQ(file1_info.smallest_key, Key(0));
7545
1
  ASSERT_EQ(file1_info.largest_key, Key(99));
7546
7547
  // file2.sst (100 => 299)
7548
1
  std::string file2 = sst_files_folder + "file2.sst";
7549
1
  ASSERT_OK(sst_file_writer.Open(file2));
7550
201
  for (int k = 100; k < 300; k++) {
7551
200
    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val"));
7552
200
  }
7553
1
  ExternalSstFileInfo file2_info;
7554
1
  s = sst_file_writer.Finish(&file2_info);
7555
2
  ASSERT_TRUE(s.ok()) << s.ToString();
7556
1
  ASSERT_EQ(file2_info.file_path, file2);
7557
1
  ASSERT_EQ(file2_info.num_entries, 200);
7558
1
  ASSERT_EQ(file2_info.smallest_key, Key(100));
7559
1
  ASSERT_EQ(file2_info.largest_key, Key(299));
7560
7561
  // file3.sst (110 => 124) .. overlap with file2.sst
7562
1
  std::string file3 = sst_files_folder + "file3.sst";
7563
1
  ASSERT_OK(sst_file_writer.Open(file3));
7564
16
  for (int k = 110; k < 125; k++) {
7565
15
    ASSERT_OK(sst_file_writer.Add(Key(k), Key(k) + "_val_overlap"));
7566
15
  }
7567
1
  ExternalSstFileInfo file3_info;
7568
1
  s = sst_file_writer.Finish(&file3_info);
7569
2
  ASSERT_TRUE(s.ok()) << s.ToString();
7570
1
  ASSERT_EQ(file3_info.file_path, file3);
7571
1
  ASSERT_EQ(file3_info.num_entries, 15);
7572
1
  ASSERT_EQ(file3_info.smallest_key, Key(110));
7573
1
  ASSERT_EQ(file3_info.largest_key, Key(124));
7574
7575
1
  s = db_->AddFile(&file1_info, true /* move file */);
7576
2
  ASSERT_TRUE(s.ok()) << s.ToString();
7577
1
  ASSERT_TRUE(env_->FileExists(file1).IsNotFound());
7578
7579
1
  s = db_->AddFile(&file2_info, false /* copy file */);
7580
2
  ASSERT_TRUE(s.ok()) << s.ToString();
7581
1
  ASSERT_OK(env_->FileExists(file2));
7582
7583
  // This file have overlapping values with the exisitng data
7584
1
  s = db_->AddFile(&file3_info, true /* move file */);
7585
2
  ASSERT_FALSE(s.ok()) << s.ToString();
7586
1
  ASSERT_OK(env_->FileExists(file3));
7587
7588
301
  for (int k = 0; k < 300; k++) {
7589
300
    ASSERT_EQ(Get(Key(k)), Key(k) + "_val");
7590
300
  }
7591
1
}
7592
7593
1
TEST_F(DBTest, AddExternalSstFileMultiThreaded) {
7594
1
  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
7595
  // Bulk load 10 files every file contain 1000 keys
7596
1
  int num_files = 10;
7597
1
  int keys_per_file = 1000;
7598
7599
  // Generate file names
7600
1
  std::vector<std::string> file_names;
7601
11
  for (int i = 0; i < num_files; i++) {
7602
10
    std::string file_name = "file_" + ToString(i) + ".sst";
7603
10
    file_names.push_back(sst_files_folder + file_name);
7604
10
  }
7605
7606
23
  do {
7607
23
    ASSERT_OK(env_->CreateDirIfMissing(sst_files_folder));
7608
23
    Options options = CurrentOptions();
7609
23
    const ImmutableCFOptions ioptions(options);
7610
7611
23
    std::atomic<int> thread_num(0);
7612
219
    std::function<void()> write_file_func = [&]() {
7613
219
      int file_idx = thread_num.fetch_add(1);
7614
219
      int range_start = file_idx * keys_per_file;
7615
219
      int range_end = range_start + keys_per_file;
7616
7617
219
      SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
7618
7619
219
      ASSERT_OK(sst_file_writer.Open(file_names[file_idx]));
7620
7621
227k
      for (int k = range_start; k < range_end; k++) {
7622
227k
        ASSERT_OK(sst_file_writer.Add(Key(k), Key(k)));
7623
227k
      }
7624
7625
18.4E
      Status s = sst_file_writer.Finish();
7626
460
      ASSERT_TRUE(s.ok()) << s.ToString();
7627
18.4E
    };
7628
    // Write num_files files in parallel
7629
23
    std::vector<std::thread> sst_writer_threads;
7630
253
    for (int i = 0; i < num_files; ++i) {
7631
230
      sst_writer_threads.emplace_back(write_file_func);
7632
230
    }
7633
7634
230
    for (auto& t : sst_writer_threads) {
7635
230
      t.join();
7636
230
    }
7637
7638
23
    fprintf(stderr, "Wrote %d files (%d keys)\n", num_files,
7639
23
            num_files * keys_per_file);
7640
7641
23
    thread_num.store(0);
7642
23
    std::atomic<int> files_added(0);
7643
449
    std::function<void()> load_file_func = [&]() {
7644
      // We intentionally add every file twice, and assert that it was added
7645
      // only once and the other add failed
7646
449
      int thread_id = thread_num.fetch_add(1);
7647
449
      int file_idx = thread_id / 2;
7648
      // sometimes we use copy, sometimes link .. the result should be the same
7649
449
      bool move_file = (thread_id % 3 == 0);
7650
7651
449
      Status s = db_->AddFile(file_names[file_idx], move_file);
7652
449
      if (s.ok()) {
7653
230
        files_added++;
7654
230
      }
7655
449
    };
7656
    // Bulk load num_files files in parallel
7657
23
    std::vector<std::thread> add_file_threads;
7658
23
    DestroyAndReopen(options);
7659
483
    for (int i = 0; i < num_files * 2; ++i) {
7660
460
      add_file_threads.emplace_back(load_file_func);
7661
460
    }
7662
7663
460
    for (auto& t : add_file_threads) {
7664
460
      t.join();
7665
460
    }
7666
23
    ASSERT_EQ(files_added.load(), num_files);
7667
23
    fprintf(stderr, "Loaded %d files (%d keys)\n", num_files,
7668
23
            num_files * keys_per_file);
7669
7670
    // Overwrite values of keys divisible by 100
7671
2.32k
    for (int k = 0; k < num_files * keys_per_file; k += 100) {
7672
2.30k
      std::string key = Key(k);
7673
2.30k
      Status s = Put(key, key + "_new");
7674
2.30k
      ASSERT_TRUE(s.ok());
7675
2.30k
    }
7676
7677
69
    for (int i = 0; i < 2; i++) {
7678
      // Make sure the values are correct before and after flush/compaction
7679
460k
      for (int k = 0; k < num_files * keys_per_file; ++k) {
7680
460k
        std::string key = Key(k);
7681
455k
        std::string value = (k % 100 == 0) ? (key + "_new") : key;
7682
460k
        ASSERT_EQ(Get(key), value);
7683
460k
      }
7684
46
      ASSERT_OK(Flush());
7685
46
      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7686
46
    }
7687
7688
23
    fprintf(stderr, "Verified %d values\n", num_files * keys_per_file);
7689
23
  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
7690
23
                         kSkipFIFOCompaction));
7691
1
}
7692
7693
1
TEST_F(DBTest, AddExternalSstFileOverlappingRanges) {
7694
1
  std::string sst_files_folder = test::TmpDir(env_) + "/sst_files/";
7695
1
  Random rnd(301);
7696
23
  do {
7697
23
    ASSERT_OK(env_->CreateDirIfMissing(sst_files_folder));
7698
23
    Options options = CurrentOptions();
7699
23
    DestroyAndReopen(options);
7700
23
    const ImmutableCFOptions ioptions(options);
7701
23
    SstFileWriter sst_file_writer(EnvOptions(), ioptions, options.comparator);
7702
7703
23
    printf("Option config = %d\n", option_config_);
7704
23
    std::vector<std::pair<int, int>> key_ranges;
7705
    // Lower number of key ranges for tsan due to low perf.
7706
23
    constexpr int kNumKeyRanges = yb::NonTsanVsTsan(500, 100);
7707
11.5k
    for (int i = 0; i < kNumKeyRanges; i++) {
7708
11.5k
      int range_start = rnd.Uniform(20000);
7709
11.5k
      int keys_per_range = 10 + rnd.Uniform(41);
7710
7711
11.5k
      key_ranges.emplace_back(range_start, range_start + keys_per_range);
7712
11.5k
    }
7713
7714
23
    int memtable_add = 0;
7715
23
    int success_add_file = 0;
7716
23
    int failed_add_file = 0;
7717
23
    std::map<std::string, std::string> true_data;
7718
11.5k
    for (size_t i = 0; i < key_ranges.size(); i++) {
7719
11.5k
      int range_start = key_ranges[i].first;
7720
11.5k
      int range_end = key_ranges[i].second;
7721
7722
11.5k
      Status s;
7723
11.5k
      std::string range_val = "range_" + ToString(i);
7724
7725
      // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile
7726
11.5k
      if (i && i % 5 == 0) {
7727
        // Use DB::Put to insert range (insert into memtable)
7728
2.27k
        range_val += "_put";
7729
73.4k
        for (int k = range_start; k <= range_end; k++) {
7730
71.1k
          s = Put(Key(k), range_val);
7731
71.1k
          ASSERT_OK(s);
7732
71.1k
        }
7733
2.27k
        memtable_add++;
7734
9.22k
      } else {
7735
        // Use DB::AddFile to insert range
7736
9.22k
        range_val += "_add_file";
7737
7738
        // Generate the file containing the range
7739
9.22k
        std::string file_name = sst_files_folder + env_->GenerateUniqueId();
7740
9.22k
        ASSERT_OK(sst_file_writer.Open(file_name));
7741
294k
        for (int k = range_start; k <= range_end; k++) {
7742
284k
          s = sst_file_writer.Add(Key(k), range_val);
7743
284k
          ASSERT_OK(s);
7744
284k
        }
7745
9.22k
        ExternalSstFileInfo file_info;
7746
9.22k
        s = sst_file_writer.Finish(&file_info);
7747
9.22k
        ASSERT_OK(s);
7748
7749
        // Insert the generated file
7750
9.22k
        s = db_->AddFile(&file_info);
7751
7752
9.22k
        auto it = true_data.lower_bound(Key(range_start));
7753
9.22k
        if (it != true_data.end() && it->first <= Key(range_end)) {
7754
          // This range overlap with data already exist in DB
7755
4.20k
          ASSERT_NOK(s);
7756
4.20k
          failed_add_file++;
7757
5.01k
        } else {
7758
5.01k
          ASSERT_OK(s);
7759
5.01k
          success_add_file++;
7760
5.01k
        }
7761
9.22k
      }
7762
7763
11.5k
      if (s.ok()) {
7764
        // Update true_data map to include the new inserted data
7765
225k
        for (int k = range_start; k <= range_end; k++) {
7766
218k
          true_data[Key(k)] = range_val;
7767
218k
        }
7768
7.29k
      }
7769
7770
      // Flush / Compact the DB
7771
11.5k
      if (i && i % 50 == 0) {
7772
207
        ASSERT_OK(Flush());
7773
207
      }
7774
11.5k
      if (i && i % 75 == 0) {
7775
138
        ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7776
138
      }
7777
11.5k
    }
7778
7779
23
    printf(
7780
23
        "Total: %zu ranges\n"
7781
23
        "AddFile()|Success: %d ranges\n"
7782
23
        "AddFile()|RangeConflict: %d ranges\n"
7783
23
        "Put(): %d ranges\n",
7784
23
        key_ranges.size(), success_add_file, failed_add_file, memtable_add);
7785
7786
    // Verify the correctness of the data
7787
199k
    for (const auto& kv : true_data) {
7788
199k
      ASSERT_EQ(Get(kv.first), kv.second);
7789
199k
    }
7790
23
    printf("keys/values verified\n");
7791
23
  } while (ChangeOptions(kSkipPlainTable | kSkipUniversalCompaction |
7792
23
                         kSkipFIFOCompaction));
7793
1
}
7794
7795
#endif  // ROCKSDB_LITE
7796
7797
1
TEST_F(DBTest, PinnedDataIteratorRandomized) {
7798
1
  enum TestConfig {
7799
1
    NORMAL,
7800
1
    CLOSE_AND_OPEN,
7801
1
    COMPACT_BEFORE_READ,
7802
1
    FLUSH_EVERY_1000,
7803
1
    MAX
7804
1
  };
7805
7806
  // Generate Random data
7807
1
  Random rnd(301);
7808
7809
  // Lower number of keys for tsan due to low perf.
7810
1
  constexpr int kNumPuts = yb::NonTsanVsTsan(100000, 10000);
7811
1
  int key_pool = static_cast<int>(kNumPuts * 0.7);
7812
1
  int key_size = 100;
7813
1
  int val_size = 1000;
7814
1
  int seeks_percentage = 20;   // 20% of keys will be used to test seek()
7815
1
  int delete_percentage = 20;  // 20% of keys will be deleted
7816
1
  int merge_percentage = 20;   // 20% of keys will be added using Merge()
7817
7818
5
  for (int run_config = 0; run_config < TestConfig::MAX; run_config++) {
7819
4
    Options options = CurrentOptions();
7820
4
    BlockBasedTableOptions table_options;
7821
4
    table_options.use_delta_encoding = false;
7822
4
    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7823
4
    options.merge_operator = MergeOperators::CreatePutOperator();
7824
4
    DestroyAndReopen(options);
7825
7826
4
    std::vector<std::string> generated_keys(key_pool);
7827
280k
    for (int i = 0; i < key_pool; i++) {
7828
280k
      generated_keys[i] = RandomString(&rnd, key_size);
7829
280k
    }
7830
7831
4
    std::map<std::string, std::string> true_data;
7832
4
    std::vector<std::string> random_keys;
7833
4
    std::vector<std::string> deleted_keys;
7834
400k
    for (int i = 0; i < kNumPuts; i++) {
7835
400k
      auto& k = generated_keys[rnd.Next() % key_pool];
7836
400k
      auto v = RandomString(&rnd, val_size);
7837
7838
      // Insert data to true_data map and to DB
7839
400k
      true_data[k] = v;
7840
400k
      if (rnd.OneIn(static_cast<int>(100.0 / merge_percentage))) {
7841
80.1k
        ASSERT_OK(db_->Merge(WriteOptions(), k, v));
7842
319k
      } else {
7843
319k
        ASSERT_OK(Put(k, v));
7844
319k
      }
7845
7846
      // Pick random keys to be used to test Seek()
7847
400k
      if (rnd.OneIn(static_cast<int>(100.0 / seeks_percentage))) {
7848
80.0k
        random_keys.push_back(k);
7849
80.0k
      }
7850
7851
      // Delete some random keys
7852
400k
      if (rnd.OneIn(static_cast<int>(100.0 / delete_percentage))) {
7853
80.4k
        deleted_keys.push_back(k);
7854
80.4k
        true_data.erase(k);
7855
80.4k
        ASSERT_OK(Delete(k));
7856
80.4k
      }
7857
7858
400k
      if (run_config == TestConfig::FLUSH_EVERY_1000) {
7859
100k
        if (i && i % 1000 == 0) {
7860
99
          ASSERT_OK(Flush());
7861
99
        }
7862
100k
      }
7863
400k
    }
7864
7865
4
    if (run_config == TestConfig::CLOSE_AND_OPEN) {
7866
1
      Close();
7867
1
      Reopen(options);
7868
3
    } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
7869
1
      ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7870
1
    }
7871
7872
4
    ReadOptions ro;
7873
4
    ro.pin_data = true;
7874
4
    auto iter = db_->NewIterator(ro);
7875
7876
4
    {
7877
      // Test Seek to random keys
7878
4
      printf("Testing seek on %zu keys\n", random_keys.size());
7879
4
      std::vector<Slice> keys_slices;
7880
4
      std::vector<std::string> true_keys;
7881
80.0k
      for (auto& k : random_keys) {
7882
80.0k
        iter->Seek(k);
7883
80.0k
        if (!iter->Valid()) {
7884
0
          ASSERT_EQ(true_data.lower_bound(k), true_data.end());
7885
0
          continue;
7886
80.0k
        }
7887
80.0k
        std::string prop_value;
7888
80.0k
        ASSERT_OK(
7889
80.0k
            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
7890
80.0k
        ASSERT_EQ("1", prop_value);
7891
80.0k
        keys_slices.push_back(iter->key());
7892
80.0k
        true_keys.push_back(true_data.lower_bound(k)->first);
7893
80.0k
      }
7894
7895
80.0k
      for (size_t i = 0; i < keys_slices.size(); i++) {
7896
80.0k
        ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
7897
80.0k
      }
7898
4
    }
7899
7900
4
    {
7901
      // Test iterating all data forward
7902
4
      printf("Testing iterating forward on all keys\n");
7903
4
      std::vector<Slice> all_keys;
7904
170k
      for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
7905
170k
        std::string prop_value;
7906
170k
        ASSERT_OK(
7907
170k
            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
7908
170k
        ASSERT_EQ("1", prop_value);
7909
170k
        all_keys.push_back(iter->key());
7910
170k
      }
7911
4
      ASSERT_EQ(all_keys.size(), true_data.size());
7912
7913
      // Verify that all keys slices are valid
7914
4
      auto data_iter = true_data.begin();
7915
170k
      for (size_t i = 0; i < all_keys.size(); i++) {
7916
170k
        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
7917
170k
        data_iter++;
7918
170k
      }
7919
4
    }
7920
7921
4
    {
7922
      // Test iterating all data backward
7923
4
      printf("Testing iterating backward on all keys\n");
7924
4
      std::vector<Slice> all_keys;
7925
170k
      for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
7926
170k
        std::string prop_value;
7927
170k
        ASSERT_OK(
7928
170k
            iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
7929
170k
        ASSERT_EQ("1", prop_value);
7930
170k
        all_keys.push_back(iter->key());
7931
170k
      }
7932
4
      ASSERT_EQ(all_keys.size(), true_data.size());
7933
7934
      // Verify that all keys slices are valid (backward)
7935
4
      auto data_iter = true_data.rbegin();
7936
170k
      for (size_t i = 0; i < all_keys.size(); i++) {
7937
170k
        ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
7938
170k
        data_iter++;
7939
170k
      }
7940
4
    }
7941
7942
4
    delete iter;
7943
4
  }
7944
1
}
7945
7946
#ifndef ROCKSDB_LITE
7947
1
TEST_F(DBTest, PinnedDataIteratorMultipleFiles) {
7948
1
  Options options = CurrentOptions();
7949
1
  BlockBasedTableOptions table_options;
7950
1
  table_options.use_delta_encoding = false;
7951
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7952
1
  options.disable_auto_compactions = true;
7953
1
  options.write_buffer_size = 1024 * 1024 * 10;  // 10 Mb
7954
1
  DestroyAndReopen(options);
7955
7956
1
  std::map<std::string, std::string> true_data;
7957
7958
  // Generate 4 sst files in L2
7959
1
  Random rnd(301);
7960
1.00k
  for (int i = 1; i <= 1000; i++) {
7961
1.00k
    std::string k = Key(i * 3);
7962
1.00k
    std::string v = RandomString(&rnd, 100);
7963
1.00k
    ASSERT_OK(Put(k, v));
7964
1.00k
    true_data[k] = v;
7965
1.00k
    if (i % 250 == 0) {
7966
4
      ASSERT_OK(Flush());
7967
4
    }
7968
1.00k
  }
7969
1
  ASSERT_EQ(FilesPerLevel(0), "4");
7970
1
  ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7971
1
  ASSERT_EQ(FilesPerLevel(0), "0,4");
7972
7973
  // Generate 4 sst files in L0
7974
1.00k
  for (int i = 1; i <= 1000; i++) {
7975
1.00k
    std::string k = Key(i * 2);
7976
1.00k
    std::string v = RandomString(&rnd, 100);
7977
1.00k
    ASSERT_OK(Put(k, v));
7978
1.00k
    true_data[k] = v;
7979
1.00k
    if (i % 250 == 0) {
7980
4
      ASSERT_OK(Flush());
7981
4
    }
7982
1.00k
  }
7983
1
  ASSERT_EQ(FilesPerLevel(0), "4,4");
7984
7985
  // Add some keys/values in memtables
7986
1.00k
  for (int i = 1; i <= 1000; i++) {
7987
1.00k
    std::string k = Key(i);
7988
1.00k
    std::string v = RandomString(&rnd, 100);
7989
1.00k
    ASSERT_OK(Put(k, v));
7990
1.00k
    true_data[k] = v;
7991
1.00k
  }
7992
1
  ASSERT_EQ(FilesPerLevel(0), "4,4");
7993
7994
1
  ReadOptions ro;
7995
1
  ro.pin_data = true;
7996
1
  auto iter = db_->NewIterator(ro);
7997
7998
1
  std::vector<std::pair<Slice, std::string>> results;
7999
2.00k
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
8000
2.00k
    std::string prop_value;
8001
2.00k
    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
8002
2.00k
    ASSERT_EQ("1", prop_value);
8003
2.00k
    results.emplace_back(iter->key(), iter->value().ToString());
8004
2.00k
  }
8005
8006
1
  ASSERT_EQ(results.size(), true_data.size());
8007
1
  auto data_iter = true_data.begin();
8008
2.00k
  for (size_t i = 0; i < results.size(); i++, data_iter++) {
8009
2.00k
    auto& kv = results[i];
8010
2.00k
    ASSERT_EQ(kv.first, data_iter->first);
8011
2.00k
    ASSERT_EQ(kv.second, data_iter->second);
8012
2.00k
  }
8013
8014
1
  delete iter;
8015
1
}
8016
#endif
8017
8018
1
TEST_F(DBTest, PinnedDataIteratorMergeOperator) {
8019
1
  Options options = CurrentOptions();
8020
1
  BlockBasedTableOptions table_options;
8021
1
  table_options.use_delta_encoding = false;
8022
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
8023
1
  options.merge_operator = MergeOperators::CreateUInt64AddOperator();
8024
1
  DestroyAndReopen(options);
8025
8026
1
  std::string numbers[7];
8027
8
  for (int val = 0; val <= 6; val++) {
8028
7
    PutFixed64(numbers + val, val);
8029
7
  }
8030
8031
  // +1 all keys in range [ 0 => 999]
8032
1.00k
  for (int i = 0; i < 1000; i++) {
8033
1.00k
    WriteOptions wo;
8034
1.00k
    ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
8035
1.00k
  }
8036
8037
  // +2 all keys divisible by 2 in range [ 0 => 999]
8038
501
  for (int i = 0; i < 1000; i += 2) {
8039
500
    WriteOptions wo;
8040
500
    ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
8041
500
  }
8042
8043
  // +3 all keys divisible by 5 in range [ 0 => 999]
8044
201
  for (int i = 0; i < 1000; i += 5) {
8045
200
    WriteOptions wo;
8046
200
    ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
8047
200
  }
8048
8049
1
  ReadOptions ro;
8050
1
  ro.pin_data = true;
8051
1
  auto iter = db_->NewIterator(ro);
8052
8053
1
  std::vector<std::pair<Slice, std::string>> results;
8054
1.00k
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
8055
1.00k
    std::string prop_value;
8056
1.00k
    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
8057
1.00k
    ASSERT_EQ("1", prop_value);
8058
1.00k
    results.emplace_back(iter->key(), iter->value().ToString());
8059
1.00k
  }
8060
8061
1
  ASSERT_EQ(results.size(), 1000);
8062
1.00k
  for (size_t i = 0; i < results.size(); i++) {
8063
1.00k
    auto& kv = results[i];
8064
1.00k
    ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
8065
1.00k
    int expected_val = 1;
8066
1.00k
    if (i % 2 == 0) {
8067
500
      expected_val += 2;
8068
500
    }
8069
1.00k
    if (i % 5 == 0) {
8070
200
      expected_val += 3;
8071
200
    }
8072
1.00k
    ASSERT_EQ(kv.second, numbers[expected_val]);
8073
1.00k
  }
8074
8075
1
  delete iter;
8076
1
}
8077
8078
1
TEST_F(DBTest, PinnedDataIteratorReadAfterUpdate) {
8079
1
  Options options = CurrentOptions();
8080
1
  BlockBasedTableOptions table_options;
8081
1
  table_options.use_delta_encoding = false;
8082
1
  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
8083
1
  options.write_buffer_size = 100000;
8084
1
  DestroyAndReopen(options);
8085
8086
1
  Random rnd(301);
8087
8088
1
  std::map<std::string, std::string> true_data;
8089
1.00k
  for (int i = 0; i < 1000; i++) {
8090
1.00k
    std::string k = RandomString(&rnd, 10);
8091
1.00k
    std::string v = RandomString(&rnd, 1000);
8092
1.00k
    ASSERT_OK(Put(k, v));
8093
1.00k
    true_data[k] = v;
8094
1.00k
  }
8095
8096
1
  ReadOptions ro;
8097
1
  ro.pin_data = true;
8098
1
  auto iter = db_->NewIterator(ro);
8099
8100
  // Delete 50% of the keys and update the other 50%
8101
1.00k
  for (auto& kv : true_data) {
8102
1.00k
    if (rnd.OneIn(2)) {
8103
512
      ASSERT_OK(Delete(kv.first));
8104
488
    } else {
8105
488
      std::string new_val = RandomString(&rnd, 1000);
8106
488
      ASSERT_OK(Put(kv.first, new_val));
8107
488
    }
8108
1.00k
  }
8109
8110
1
  std::vector<std::pair<Slice, std::string>> results;
8111
1.00k
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
8112
1.00k
    std::string prop_value;
8113
1.00k
    ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
8114
1.00k
    ASSERT_EQ("1", prop_value);
8115
1.00k
    results.emplace_back(iter->key(), iter->value().ToString());
8116
1.00k
  }
8117
8118
1
  auto data_iter = true_data.begin();
8119
1.00k
  for (size_t i = 0; i < results.size(); i++, data_iter++) {
8120
1.00k
    auto& kv = results[i];
8121
1.00k
    ASSERT_EQ(kv.first, data_iter->first);
8122
1.00k
    ASSERT_EQ(kv.second, data_iter->second);
8123
1.00k
  }
8124
8125
1
  delete iter;
8126
1
}
8127
8128
INSTANTIATE_TEST_CASE_P(DBTestWithParam, DBTestWithParam,
8129
                        ::testing::Combine(::testing::Values(1, 4),
8130
                                           ::testing::Bool()));
8131
8132
1
TEST_F(DBTest, PauseBackgroundWorkTest) {
8133
1
  Options options;
8134
1
  options.write_buffer_size = 100000;  // Small write buffer
8135
1
  options = CurrentOptions(options);
8136
1
  Reopen(options);
8137
8138
1
  std::vector<std::thread> threads;
8139
1
  std::atomic<bool> done(false);
8140
1
  ASSERT_OK(db_->PauseBackgroundWork());
8141
1
  threads.emplace_back([&]() {
8142
1
    Random rnd(301);
8143
10.0k
    for (int i = 0; i < 10000; ++i) {
8144
10.0k
      ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
8145
10.0k
    }
8146
1
    done.store(true);
8147
1
  });
8148
1
  env_->SleepForMicroseconds(200000);
8149
  // make sure the thread is not done
8150
1
  ASSERT_EQ(false, done.load());
8151
1
  ASSERT_OK(db_->ContinueBackgroundWork());
8152
1
  for (auto& t : threads) {
8153
1
    t.join();
8154
1
  }
8155
  // now it's done
8156
1
  ASSERT_EQ(true, done.load());
8157
1
}
8158
8159
#ifndef ROCKSDB_LITE
8160
namespace {
8161
void ValidateKeyExistence(DB* db, const std::vector<Slice>& keys_must_exist,
8162
11
                          const std::vector<Slice>& keys_must_not_exist) {
8163
  // Ensure that expected keys exist
8164
11
  std::vector<std::string> values;
8165
11
  if (keys_must_exist.size() > 0) {
8166
11
    std::vector<Status> status_list =
8167
11
        db->MultiGet(ReadOptions(), keys_must_exist, &values);
8168
61
    for (size_t i = 0; i < keys_must_exist.size(); i++) {
8169
50
      ASSERT_OK(status_list[i]);
8170
50
    }
8171
11
  }
8172
8173
  // Ensure that given keys don't exist
8174
11
  if (keys_must_not_exist.size() > 0) {
8175
6
    std::vector<Status> status_list =
8176
6
        db->MultiGet(ReadOptions(), keys_must_not_exist, &values);
8177
22
    for (size_t i = 0; i < keys_must_not_exist.size(); i++) {
8178
16
      ASSERT_TRUE(status_list[i].IsNotFound());
8179
16
    }
8180
6
  }
8181
11
}
8182
8183
}  // namespace
8184
8185
1
TEST_F(DBTest, WalFilterTest) {
8186
1
  class TestWalFilter : public WalFilter {
8187
1
   private:
8188
    // Processing option that is requested to be applied at the given index
8189
1
    WalFilter::WalProcessingOption wal_processing_option_;
8190
    // Index at which to apply wal_processing_option_
8191
    // At other indexes default wal_processing_option::kContinueProcessing is
8192
    // returned.
8193
1
    size_t apply_option_at_record_index_;
8194
    // Current record index, incremented with each record encountered.
8195
1
    size_t current_record_index_;
8196
8197
1
   public:
8198
1
    TestWalFilter(WalFilter::WalProcessingOption wal_processing_option,
8199
1
                  size_t apply_option_for_record_index)
8200
1
        : wal_processing_option_(wal_processing_option),
8201
1
          apply_option_at_record_index_(apply_option_for_record_index),
8202
4
          current_record_index_(0) {}
8203
8204
1
    virtual WalProcessingOption LogRecord(const WriteBatch& batch,
8205
1
                                          WriteBatch* new_batch,
8206
13
                                          bool* batch_changed) const override {
8207
13
      WalFilter::WalProcessingOption option_to_return;
8208
8209
13
      if (current_record_index_ == apply_option_at_record_index_) {
8210
4
        option_to_return = wal_processing_option_;
8211
9
      } else {
8212
9
        option_to_return = WalProcessingOption::kContinueProcessing;
8213
9
      }
8214
8215
      // Filter is passed as a const object for RocksDB to not modify the
8216
      // object, however we modify it for our own purpose here and hence
8217
      // cast the constness away.
8218
13
      (const_cast<TestWalFilter*>(this)->current_record_index_)++;
8219
8220
13
      return option_to_return;
8221
13
    }
8222
8223
1
    const char* Name() const override { return "TestWalFilter"; }
8224
1
  };
8225
8226
  // Create 3 batches with two keys each
8227
1
  std::vector<std::vector<std::string>> batch_keys(3);
8228
8229
1
  batch_keys[0].push_back("key1");
8230
1
  batch_keys[0].push_back("key2");
8231
1
  batch_keys[1].push_back("key3");
8232
1
  batch_keys[1].push_back("key4");
8233
1
  batch_keys[2].push_back("key5");
8234
1
  batch_keys[2].push_back("key6");
8235
8236
  // Test with all WAL processing options
8237
1
  for (int option = 0;
8238
5
       option < static_cast<int>(
8239
5
                    WalFilter::WalProcessingOption::kWalProcessingOptionMax);
8240
4
       option++) {
8241
4
    Options options = OptionsForLogIterTest();
8242
4
    DestroyAndReopen(options);
8243
4
    CreateAndReopenWithCF({"pikachu"}, options);
8244
8245
    // Write given keys in given batches
8246
16
    for (size_t i = 0; i < batch_keys.size(); i++) {
8247
12
      WriteBatch batch;
8248
36
      for (size_t j = 0; j < batch_keys[i].size(); j++) {
8249
24
        batch.Put(handles_[0], batch_keys[i][j], DummyString(1024));
8250
24
      }
8251
12
      ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
8252
12
    }
8253
8254
4
    WalFilter::WalProcessingOption wal_processing_option =
8255
4
        static_cast<WalFilter::WalProcessingOption>(option);
8256
8257
    // Create a test filter that would apply wal_processing_option at the first
8258
    // record
8259
4
    size_t apply_option_for_record_index = 1;
8260
4
    TestWalFilter test_wal_filter(wal_processing_option,
8261
4
                                  apply_option_for_record_index);
8262
8263
    // Reopen database with option to use WAL filter
8264
4
    options = OptionsForLogIterTest();
8265
4
    options.wal_filter = &test_wal_filter;
8266
4
    Status status =
8267
4
        TryReopenWithColumnFamilies({"default", "pikachu"}, options);
8268
4
    if (wal_processing_option ==
8269
1
        WalFilter::WalProcessingOption::kCorruptedRecord) {
8270
1
      assert(!status.ok());
8271
      // In case of corruption we can turn off paranoid_checks to reopen
8272
      // databse
8273
1
      options.paranoid_checks = false;
8274
1
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
8275
3
    } else {
8276
3
      assert(status.ok());
8277
3
    }
8278
8279
    // Compute which keys we expect to be found
8280
    // and which we expect not to be found after recovery.
8281
4
    std::vector<Slice> keys_must_exist;
8282
4
    std::vector<Slice> keys_must_not_exist;
8283
4
    switch (wal_processing_option) {
8284
1
      case WalFilter::WalProcessingOption::kCorruptedRecord:
8285
2
      case WalFilter::WalProcessingOption::kContinueProcessing: {
8286
2
        fprintf(stderr, "Testing with complete WAL processing\n");
8287
        // we expect all records to be processed
8288
8
        for (size_t i = 0; i < batch_keys.size(); i++) {
8289
18
          for (size_t j = 0; j < batch_keys[i].size(); j++) {
8290
12
            keys_must_exist.push_back(Slice(batch_keys[i][j]));
8291
12
          }
8292
6
        }
8293
2
        break;
8294
1
      }
8295
1
      case WalFilter::WalProcessingOption::kIgnoreCurrentRecord: {
8296
1
        fprintf(stderr,
8297
1
                "Testing with ignoring record %" ROCKSDB_PRIszt " only\n",
8298
1
                apply_option_for_record_index);
8299
        // We expect the record with apply_option_for_record_index to be not
8300
        // found.
8301
4
        for (size_t i = 0; i < batch_keys.size(); i++) {
8302
9
          for (size_t j = 0; j < batch_keys[i].size(); j++) {
8303
6
            if (i == apply_option_for_record_index) {
8304
2
              keys_must_not_exist.push_back(Slice(batch_keys[i][j]));
8305
4
            } else {
8306
4
              keys_must_exist.push_back(Slice(batch_keys[i][j]));
8307
4
            }
8308
6
          }
8309
3
        }
8310
1
        break;
8311
1
      }
8312
1
      case WalFilter::WalProcessingOption::kStopReplay: {
8313
1
        fprintf(stderr,
8314
1
                "Testing with stopping replay from record %" ROCKSDB_PRIszt
8315
1
                "\n",
8316
1
                apply_option_for_record_index);
8317
        // We expect records beyond apply_option_for_record_index to be not
8318
        // found.
8319
4
        for (size_t i = 0; i < batch_keys.size(); i++) {
8320
9
          for (size_t j = 0; j < batch_keys[i].size(); j++) {
8321
6
            if (i >= apply_option_for_record_index) {
8322
4
              keys_must_not_exist.push_back(Slice(batch_keys[i][j]));
8323
2
            } else {
8324
2
              keys_must_exist.push_back(Slice(batch_keys[i][j]));
8325
2
            }
8326
6
          }
8327
3
        }
8328
1
        break;
8329
1
      }
8330
0
      default:
8331
0
        assert(false);  // unhandled case
8332
4
    }
8333
8334
4
    bool checked_after_reopen = false;
8335
8336
8
    while (true) {
8337
      // Ensure that expected keys exists
8338
      // and not expected keys don't exist after recovery
8339
8
      ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
8340
8341
8
      if (checked_after_reopen) {
8342
4
        break;
8343
4
      }
8344
8345
      // reopen database again to make sure previous log(s) are not used
8346
      // (even if they were skipped)
8347
      // reopn database with option to use WAL filter
8348
4
      options = OptionsForLogIterTest();
8349
4
      ReopenWithColumnFamilies({"default", "pikachu"}, options);
8350
8351
4
      checked_after_reopen = true;
8352
4
    }
8353
4
  }
8354
1
}
8355
8356
1
TEST_F(DBTest, WalFilterTestWithChangeBatch) {
8357
1
  class ChangeBatchHandler : public WriteBatch::Handler {
8358
1
   private:
8359
    // Batch to insert keys in
8360
1
    WriteBatch* new_write_batch_;
8361
    // Number of keys to add in the new batch
8362
1
    size_t num_keys_to_add_in_new_batch_;
8363
    // Number of keys added to new batch
8364
1
    size_t num_keys_added_;
8365
8366
1
   public:
8367
1
    ChangeBatchHandler(WriteBatch* new_write_batch,
8368
1
                       size_t num_keys_to_add_in_new_batch)
8369
1
        : new_write_batch_(new_write_batch),
8370
1
          num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
8371
2
          num_keys_added_(0) {}
8372
4
    void Put(const Slice& key, const Slice& value) override {
8373
4
      if (num_keys_added_ < num_keys_to_add_in_new_batch_) {
8374
2
        new_write_batch_->Put(key, value);
8375
2
        ++num_keys_added_;
8376
2
      }
8377
4
    }
8378
1
  };
8379
8380
1
  class TestWalFilterWithChangeBatch : public WalFilter {
8381
1
   private:
8382
    // Index at which to start changing records
8383
1
    size_t change_records_from_index_;
8384
    // Number of keys to add in the new batch
8385
1
    size_t num_keys_to_add_in_new_batch_;
8386
    // Current record index, incremented with each record encountered.
8387
1
    size_t current_record_index_;
8388
8389
1
   public:
8390
1
    TestWalFilterWithChangeBatch(size_t change_records_from_index,
8391
1
                                 size_t num_keys_to_add_in_new_batch)
8392
1
        : change_records_from_index_(change_records_from_index),
8393
1
          num_keys_to_add_in_new_batch_(num_keys_to_add_in_new_batch),
8394
1
          current_record_index_(0) {}
8395
8396
1
    virtual WalProcessingOption LogRecord(const WriteBatch& batch,
8397
1
                                          WriteBatch* new_batch,
8398
3
                                          bool* batch_changed) const override {
8399
3
      if (current_record_index_ >= change_records_from_index_) {
8400
2
        ChangeBatchHandler handler(new_batch, num_keys_to_add_in_new_batch_);
8401
2
        EXPECT_OK(batch.Iterate(&handler));
8402
2
        *batch_changed = true;
8403
2
      }
8404
8405
      // Filter is passed as a const object for RocksDB to not modify the
8406
      // object, however we modify it for our own purpose here and hence
8407
      // cast the constness away.
8408
3
      (const_cast<TestWalFilterWithChangeBatch*>(this)
8409
3
           ->current_record_index_)++;
8410
8411
3
      return WalProcessingOption::kContinueProcessing;
8412
3
    }
8413
8414
0
    const char* Name() const override {
8415
0
      return "TestWalFilterWithChangeBatch";
8416
0
    }
8417
1
  };
8418
8419
1
  std::vector<std::vector<std::string>> batch_keys(3);
8420
8421
1
  batch_keys[0].push_back("key1");
8422
1
  batch_keys[0].push_back("key2");
8423
1
  batch_keys[1].push_back("key3");
8424
1
  batch_keys[1].push_back("key4");
8425
1
  batch_keys[2].push_back("key5");
8426
1
  batch_keys[2].push_back("key6");
8427
8428
1
  Options options = OptionsForLogIterTest();
8429
1
  DestroyAndReopen(options);
8430
1
  CreateAndReopenWithCF({"pikachu"}, options);
8431
8432
  // Write given keys in given batches
8433
4
  for (size_t i = 0; i < batch_keys.size(); i++) {
8434
3
    WriteBatch batch;
8435
9
    for (size_t j = 0; j < batch_keys[i].size(); j++) {
8436
6
      batch.Put(handles_[0], batch_keys[i][j], DummyString(1024));
8437
6
    }
8438
3
    ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
8439
3
  }
8440
8441
  // Create a test filter that would apply wal_processing_option at the first
8442
  // record
8443
1
  size_t change_records_from_index = 1;
8444
1
  size_t num_keys_to_add_in_new_batch = 1;
8445
1
  TestWalFilterWithChangeBatch test_wal_filter_with_change_batch(
8446
1
      change_records_from_index, num_keys_to_add_in_new_batch);
8447
8448
  // Reopen database with option to use WAL filter
8449
1
  options = OptionsForLogIterTest();
8450
1
  options.wal_filter = &test_wal_filter_with_change_batch;
8451
1
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
8452
8453
  // Ensure that all keys exist before change_records_from_index_
8454
  // And after that index only single key exists
8455
  // as our filter adds only single key for each batch
8456
1
  std::vector<Slice> keys_must_exist;
8457
1
  std::vector<Slice> keys_must_not_exist;
8458
8459
4
  for (size_t i = 0; i < batch_keys.size(); i++) {
8460
9
    for (size_t j = 0; j < batch_keys[i].size(); j++) {
8461
6
      if (i >= change_records_from_index && j >= num_keys_to_add_in_new_batch) {
8462
2
        keys_must_not_exist.push_back(Slice(batch_keys[i][j]));
8463
4
      } else {
8464
4
        keys_must_exist.push_back(Slice(batch_keys[i][j]));
8465
4
      }
8466
6
    }
8467
3
  }
8468
8469
1
  bool checked_after_reopen = false;
8470
8471
2
  while (true) {
8472
    // Ensure that expected keys exists
8473
    // and not expected keys don't exist after recovery
8474
2
    ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
8475
8476
2
    if (checked_after_reopen) {
8477
1
      break;
8478
1
    }
8479
8480
    // reopen database again to make sure previous log(s) are not used
8481
    // (even if they were skipped)
8482
    // reopn database with option to use WAL filter
8483
1
    options = OptionsForLogIterTest();
8484
1
    ReopenWithColumnFamilies({"default", "pikachu"}, options);
8485
8486
1
    checked_after_reopen = true;
8487
1
  }
8488
1
}
8489
8490
1
TEST_F(DBTest, WalFilterTestWithChangeBatchExtraKeys) {
8491
1
  class TestWalFilterWithChangeBatchAddExtraKeys : public WalFilter {
8492
1
   public:
8493
1
    virtual WalProcessingOption LogRecord(const WriteBatch& batch,
8494
1
                                          WriteBatch* new_batch,
8495
1
                                          bool* batch_changed) const override {
8496
1
      *new_batch = batch;
8497
1
      new_batch->Put("key_extra", "value_extra");
8498
1
      *batch_changed = true;
8499
1
      return WalProcessingOption::kContinueProcessing;
8500
1
    }
8501
8502
2
    const char* Name() const override {
8503
2
      return "WalFilterTestWithChangeBatchExtraKeys";
8504
2
    }
8505
1
  };
8506
8507
1
  std::vector<std::vector<std::string>> batch_keys(3);
8508
8509
1
  batch_keys[0].push_back("key1");
8510
1
  batch_keys[0].push_back("key2");
8511
1
  batch_keys[1].push_back("key3");
8512
1
  batch_keys[1].push_back("key4");
8513
1
  batch_keys[2].push_back("key5");
8514
1
  batch_keys[2].push_back("key6");
8515
8516
1
  Options options = OptionsForLogIterTest();
8517
1
  DestroyAndReopen(options);
8518
1
  CreateAndReopenWithCF({"pikachu"}, options);
8519
8520
  // Write given keys in given batches
8521
4
  for (size_t i = 0; i < batch_keys.size(); i++) {
8522
3
    WriteBatch batch;
8523
9
    for (size_t j = 0; j < batch_keys[i].size(); j++) {
8524
6
      batch.Put(handles_[0], batch_keys[i][j], DummyString(1024));
8525
6
    }
8526
3
    ASSERT_OK(dbfull()->Write(WriteOptions(), &batch));
8527
3
  }
8528
8529
  // Create a test filter that would add extra keys
8530
1
  TestWalFilterWithChangeBatchAddExtraKeys test_wal_filter_extra_keys;
8531
8532
  // Reopen database with option to use WAL filter
8533
1
  options = OptionsForLogIterTest();
8534
1
  options.wal_filter = &test_wal_filter_extra_keys;
8535
1
  Status status = TryReopenWithColumnFamilies({"default", "pikachu"}, options);
8536
1
  ASSERT_TRUE(status.IsNotSupported());
8537
8538
  // Reopen without filter, now reopen should succeed - previous
8539
  // attempt to open must not have altered the db.
8540
1
  options = OptionsForLogIterTest();
8541
1
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
8542
8543
1
  std::vector<Slice> keys_must_exist;
8544
1
  std::vector<Slice> keys_must_not_exist;  // empty vector
8545
8546
4
  for (size_t i = 0; i < batch_keys.size(); i++) {
8547
9
    for (size_t j = 0; j < batch_keys[i].size(); j++) {
8548
6
      keys_must_exist.push_back(Slice(batch_keys[i][j]));
8549
6
    }
8550
3
  }
8551
8552
1
  ValidateKeyExistence(db_, keys_must_exist, keys_must_not_exist);
8553
1
}
8554
#endif  // ROCKSDB_LITE
8555
8556
// Test for https://github.com/yugabyte/yugabyte-db/issues/8919.
8557
// Schedules flush after CancelAllBackgroundWork call.
8558
1
TEST_F(DBTest, CancelBackgroundWorkWithFlush) {
8559
1
  FLAGS_use_priority_thread_pool_for_compactions = true;
8560
1
  constexpr auto kMaxBackgroundCompactions = 1;
8561
1
  constexpr auto kWriteBufferSize = 64_KB;
8562
1
  constexpr auto kValueSize = 2_KB;
8563
8564
2
  for (const auto use_priority_thread_pool_for_flushes : {false, true}) {
8565
2
    LOG(INFO) << "use_priority_thread_pool_for_flushes: " << use_priority_thread_pool_for_flushes;
8566
2
    FLAGS_use_priority_thread_pool_for_flushes = use_priority_thread_pool_for_flushes;
8567
8568
2
    yb::PriorityThreadPool thread_pool(kMaxBackgroundCompactions);
8569
2
    Options options = CurrentOptions();
8570
2
    options.create_if_missing = true;
8571
2
    options.priority_thread_pool_for_compactions_and_flushes = &thread_pool;
8572
2
    options.compression = kNoCompression;
8573
2
    options.write_buffer_size = kWriteBufferSize;
8574
2
    options.arena_block_size = kValueSize;
8575
2
    options.log_prefix = yb::Format(
8576
2
        "TEST_use_priority_thread_pool_for_flushes_$0: ", use_priority_thread_pool_for_flushes);
8577
2
    options.info_log_level = InfoLogLevel::INFO_LEVEL;
8578
2
    options.info_log = std::make_shared<yb::YBRocksDBLogger>(options.log_prefix);
8579
8580
2
    DestroyAndReopen(options);
8581
8582
2
    WriteOptions wo;
8583
2
    wo.disableWAL = true;
8584
8585
2
    LOG(INFO) << "Writing data...";
8586
2
    Random rnd(301);
8587
2
    int key = 0;
8588
66
    while (key * kValueSize < kWriteBufferSize) {
8589
64
      ASSERT_OK(Put(Key(++key), RandomString(&rnd, kValueSize), wo));
8590
64
    }
8591
8592
2
    db_->SetDisableFlushOnShutdown(true);
8593
2
    CancelAllBackgroundWork(db_);
8594
8595
    // Write one more key, that should trigger scheduling flush.
8596
2
    ASSERT_OK(Put(Key(++key), RandomString(&rnd, kValueSize), wo));
8597
2
    LOG(INFO) << "Writing data - done";
8598
8599
2
    Close();
8600
2
  }
8601
1
}
8602
8603
}  // namespace rocksdb
8604
8605
8606
13.2k
int main(int argc, char** argv) {
8607
13.2k
  rocksdb::port::InstallStackTraceHandler();
8608
13.2k
  ::testing::InitGoogleTest(&argc, argv);
8609
13.2k
  google::ParseCommandLineNonHelpFlags(&argc, &argv, /* remove_flags */ true);
8610
13.2k
  return RUN_ALL_TESTS();
8611
13.2k
}