YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/options_builder.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
21
#include <cmath>
22
23
#include "yb/rocksdb/options.h"
24
25
namespace rocksdb {
26
27
namespace {
28
29
// For now, always use 1-0 as level bytes multiplier.
30
const int kBytesForLevelMultiplier = 10;
31
const size_t kBytesForOneMb = 1024 * 1024;
32
33
// Pick compaction style
34
CompactionStyle PickCompactionStyle(size_t write_buffer_size,
35
                                    int read_amp_threshold,
36
                                    int write_amp_threshold,
37
5
                                    uint64_t target_db_size) {
38
5
#ifndef ROCKSDB_LITE
39
  // Estimate read amplification and write amplification of two compaction
40
  // styles. If there is hard limit to force a choice, make the choice.
41
  // Otherwise, calculate a score based on threshold and expected value of
42
  // two styles, weighing reads 4X important than writes.
43
5
  int expected_levels = static_cast<int>(ceil(
44
5
      std::log(target_db_size / write_buffer_size) / std::log(kBytesForLevelMultiplier)));
45
46
5
  int expected_max_files_universal =
47
5
      static_cast<int>(ceil(log2(target_db_size / write_buffer_size)));
48
49
5
  const int kEstimatedLevel0FilesInLevelStyle = 2;
50
  // Estimate write amplification:
51
  // (1) 1 for every L0 file
52
  // (2) 2 for L1
53
  // (3) kBytesForLevelMultiplier for the last level. It's really hard to
54
  //     predict.
55
  // (3) kBytesForLevelMultiplier for other levels.
56
5
  int expected_write_amp_level = kEstimatedLevel0FilesInLevelStyle + 2
57
5
      + (expected_levels - 2) * kBytesForLevelMultiplier
58
5
      + kBytesForLevelMultiplier;
59
5
  int expected_read_amp_level =
60
5
      kEstimatedLevel0FilesInLevelStyle + expected_levels;
61
62
5
  int max_read_amp_uni = expected_max_files_universal;
63
5
  if (read_amp_threshold <= max_read_amp_uni) {
64
2
    return kCompactionStyleLevel;
65
3
  } else if (write_amp_threshold <= expected_write_amp_level) {
66
1
    return kCompactionStyleUniversal;
67
1
  }
68
69
2
  const double kReadWriteWeight = 4;
70
71
2
  double level_ratio =
72
2
      static_cast<double>(read_amp_threshold) / expected_read_amp_level *
73
2
          kReadWriteWeight +
74
2
      static_cast<double>(write_amp_threshold) / expected_write_amp_level;
75
76
2
  int expected_write_amp_uni = expected_max_files_universal / 2 + 2;
77
2
  int expected_read_amp_uni = expected_max_files_universal / 2 + 1;
78
79
2
  double uni_ratio =
80
2
      static_cast<double>(read_amp_threshold) / expected_read_amp_uni *
81
2
          kReadWriteWeight +
82
2
      static_cast<double>(write_amp_threshold) / expected_write_amp_uni;
83
84
2
  if (level_ratio > uni_ratio) {
85
0
    return kCompactionStyleLevel;
86
2
  } else {
87
2
    return kCompactionStyleUniversal;
88
2
  }
89
#else
90
  return kCompactionStyleLevel;
91
#endif  // !ROCKSDB_LITE
92
2
}
93
94
// Pick mem table size
95
5
void PickWriteBufferSize(size_t total_write_buffer_limit, Options* options) {
96
5
  const size_t kMaxWriteBufferSize = 128 * kBytesForOneMb;
97
5
  const size_t kMinWriteBufferSize = 4 * kBytesForOneMb;
98
99
  // Try to pick up a buffer size between 4MB and 128MB.
100
  // And try to pick 4 as the total number of write buffers.
101
5
  size_t write_buffer_size = total_write_buffer_limit / 4;
102
5
  if (write_buffer_size > kMaxWriteBufferSize) {
103
1
    write_buffer_size = kMaxWriteBufferSize;
104
4
  } else if (write_buffer_size < kMinWriteBufferSize) {
105
0
    write_buffer_size = std::min(static_cast<size_t>(kMinWriteBufferSize),
106
0
                                 total_write_buffer_limit / 2);
107
0
  }
108
109
  // Truncate to multiple of 1MB.
110
5
  if (write_buffer_size % kBytesForOneMb != 0) {
111
0
    write_buffer_size =
112
0
        (write_buffer_size / kBytesForOneMb + 1) * kBytesForOneMb;
113
0
  }
114
115
5
  options->write_buffer_size = write_buffer_size;
116
5
  options->max_write_buffer_number =
117
5
      static_cast<int>(total_write_buffer_limit / write_buffer_size);
118
5
  options->min_write_buffer_number_to_merge = 1;
119
5
}
120
121
#ifndef ROCKSDB_LITE
122
3
void OptimizeForUniversal(Options* options) {
123
3
  options->level0_file_num_compaction_trigger = 2;
124
3
  options->level0_slowdown_writes_trigger = 30;
125
3
  options->level0_stop_writes_trigger = 40;
126
3
  options->max_open_files = -1;
127
3
}
128
#endif
129
130
// Optimize parameters for level-based compaction
131
void OptimizeForLevel(int read_amplification_threshold,
132
                      int write_amplification_threshold,
133
2
                      uint64_t target_db_size, Options* options) {
134
2
  int expected_levels_one_level0_file =
135
2
      static_cast<int>(ceil(std::log(target_db_size / options->write_buffer_size) /
136
2
                            std::log(kBytesForLevelMultiplier)));
137
138
2
  int level0_stop_writes_trigger =
139
2
      read_amplification_threshold - expected_levels_one_level0_file;
140
141
2
  const size_t kInitialLevel0TotalSize = 128 * kBytesForOneMb;
142
2
  const int kMaxFileNumCompactionTrigger = 4;
143
2
  const int kMinLevel0StopTrigger = 3;
144
145
2
  int file_num_buffer = static_cast<int>(
146
2
      kInitialLevel0TotalSize / options->write_buffer_size + 1);
147
148
2
  if (level0_stop_writes_trigger > file_num_buffer) {
149
    // Have sufficient room for multiple level 0 files
150
    // Try enlarge the buffer up to 1GB
151
152
    // Try to enlarge the buffer up to 1GB, if still have sufficient headroom.
153
0
    file_num_buffer *=
154
0
        1 << std::max(0, std::min(3, level0_stop_writes_trigger -
155
0
                                       file_num_buffer - 2));
156
157
0
    options->level0_stop_writes_trigger = level0_stop_writes_trigger;
158
0
    options->level0_slowdown_writes_trigger = level0_stop_writes_trigger - 2;
159
0
    options->level0_file_num_compaction_trigger =
160
0
        std::min(kMaxFileNumCompactionTrigger, file_num_buffer / 2);
161
2
  } else {
162
2
    options->level0_stop_writes_trigger =
163
2
        std::max(kMinLevel0StopTrigger, file_num_buffer);
164
2
    options->level0_slowdown_writes_trigger =
165
2
        options->level0_stop_writes_trigger - 1;
166
2
    options->level0_file_num_compaction_trigger = 1;
167
2
  }
168
169
  // This doesn't consider compaction and overheads of mem tables. But usually
170
  // it is in the same order of magnitude.
171
2
  size_t expected_level0_compaction_size =
172
2
      options->level0_file_num_compaction_trigger * options->write_buffer_size;
173
  // Enlarge level1 target file size if level0 compaction size is larger.
174
2
  uint64_t max_bytes_for_level_base = 10 * kBytesForOneMb;
175
2
  if (expected_level0_compaction_size > max_bytes_for_level_base) {
176
2
    max_bytes_for_level_base = expected_level0_compaction_size;
177
2
  }
178
2
  options->max_bytes_for_level_base = max_bytes_for_level_base;
179
  // Now always set level multiplier to be 10
180
2
  options->max_bytes_for_level_multiplier = kBytesForLevelMultiplier;
181
182
2
  const uint64_t kMinFileSize = 2 * kBytesForOneMb;
183
  // Allow at least 3-way parallelism for compaction between level 1 and 2.
184
2
  uint64_t max_file_size = max_bytes_for_level_base / 3;
185
2
  if (max_file_size < kMinFileSize) {
186
0
    options->target_file_size_base = kMinFileSize;
187
2
  } else {
188
2
    if (max_file_size % kBytesForOneMb != 0) {
189
2
      max_file_size = (max_file_size / kBytesForOneMb + 1) * kBytesForOneMb;
190
2
    }
191
2
    options->target_file_size_base = max_file_size;
192
2
  }
193
194
  // TODO: consider to tune num_levels too.
195
2
}
196
197
}  // namespace
198
199
Options GetOptions(size_t total_write_buffer_limit,
200
                   int read_amplification_threshold,
201
5
                   int write_amplification_threshold, uint64_t target_db_size) {
202
5
  Options options;
203
5
  PickWriteBufferSize(total_write_buffer_limit, &options);
204
5
  size_t write_buffer_size = options.write_buffer_size;
205
5
  options.compaction_style =
206
5
      PickCompactionStyle(write_buffer_size, read_amplification_threshold,
207
5
                          write_amplification_threshold, target_db_size);
208
5
#ifndef ROCKSDB_LITE
209
5
  if (options.compaction_style == kCompactionStyleUniversal) {
210
3
    OptimizeForUniversal(&options);
211
3
  } else {
212
#else
213
  {
214
#endif  // !ROCKSDB_LITE
215
2
    OptimizeForLevel(read_amplification_threshold,
216
2
                     write_amplification_threshold, target_db_size, &options);
217
2
  }
218
5
  return options;
219
5
}
220
221
}  // namespace rocksdb