/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/compaction.h
Line | Count | Source |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #ifndef YB_ROCKSDB_DB_COMPACTION_H |
25 | | #define YB_ROCKSDB_DB_COMPACTION_H |
26 | | |
27 | | #pragma once |
28 | | |
29 | | #include <atomic> |
30 | | |
31 | | #include "yb/rocksdb/util/arena.h" |
32 | | #include "yb/rocksdb/util/autovector.h" |
33 | | #include "yb/rocksdb/util/mutable_cf_options.h" |
34 | | #include "yb/rocksdb/db/version_edit.h" |
35 | | |
36 | | namespace yb { |
37 | | |
38 | | class PriorityThreadPoolSuspender; |
39 | | |
40 | | } |
41 | | |
42 | | namespace rocksdb { |
43 | | |
44 | | using yb::Result; |
45 | | |
46 | | // The structure that manages compaction input files associated |
47 | | // with the same physical level. |
48 | | struct CompactionInputFiles { |
49 | | int level; |
50 | | std::vector<FileMetaData*> files; |
51 | 421k | inline bool empty() const { return files.empty(); } |
52 | 732k | inline size_t size() const { return files.size(); } |
53 | 30.2k | inline void clear() { files.clear(); } |
54 | 610k | inline FileMetaData* operator[](size_t i) const { return files[i]; } |
55 | | }; |
56 | | |
57 | | class Version; |
58 | | class ColumnFamilyData; |
59 | | class VersionStorageInfo; |
60 | | class CompactionFilter; |
61 | | |
62 | | struct LightweightBoundaries { |
63 | | Slice key; |
64 | | size_t num_user_values; |
65 | | UserBoundaryTag* user_tags; |
66 | | Slice* user_values; |
67 | | |
68 | 9.80M | Slice user_key() const { return ExtractUserKey(key); } |
69 | | |
70 | | // Allocates appropriate objects in provided arena, so arena should live longer than this object. |
71 | | LightweightBoundaries(Arena* arena, const FileMetaData::BoundaryValues& source); |
72 | | |
73 | 12.9M | const Slice* user_value_with_tag(UserBoundaryTag tag) const { |
74 | 28.4M | for (size_t i = 0; i != num_user_values; ++i) { |
75 | 28.2M | if (user_tags[i] == tag) { |
76 | 12.6M | return user_values + i; |
77 | 12.6M | } |
78 | 28.2M | } |
79 | 265k | return nullptr; |
80 | 12.9M | } |
81 | | }; |
82 | | |
83 | | // A compressed copy of file meta data that just contains smallest and largest key's slice. |
84 | | struct FdWithBoundaries { |
85 | | FileDescriptor fd; |
86 | | LightweightBoundaries smallest; // smallest boundaries in this file |
87 | | LightweightBoundaries largest; // largest boundaries in this file |
88 | | Slice user_filter_data; // Data provided by user that will be passed to iterator replacer. |
89 | | |
90 | | FdWithBoundaries(Arena* arena, const FileMetaData& source); |
91 | | }; |
92 | | |
93 | | // Data structure to store an array of FdWithKeyRange in one level |
94 | | // Actual data is guaranteed to be stored closely |
95 | | struct LevelFilesBrief { |
96 | | size_t num_files; |
97 | | FdWithBoundaries* files; |
98 | | |
99 | 10.8M | LevelFilesBrief() { |
100 | 10.8M | num_files = 0; |
101 | 10.8M | files = nullptr; |
102 | 10.8M | } |
103 | | }; |
104 | | |
105 | | // A Compaction encapsulates information about a compaction. |
106 | | class Compaction { |
107 | | public: |
108 | | static std::unique_ptr<Compaction> Create( |
109 | | VersionStorageInfo* input_version, const MutableCFOptions& mutable_cf_options, |
110 | | std::vector<CompactionInputFiles> inputs, int output_level, uint64_t target_file_size, |
111 | | uint64_t max_grandparent_overlap_bytes, uint32_t output_path_id, CompressionType compression, |
112 | | std::vector<FileMetaData*> grandparents, |
113 | | Logger* info_log, |
114 | | bool manual_compaction = false, double score = -1, |
115 | | bool deletion_compaction = false, |
116 | | CompactionReason compaction_reason = CompactionReason::kUnknown); |
117 | | |
118 | | // No copying allowed |
119 | | Compaction(const Compaction&) = delete; |
120 | | void operator=(const Compaction&) = delete; |
121 | | |
122 | | ~Compaction(); |
123 | | |
124 | | // Returns the level associated to the specified compaction input level. |
125 | | // If compaction_input_level is not specified, then input_level is set to 0. |
126 | 12.4M | int level(size_t compaction_input_level = 0) const { |
127 | 12.4M | return inputs_[compaction_input_level].level; |
128 | 12.4M | } |
129 | | |
130 | 62.7k | int start_level() const { return start_level_; } |
131 | | |
132 | | // Outputs will go to this level |
133 | 231k | int output_level() const { return output_level_; } |
134 | | |
135 | | // Returns the number of input levels in this compaction. |
136 | 663k | size_t num_input_levels() const { return inputs_.size(); } |
137 | | |
138 | | // Return the object that holds the edits to the descriptor done |
139 | | // by this compaction. |
140 | 83.9k | VersionEdit* edit() { return &edit_; } |
141 | | |
142 | | // Returns the number of input files associated to the specified |
143 | | // compaction input level. |
144 | | // The function will return 0 if when "compaction_input_level" < 0 |
145 | | // or "compaction_input_level" >= "num_input_levels()". |
146 | 171k | size_t num_input_files(size_t compaction_input_level) const { |
147 | 171k | if (compaction_input_level < inputs_.size()) { |
148 | 171k | return inputs_[compaction_input_level].size(); |
149 | 171k | } |
150 | 11 | return 0; |
151 | 11 | } |
152 | | |
153 | 15.0k | uint64_t input_version_number() const { return input_version_number_; } |
154 | | |
155 | | // Returns the ColumnFamilyData associated with the compaction. |
156 | 392k | ColumnFamilyData* column_family_data() const { return cfd_; } |
157 | | |
158 | | // Returns the file meta data of the 'i'th input file at the |
159 | | // specified compaction input level. |
160 | | // REQUIREMENT: "compaction_input_level" must be >= 0 and |
161 | | // < "input_levels()" |
162 | 172k | FileMetaData* input(size_t compaction_input_level, size_t i) const { |
163 | 172k | assert(compaction_input_level < inputs_.size()); |
164 | 172k | return inputs_[compaction_input_level][i]; |
165 | 172k | } |
166 | | |
167 | | // Returns the list of file meta data of the specified compaction |
168 | | // input level. |
169 | | // REQUIREMENT: "compaction_input_level" must be >= 0 and |
170 | | // < "input_levels()" |
171 | 124k | const std::vector<FileMetaData*>* inputs(size_t compaction_input_level) { |
172 | 124k | assert(compaction_input_level < inputs_.size()); |
173 | 124k | return &inputs_[compaction_input_level].files; |
174 | 124k | } |
175 | | |
176 | | // Returns the LevelFilesBrief of the specified compaction input level. |
177 | 64.3k | LevelFilesBrief* input_levels(size_t compaction_input_level) { |
178 | 64.3k | return &input_levels_[compaction_input_level]; |
179 | 64.3k | } |
180 | | |
181 | | // Maximum size of files to build during this compaction. |
182 | 33.1M | uint64_t max_output_file_size() const { return max_output_file_size_; } |
183 | | |
184 | | // What compression for output |
185 | 22.1k | CompressionType output_compression() const { return output_compression_; } |
186 | | |
187 | | // Whether need to write output file to second DB path. |
188 | 67.3k | uint32_t output_path_id() const { return output_path_id_; } |
189 | | |
190 | | // Is this a trivial compaction that can be implemented by just |
191 | | // moving a single input file to the next level (no merging or splitting) |
192 | | bool IsTrivialMove() const; |
193 | | |
194 | | // If true, then the compaction can be done by simply deleting input files. |
195 | 22.9k | bool deletion_compaction() const { return deletion_compaction_; } |
196 | | |
197 | | // Add all inputs to this compaction as delete operations to *edit. |
198 | | void AddInputDeletions(VersionEdit* edit); |
199 | | |
200 | | // Returns true if the available information we have guarantees that |
201 | | // the input "user_key" does not exist in any level beyond "output_level()". |
202 | | bool KeyNotExistsBeyondOutputLevel(const Slice& user_key, |
203 | | std::vector<size_t>* level_ptrs) const; |
204 | | |
205 | | // Returns true iff we should stop building the current output |
206 | | // before processing "internal_key". |
207 | | bool ShouldStopBefore(const Slice& internal_key); |
208 | | |
209 | | // Clear all files to indicate that they are not being compacted |
210 | | // Delete this compaction from the list of running compactions. |
211 | | // |
212 | | // Requirement: DB mutex held |
213 | | void ReleaseCompactionFiles(Status status); |
214 | | |
215 | | // Returns the summary of the compaction in "output" with maximum "len" |
216 | | // in bytes. The caller is responsible for the memory management of |
217 | | // "output". |
218 | | void Summary(char* output, int len); |
219 | | |
220 | | // Return the score that was used to pick this compaction run. |
221 | 21.3k | double score() const { return score_; } |
222 | | |
223 | | // Is this compaction creating a file in the bottom most level? |
224 | 21.3k | bool bottommost_level() { return bottommost_level_; } |
225 | | |
226 | | // Does this compaction include all sst files? |
227 | 641 | bool is_full_compaction() { return is_full_compaction_; } |
228 | | |
229 | | // Was this compaction triggered manually by the client? |
230 | 10.7k | bool is_manual_compaction() { return is_manual_compaction_; } |
231 | | |
232 | | // Used when allow_trivial_move option is set in |
233 | | // Universal compaction. If all the input files are |
234 | | // non overlapping, then is_trivial_move_ variable |
235 | | // will be set true, else false |
236 | 603 | void set_is_trivial_move(bool trivial_move) { |
237 | 603 | is_trivial_move_ = trivial_move; |
238 | 603 | } |
239 | | |
240 | | // Used when allow_trivial_move option is set in |
241 | | // Universal compaction. Returns true, if the input files |
242 | | // are non-overlapping and can be trivially moved. |
243 | 2 | bool is_trivial_move() { return is_trivial_move_; } |
244 | | |
245 | | // How many total levels are there? |
246 | 10.7k | int number_levels() const { return number_levels_; } |
247 | | |
248 | | // Return the MutableCFOptions that should be used throughout the compaction |
249 | | // procedure |
250 | 77.9k | const MutableCFOptions* mutable_cf_options() { return &mutable_cf_options_; } |
251 | | |
252 | | // Returns the size in bytes that the output file should be preallocated to. |
253 | | // In level compaction, that is max_file_size_. In universal compaction, that |
254 | | // is the sum of all input file sizes. |
255 | | uint64_t OutputFilePreallocationSize(); |
256 | | |
257 | | void SetInputVersion(Version* input_version); |
258 | | |
259 | | struct InputLevelSummaryBuffer { |
260 | | char buffer[128]; |
261 | | }; |
262 | | |
263 | | const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const; |
264 | | |
265 | | uint64_t CalculateTotalInputSize() const; |
266 | | |
267 | | // In case of compaction error, reset the nextIndex that is used to pick up the next file to be |
268 | | // compacted from files_by_size_. Does nothing for universal compaction, since nextIndex is not |
269 | | // used in this case. |
270 | | void ResetNextCompactionIndex(); |
271 | | |
272 | | // Create a CompactionFilter from compaction_filter_factory |
273 | | std::unique_ptr<CompactionFilter> CreateCompactionFilter() const; |
274 | | |
275 | | // Is the input level corresponding to output_level_ empty? |
276 | | bool IsOutputLevelEmpty() const; |
277 | | |
278 | | // Should this compaction be broken up into smaller ones run in parallel? |
279 | | bool ShouldFormSubcompactions() const; |
280 | | |
281 | | // test function to validate the functionality of IsBottommostLevel() |
282 | | // function -- determines if compaction with inputs and storage is bottommost |
283 | | static bool TEST_IsBottommostLevel( |
284 | | int output_level, VersionStorageInfo* vstorage, |
285 | | const std::vector<CompactionInputFiles>& inputs); |
286 | | |
287 | 641 | TablePropertiesCollection GetOutputTableProperties() const { |
288 | 641 | return output_table_properties_; |
289 | 641 | } |
290 | | |
291 | 10.6k | void SetOutputTableProperties(TablePropertiesCollection tp) { |
292 | 10.6k | output_table_properties_ = std::move(tp); |
293 | 10.6k | } |
294 | | |
295 | 21.8M | Slice GetLargestUserKey() const { return largest_user_key_; } |
296 | | |
297 | 643 | CompactionReason compaction_reason() { return compaction_reason_; } |
298 | | |
299 | 43.9k | yb::PriorityThreadPoolSuspender* suspender() { return suspender_; } |
300 | 439 | void SetSuspender(yb::PriorityThreadPoolSuspender* value) { suspender_ = value; } |
301 | | |
302 | | private: |
303 | | Compaction(VersionStorageInfo* input_version, |
304 | | const MutableCFOptions& mutable_cf_options, |
305 | | std::vector<CompactionInputFiles> inputs, int output_level, |
306 | | uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes, |
307 | | uint32_t output_path_id, CompressionType compression, |
308 | | std::vector<FileMetaData*> grandparents, |
309 | | bool manual_compaction, double score, |
310 | | bool deletion_compaction, |
311 | | CompactionReason compaction_reason); |
312 | | |
313 | | // mark (or clear) all files that are being compacted |
314 | | void MarkFilesBeingCompacted(bool mark_as_compacted); |
315 | | |
316 | | // get the smallest and largest key present in files to be compacted |
317 | | static void GetBoundaryKeys(VersionStorageInfo* vstorage, |
318 | | const std::vector<CompactionInputFiles>& inputs, |
319 | | Slice* smallest_key, Slice* largest_key); |
320 | | |
321 | | // helper function to determine if compaction with inputs and storage is |
322 | | // bottommost |
323 | | static bool IsBottommostLevel( |
324 | | int output_level, VersionStorageInfo* vstorage, |
325 | | const std::vector<CompactionInputFiles>& inputs); |
326 | | |
327 | | static bool IsFullCompaction(VersionStorageInfo* vstorage, |
328 | | const std::vector<CompactionInputFiles>& inputs); |
329 | | |
330 | | const int start_level_; // the lowest level to be compacted |
331 | | const int output_level_; // levels to which output files are stored |
332 | | uint64_t max_output_file_size_ = 0; |
333 | | uint64_t max_grandparent_overlap_bytes_ = 0; |
334 | | MutableCFOptions mutable_cf_options_; |
335 | | Version* input_version_ = nullptr; |
336 | | uint64_t input_version_number_ = 0; |
337 | | bool input_version_level0_non_overlapping_ = false; |
338 | | VersionSet* vset_ = nullptr; |
339 | | VersionEdit edit_; |
340 | | const int number_levels_; |
341 | | ColumnFamilyData* cfd_; |
342 | | Arena arena_; // Arena used to allocate space for file_levels_ |
343 | | |
344 | | const uint32_t output_path_id_; |
345 | | CompressionType output_compression_; |
346 | | // If true, then the comaction can be done by simply deleting input files. |
347 | | const bool deletion_compaction_; |
348 | | |
349 | | // Compaction input files organized by level. Constant after construction |
350 | | const std::vector<CompactionInputFiles> inputs_; |
351 | | |
352 | | // A copy of inputs_, organized more closely in memory |
353 | | autovector<LevelFilesBrief, 2> input_levels_; |
354 | | |
355 | | // State used to check for number of of overlapping grandparent files |
356 | | // (grandparent == "output_level_ + 1") |
357 | | std::vector<FileMetaData*> grandparents_; |
358 | | size_t grandparent_index_; // Index in grandparent_starts_ |
359 | | std::atomic<bool> seen_key_; // Some output key has been seen |
360 | | uint64_t overlapped_bytes_; // Bytes of overlap between current output |
361 | | // and grandparent files |
362 | | const double score_; // score that was used to pick this compaction. |
363 | | |
364 | | // Is this compaction creating a file in the bottom most level? |
365 | | const bool bottommost_level_; |
366 | | // Does this compaction include all sst files? |
367 | | const bool is_full_compaction_; |
368 | | |
369 | | // Is this compaction requested by the client? |
370 | | const bool is_manual_compaction_; |
371 | | |
372 | | // True if we can do trivial move in Universal multi level |
373 | | // compaction |
374 | | bool is_trivial_move_ = false; |
375 | | |
376 | | bool IsCompactionStyleUniversal() const; |
377 | | |
378 | | // Does input compression match the output compression? |
379 | | bool InputCompressionMatchesOutput() const; |
380 | | |
381 | | // table properties of output files |
382 | | TablePropertiesCollection output_table_properties_; |
383 | | |
384 | | // largest user keys in compaction |
385 | | Slice largest_user_key_; |
386 | | |
387 | | // Reason for compaction |
388 | | CompactionReason compaction_reason_; |
389 | | |
390 | | yb::PriorityThreadPoolSuspender* suspender_ = nullptr; |
391 | | }; |
392 | | |
393 | | // Utility function |
394 | | extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files); |
395 | | |
396 | | } // namespace rocksdb |
397 | | |
398 | | #endif // YB_ROCKSDB_DB_COMPACTION_H |