/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/version_edit.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #ifndef YB_ROCKSDB_DB_VERSION_EDIT_H |
25 | | #define YB_ROCKSDB_DB_VERSION_EDIT_H |
26 | | |
27 | | #include <stddef.h> |
28 | | #include <stdint.h> |
29 | | #include <stdio.h> |
30 | | #include <string.h> |
31 | | |
32 | | #include <algorithm> |
33 | | #include <limits> |
34 | | #include <memory> |
35 | | #include <set> |
36 | | #include <stack> |
37 | | #include <string> |
38 | | #include <unordered_map> |
39 | | #include <utility> |
40 | | #include <vector> |
41 | | |
42 | | #include <boost/optional.hpp> |
43 | | |
44 | | #include "yb/rocksdb/cache.h" |
45 | | #include "yb/rocksdb/db/dbformat.h" |
46 | | #include "yb/rocksdb/listener.h" |
47 | | #include "yb/rocksdb/options.h" |
48 | | #include "yb/rocksdb/status.h" |
49 | | #include "yb/rocksdb/types.h" |
50 | | |
51 | | namespace rocksdb { |
52 | | |
53 | | class TableCache; |
54 | | class VersionSet; |
55 | | class VersionEditPB; |
56 | | |
57 | | const uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF; |
58 | | |
59 | | extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id); |
60 | | |
61 | | // A copyable structure contains information needed to read data from an SST |
62 | | // file. It can contains a pointer to a table reader opened for the file, or |
63 | | // file number and size, which can be used to create a new table reader for it. |
64 | | // The behavior is undefined when a copied of the structure is used when the |
65 | | // file is not in any live version any more. |
66 | | // SST can be either one file containing both meta data and data or it can be split into |
67 | | // multiple files: one metadata file and number of data files (S-Blocks aka storage-blocks). |
68 | | // As of 2017-03-10 there is at most one data file. |
69 | | // Base file is a file which contains SST metadata. So, if SST is either one base file, or |
70 | | // in case SST is split into multiple files, base file is a metadata file. |
71 | | struct FileDescriptor { |
72 | | // Table reader in table_reader_handle |
73 | | TableReader* table_reader; |
74 | | uint64_t packed_number_and_path_id; |
75 | | uint64_t total_file_size; // total file(s) size in bytes |
76 | | uint64_t base_file_size; // base file size in bytes |
77 | | |
78 | 210k | FileDescriptor() : FileDescriptor(0, 0, 0, 0) {} Unexecuted instantiation: _ZN7rocksdb14FileDescriptorC2Ev _ZN7rocksdb14FileDescriptorC1Ev Line | Count | Source | 78 | 210k | FileDescriptor() : FileDescriptor(0, 0, 0, 0) {} |
|
79 | | |
80 | | FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _total_file_size, |
81 | | uint64_t _base_file_size) |
82 | | : table_reader(nullptr), |
83 | | packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)), |
84 | | total_file_size(_total_file_size), |
85 | 337k | base_file_size(_base_file_size) {} |
86 | | |
87 | 20.3M | uint64_t GetNumber() const { |
88 | 20.3M | return packed_number_and_path_id & kFileNumberMask; |
89 | 20.3M | } |
90 | 616k | uint32_t GetPathId() const { |
91 | 616k | return static_cast<uint32_t>( |
92 | 616k | packed_number_and_path_id / (kFileNumberMask + 1)); |
93 | 616k | } |
94 | 2.77M | uint64_t GetTotalFileSize() const { return total_file_size; } |
95 | 362k | uint64_t GetBaseFileSize() const { return base_file_size; } |
96 | | |
97 | | std::string ToString() const; |
98 | | }; |
99 | | |
100 | | YB_DEFINE_ENUM(UpdateBoundariesType, (kAll)(kSmallest)(kLargest)); |
101 | | |
102 | | struct FileMetaData { |
103 | | typedef FileBoundaryValues<InternalKey> BoundaryValues; |
104 | | |
105 | | int refs; |
106 | | FileDescriptor fd; |
107 | | bool being_compacted; // Is this file undergoing compaction? |
108 | | bool being_deleted = false; // Updated by DB::DeleteFile |
109 | | BoundaryValues smallest; // The smallest values in this file |
110 | | BoundaryValues largest; // The largest values in this file |
111 | | bool imported = false; // Was this file imported from another DB. |
112 | | |
113 | | // Needs to be disposed when refs becomes 0. |
114 | | Cache::Handle* table_reader_handle; |
115 | | |
116 | | // Stats for compensating deletion entries during compaction |
117 | | |
118 | | // File size compensated by deletion entry. |
119 | | // This is updated in Version::UpdateAccumulatedStats() first time when the |
120 | | // file is created or loaded. After it is updated (!= 0), it is immutable. |
121 | | uint64_t compensated_file_size; |
122 | | // These values can mutate, but they can only be read or written from |
123 | | // single-threaded LogAndApply thread |
124 | | uint64_t num_entries; // the number of entries. |
125 | | uint64_t num_deletions; // the number of deletion entries. |
126 | | uint64_t raw_key_size; // total uncompressed key size. |
127 | | uint64_t raw_value_size; // total uncompressed value size. |
128 | | bool init_stats_from_file; // true if the data-entry stats of this file |
129 | | // has initialized from file. |
130 | | |
131 | | bool marked_for_compaction; // True if client asked us nicely to compact this |
132 | | // file. |
133 | | |
134 | | bool delete_after_compaction = false; // True if file has been marked for |
135 | | // direct deletion. |
136 | | |
137 | | FileMetaData(); |
138 | | |
139 | | // REQUIRED: Keys must be given to the function in sorted order (it expects |
140 | | // the last key to be the largest). |
141 | | void UpdateBoundaries(InternalKey key, const FileBoundaryValuesBase& source); |
142 | | |
143 | | // Update all boundaries except key. |
144 | | void UpdateBoundariesExceptKey(const FileBoundaryValuesBase& source, UpdateBoundariesType type); |
145 | | |
146 | | bool Unref(TableCache* table_cache); |
147 | | |
148 | | Slice UserFilter() const; // Extracts user filter from largest boundary value if present. |
149 | | |
150 | | // Outputs smallest and largest user frontiers to string, if they exist. |
151 | | std::string FrontiersToString() const; |
152 | | |
153 | | std::string ToString() const; |
154 | | }; |
155 | | |
156 | | class VersionEdit { |
157 | | public: |
158 | 2.20M | VersionEdit() { Clear(); } |
159 | 2.20M | ~VersionEdit() { } |
160 | | |
161 | | void Clear(); |
162 | | |
163 | 260k | void SetComparatorName(const Slice& name) { |
164 | 260k | comparator_ = name.ToString(); |
165 | 260k | } |
166 | 295k | void SetLogNumber(uint64_t num) { |
167 | 295k | log_number_ = num; |
168 | 295k | } |
169 | 307k | void SetPrevLogNumber(uint64_t num) { |
170 | 307k | prev_log_number_ = num; |
171 | 307k | } |
172 | 563k | void SetNextFile(uint64_t num) { |
173 | 563k | next_file_number_ = num; |
174 | 563k | } |
175 | 642k | void SetLastSequence(SequenceNumber seq) { |
176 | 642k | last_sequence_ = seq; |
177 | 642k | } |
178 | | void UpdateFlushedFrontier(UserFrontierPtr value); |
179 | | void ModifyFlushedFrontier(UserFrontierPtr value, FrontierModificationMode mode); |
180 | 4.26k | void SetMaxColumnFamily(uint32_t max_column_family) { |
181 | 4.26k | max_column_family_ = max_column_family; |
182 | 4.26k | } |
183 | | |
184 | | void InitNewDB(); |
185 | | |
186 | | // Add the specified file at the specified number. |
187 | | // REQUIRES: This version has not been saved (see VersionSet::SaveTo) |
188 | | // REQUIRES: "smallest" and "largest" are smallest and largest keys in file |
189 | | void AddTestFile(int level, |
190 | | const FileDescriptor& fd, |
191 | | const FileMetaData::BoundaryValues& smallest, |
192 | | const FileMetaData::BoundaryValues& largest, |
193 | 69 | bool marked_for_compaction) { |
194 | 69 | DCHECK_LE(smallest.seqno, largest.seqno); |
195 | 69 | FileMetaData f; |
196 | 69 | f.fd = fd; |
197 | 69 | f.fd.table_reader = nullptr; |
198 | 69 | f.smallest = smallest; |
199 | 69 | f.largest = largest; |
200 | 69 | f.marked_for_compaction = marked_for_compaction; |
201 | 69 | new_files_.emplace_back(level, f); |
202 | 69 | } |
203 | | |
204 | 21.8k | void AddFile(int level, const FileMetaData& f) { |
205 | 21.8k | DCHECK_LE(f.smallest.seqno, f.largest.seqno); |
206 | 21.8k | new_files_.emplace_back(level, f); |
207 | 21.8k | } |
208 | | |
209 | 73.5k | void AddCleanedFile(int level, const FileMetaData& f) { |
210 | 73.5k | DCHECK_LE(f.smallest.seqno, f.largest.seqno); |
211 | 73.5k | FileMetaData nf; |
212 | 73.5k | nf.fd = f.fd; |
213 | 73.5k | nf.fd.table_reader = nullptr; |
214 | 73.5k | nf.smallest = f.smallest; |
215 | 73.5k | nf.largest = f.largest; |
216 | 73.5k | nf.marked_for_compaction = f.marked_for_compaction; |
217 | 73.5k | nf.imported = f.imported; |
218 | 73.5k | new_files_.emplace_back(level, std::move(nf)); |
219 | 73.5k | } |
220 | | |
221 | | // Delete the specified "file" from the specified "level". |
222 | 60.7k | void DeleteFile(int level, uint64_t file) { |
223 | 60.7k | deleted_files_.insert({level, file}); |
224 | 60.7k | } |
225 | | |
226 | | // Number of edits |
227 | 348k | size_t NumEntries() { return new_files_.size() + deleted_files_.size(); } |
228 | | |
229 | 1.54M | bool IsColumnFamilyAdd() { |
230 | 1.53M | return column_family_name_ ? true : false; |
231 | 1.54M | } |
232 | | |
233 | 1.54M | bool IsColumnFamilyManipulation() { |
234 | 1.54M | return IsColumnFamilyAdd() || is_column_family_drop_; |
235 | 1.54M | } |
236 | | |
237 | 671k | void SetColumnFamily(uint32_t column_family_id) { |
238 | 671k | column_family_ = column_family_id; |
239 | 671k | } |
240 | | |
241 | | // set column family ID by calling SetColumnFamily() |
242 | 348k | void AddColumnFamily(const std::string& name) { |
243 | 348k | DCHECK(!is_column_family_drop_); |
244 | 348k | DCHECK(!column_family_name_); |
245 | 348k | DCHECK_EQ(NumEntries(), 0); |
246 | 348k | column_family_name_ = name; |
247 | 348k | } |
248 | | |
249 | | // set column family ID by calling SetColumnFamily() |
250 | 27 | void DropColumnFamily() { |
251 | 27 | DCHECK(!is_column_family_drop_); |
252 | 27 | DCHECK(!column_family_name_); |
253 | 27 | DCHECK_EQ(NumEntries(), 0); |
254 | 27 | is_column_family_drop_ = true; |
255 | 27 | } |
256 | | |
257 | | // return true on success. |
258 | | bool AppendEncodedTo(std::string* dst) const; |
259 | | Status DecodeFrom(BoundaryValuesExtractor* extractor, const Slice& src); |
260 | | |
261 | | typedef std::set<std::pair<int, uint64_t>> DeletedFileSet; |
262 | | |
263 | 680k | const DeletedFileSet& GetDeletedFiles() { return deleted_files_; } |
264 | 681k | const std::vector<std::pair<int, FileMetaData>>& GetNewFiles() { |
265 | 681k | return new_files_; |
266 | 681k | } |
267 | | |
268 | | std::string DebugString(bool hex_key = false) const; |
269 | | |
270 | 0 | std::string ToString() const { |
271 | 0 | return DebugString(); |
272 | 0 | } |
273 | | |
274 | | private: |
275 | | friend class VersionSet; |
276 | | friend class Version; |
277 | | |
278 | | bool EncodeTo(VersionEditPB* out) const; |
279 | | |
280 | | int max_level_; |
281 | | boost::optional<std::string> comparator_; |
282 | | boost::optional<uint64_t> log_number_; |
283 | | boost::optional<uint64_t> prev_log_number_; |
284 | | boost::optional<uint64_t> next_file_number_; |
285 | | boost::optional<uint32_t> max_column_family_; |
286 | | boost::optional<SequenceNumber> last_sequence_; |
287 | | UserFrontierPtr flushed_frontier_; |
288 | | |
289 | | // Used when we're resetting the flushed frontier to a potentially lower value. This is needed |
290 | | // when restoring from a backup into a new Raft group with an unrelated sequence of OpIds. |
291 | | bool force_flushed_frontier_ = false; |
292 | | |
293 | | DeletedFileSet deleted_files_; |
294 | | std::vector<std::pair<int, FileMetaData>> new_files_; |
295 | | |
296 | | // Each version edit record should have column_family_id set |
297 | | // If it's not set, it is default (0) |
298 | | uint32_t column_family_; |
299 | | // a version edit can be either column_family add or |
300 | | // column_family drop. If it's column family add, |
301 | | // it also includes column family name. |
302 | | bool is_column_family_drop_; |
303 | | boost::optional<std::string> column_family_name_; |
304 | | }; |
305 | | |
306 | | } // namespace rocksdb |
307 | | |
308 | | #endif // YB_ROCKSDB_DB_VERSION_EDIT_H |