/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/format.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #ifndef YB_ROCKSDB_TABLE_FORMAT_H |
25 | | #define YB_ROCKSDB_TABLE_FORMAT_H |
26 | | |
27 | | #include <stdint.h> |
28 | | #include <string> |
29 | | #include "yb/util/slice.h" |
30 | | #include "yb/rocksdb/status.h" |
31 | | #include "yb/rocksdb/options.h" |
32 | | #include "yb/rocksdb/table.h" |
33 | | |
34 | | namespace yb { |
35 | | |
36 | | class MemTracker; |
37 | | |
38 | | } |
39 | | |
40 | | namespace rocksdb { |
41 | | |
42 | | class Block; |
43 | | struct ReadOptions; |
44 | | |
45 | | // the length of the magic number in bytes. |
46 | | const int kMagicNumberLengthByte = 8; |
47 | | |
48 | | // Even that we use kKeyDeltaEncodingSharedPrefix format for index blocks, by default every key in |
49 | | // index will still have zero shared prefix length and will be stored fully, because |
50 | | // index_block_restart_interval default value is 1 (see BlockBasedTableOptions). |
51 | | constexpr auto kIndexBlockKeyValueEncodingFormat = |
52 | | KeyValueEncodingFormat::kKeyDeltaEncodingSharedPrefix; |
53 | | |
54 | | // BlockHandle is a pointer to the extent of a file that stores a data |
55 | | // block or a meta block. |
56 | | class BlockHandle { |
57 | | constexpr static uint64_t kUint64FieldNotSet = ~static_cast<uint64_t>(0); |
58 | | |
59 | | public: |
60 | | BlockHandle(); |
61 | | BlockHandle(uint64_t offset, uint64_t size); |
62 | | |
63 | | // The offset of the block in the file. |
64 | 81.2M | uint64_t offset() const { return offset_; } |
65 | 3.30M | void set_offset(uint64_t _offset) { offset_ = _offset; } |
66 | | |
67 | | // The size of the stored block |
68 | 12.8M | uint64_t size() const { return size_; } |
69 | 3.30M | void set_size(uint64_t _size) { size_ = _size; } |
70 | | |
71 | | void AppendEncodedTo(std::string* dst) const; |
72 | | Status DecodeFrom(Slice* input); |
73 | | |
74 | | // Return a string that contains the copy of handle. |
75 | | std::string ToString(bool hex = true) const; |
76 | | |
77 | | std::string ToDebugString() const; |
78 | | |
79 | | // if the block handle's offset and size are both "0", we will view it |
80 | | // as a null block handle that points to no where. |
81 | 13.1M | bool IsNull() const { |
82 | 13.1M | return offset_ == 0 && size_ == 01.41M ; |
83 | 13.1M | } |
84 | | |
85 | 64.0k | bool IsSet() const { |
86 | 64.0k | return offset_ != kUint64FieldNotSet && size_ != kUint64FieldNotSet64.0k ; |
87 | 64.0k | } |
88 | | |
89 | 2.05k | static const BlockHandle& NullBlockHandle() { |
90 | 2.05k | return kNullBlockHandle; |
91 | 2.05k | } |
92 | | |
93 | | // Maximum encoding length of a BlockHandle |
94 | | enum { kMaxEncodedLength = 10 + 10 }; |
95 | | |
96 | | private: |
97 | | uint64_t offset_ = 0; |
98 | | uint64_t size_ = 0; |
99 | | |
100 | | static const BlockHandle kNullBlockHandle; |
101 | | }; |
102 | | |
103 | | inline uint32_t GetCompressFormatForVersion(CompressionType compression_type, |
104 | 99.7k | uint32_t version) { |
105 | | // snappy is not versioned |
106 | 99.7k | assert(compression_type != kSnappyCompression && |
107 | 99.7k | compression_type != kNoCompression); |
108 | | // As of version 2, we encode compressed block with |
109 | | // compress_format_version == 2. Before that, the version is 1. |
110 | | // DO NOT CHANGE THIS FUNCTION, it affects disk format |
111 | 99.7k | return version >= 2 ? 251.9k : 147.8k ; |
112 | 99.7k | } |
113 | | |
114 | 515k | inline bool BlockBasedTableSupportedVersion(uint32_t version) { |
115 | 515k | return version <= 2; |
116 | 515k | } |
117 | | |
118 | | // Footer encapsulates the fixed information stored at the tail |
119 | | // end of every table file. |
120 | | class Footer { |
121 | | public: |
122 | | // Constructs a footer without specifying its table magic number. |
123 | | // In such case, the table magic number of such footer should be |
124 | | // initialized via @ReadFooterFromFile(). |
125 | | // Use this when you plan to load Footer with DecodeFrom(). Never use this |
126 | | // when you plan to AppendEncodedTo. |
127 | 315k | Footer() : Footer(kInvalidTableMagicNumber, /* version= */ 0) {} rocksdb::Footer::Footer() Line | Count | Source | 127 | 315k | Footer() : Footer(kInvalidTableMagicNumber, /* version= */ 0) {} |
Unexecuted instantiation: rocksdb::Footer::Footer() |
128 | | |
129 | | // Use this constructor when you plan to write out the footer using |
130 | | // AppendEncodedTo(). Never use this constructor with DecodeFrom(). |
131 | | Footer(uint64_t table_magic_number, uint32_t version); |
132 | | |
133 | | // The version of the footer in this file |
134 | 1.96M | uint32_t version() const { return version_; } |
135 | | |
136 | | // The checksum type used in this file |
137 | 4.17M | ChecksumType checksum() const { return checksum_; } |
138 | 65.5k | void set_checksum(const ChecksumType c) { checksum_ = c; } |
139 | | |
140 | | // The block handle for the metaindex block of the table |
141 | 120k | const BlockHandle& metaindex_handle() const { return metaindex_handle_; } |
142 | 67.5k | void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } |
143 | | |
144 | | // The block handle for the index block of the table |
145 | 14.8M | const BlockHandle& index_handle() const { return data_index_handle_; } |
146 | | |
147 | 67.5k | void set_index_handle(const BlockHandle& h) { data_index_handle_ = h; } |
148 | | |
149 | 290k | uint64_t table_magic_number() const { return table_magic_number_; } |
150 | | |
151 | | void AppendEncodedTo(std::string* dst) const; |
152 | | |
153 | | // Set the current footer based on the input slice. |
154 | | // |
155 | | // REQUIRES: table_magic_number_ is not set (i.e., |
156 | | // HasInitializedTableMagicNumber() is true). The function will initialize the |
157 | | // magic number |
158 | | Status DecodeFrom(Slice* input); |
159 | | |
160 | | // Encoded length of a Footer. Note that the serialization of a Footer will |
161 | | // always occupy at least kMinEncodedLength bytes. If fields are changed |
162 | | // the version number should be incremented and kMaxEncodedLength should be |
163 | | // increased accordingly. |
164 | | enum { |
165 | | // Footer version 0 (legacy) will always occupy exactly this many bytes. |
166 | | // It consists of two block handles, padding, and a magic number. |
167 | | kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8, |
168 | | // Footer of versions 1 and higher will always occupy exactly this many |
169 | | // bytes. It consists of the checksum type, two block handles, padding, |
170 | | // a version number (bigger than 1), and a magic number |
171 | | kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8, |
172 | | kMinEncodedLength = kVersion0EncodedLength, |
173 | | kMaxEncodedLength = kNewVersionsEncodedLength, |
174 | | }; |
175 | | |
176 | | static const uint64_t kInvalidTableMagicNumber = 0; |
177 | | |
178 | | // convert this object to a human readable form |
179 | | std::string ToString() const; |
180 | | |
181 | | private: |
182 | | // REQUIRES: magic number wasn't initialized. |
183 | 118k | void set_table_magic_number(uint64_t magic_number) { |
184 | 118k | assert(!HasInitializedTableMagicNumber()); |
185 | 0 | table_magic_number_ = magic_number; |
186 | 118k | } |
187 | | |
188 | | // return true if @table_magic_number_ is set to a value different |
189 | | // from @kInvalidTableMagicNumber. |
190 | 304k | bool HasInitializedTableMagicNumber() const { |
191 | 304k | return (table_magic_number_ != kInvalidTableMagicNumber); |
192 | 304k | } |
193 | | |
194 | | uint32_t version_; |
195 | | ChecksumType checksum_; |
196 | | BlockHandle metaindex_handle_; |
197 | | BlockHandle data_index_handle_; |
198 | | uint64_t table_magic_number_ = kInvalidTableMagicNumber; |
199 | | }; |
200 | | |
201 | | // Read the footer from file |
202 | | // If enforce_table_magic_number != 0, ReadFooterFromFile() will return |
203 | | // corruption if table_magic number is not equal to enforce_table_magic_number |
204 | | Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size, |
205 | | Footer* footer, |
206 | | uint64_t enforce_table_magic_number = 0); |
207 | | |
208 | | // 1-byte type + 32-bit crc |
209 | | static const size_t kBlockTrailerSize = 5; |
210 | | |
211 | | class TrackedAllocation { |
212 | | public: |
213 | | TrackedAllocation(); |
214 | | TrackedAllocation(std::unique_ptr<char[]>&& data, size_t size, |
215 | | std::shared_ptr<yb::MemTracker> mem_tracker); |
216 | 4.33M | TrackedAllocation(TrackedAllocation&& other) = default; |
217 | | |
218 | | TrackedAllocation& operator=(TrackedAllocation&& other); |
219 | | |
220 | | ~TrackedAllocation(); |
221 | | |
222 | 0 | char* get() const { |
223 | 0 | return holder_.get(); |
224 | 0 | } |
225 | | private: |
226 | | std::unique_ptr<char[]> holder_; |
227 | | size_t size_; |
228 | | std::shared_ptr<yb::MemTracker> mem_tracker_; |
229 | | }; |
230 | | |
231 | | struct BlockContents { |
232 | | Slice data; // Actual contents of data |
233 | | bool cachable; // True iff data can be cached |
234 | | CompressionType compression_type; |
235 | | TrackedAllocation allocation; |
236 | | |
237 | 6.80M | BlockContents() : cachable(false), compression_type(kNoCompression) {} |
238 | | |
239 | | BlockContents(const Slice& _data, bool _cachable, |
240 | | CompressionType _compression_type) |
241 | 15.6k | : data(_data), cachable(_cachable), compression_type(_compression_type) {} |
242 | | |
243 | | BlockContents(std::unique_ptr<char[]>&& _data, size_t _size, bool _cachable, |
244 | | CompressionType _compression_type, std::shared_ptr<yb::MemTracker> _mem_tracker); |
245 | | |
246 | 4.33M | BlockContents(BlockContents&& other) = default; |
247 | | |
248 | 4.34M | BlockContents& operator=(BlockContents&& other) = default; |
249 | | }; |
250 | | |
251 | | // Read the block identified by "handle" from "file". On failure |
252 | | // return non-OK. On success fill *result and return OK. |
253 | | extern Status ReadBlockContents(RandomAccessFileReader* file, |
254 | | const Footer& footer, |
255 | | const ReadOptions& options, |
256 | | const BlockHandle& handle, |
257 | | BlockContents* contents, Env* env, |
258 | | const std::shared_ptr<yb::MemTracker>& mem_tracker, |
259 | | bool do_uncompress); |
260 | | |
261 | | // The 'data' points to the raw block contents read in from file. |
262 | | // This method allocates a new heap buffer and the raw block |
263 | | // contents are uncompresed into this buffer. This buffer is |
264 | | // returned via 'result' and it is upto the caller to |
265 | | // free this buffer. |
266 | | // For description of compress_format_version and possible values, see |
267 | | // util/compression.h |
268 | | extern Status UncompressBlockContents(const char* data, size_t n, |
269 | | BlockContents* contents, |
270 | | uint32_t compress_format_version, |
271 | | const std::shared_ptr<yb::MemTracker>& mem_tracker); |
272 | | |
273 | | // Implementation details follow. Clients should ignore, |
274 | | |
275 | 63.6M | inline BlockHandle::BlockHandle() : BlockHandle(kUint64FieldNotSet, kUint64FieldNotSet) {} |
276 | | |
277 | | inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size) |
278 | 63.7M | : offset_(_offset), size_(_size) {} |
279 | | |
280 | | } // namespace rocksdb |
281 | | |
282 | | #endif // YB_ROCKSDB_TABLE_FORMAT_H |