/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/format.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved. | 
| 2 |  | //  This source code is licensed under the BSD-style license found in the | 
| 3 |  | //  LICENSE file in the root directory of this source tree. An additional grant | 
| 4 |  | //  of patent rights can be found in the PATENTS file in the same directory. | 
| 5 |  | // | 
| 6 |  | // The following only applies to changes made to this file as part of YugaByte development. | 
| 7 |  | // | 
| 8 |  | // Portions Copyright (c) YugaByte, Inc. | 
| 9 |  | // | 
| 10 |  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | 
| 11 |  | // in compliance with the License.  You may obtain a copy of the License at | 
| 12 |  | // | 
| 13 |  | // http://www.apache.org/licenses/LICENSE-2.0 | 
| 14 |  | // | 
| 15 |  | // Unless required by applicable law or agreed to in writing, software distributed under the License | 
| 16 |  | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | 
| 17 |  | // or implied.  See the License for the specific language governing permissions and limitations | 
| 18 |  | // under the License. | 
| 19 |  | // | 
| 20 |  | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | 
| 21 |  | // Use of this source code is governed by a BSD-style license that can be | 
| 22 |  | // found in the LICENSE file. See the AUTHORS file for names of contributors. | 
| 23 |  |  | 
| 24 |  | #ifndef YB_ROCKSDB_TABLE_FORMAT_H | 
| 25 |  | #define YB_ROCKSDB_TABLE_FORMAT_H | 
| 26 |  |  | 
| 27 |  | #include <stdint.h> | 
| 28 |  | #include <string> | 
| 29 |  | #include "yb/util/slice.h" | 
| 30 |  | #include "yb/rocksdb/status.h" | 
| 31 |  | #include "yb/rocksdb/options.h" | 
| 32 |  | #include "yb/rocksdb/table.h" | 
| 33 |  |  | 
| 34 |  | namespace yb { | 
| 35 |  |  | 
| 36 |  | class MemTracker; | 
| 37 |  |  | 
| 38 |  | } | 
| 39 |  |  | 
| 40 |  | namespace rocksdb { | 
| 41 |  |  | 
| 42 |  | class Block; | 
| 43 |  | struct ReadOptions; | 
| 44 |  |  | 
| 45 |  | // the length of the magic number in bytes. | 
| 46 |  | const int kMagicNumberLengthByte = 8; | 
| 47 |  |  | 
| 48 |  | // Even that we use kKeyDeltaEncodingSharedPrefix format for index blocks, by default every key in | 
| 49 |  | // index will still have zero shared prefix length and will be stored fully, because | 
| 50 |  | // index_block_restart_interval default value is 1 (see BlockBasedTableOptions). | 
| 51 |  | constexpr auto kIndexBlockKeyValueEncodingFormat = | 
| 52 |  |     KeyValueEncodingFormat::kKeyDeltaEncodingSharedPrefix; | 
| 53 |  |  | 
| 54 |  | // BlockHandle is a pointer to the extent of a file that stores a data | 
| 55 |  | // block or a meta block. | 
| 56 |  | class BlockHandle { | 
| 57 |  |   constexpr static uint64_t kUint64FieldNotSet = ~static_cast<uint64_t>(0); | 
| 58 |  |  | 
| 59 |  |  public: | 
| 60 |  |   BlockHandle(); | 
| 61 |  |   BlockHandle(uint64_t offset, uint64_t size); | 
| 62 |  |  | 
| 63 |  |   // The offset of the block in the file. | 
| 64 | 81.2M |   uint64_t offset() const { return offset_; } | 
| 65 | 3.30M |   void set_offset(uint64_t _offset) { offset_ = _offset; } | 
| 66 |  |  | 
| 67 |  |   // The size of the stored block | 
| 68 | 12.8M |   uint64_t size() const { return size_; } | 
| 69 | 3.30M |   void set_size(uint64_t _size) { size_ = _size; } | 
| 70 |  |  | 
| 71 |  |   void AppendEncodedTo(std::string* dst) const; | 
| 72 |  |   Status DecodeFrom(Slice* input); | 
| 73 |  |  | 
| 74 |  |   // Return a string that contains the copy of handle. | 
| 75 |  |   std::string ToString(bool hex = true) const; | 
| 76 |  |  | 
| 77 |  |   std::string ToDebugString() const; | 
| 78 |  |  | 
| 79 |  |   // if the block handle's offset and size are both "0", we will view it | 
| 80 |  |   // as a null block handle that points to no where. | 
| 81 | 13.1M |   bool IsNull() const { | 
| 82 | 13.1M |     return offset_ == 0 && size_ == 01.41M; | 
| 83 | 13.1M |   } | 
| 84 |  |  | 
| 85 | 64.0k |   bool IsSet() const { | 
| 86 | 64.0k |     return offset_ != kUint64FieldNotSet && size_ != kUint64FieldNotSet64.0k; | 
| 87 | 64.0k |   } | 
| 88 |  |  | 
| 89 | 2.05k |   static const BlockHandle& NullBlockHandle() { | 
| 90 | 2.05k |     return kNullBlockHandle; | 
| 91 | 2.05k |   } | 
| 92 |  |  | 
| 93 |  |   // Maximum encoding length of a BlockHandle | 
| 94 |  |   enum { kMaxEncodedLength = 10 + 10 }; | 
| 95 |  |  | 
| 96 |  |  private: | 
| 97 |  |   uint64_t offset_ = 0; | 
| 98 |  |   uint64_t size_ = 0; | 
| 99 |  |  | 
| 100 |  |   static const BlockHandle kNullBlockHandle; | 
| 101 |  | }; | 
| 102 |  |  | 
| 103 |  | inline uint32_t GetCompressFormatForVersion(CompressionType compression_type, | 
| 104 | 99.7k |                                             uint32_t version) { | 
| 105 |  |   // snappy is not versioned | 
| 106 | 99.7k |   assert(compression_type != kSnappyCompression && | 
| 107 | 99.7k |          compression_type != kNoCompression); | 
| 108 |  |   // As of version 2, we encode compressed block with | 
| 109 |  |   // compress_format_version == 2. Before that, the version is 1. | 
| 110 |  |   // DO NOT CHANGE THIS FUNCTION, it affects disk format | 
| 111 | 99.7k |   return version >= 2 ? 251.9k: 147.8k; | 
| 112 | 99.7k | } | 
| 113 |  |  | 
| 114 | 515k | inline bool BlockBasedTableSupportedVersion(uint32_t version) { | 
| 115 | 515k |   return version <= 2; | 
| 116 | 515k | } | 
| 117 |  |  | 
| 118 |  | // Footer encapsulates the fixed information stored at the tail | 
| 119 |  | // end of every table file. | 
| 120 |  | class Footer { | 
| 121 |  |  public: | 
| 122 |  |   // Constructs a footer without specifying its table magic number. | 
| 123 |  |   // In such case, the table magic number of such footer should be | 
| 124 |  |   // initialized via @ReadFooterFromFile(). | 
| 125 |  |   // Use this when you plan to load Footer with DecodeFrom(). Never use this | 
| 126 |  |   // when you plan to AppendEncodedTo. | 
| 127 | 315k |   Footer() : Footer(kInvalidTableMagicNumber, /* version= */ 0) {}rocksdb::Footer::Footer()| Line | Count | Source |  | 127 | 315k |   Footer() : Footer(kInvalidTableMagicNumber, /* version= */ 0) {} | 
Unexecuted instantiation: rocksdb::Footer::Footer() | 
| 128 |  |  | 
| 129 |  |   // Use this constructor when you plan to write out the footer using | 
| 130 |  |   // AppendEncodedTo(). Never use this constructor with DecodeFrom(). | 
| 131 |  |   Footer(uint64_t table_magic_number, uint32_t version); | 
| 132 |  |  | 
| 133 |  |   // The version of the footer in this file | 
| 134 | 1.96M |   uint32_t version() const { return version_; } | 
| 135 |  |  | 
| 136 |  |   // The checksum type used in this file | 
| 137 | 4.17M |   ChecksumType checksum() const { return checksum_; } | 
| 138 | 65.5k |   void set_checksum(const ChecksumType c) { checksum_ = c; } | 
| 139 |  |  | 
| 140 |  |   // The block handle for the metaindex block of the table | 
| 141 | 120k |   const BlockHandle& metaindex_handle() const { return metaindex_handle_; } | 
| 142 | 67.5k |   void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } | 
| 143 |  |  | 
| 144 |  |   // The block handle for the index block of the table | 
| 145 | 14.8M |   const BlockHandle& index_handle() const { return data_index_handle_; } | 
| 146 |  |  | 
| 147 | 67.5k |   void set_index_handle(const BlockHandle& h) { data_index_handle_ = h; } | 
| 148 |  |  | 
| 149 | 290k |   uint64_t table_magic_number() const { return table_magic_number_; } | 
| 150 |  |  | 
| 151 |  |   void AppendEncodedTo(std::string* dst) const; | 
| 152 |  |  | 
| 153 |  |   // Set the current footer based on the input slice. | 
| 154 |  |   // | 
| 155 |  |   // REQUIRES: table_magic_number_ is not set (i.e., | 
| 156 |  |   // HasInitializedTableMagicNumber() is true). The function will initialize the | 
| 157 |  |   // magic number | 
| 158 |  |   Status DecodeFrom(Slice* input); | 
| 159 |  |  | 
| 160 |  |   // Encoded length of a Footer.  Note that the serialization of a Footer will | 
| 161 |  |   // always occupy at least kMinEncodedLength bytes.  If fields are changed | 
| 162 |  |   // the version number should be incremented and kMaxEncodedLength should be | 
| 163 |  |   // increased accordingly. | 
| 164 |  |   enum { | 
| 165 |  |     // Footer version 0 (legacy) will always occupy exactly this many bytes. | 
| 166 |  |     // It consists of two block handles, padding, and a magic number. | 
| 167 |  |     kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8, | 
| 168 |  |     // Footer of versions 1 and higher will always occupy exactly this many | 
| 169 |  |     // bytes. It consists of the checksum type, two block handles, padding, | 
| 170 |  |     // a version number (bigger than 1), and a magic number | 
| 171 |  |     kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8, | 
| 172 |  |     kMinEncodedLength = kVersion0EncodedLength, | 
| 173 |  |     kMaxEncodedLength = kNewVersionsEncodedLength, | 
| 174 |  |   }; | 
| 175 |  |  | 
| 176 |  |   static const uint64_t kInvalidTableMagicNumber = 0; | 
| 177 |  |  | 
| 178 |  |   // convert this object to a human readable form | 
| 179 |  |   std::string ToString() const; | 
| 180 |  |  | 
| 181 |  |  private: | 
| 182 |  |   // REQUIRES: magic number wasn't initialized. | 
| 183 | 118k |   void set_table_magic_number(uint64_t magic_number) { | 
| 184 | 118k |     assert(!HasInitializedTableMagicNumber()); | 
| 185 | 0 |     table_magic_number_ = magic_number; | 
| 186 | 118k |   } | 
| 187 |  |  | 
| 188 |  |   // return true if @table_magic_number_ is set to a value different | 
| 189 |  |   // from @kInvalidTableMagicNumber. | 
| 190 | 304k |   bool HasInitializedTableMagicNumber() const { | 
| 191 | 304k |     return (table_magic_number_ != kInvalidTableMagicNumber); | 
| 192 | 304k |   } | 
| 193 |  |  | 
| 194 |  |   uint32_t version_; | 
| 195 |  |   ChecksumType checksum_; | 
| 196 |  |   BlockHandle metaindex_handle_; | 
| 197 |  |   BlockHandle data_index_handle_; | 
| 198 |  |   uint64_t table_magic_number_ = kInvalidTableMagicNumber; | 
| 199 |  | }; | 
| 200 |  |  | 
| 201 |  | // Read the footer from file | 
| 202 |  | // If enforce_table_magic_number != 0, ReadFooterFromFile() will return | 
| 203 |  | // corruption if table_magic number is not equal to enforce_table_magic_number | 
| 204 |  | Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size, | 
| 205 |  |                           Footer* footer, | 
| 206 |  |                           uint64_t enforce_table_magic_number = 0); | 
| 207 |  |  | 
| 208 |  | // 1-byte type + 32-bit crc | 
| 209 |  | static const size_t kBlockTrailerSize = 5; | 
| 210 |  |  | 
| 211 |  | class TrackedAllocation { | 
| 212 |  |  public: | 
| 213 |  |   TrackedAllocation(); | 
| 214 |  |   TrackedAllocation(std::unique_ptr<char[]>&& data, size_t size, | 
| 215 |  |                     std::shared_ptr<yb::MemTracker> mem_tracker); | 
| 216 | 4.33M |   TrackedAllocation(TrackedAllocation&& other) = default; | 
| 217 |  |  | 
| 218 |  |   TrackedAllocation& operator=(TrackedAllocation&& other); | 
| 219 |  |  | 
| 220 |  |   ~TrackedAllocation(); | 
| 221 |  |  | 
| 222 | 0 |   char* get() const { | 
| 223 | 0 |     return holder_.get(); | 
| 224 | 0 |   } | 
| 225 |  |  private: | 
| 226 |  |   std::unique_ptr<char[]> holder_; | 
| 227 |  |   size_t size_; | 
| 228 |  |   std::shared_ptr<yb::MemTracker> mem_tracker_; | 
| 229 |  | }; | 
| 230 |  |  | 
| 231 |  | struct BlockContents { | 
| 232 |  |   Slice data;           // Actual contents of data | 
| 233 |  |   bool cachable;        // True iff data can be cached | 
| 234 |  |   CompressionType compression_type; | 
| 235 |  |   TrackedAllocation allocation; | 
| 236 |  |  | 
| 237 | 6.80M |   BlockContents() : cachable(false), compression_type(kNoCompression) {} | 
| 238 |  |  | 
| 239 |  |   BlockContents(const Slice& _data, bool _cachable, | 
| 240 |  |                 CompressionType _compression_type) | 
| 241 | 15.6k |       : data(_data), cachable(_cachable), compression_type(_compression_type) {} | 
| 242 |  |  | 
| 243 |  |   BlockContents(std::unique_ptr<char[]>&& _data, size_t _size, bool _cachable, | 
| 244 |  |                 CompressionType _compression_type, std::shared_ptr<yb::MemTracker> _mem_tracker); | 
| 245 |  |  | 
| 246 | 4.33M |   BlockContents(BlockContents&& other) = default; | 
| 247 |  |  | 
| 248 | 4.34M |   BlockContents& operator=(BlockContents&& other) = default; | 
| 249 |  | }; | 
| 250 |  |  | 
| 251 |  | // Read the block identified by "handle" from "file".  On failure | 
| 252 |  | // return non-OK.  On success fill *result and return OK. | 
| 253 |  | extern Status ReadBlockContents(RandomAccessFileReader* file, | 
| 254 |  |                                 const Footer& footer, | 
| 255 |  |                                 const ReadOptions& options, | 
| 256 |  |                                 const BlockHandle& handle, | 
| 257 |  |                                 BlockContents* contents, Env* env, | 
| 258 |  |                                 const std::shared_ptr<yb::MemTracker>& mem_tracker, | 
| 259 |  |                                 bool do_uncompress); | 
| 260 |  |  | 
| 261 |  | // The 'data' points to the raw block contents read in from file. | 
| 262 |  | // This method allocates a new heap buffer and the raw block | 
| 263 |  | // contents are uncompresed into this buffer. This buffer is | 
| 264 |  | // returned via 'result' and it is upto the caller to | 
| 265 |  | // free this buffer. | 
| 266 |  | // For description of compress_format_version and possible values, see | 
| 267 |  | // util/compression.h | 
| 268 |  | extern Status UncompressBlockContents(const char* data, size_t n, | 
| 269 |  |                                       BlockContents* contents, | 
| 270 |  |                                       uint32_t compress_format_version, | 
| 271 |  |                                       const std::shared_ptr<yb::MemTracker>& mem_tracker); | 
| 272 |  |  | 
| 273 |  | // Implementation details follow.  Clients should ignore, | 
| 274 |  |  | 
| 275 | 63.6M | inline BlockHandle::BlockHandle() : BlockHandle(kUint64FieldNotSet, kUint64FieldNotSet) {} | 
| 276 |  |  | 
| 277 |  | inline BlockHandle::BlockHandle(uint64_t _offset, uint64_t _size) | 
| 278 | 63.7M |     : offset_(_offset), size_(_size) {} | 
| 279 |  |  | 
| 280 |  | }  // namespace rocksdb | 
| 281 |  |  | 
| 282 |  | #endif // YB_ROCKSDB_TABLE_FORMAT_H |