/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_key_coding.h

Source (jump to first uncovered line)
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// The following only applies to changes made to this file as part of YugaByte development.
//
// Portions Copyright (c) YugaByte, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License.  You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied.  See the License for the specific language governing permissions and limitations
// under the License.
//

#pragma once
#ifndef ROCKSDB_LITE

#include <array>
#include "yb/util/slice.h"
#include "yb/rocksdb/db/dbformat.h"
#include "yb/rocksdb/table/plain_table_reader.h"

namespace rocksdb {

class WritableFile;
struct ParsedInternalKey;
struct PlainTableReaderFileInfo;
enum PlainTableEntryType : unsigned char;

// Helper class to write out a key to an output file
// Actual data format of the key is documented in plain_table_factory.h
class PlainTableKeyEncoder {
 public:
  explicit PlainTableKeyEncoder(EncodingType encoding_type,
                                uint32_t user_key_len,
                                const SliceTransform* prefix_extractor,
                                size_t index_sparseness)
      : encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain),
        fixed_user_key_len_(user_key_len),
        prefix_extractor_(prefix_extractor),
        index_sparseness_((index_sparseness > 1) ? index_sparseness : 1),
        key_count_for_prefix_(0) {}
  // key: the key to write out, in the format of internal key.
  // file: the output file to write out
  // offset: offset in the file. Needs to be updated after appending bytes
  //         for the key
  // meta_bytes_buf: buffer for extra meta bytes
  // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
  //                      if meta_bytes_buf is updated.
  Status AppendKey(const Slice& key, WritableFileWriter* file, uint64_t* offset,
                   char* meta_bytes_buf, size_t* meta_bytes_buf_size);

  // Return actual encoding type to be picked
  EncodingType GetEncodingType() { return encoding_type_; }

 private:
  EncodingType encoding_type_;
  uint32_t fixed_user_key_len_;
  const SliceTransform* prefix_extractor_;
  const size_t index_sparseness_;
  size_t key_count_for_prefix_;
  IterKey pre_prefix_;
};

class PlainTableFileReader {
 public:
  explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info)
      : file_info_(_file_info), num_buf_(0) {}
  // In mmaped mode, the results point to mmaped area of the file, which
  // means it is always valid before closing the file.
  // In non-mmap mode, the results point to an internal buffer. If the caller
  // makes another read call, the results may not be valid. So callers should
  // make a copy when needed.
  // In order to save read calls to files, we keep two internal buffers:
  // the first read and the most recent read. This is efficient because it
  // columns these two common use cases:
  // (1) hash index only identify one location, we read the key to verify
  //     the location, and read key and value if it is the right location.
  // (2) after hash index checking, we identify two locations (because of
  //     hash bucket conflicts), we binary search the two location to see
  //     which one is what we need and start to read from the location.
  // These two most common use cases will be covered by the two buffers
  // so that we don't need to re-read the same location.
  // Currently we keep a fixed size buffer. If a read doesn't exactly fit
  // the buffer, we replace the second buffer with the location user reads.
  //
  // If return false, status code is stored in status_.
  bool Read(uint32_t file_offset, uint32_t len, Slice* out) {
    if (file_info_->is_mmap_mode) {
      assert(file_offset + len <= file_info_->data_end_offset);
      *out = Slice(file_info_->file_data.data() + file_offset, len);
      return true;
    } else {
      return ReadNonMmap(file_offset, len, out);
    }
  }

  // If return false, status code is stored in status_.
  bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output);

  // *bytes_read = 0 means eof. false means failure and status is saved
  // in status_. Not directly returning Status to save copying status
  // object to map previous performance of mmap mode.
  inline bool ReadVarint32(uint32_t offset, uint32_t* output,
                           uint32_t* bytes_read);

  bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output,
                           uint32_t* bytes_read);

  Status status() const { return status_; }

  const PlainTableReaderFileInfo* file_info() { return file_info_; }

 private:
  const PlainTableReaderFileInfo* file_info_;

  struct Buffer {
    Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {}
    std::unique_ptr<char[]> buf;
    uint32_t buf_start_offset;
    uint32_t buf_len;
    uint32_t buf_capacity;
  };

  // Keep buffers for two recent reads.
  std::array<unique_ptr<Buffer>, 2> buffers_;
  uint32_t num_buf_;
  Status status_;

  Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len);
};

// A helper class to decode keys from input buffer
// Actual data format of the key is documented in plain_table_factory.h
class PlainTableKeyDecoder {
 public:
  explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info,
                                EncodingType encoding_type,
                                uint32_t user_key_len,
                                const SliceTransform* prefix_extractor)
      : file_reader_(file_info),
        encoding_type_(encoding_type),
        prefix_len_(0),
        fixed_user_key_len_(user_key_len),
        prefix_extractor_(prefix_extractor),
        in_prefix_(false) {}
  // Find the next key.
  // start: char array where the key starts.
  // limit: boundary of the char array
  // parsed_key: the output of the result key
  // internal_key: if not null, fill with the output of the result key in
  //               un-parsed format
  // bytes_read: how many bytes read from start. Output
  // seekable: whether key can be read from this place. Used when building
  //           indexes. Output.
  Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key,
                 Slice* internal_key, Slice* value, uint32_t* bytes_read,
                 bool* seekable = nullptr);

  Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key,
                        Slice* internal_key, uint32_t* bytes_read,
                        bool* seekable = nullptr);

  PlainTableFileReader file_reader_;
  EncodingType encoding_type_;
  uint32_t prefix_len_;
  uint32_t fixed_user_key_len_;
  Slice saved_user_key_;
  IterKey cur_key_;
  const SliceTransform* prefix_extractor_;
  bool in_prefix_;

 private:
  Status NextPlainEncodingKey(uint32_t start_offset,
                              ParsedInternalKey* parsed_key,
                              Slice* internal_key, uint32_t* bytes_read,
                              bool* seekable = nullptr);
  Status NextPrefixEncodingKey(uint32_t start_offset,
                               ParsedInternalKey* parsed_key,
                               Slice* internal_key, uint32_t* bytes_read,
                               bool* seekable = nullptr);
  Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size,
                         ParsedInternalKey* parsed_key, uint32_t* bytes_read,
                         bool* internal_key_valid, Slice* internal_key);
  inline Status DecodeSize(uint32_t start_offset,
                           PlainTableEntryType* entry_type, uint32_t* key_size,
                           uint32_t* bytes_read);
};

}  // namespace rocksdb

#endif  // ROCKSDB_LITE

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2		// This source code is licensed under the BSD-style license found in the
3		// LICENSE file in the root directory of this source tree. An additional grant
4		// of patent rights can be found in the PATENTS file in the same directory.
5		//
6		// The following only applies to changes made to this file as part of YugaByte development.
7		//
8		// Portions Copyright (c) YugaByte, Inc.
9		//
10		// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11		// in compliance with the License. You may obtain a copy of the License at
12		//
13		// http://www.apache.org/licenses/LICENSE-2.0
14		//
15		// Unless required by applicable law or agreed to in writing, software distributed under the License
16		// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17		// or implied. See the License for the specific language governing permissions and limitations
18		// under the License.
19		//
20
21		#pragma once
22		#ifndef ROCKSDB_LITE
23
24		#include <array>
25		#include "yb/util/slice.h"
26		#include "yb/rocksdb/db/dbformat.h"
27		#include "yb/rocksdb/table/plain_table_reader.h"
28
29		namespace rocksdb {
30
31		class WritableFile;
32		struct ParsedInternalKey;
33		struct PlainTableReaderFileInfo;
34		enum PlainTableEntryType : unsigned char;
35
36		// Helper class to write out a key to an output file
37		// Actual data format of the key is documented in plain_table_factory.h
38		class PlainTableKeyEncoder {
39		public:
40		explicit PlainTableKeyEncoder(EncodingType encoding_type,
41		uint32_t user_key_len,
42		const SliceTransform* prefix_extractor,
43		size_t index_sparseness)
44		: encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain),
45		fixed_user_key_len_(user_key_len),
46		prefix_extractor_(prefix_extractor),
47		index_sparseness_((index_sparseness > 1) ? index_sparseness : 1),
48	2.06k	key_count_for_prefix_(0) {}
49		// key: the key to write out, in the format of internal key.
50		// file: the output file to write out
51		// offset: offset in the file. Needs to be updated after appending bytes
52		// for the key
53		// meta_bytes_buf: buffer for extra meta bytes
54		// meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
55		// if meta_bytes_buf is updated.
56		Status AppendKey(const Slice& key, WritableFileWriter* file, uint64_t* offset,
57		char* meta_bytes_buf, size_t* meta_bytes_buf_size);
58
59		// Return actual encoding type to be picked
60	2.06k	EncodingType GetEncodingType() { return encoding_type_; }
61
62		private:
63		EncodingType encoding_type_;
64		uint32_t fixed_user_key_len_;
65		const SliceTransform* prefix_extractor_;
66		const size_t index_sparseness_;
67		size_t key_count_for_prefix_;
68		IterKey pre_prefix_;
69		};
70
71		class PlainTableFileReader {
72		public:
73		explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info)
74	89.2k	: file_info_(_file_info), num_buf_(0) {}
75		// In mmaped mode, the results point to mmaped area of the file, which
76		// means it is always valid before closing the file.
77		// In non-mmap mode, the results point to an internal buffer. If the caller
78		// makes another read call, the results may not be valid. So callers should
79		// make a copy when needed.
80		// In order to save read calls to files, we keep two internal buffers:
81		// the first read and the most recent read. This is efficient because it
82		// columns these two common use cases:
83		// (1) hash index only identify one location, we read the key to verify
84		// the location, and read key and value if it is the right location.
85		// (2) after hash index checking, we identify two locations (because of
86		// hash bucket conflicts), we binary search the two location to see
87		// which one is what we need and start to read from the location.
88		// These two most common use cases will be covered by the two buffers
89		// so that we don't need to re-read the same location.
90		// Currently we keep a fixed size buffer. If a read doesn't exactly fit
91		// the buffer, we replace the second buffer with the location user reads.
92		//
93		// If return false, status code is stored in status_.
94	12.1M	bool Read(uint32_t file_offset, uint32_t len, Slice* out) {
95	12.1M	if (file_info_->is_mmap_mode) {
96	6.95M	assert(file_offset + len <= file_info_->data_end_offset);
97	6.95M	*out = Slice(file_info_->file_data.data() + file_offset, len);
98	6.95M	return true;
99	5.21M	} else {
100	5.21M	return ReadNonMmap(file_offset, len, out);
101	5.21M	}
102	12.1M	}
103
104		// If return false, status code is stored in status_.
105		bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output);
106
107		// *bytes_read = 0 means eof. false means failure and status is saved
108		// in status_. Not directly returning Status to save copying status
109		// object to map previous performance of mmap mode.
110		inline bool ReadVarint32(uint32_t offset, uint32_t* output,
111		uint32_t* bytes_read);
112
113		bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output,
114		uint32_t* bytes_read);
115
116	0	Status status() const { return status_; }
117
118	4.30M	const PlainTableReaderFileInfo* file_info() { return file_info_; }
119
120		private:
121		const PlainTableReaderFileInfo* file_info_;
122
123		struct Buffer {
124	42.7k	Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {}
125		std::unique_ptr<char[]> buf;
126		uint32_t buf_start_offset;
127		uint32_t buf_len;
128		uint32_t buf_capacity;
129		};
130
131		// Keep buffers for two recent reads.
132		std::array<unique_ptr<Buffer>, 2> buffers_;
133		uint32_t num_buf_;
134		Status status_;
135
136		Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len);
137		};
138
139		// A helper class to decode keys from input buffer
140		// Actual data format of the key is documented in plain_table_factory.h
141		class PlainTableKeyDecoder {
142		public:
143		explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info,
144		EncodingType encoding_type,
145		uint32_t user_key_len,
146		const SliceTransform* prefix_extractor)
147		: file_reader_(file_info),
148		encoding_type_(encoding_type),
149		prefix_len_(0),
150		fixed_user_key_len_(user_key_len),
151		prefix_extractor_(prefix_extractor),
152	89.2k	in_prefix_(false) {}
153		// Find the next key.
154		// start: char array where the key starts.
155		// limit: boundary of the char array
156		// parsed_key: the output of the result key
157		// internal_key: if not null, fill with the output of the result key in
158		// un-parsed format
159		// bytes_read: how many bytes read from start. Output
160		// seekable: whether key can be read from this place. Used when building
161		// indexes. Output.
162		Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key,
163		Slice* internal_key, Slice* value, uint32_t* bytes_read,
164		bool* seekable = nullptr);
165
166		Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key,
167		Slice* internal_key, uint32_t* bytes_read,
168		bool* seekable = nullptr);
169
170		PlainTableFileReader file_reader_;
171		EncodingType encoding_type_;
172		uint32_t prefix_len_;
173		uint32_t fixed_user_key_len_;
174		Slice saved_user_key_;
175		IterKey cur_key_;
176		const SliceTransform* prefix_extractor_;
177		bool in_prefix_;
178
179		private:
180		Status NextPlainEncodingKey(uint32_t start_offset,
181		ParsedInternalKey* parsed_key,
182		Slice* internal_key, uint32_t* bytes_read,
183		bool* seekable = nullptr);
184		Status NextPrefixEncodingKey(uint32_t start_offset,
185		ParsedInternalKey* parsed_key,
186		Slice* internal_key, uint32_t* bytes_read,
187		bool* seekable = nullptr);
188		Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size,
189		ParsedInternalKey* parsed_key, uint32_t* bytes_read,
190		bool* internal_key_valid, Slice* internal_key);
191		inline Status DecodeSize(uint32_t start_offset,
192		PlainTableEntryType* entry_type, uint32_t* key_size,
193		uint32_t* bytes_read);
194		};
195
196		} // namespace rocksdb
197
198		#endif // ROCKSDB_LITE

YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30