YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_reader.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
//
5
// The following only applies to changes made to this file as part of YugaByte development.
6
//
7
// Portions Copyright (c) YugaByte, Inc.
8
//
9
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
10
// in compliance with the License.  You may obtain a copy of the License at
11
//
12
// http://www.apache.org/licenses/LICENSE-2.0
13
//
14
// Unless required by applicable law or agreed to in writing, software distributed under the License
15
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
16
// or implied.  See the License for the specific language governing permissions and limitations
17
// under the License.
18
//
19
20
#ifndef YB_ROCKSDB_TABLE_PLAIN_TABLE_READER_H
21
#define YB_ROCKSDB_TABLE_PLAIN_TABLE_READER_H
22
23
#ifndef ROCKSDB_LITE
24
#include <stdint.h>
25
#include <unordered_map>
26
#include <memory>
27
#include <vector>
28
#include <string>
29
30
#include "yb/rocksdb/db/dbformat.h"
31
#include "yb/rocksdb/env.h"
32
#include "yb/rocksdb/iterator.h"
33
#include "yb/rocksdb/slice_transform.h"
34
#include "yb/rocksdb/table.h"
35
#include "yb/rocksdb/table_properties.h"
36
37
#include "yb/rocksdb/table/format.h"
38
#include "yb/rocksdb/table/table_reader.h"
39
#include "yb/rocksdb/table/plain_table_factory.h"
40
#include "yb/rocksdb/table/plain_table_index.h"
41
42
#include "yb/rocksdb/util/arena.h"
43
#include "yb/rocksdb/util/dynamic_bloom.h"
44
#include "yb/rocksdb/util/file_reader_writer.h"
45
46
namespace rocksdb {
47
48
class Block;
49
struct BlockContents;
50
class BlockHandle;
51
class Footer;
52
struct Options;
53
struct ReadOptions;
54
class TableCache;
55
class TableReader;
56
class InternalKeyComparator;
57
class PlainTableKeyDecoder;
58
class GetContext;
59
class InternalIterator;
60
61
using std::unique_ptr;
62
using std::unordered_map;
63
using std::vector;
64
extern const uint32_t kPlainTableVariableLength;
65
66
struct PlainTableReaderFileInfo {
67
  bool is_mmap_mode;
68
  Slice file_data;
69
  uint32_t data_end_offset;
70
  unique_ptr<RandomAccessFileReader> file;
71
72
  PlainTableReaderFileInfo(unique_ptr<RandomAccessFileReader>&& _file,
73
                           const EnvOptions& storage_options,
74
                           uint32_t _data_size_offset)
75
      : is_mmap_mode(storage_options.use_mmap_reads),
76
        data_end_offset(_data_size_offset),
77
3.10k
        file(std::move(_file)) {}
78
};
79
80
// Based on following output file format shown in plain_table_factory.h
81
// When opening the output file, IndexedTableReader creates a hash table
82
// from key prefixes to offset of the output file. IndexedTable will decide
83
// whether it points to the data offset of the first key with the key prefix
84
// or the offset of it. If there are too many keys share this prefix, it will
85
// create a binary search-able index from the suffix to offset on disk.
86
//
87
// The implementation of IndexedTableReader requires output file is mmaped
88
class PlainTableReader: public TableReader {
89
 public:
90
  static Status Open(const ImmutableCFOptions& ioptions,
91
                     const EnvOptions& env_options,
92
                     const InternalKeyComparatorPtr& internal_comparator,
93
                     unique_ptr<RandomAccessFileReader>&& file,
94
                     uint64_t file_size, unique_ptr<TableReader>* table,
95
                     const int bloom_bits_per_key, double hash_table_ratio,
96
                     size_t index_sparseness, size_t huge_page_tlb_size,
97
                     bool full_scan_mode);
98
99
2.33k
  bool IsSplitSst() const override { return false; }
100
101
0
  void SetDataFileReader(unique_ptr<RandomAccessFileReader>&& data_file) override { assert(false); }
102
103
  InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr,
104
                                bool skip_filters = false) override;
105
106
  void Prepare(const Slice& target) override;
107
108
  Status Get(const ReadOptions&, const Slice& key, GetContext* get_context,
109
             bool skip_filters = false) override;
110
111
  uint64_t ApproximateOffsetOf(const Slice& key) override;
112
113
17.0k
  uint32_t GetIndexSize() const { return index_.GetIndexSize(); }
114
  void SetupForCompaction() override;
115
116
1.58k
  std::shared_ptr<const TableProperties> GetTableProperties() const override {
117
1.58k
    return table_properties_;
118
1.58k
  }
119
120
48
  virtual size_t ApproximateMemoryUsage() const override {
121
48
    return arena_.MemoryAllocatedBytes();
122
48
  }
123
124
  PlainTableReader(const ImmutableCFOptions& ioptions,
125
                   unique_ptr<RandomAccessFileReader>&& file,
126
                   const EnvOptions& env_options,
127
                   const InternalKeyComparatorPtr& internal_comparator,
128
                   EncodingType encoding_type, uint64_t file_size,
129
                   const TableProperties* table_properties);
130
  virtual ~PlainTableReader();
131
132
 protected:
133
  // Check bloom filter to see whether it might contain this prefix.
134
  // The hash of the prefix is given, since it can be reused for index lookup
135
  // too.
136
  virtual bool MatchBloom(uint32_t hash) const;
137
138
  // PopulateIndex() builds index of keys. It must be called before any query
139
  // to the table.
140
  //
141
  // props: the table properties object that need to be stored. Ownership of
142
  //        the object will be passed.
143
  //
144
145
  Status PopulateIndex(TableProperties* props, int bloom_bits_per_key,
146
                       double hash_table_ratio, size_t index_sparseness,
147
                       size_t huge_page_tlb_size);
148
149
  Status MmapDataIfNeeded();
150
151
 private:
152
  InternalKeyComparatorPtr internal_comparator_;
153
  EncodingType encoding_type_;
154
  // represents plain table's current status.
155
  Status status_;
156
157
  PlainTableIndex index_;
158
  bool full_scan_mode_;
159
160
  // data_start_offset_ and data_end_offset_ defines the range of the
161
  // sst file that stores data.
162
  const uint32_t data_start_offset_ = 0;
163
  const uint32_t user_key_len_;
164
  const SliceTransform* prefix_extractor_;
165
166
  static const size_t kNumInternalBytes = 8;
167
168
  // Bloom filter is used to rule out non-existent key
169
  bool enable_bloom_;
170
  DynamicBloom bloom_;
171
  PlainTableReaderFileInfo file_info_;
172
  Arena arena_;
173
  TrackedAllocation index_block_alloc_;
174
  TrackedAllocation bloom_block_alloc_;
175
176
  const ImmutableCFOptions& ioptions_;
177
  uint64_t file_size_;
178
  std::shared_ptr<const TableProperties> table_properties_;
179
  std::shared_ptr<yb::MemTracker> mem_tracker_;
180
  size_t tracked_consumption_ = 0;
181
182
0
  bool IsFixedLength() const {
183
0
    return user_key_len_ != kPlainTableVariableLength;
184
0
  }
185
186
0
  size_t GetFixedInternalKeyLength() const {
187
0
    return user_key_len_ + kNumInternalBytes;
188
0
  }
189
190
157k
  Slice GetPrefix(const Slice& target) const {
191
157k
    assert(target.size() >= 8);  // target is internal key
192
0
    return GetPrefixFromUserKey(GetUserKey(target));
193
157k
  }
194
195
2.04M
  Slice GetPrefix(const ParsedInternalKey& target) const {
196
2.04M
    return GetPrefixFromUserKey(target.user_key);
197
2.04M
  }
198
199
157k
  Slice GetUserKey(const Slice& key) const {
200
157k
    return Slice(key.data(), key.size() - 8);
201
157k
  }
202
203
2.20M
  Slice GetPrefixFromUserKey(const Slice& user_key) const {
204
2.20M
    if (!IsTotalOrderMode()) {
205
2.12M
      return prefix_extractor_->Transform(user_key);
206
2.12M
    } else {
207
      // Use empty slice as prefix if prefix_extractor is not set.
208
      // In that case,
209
      // it falls back to pure binary search and
210
      // total iterator seek is supported.
211
71.7k
      return Slice();
212
71.7k
    }
213
2.20M
  }
214
215
  friend class TableCache;
216
  friend class PlainTableIterator;
217
218
  // Internal helper function to generate an IndexRecordList object from all
219
  // the rows, which contains index records as a list.
220
  // If bloom_ is not null, all the keys' full-key hash will be added to the
221
  // bloom filter.
222
  Status PopulateIndexRecordList(PlainTableIndexBuilder* index_builder,
223
                                 vector<uint32_t>* prefix_hashes);
224
225
  // Internal helper function to allocate memory for bloom filter and fill it
226
  void AllocateAndFillBloom(int bloom_bits_per_key, int num_prefixes,
227
                            size_t huge_page_tlb_size,
228
                            vector<uint32_t>* prefix_hashes);
229
230
  void FillBloom(vector<uint32_t>* prefix_hashes);
231
232
  // Read the key and value at `offset` to parameters for keys, the and
233
  // `seekable`.
234
  // On success, `offset` will be updated as the offset for the next key.
235
  // `parsed_key` will be key in parsed format.
236
  // if `internal_key` is not empty, it will be filled with key with slice
237
  // format.
238
  // if `seekable` is not null, it will return whether we can directly read
239
  // data using this offset.
240
  Status Next(PlainTableKeyDecoder* decoder, uint32_t* offset,
241
              ParsedInternalKey* parsed_key, Slice* internal_key, Slice* value,
242
              bool* seekable = nullptr) const;
243
  // Get file offset for key target.
244
  // return value prefix_matched is set to true if the offset is confirmed
245
  // for a key with the same prefix as target.
246
  Status GetOffset(PlainTableKeyDecoder* decoder, const Slice& target,
247
                   const Slice& prefix, uint32_t prefix_hash,
248
                   bool* prefix_matched, uint32_t* offset) const;
249
250
2.33M
  bool IsTotalOrderMode() const { return (prefix_extractor_ == nullptr); }
251
252
  // No copying allowed
253
  explicit PlainTableReader(const TableReader&) = delete;
254
  void operator=(const TableReader&) = delete;
255
};
256
}  // namespace rocksdb
257
#endif  // ROCKSDB_LITE
258
259
#endif  // YB_ROCKSDB_TABLE_PLAIN_TABLE_READER_H