YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_based_table_reader.h
Line
Count
Source
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24
#ifndef YB_ROCKSDB_TABLE_BLOCK_BASED_TABLE_READER_H
25
#define YB_ROCKSDB_TABLE_BLOCK_BASED_TABLE_READER_H
26
27
#include <stdint.h>
28
29
#include <memory>
30
#include <string>
31
#include <utility>
32
33
#include "yb/rocksdb/immutable_options.h"
34
#include "yb/rocksdb/options.h"
35
#include "yb/rocksdb/statistics.h"
36
#include "yb/rocksdb/status.h"
37
#include "yb/rocksdb/table/table_reader.h"
38
39
#include "yb/util/strongly_typed_bool.h"
40
41
namespace rocksdb {
42
43
class Block;
44
class BlockIter;
45
class BlockHandle;
46
class Cache;
47
class FilterBlockReader;
48
class BlockBasedFilterBlockReader;
49
class FullFilterBlockReader;
50
class Footer;
51
class InternalKeyComparator;
52
class Iterator;
53
class TableCache;
54
class TableReader;
55
class WritableFile;
56
struct BlockBasedTableOptions;
57
struct EnvOptions;
58
struct ReadOptions;
59
class GetContext;
60
class InternalIterator;
61
class IndexReader;
62
63
using std::unique_ptr;
64
65
enum class DataIndexLoadMode {
66
  // Preload on Open, store in block cache or in table reader depending on
67
  // BlockBasedTableOptions::cache_index_and_filter_blocks.
68
  PRELOAD_ON_OPEN,
69
  // Load on first data index access, store in block cache or in table reader depending on
70
  // BlockBasedTableOptions::cache_index_and_filter_blocks.
71
  LAZY,
72
  // Don't preload data index, access as needed, use block cache if available.
73
  USE_CACHE
74
};
75
76
enum class PrefetchFilter {
77
  YES,
78
  NO
79
};
80
81
YB_DEFINE_ENUM(BlockType, (kData)(kIndex));
82
83
// BloomFilterAwareFileFilter should only be used when scanning within the same hashed components of
84
// the key and it should be used together with DocDbAwareFilterPolicy which only takes into account
85
// hashed components of key for filtering.
86
// BloomFilterAwareFileFilter ignores an SST file completely if there are no keys with the same
87
// hashed components as the key specified in constructor.
88
class BloomFilterAwareFileFilter : public TableAwareReadFileFilter {
89
 public:
90
  BloomFilterAwareFileFilter(const ReadOptions& read_options, const Slice& user_key);
91
92
  bool Filter(TableReader* reader) const override;
93
94
 private:
95
  const ReadOptions read_options_;
96
  const std::string user_key_;
97
};
98
99
// A Table is a sorted map from strings to strings.  Tables are
100
// immutable and persistent.  A Table may be safely accessed from
101
// multiple threads without external synchronization.
102
class BlockBasedTable : public TableReader {
103
 public:
104
  // No copying allowed
105
  explicit BlockBasedTable(const TableReader&) = delete;
106
  void operator=(const TableReader&) = delete;
107
108
  // Attempt to open the table that is stored in bytes [0..base_file_size) of "base_file" (may be
109
  // only metadata and data will be read from separate file passed via SetDataFileReader), and read
110
  // the metadata entries necessary to allow retrieving data from the table.
111
  //
112
  // If successful, returns ok and sets "*table_reader" to the newly opened table. The client
113
  // should delete "*table_reader" when no longer needed. If there was an error while initializing
114
  // the table, sets "*table_reader" to nullptr and returns a non-ok status.
115
  //
116
  // base_file must remain live while this Table is in use.
117
  // data_index_load_mode can be used to control loading of data index (see DataIndexLoadMode
118
  // description).
119
  // prefetch_filter can be used to disable prefetching of filter blocks at startup. For fixed-size
120
  // bloom filter only filter index could be prefetched.
121
  // skip_filters Disables loading/accessing the filter block. Overrides prefetch_filter, so filter
122
  //   will be skipped if both are set.
123
  static CHECKED_STATUS Open(
124
      const ImmutableCFOptions& ioptions,
125
      const EnvOptions& env_options,
126
      const BlockBasedTableOptions& table_options,
127
      const InternalKeyComparatorPtr& internal_key_comparator,
128
      unique_ptr<RandomAccessFileReader>&& base_file,
129
      uint64_t base_file_size,
130
      unique_ptr<TableReader>* table_reader,
131
      DataIndexLoadMode data_index_load_mode = DataIndexLoadMode::LAZY,
132
      PrefetchFilter prefetch_filter = PrefetchFilter::YES,
133
      bool skip_filters = false);
134
135
69.4k
  bool IsSplitSst() const override { return true; }
136
137
  void SetDataFileReader(unique_ptr<RandomAccessFileReader>&& data_file) override;
138
139
  bool PrefixMayMatch(const Slice& internal_key);
140
141
  // Returns a new iterator over the table contents.
142
  // The result of NewIterator() is initially invalid (caller must
143
  // call one of the Seek methods on the iterator before using it).
144
  // @param skip_filters Disables loading/accessing the filter block
145
  InternalIterator* NewIterator(const ReadOptions&, Arena* arena = nullptr,
146
                                bool skip_filters = false) override;
147
148
  // @param skip_filters Disables loading/accessing the filter block.
149
  // key should be internal key in case bloom filters are used.
150
  CHECKED_STATUS Get(
151
      const ReadOptions& readOptions, const Slice& key, GetContext* get_context,
152
      bool skip_filters = false) override;
153
154
  // Pre-fetch the disk blocks that correspond to the key range specified by
155
  // (kbegin, kend). The call will return return error status in the event of
156
  // IO or iteration error.
157
  CHECKED_STATUS Prefetch(const Slice* begin, const Slice* end) override;
158
159
  // Given a key, return an approximate byte offset in the file where
160
  // the data for that key begins (or would begin if the key were
161
  // present in the file).  The returned value is in terms of file
162
  // bytes, and so includes effects like compression of the underlying data.
163
  // E.g., the approximate offset of the last key in the table will
164
  // be close to the file length.
165
  uint64_t ApproximateOffsetOf(const Slice& key) override;
166
167
  // Returns true if the block for the specified key is in cache.
168
  // REQUIRES: key is in this table && block cache enabled
169
  bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);
170
171
  // Set up the table for Compaction. Might change some parameters with
172
  // posix_fadvise
173
  void SetupForCompaction() override;
174
175
  std::shared_ptr<const TableProperties> GetTableProperties() const override;
176
177
  size_t ApproximateMemoryUsage() const override;
178
179
  // convert SST file to a human readable form
180
  CHECKED_STATUS DumpTable(WritableFile* out_file) override;
181
182
  // input_iter: if it is not null, update this one and return it as Iterator
183
  InternalIterator* NewDataBlockIterator(
184
      const ReadOptions& ro, const Slice& index_value, BlockType block_type,
185
      BlockIter* input_iter = nullptr);
186
187
  const ImmutableCFOptions& ioptions();
188
189
  yb::Result<std::string> GetMiddleKey() override;
190
191
  ~BlockBasedTable();
192
193
  bool TEST_filter_block_preloaded() const;
194
  bool TEST_index_reader_loaded() const;
195
196
 private:
197
  template <class TValue>
198
  struct CachableEntry;
199
200
  struct FileReaderWithCachePrefix;
201
202
  struct Rep;
203
  Rep* rep_;
204
205
  class BlockEntryIteratorState;
206
  class IndexIteratorHolder;
207
208
  // Returns filter block handle for fixed-size bloom filter using filter index and filter key.
209
  CHECKED_STATUS GetFixedSizeFilterBlockHandle(const Slice& filter_key,
210
      BlockHandle* filter_block_handle) const;
211
212
  // Returns key to be added to filter or verified against filter based on internal_key.
213
  Slice GetFilterKeyFromInternalKey(const Slice &internal_key) const;
214
215
  // Returns key to be added to filter or verified against filter based on user_key.
216
  Slice GetFilterKeyFromUserKey(const Slice& user_key) const;
217
218
  // If `no_io == true`, we will not try to read filter/index from sst file (except fixed-size
219
  // filter blocks) were they not present in cache yet.
220
  // filter_key is only required when using fixed-size bloom filter in order to use the filter index
221
  // to get the correct filter block.
222
  // Note: even if we check prefix match we still need to get filter based on filter_key, not its
223
  // prefix, because prefix for the key goes to the same filter block as key itself.
224
  CachableEntry<FilterBlockReader> GetFilter(const QueryId query_id,
225
                                             bool no_io = false,
226
                                             const Slice* filter_key = nullptr) const;
227
228
  // Returns index reader.
229
  // If index reader is not stored in either block or internal cache:
230
  // - If read_options.read_tier == kBlockCacheTier: Status::Incomplete error will be returned.
231
  // - If read_options.read_tier != kBlockCacheTier: new index reader will be created and cached.
232
  yb::Result<CachableEntry<IndexReader>> GetIndexReader(const ReadOptions& read_options);
233
234
  // Get the iterator from the index reader.
235
  // If input_iter is not set, return new Iterator
236
  // If input_iter is set, update it and return:
237
  //  - newly created data index iterator in case it was created (if we use multi-level data index,
238
  //    input_iter is an iterator of the top level index, but not the whole index iterator).
239
  //  - nullptr if input_iter is a data index iterator and no new iterators were created.
240
  //
241
  // Note: ErrorIterator with error will be returned if GetIndexReader returned an error.
242
  InternalIterator* NewIndexIterator(const ReadOptions& read_options,
243
                                     BlockIter* input_iter = nullptr);
244
245
  // Read block cache from block caches (if set): block_cache and
246
  // block_cache_compressed.
247
  // On success, Status::OK with be returned and @block will be populated with
248
  // pointer to the block as well as its block handle.
249
  static CHECKED_STATUS GetDataBlockFromCache(
250
      const Slice& block_cache_key, const Slice& compressed_block_cache_key,
251
      Cache* block_cache, Cache* block_cache_compressed, Statistics* statistics,
252
      const ReadOptions& read_options, BlockBasedTable::CachableEntry<Block>* block,
253
      uint32_t format_version, BlockType block_type,
254
      const std::shared_ptr<yb::MemTracker>& mem_tracker);
255
256
  // Put a raw block (maybe compressed) to the corresponding block caches.
257
  // This method will perform decompression against raw_block if needed and then
258
  // populate the block caches.
259
  // On success, Status::OK will be returned; also @block will be populated with
260
  // uncompressed block and its cache handle.
261
  //
262
  // REQUIRES: raw_block is heap-allocated. PutDataBlockToCache() will be
263
  // responsible for releasing its memory if error occurs.
264
  static CHECKED_STATUS PutDataBlockToCache(
265
      const Slice& block_cache_key, const Slice& compressed_block_cache_key,
266
      Cache* block_cache, Cache* block_cache_compressed,
267
      const ReadOptions& read_options, Statistics* statistics,
268
      CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
269
      const std::shared_ptr<yb::MemTracker>& mem_tracker);
270
271
  // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
272
  // after a call to Seek(key), until handle_result returns false.
273
  // May not make such a call if filter policy says that key is not present.
274
  friend class TableCache;
275
  friend class BlockBasedTableBuilder;
276
  friend class BloomFilterAwareFileFilter;
277
278
  void ReadMeta(const Footer& footer);
279
280
  // Create a index reader based on the index type stored in the table.
281
  // Optionally, user can pass a preloaded meta_index_iter for the index that
282
  // need to access extra meta blocks for index construction. This parameter
283
  // helps avoid re-reading meta index block if caller already created one.
284
  CHECKED_STATUS CreateDataBlockIndexReader(
285
      std::unique_ptr<IndexReader>* index_reader,
286
      InternalIterator* preloaded_meta_index_iter = nullptr);
287
288
  bool NonBlockBasedFilterKeyMayMatch(FilterBlockReader* filter, const Slice& filter_key) const;
289
290
  CHECKED_STATUS ReadPropertiesBlock(InternalIterator* meta_iter);
291
292
  CHECKED_STATUS SetupFilter(InternalIterator* meta_iter);
293
294
  // Read the meta block from sst.
295
  static CHECKED_STATUS ReadMetaBlock(
296
      Rep* rep, std::unique_ptr<Block>* meta_block, std::unique_ptr<InternalIterator>* iter);
297
298
  // Create the filter from the filter block.
299
  static FilterBlockReader* ReadFilterBlock(const BlockHandle& filter_block, Rep* rep,
300
      size_t* filter_size = nullptr);
301
302
  // CreateFilterIndexReader from sst
303
  CHECKED_STATUS CreateFilterIndexReader(std::unique_ptr<IndexReader>* filter_index_reader);
304
305
  // Helper function to setup the cache key's prefix for block of file passed within a reader
306
  // instance. Used for both data and metadata files.
307
  static void SetupCacheKeyPrefix(Rep* rep, FileReaderWithCachePrefix* reader_with_cache_prefix);
308
309
  FileReaderWithCachePrefix* GetBlockReader(BlockType block_type);
310
  KeyValueEncodingFormat GetKeyValueEncodingFormat(BlockType block_type);
311
312
72.6k
  explicit BlockBasedTable(Rep* rep) : rep_(rep) {}
313
314
  // Helper functions for DumpTable()
315
  CHECKED_STATUS DumpIndexBlock(WritableFile* out_file);
316
  CHECKED_STATUS DumpDataBlocks(WritableFile* out_file);
317
};
318
319
}  // namespace rocksdb
320
321
#endif  // YB_ROCKSDB_TABLE_BLOCK_BASED_TABLE_READER_H