YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_based_table_reader.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24
#include "yb/rocksdb/table/block_based_table_reader.h"
25
26
#include <string>
27
#include <utility>
28
29
#include "yb/gutil/macros.h"
30
31
#include "yb/rocksdb/cache.h"
32
#include "yb/rocksdb/comparator.h"
33
#include "yb/rocksdb/db/dbformat.h"
34
#include "yb/rocksdb/env.h"
35
#include "yb/rocksdb/filter_policy.h"
36
#include "yb/rocksdb/iterator.h"
37
#include "yb/rocksdb/options.h"
38
#include "yb/rocksdb/statistics.h"
39
#include "yb/rocksdb/table.h"
40
#include "yb/rocksdb/table/block.h"
41
#include "yb/rocksdb/table/block_based_filter_block.h"
42
#include "yb/rocksdb/table/block_based_table_factory.h"
43
#include "yb/rocksdb/table/block_based_table_internal.h"
44
#include "yb/rocksdb/table/block_hash_index.h"
45
#include "yb/rocksdb/table/block_prefix_index.h"
46
#include "yb/rocksdb/table/filter_block.h"
47
#include "yb/rocksdb/table/fixed_size_filter_block.h"
48
#include "yb/rocksdb/table/format.h"
49
#include "yb/rocksdb/table/full_filter_block.h"
50
#include "yb/rocksdb/table/get_context.h"
51
#include "yb/rocksdb/table/index_reader.h"
52
#include "yb/rocksdb/table/internal_iterator.h"
53
#include "yb/rocksdb/table/meta_blocks.h"
54
#include "yb/rocksdb/table/table_properties_internal.h"
55
#include "yb/rocksdb/table/two_level_iterator.h"
56
#include "yb/rocksdb/table_properties.h"
57
#include "yb/rocksdb/util/coding.h"
58
#include "yb/rocksdb/util/file_reader_writer.h"
59
#include "yb/rocksdb/util/perf_context_imp.h"
60
#include "yb/rocksdb/util/statistics.h"
61
#include "yb/rocksdb/util/stop_watch.h"
62
63
#include "yb/util/atomic.h"
64
#include "yb/util/logging.h"
65
#include "yb/util/mem_tracker.h"
66
#include "yb/util/scope_exit.h"
67
#include "yb/util/stats/perf_step_timer.h"
68
#include "yb/util/status_format.h"
69
#include "yb/util/string_util.h"
70
71
namespace rocksdb {
72
73
extern const uint64_t kBlockBasedTableMagicNumber;
74
extern const char kHashIndexPrefixesBlock[];
75
extern const char kHashIndexPrefixesMetadataBlock[];
76
using std::unique_ptr;
77
78
typedef FilterPolicy::FilterType FilterType;
79
80
namespace {
81
82
// Delete the resource that is held by the iterator.
83
template <class ResourceType>
84
1.45M
void DeleteHeldResource(void* arg, void* ignored) {
85
1.45M
  delete reinterpret_cast<ResourceType*>(arg);
86
1.45M
}
87
88
// Delete the entry resided in the cache.
89
template <class Entry>
90
1.99M
void DeleteCachedEntry(const Slice& key, void* value) {
91
1.99M
  auto entry = reinterpret_cast<Entry*>(value);
92
1.99M
  delete entry;
93
1.99M
}
block_based_table_reader.cc:_ZN7rocksdb12_GLOBAL__N_117DeleteCachedEntryINS_5BlockEEEvRKN2yb5SliceEPv
Line
Count
Source
90
1.99M
void DeleteCachedEntry(const Slice& key, void* value) {
91
1.99M
  auto entry = reinterpret_cast<Entry*>(value);
92
1.99M
  delete entry;
93
1.99M
}
block_based_table_reader.cc:_ZN7rocksdb12_GLOBAL__N_117DeleteCachedEntryINS_17FilterBlockReaderEEEvRKN2yb5SliceEPv
Line
Count
Source
90
319
void DeleteCachedEntry(const Slice& key, void* value) {
91
319
  auto entry = reinterpret_cast<Entry*>(value);
92
319
  delete entry;
93
319
}
block_based_table_reader.cc:_ZN7rocksdb12_GLOBAL__N_117DeleteCachedEntryINS_11IndexReaderEEEvRKN2yb5SliceEPv
Line
Count
Source
90
500
void DeleteCachedEntry(const Slice& key, void* value) {
91
500
  auto entry = reinterpret_cast<Entry*>(value);
92
500
  delete entry;
93
500
}
94
95
// Release the cached entry and decrement its ref count.
96
20.8M
void ReleaseCachedEntry(void* arg, void* h) {
97
20.8M
  Cache* cache = reinterpret_cast<Cache*>(arg);
98
20.8M
  Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
99
20.8M
  cache->Release(handle);
100
20.8M
}
101
102
Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
103
                                 Tickers block_cache_miss_ticker,
104
                                 Tickers block_cache_hit_ticker,
105
                                 Statistics* statistics,
106
27.1M
                                 const QueryId query_id) {
107
27.1M
  auto cache_handle = block_cache->Lookup(key, query_id, statistics);
108
27.1M
  if (cache_handle != nullptr) {
109
23.6M
    PERF_COUNTER_ADD(block_cache_hit_count, 1);
110
    // block-type specific cache hit
111
23.6M
    RecordTick(statistics, block_cache_hit_ticker);
112
3.52M
  } else {
113
    // block-type specific cache miss
114
3.52M
    RecordTick(statistics, block_cache_miss_ticker);
115
3.52M
  }
116
117
27.1M
  return cache_handle;
118
27.1M
}
119
120
class NotMatchingFilterBlockReader : public FilterBlockReader {
121
 public:
122
72.6k
  NotMatchingFilterBlockReader() {}
123
  NotMatchingFilterBlockReader(const NotMatchingFilterBlockReader&) = delete;
124
  void operator=(const NotMatchingFilterBlockReader&) = delete;
125
10
  virtual bool KeyMayMatch(const Slice& key, uint64_t block_offset = 0) override {
126
10
    return false; }
127
0
  virtual bool PrefixMayMatch(const Slice& prefix, uint64_t block_offset = 0) override {
128
0
    return false; }
129
0
  virtual size_t ApproximateMemoryUsage() const override { return 0; }
130
};
131
132
}  // namespace
133
134
// Originally following data was stored in BlockBasedTable::Rep and related to a single SST file.
135
// Since SST file is now split into two files - data file and metadata file, all file-related data
136
// was moved into dedicated structure for each file.
137
struct BlockBasedTable::FileReaderWithCachePrefix {
138
  // Pointer to file reader.
139
  unique_ptr<RandomAccessFileReader> reader;
140
141
  // BlockBasedTableReader uses the block cache passed to BlockBasedTableReader::Open inside
142
  // a BlockBasedTableOptions instance to reduce the number of file read requests. If block cache
143
  // pointer in options is nullptr, cache is not used. File blocks are referred in cache by keys,
144
  // which are composed from the following data (see GetCacheKey helper function):
145
  // - cache key prefix (unique for each file), generated by BlockBasedTable::GenerateCachePrefix
146
  // - block offset within a file.
147
  block_based_table::CacheKeyPrefixBuffer cache_key_prefix;
148
149
  // Similar prefix, but for compressed blocks cache:
150
  block_based_table::CacheKeyPrefixBuffer compressed_cache_key_prefix;
151
152
  explicit FileReaderWithCachePrefix(unique_ptr<RandomAccessFileReader>&& _reader) :
153
142k
      reader(std::move(_reader)) {}
154
};
155
156
// CachableEntry represents the entries that *may* be fetched from block cache.
157
//  field `value` is the item we want to get.
158
//  field `cache_handle` is the cache handle to the block cache. If the value
159
//    was not read from cache, `cache_handle` will be nullptr.
160
template <class TValue>
161
struct BlockBasedTable::CachableEntry {
162
  CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
163
50.5M
      : value(_value), cache_handle(_cache_handle) {}
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_17FilterBlockReaderEEC2EPS2_PNS_5Cache6HandleE
Line
Count
Source
163
20.3M
      : value(_value), cache_handle(_cache_handle) {}
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_11IndexReaderEEC2EPS2_PNS_5Cache6HandleE
Line
Count
Source
163
11.7M
      : value(_value), cache_handle(_cache_handle) {}
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_5BlockEEC2EPS2_PNS_5Cache6HandleE
Line
Count
Source
163
18.3M
      : value(_value), cache_handle(_cache_handle) {}
164
26.5M
  CachableEntry() : CachableEntry(nullptr, nullptr) {}
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_17FilterBlockReaderEEC1Ev
Line
Count
Source
164
8.20M
  CachableEntry() : CachableEntry(nullptr, nullptr) {}
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_5BlockEEC1Ev
Line
Count
Source
164
18.3M
  CachableEntry() : CachableEntry(nullptr, nullptr) {}
165
12.5M
  void Release(Cache* cache) {
166
12.5M
    if (cache_handle) {
167
4.87M
      cache->Release(cache_handle);
168
4.87M
      value = nullptr;
169
4.87M
      cache_handle = nullptr;
170
4.87M
    }
171
12.5M
  }
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_17FilterBlockReaderEE7ReleaseEPNS_5CacheE
Line
Count
Source
165
12.5M
  void Release(Cache* cache) {
166
12.5M
    if (cache_handle) {
167
4.87M
      cache->Release(cache_handle);
168
4.87M
      value = nullptr;
169
4.87M
      cache_handle = nullptr;
170
4.87M
    }
171
12.5M
  }
_ZN7rocksdb15BlockBasedTable13CachableEntryINS_11IndexReaderEE7ReleaseEPNS_5CacheE
Line
Count
Source
165
46
  void Release(Cache* cache) {
166
46
    if (cache_handle) {
167
46
      cache->Release(cache_handle);
168
46
      value = nullptr;
169
46
      cache_handle = nullptr;
170
46
    }
171
46
  }
172
173
  TValue* value = nullptr;
174
  // if the entry is from the cache, cache_handle will be populated.
175
  Cache::Handle* cache_handle = nullptr;
176
};
177
178
struct BlockBasedTable::Rep {
179
  struct NotMatchingFilterEntry : public CachableEntry<FilterBlockReader> {
180
72.6k
    NotMatchingFilterEntry() : CachableEntry(&filter, nullptr) {}
181
    NotMatchingFilterBlockReader filter;
182
  };
183
184
  Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
185
      const BlockBasedTableOptions& _table_opt,
186
      const InternalKeyComparatorPtr& _internal_comparator, bool skip_filters,
187
      const DataIndexLoadMode data_index_load_mode_)
188
      : ioptions(_ioptions),
189
        env_options(_env_options),
190
        table_options(_table_opt),
191
        filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()),
192
        filter_key_transformer(filter_policy ? filter_policy->GetKeyTransformer() : nullptr),
193
        comparator(_internal_comparator),
194
        filter_type(FilterType::kNoFilter),
195
        whole_key_filtering(_table_opt.whole_key_filtering),
196
        prefix_filtering(true),
197
72.6k
        data_index_load_mode(data_index_load_mode_) {
198
72.6k
    if (ioptions.block_based_table_mem_tracker) {
199
4.48k
      mem_tracker = ioptions.block_based_table_mem_tracker;
200
68.1k
    } else if (ioptions.mem_tracker) {
201
0
      mem_tracker = yb::MemTracker::FindOrCreateTracker("BlockBasedTable", ioptions.mem_tracker);
202
0
    }
203
72.6k
  }
204
205
  const ImmutableCFOptions& ioptions;
206
  const EnvOptions& env_options;
207
  const BlockBasedTableOptions& table_options;
208
  const FilterPolicy* filter_policy;
209
  const FilterPolicy::KeyTransformer* filter_key_transformer;
210
  InternalKeyComparatorPtr comparator;
211
  const NotMatchingFilterEntry not_matching_filter_entry;
212
  Status status;
213
  std::shared_ptr<FileReaderWithCachePrefix> base_reader_with_cache_prefix;
214
  std::shared_ptr<FileReaderWithCachePrefix> data_reader_with_cache_prefix;
215
216
  // Footer contains the fixed table information
217
  Footer footer;
218
  std::mutex data_index_reader_mutex;
219
  yb::AtomicUniquePtr<IndexReader> data_index_reader;
220
  unique_ptr<BlockEntryIteratorState> data_index_iterator_state;
221
  unique_ptr<IndexReader> filter_index_reader;
222
  unique_ptr<FilterBlockReader> filter;
223
224
  FilterType filter_type;
225
226
  // Handle of fixed-size bloom filter index block or simply filter block for filters of other
227
  // types.
228
  BlockHandle filter_handle;
229
230
  std::shared_ptr<const TableProperties> table_properties;
231
  IndexType index_type = IndexType::kBinarySearch;
232
  bool hash_index_allow_collision = false;
233
  bool whole_key_filtering = false;
234
  bool prefix_filtering = false;
235
  KeyValueEncodingFormat data_block_key_value_encoding_format =
236
      KeyValueEncodingFormat::kKeyDeltaEncodingSharedPrefix;
237
  // TODO(kailiu) It is very ugly to use internal key in table, since table
238
  // module should not be relying on db module. However to make things easier
239
  // and compatible with existing code, we introduce a wrapper that allows
240
  // block to extract prefix without knowing if a key is internal or not.
241
  unique_ptr<SliceTransform> internal_prefix_transform;
242
243
  DataIndexLoadMode data_index_load_mode = static_cast<DataIndexLoadMode>(0);
244
  yb::MemTrackerPtr mem_tracker;
245
};
246
247
// BlockEntryIteratorState doesn't actually store any iterator state and is only used as an adapter
248
// to BlockBasedTable. It is used by TwoLevelIterator and MultiLevelIterator to call BlockBasedTable
249
// functions in order to check if prefix may match or to create a secondary iterator.
250
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
251
 public:
252
  BlockEntryIteratorState(
253
      BlockBasedTable* table, const ReadOptions& read_options, bool skip_filters,
254
      BlockType block_type)
255
      : TwoLevelIteratorState(table->rep_->ioptions.prefix_extractor != nullptr),
256
        table_(table),
257
        read_options_(read_options),
258
        skip_filters_(skip_filters),
259
3.84M
        block_type_(block_type) {}
260
261
11.1M
  InternalIterator* NewSecondaryIterator(const Slice& index_value) override {
262
11.1M
    return table_->NewDataBlockIterator(read_options_, index_value, block_type_);
263
11.1M
  }
264
265
98.2k
  bool PrefixMayMatch(const Slice& internal_key) override {
266
98.2k
    if (read_options_.total_order_seek || skip_filters_) {
267
97.9k
      return true;
268
97.9k
    }
269
324
    return table_->PrefixMayMatch(internal_key);
270
324
  }
271
272
 private:
273
  // Don't own table_. BlockEntryIteratorState should only be stored in iterators or in
274
  // corresponding BlockBasedTable. TableReader (superclass of BlockBasedTable) is only destroyed
275
  // after iterator is deleted.
276
  BlockBasedTable* const table_;
277
  const ReadOptions read_options_;
278
  const bool skip_filters_;
279
  const BlockType block_type_;
280
};
281
282
283
class BlockBasedTable::IndexIteratorHolder {
284
 public:
285
  IndexIteratorHolder(BlockBasedTable* table_reader, ReadOptions read_options)
286
      : iter_holder_(table_reader->NewIndexIterator(read_options, &iter_)),
287
7.94M
        iter_ptr_(iter_holder_ ? iter_holder_.get() : implicit_cast<InternalIterator*>(&iter_)) {}
288
289
7.94M
  InternalIterator* iter() const { return iter_ptr_; }
290
291
 private:
292
  BlockIter iter_;
293
  std::unique_ptr<InternalIterator> iter_holder_;
294
  InternalIterator* iter_ptr_;
295
};
296
297
71.4k
BlockBasedTable::~BlockBasedTable() {
298
71.4k
  delete rep_;
299
71.4k
}
300
301
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep,
302
142k
    FileReaderWithCachePrefix* reader_with_cache_prefix) {
303
142k
  reader_with_cache_prefix->cache_key_prefix.size = 0;
304
142k
  reader_with_cache_prefix->compressed_cache_key_prefix.size = 0;
305
142k
  if (rep->table_options.block_cache != nullptr) {
306
140k
    GenerateCachePrefix(rep->table_options.block_cache.get(),
307
140k
        reader_with_cache_prefix->reader->file(),
308
140k
        &reader_with_cache_prefix->cache_key_prefix);
309
140k
  }
310
142k
  if (rep->table_options.block_cache_compressed != nullptr) {
311
1.66k
    GenerateCachePrefix(rep->table_options.block_cache_compressed.get(),
312
1.66k
        reader_with_cache_prefix->reader->file(),
313
1.66k
        &reader_with_cache_prefix->compressed_cache_key_prefix);
314
1.66k
  }
315
142k
}
316
317
18.3M
KeyValueEncodingFormat BlockBasedTable::GetKeyValueEncodingFormat(const BlockType block_type) {
318
18.3M
  switch (block_type) {
319
14.4M
    case BlockType::kData:
320
14.4M
      return rep_->data_block_key_value_encoding_format;
321
3.89M
    case BlockType::kIndex:
322
3.89M
      return kIndexBlockKeyValueEncodingFormat;
323
0
  }
324
0
  FATAL_INVALID_ENUM_VALUE(BlockType, block_type);
325
0
}
326
327
18.3M
BlockBasedTable::FileReaderWithCachePrefix* BlockBasedTable::GetBlockReader(BlockType block_type) {
328
18.3M
  switch (block_type) {
329
14.4M
    case BlockType::kData:
330
14.4M
      return rep_->data_reader_with_cache_prefix.get();
331
3.89M
    case BlockType::kIndex:
332
3.89M
      return rep_->base_reader_with_cache_prefix.get();
333
0
  }
334
0
  FATAL_INVALID_ENUM_VALUE(BlockType, block_type);
335
0
}
336
337
BloomFilterAwareFileFilter::BloomFilterAwareFileFilter(
338
    const ReadOptions& read_options, const Slice& user_key)
339
8.65M
    : read_options_(read_options), user_key_(user_key.ToBuffer()) {}
340
341
4.34M
bool BloomFilterAwareFileFilter::Filter(TableReader* reader) const {
342
4.34M
  auto table = down_cast<BlockBasedTable*>(reader);
343
4.34M
  if (table->rep_->filter_type == FilterType::kFixedSizeFilter) {
344
4.33M
    const auto filter_key = table->GetFilterKeyFromUserKey(user_key_);
345
4.33M
    if (filter_key.empty()) {
346
0
      return true;
347
0
    }
348
4.33M
    auto filter_entry = table->GetFilter(read_options_.query_id,
349
4.33M
        read_options_.read_tier == kBlockCacheTier /* no_io */, &filter_key);
350
4.33M
    FilterBlockReader* filter = filter_entry.value;
351
    // If bloom filter was not useful, then take this file into account.
352
4.33M
    const bool use_file = table->NonBlockBasedFilterKeyMayMatch(filter, filter_key);
353
4.33M
    if (!use_file) {
354
      // Record that the bloom filter was useful.
355
2.68M
      RecordTick(table->rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
356
2.68M
    }
357
4.33M
    filter_entry.Release(table->rep_->table_options.block_cache.get());
358
4.33M
    return use_file;
359
2.28k
  } else {
360
    // For non fixed-size filters - take file into account. We are only using fixed-size bloom
361
    // filters for DocDB, so not need to support others.
362
2.28k
    return true;
363
2.28k
  }
364
4.34M
}
365
366
namespace {
367
// Return True if table_properties has `user_prop_name` has a `true` value
368
// or it doesn't contain this property (for backward compatible).
369
bool IsFeatureSupported(const TableProperties& table_properties,
370
145k
                        const std::string& user_prop_name, Logger* info_log) {
371
145k
  auto& props = table_properties.user_collected_properties;
372
145k
  auto pos = props.find(user_prop_name);
373
  // Older version doesn't have this value set. Skip this check.
374
145k
  if (pos != props.end()) {
375
145k
    if (pos->second == kPropFalse) {
376
70.1k
      return false;
377
75.1k
    } else if (pos->second != kPropTrue) {
378
0
      RLOG(InfoLogLevel::WARN_LEVEL, info_log,
379
0
          "Property %s has invalidate value %s", user_prop_name.c_str(),
380
0
          pos->second.c_str());
381
0
    }
382
145k
  }
383
75.1k
  return true;
384
145k
}
385
}  // namespace
386
387
Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
388
                             const EnvOptions& env_options,
389
                             const BlockBasedTableOptions& table_options,
390
                             const InternalKeyComparatorPtr& internal_comparator,
391
                             unique_ptr<RandomAccessFileReader>&& base_file,
392
                             uint64_t base_file_size,
393
                             unique_ptr<TableReader>* table_reader,
394
                             DataIndexLoadMode data_index_load_mode,
395
                             PrefetchFilter prefetch_filter,
396
72.6k
                             const bool skip_filters) {
397
72.6k
  table_reader->reset();
398
399
72.6k
  Footer footer;
400
72.6k
  RETURN_NOT_OK(ReadFooterFromFile(
401
72.6k
      base_file.get(), base_file_size, &footer, kBlockBasedTableMagicNumber));
402
72.6k
  if (!BlockBasedTableSupportedVersion(footer.version())) {
403
0
    return STATUS(Corruption,
404
0
        "Unknown Footer version. Maybe this file was created with newer "
405
0
        "version of RocksDB?");
406
0
  }
407
408
  // We've successfully read the footer and the index block: we're
409
  // ready to serve requests.
410
72.6k
  Rep* rep = new BlockBasedTable::Rep(ioptions, env_options, table_options,
411
72.6k
                                      internal_comparator, skip_filters, data_index_load_mode);
412
72.6k
  rep->base_reader_with_cache_prefix =
413
72.6k
      std::make_shared<FileReaderWithCachePrefix>(std::move(base_file));
414
72.6k
  rep->data_reader_with_cache_prefix = rep->base_reader_with_cache_prefix;
415
72.6k
  rep->footer = footer;
416
72.6k
  rep->index_type = table_options.index_type;
417
72.6k
  rep->hash_index_allow_collision = table_options.hash_index_allow_collision;
418
72.6k
  SetupCacheKeyPrefix(rep, rep->base_reader_with_cache_prefix.get());
419
72.6k
  unique_ptr<BlockBasedTable> new_table(new BlockBasedTable(rep));
420
421
  // Read meta index
422
72.6k
  std::unique_ptr<Block> meta;
423
72.6k
  std::unique_ptr<InternalIterator> meta_iter;
424
72.6k
  RETURN_NOT_OK(ReadMetaBlock(rep, &meta, &meta_iter));
425
426
72.6k
  RETURN_NOT_OK(new_table->ReadPropertiesBlock(meta_iter.get()));
427
428
72.6k
  RETURN_NOT_OK(new_table->SetupFilter(meta_iter.get()));
429
430
72.6k
  if (data_index_load_mode == DataIndexLoadMode::PRELOAD_ON_OPEN) {
431
    // Will use block cache for data index access?
432
4
    if (table_options.cache_index_and_filter_blocks) {
433
0
      DCHECK_ONLY_NOTNULL(table_options.block_cache.get());
434
      // Hack: Call NewIndexIterator() to implicitly add index to the
435
      // block_cache
436
0
      unique_ptr<InternalIterator> iter(new_table->NewIndexIterator(ReadOptions::kDefault));
437
0
      RETURN_NOT_OK(iter->status());
438
4
    } else {
439
      // If we don't use block cache for data index access, we'll pre-load it, which will kept in
440
      // member variables in Rep and with a same life-time as this table object.
441
      // NOTE: Table reader objects are cached in table cache (table_cache.cc).
442
4
      std::unique_ptr<IndexReader> index_reader;
443
4
      RETURN_NOT_OK(new_table->CreateDataBlockIndexReader(&index_reader, meta_iter.get()));
444
4
      rep->data_index_reader.reset(index_reader.release());
445
4
    }
446
4
  }
447
448
72.6k
  if (prefetch_filter == PrefetchFilter::YES) {
449
    // pre-fetching of blocks is turned on
450
    // NOTE: Table reader objects are cached in table cache (table_cache.cc).
451
72.6k
    if (rep->filter_policy && rep->filter_type == FilterType::kFixedSizeFilter) {
452
      // TODO: may be put it in block cache instead of table reader in case
453
      // table_options.cache_index_and_filter_blocks is set?
454
3.70k
      RETURN_NOT_OK(new_table->CreateFilterIndexReader(&rep->filter_index_reader));
455
3.70k
    }
456
457
    // Will use block cache for filter blocks access?
458
72.6k
    if (table_options.cache_index_and_filter_blocks) {
459
4.26k
      assert(table_options.block_cache != nullptr);
460
4.26k
      bool corrupted_filter_type = true;
461
4.26k
      switch (rep->filter_type) {
462
25
        case FilterType::kFullFilter:
463
25
          FALLTHROUGH_INTENDED;
464
184
        case FilterType::kBlockBasedFilter: {
465
          // Hack: Call GetFilter() to implicitly add filter to the block_cache
466
184
          auto filter_entry = new_table->GetFilter(kDefaultQueryId);
467
184
          filter_entry.Release(table_options.block_cache.get());
468
184
          corrupted_filter_type = false;
469
184
          break;
470
25
        }
471
3.06k
        case FilterType::kFixedSizeFilter:
472
          // We never pre-cache fixed-size bloom filters.
473
3.06k
          FALLTHROUGH_INTENDED;
474
4.08k
        case FilterType::kNoFilter:
475
4.08k
          corrupted_filter_type = false;
476
4.08k
          break;
477
4.26k
      }
478
4.26k
      if (corrupted_filter_type) {
479
0
        RLOG(InfoLogLevel::FATAL_LEVEL, rep->ioptions.info_log, "Corrupted bloom filter type: %d",
480
0
            rep->filter_type);
481
0
        assert(false);
482
0
        return STATUS_SUBSTITUTE(Corruption, "Corrupted bloom filter type: $0", rep->filter_type);
483
0
      }
484
68.3k
    } else {
485
      // If we don't use block cache for filter access, we'll pre-load these blocks, which will
486
      // kept in member variables in Rep and with a same life-time as this table object.
487
68.3k
      bool corrupted_filter_type = true;
488
68.3k
      switch (rep->filter_type) {
489
3.97k
        case FilterType::kFullFilter:
490
3.97k
          FALLTHROUGH_INTENDED;
491
4.90k
        case FilterType::kBlockBasedFilter:
492
4.90k
          rep->filter.reset(ReadFilterBlock(rep->filter_handle, rep, nullptr));
493
4.90k
          corrupted_filter_type = false;
494
4.90k
          break;
495
644
        case FilterType::kFixedSizeFilter:
496
          // We never pre-load fixed-size bloom filters.
497
644
          FALLTHROUGH_INTENDED;
498
63.4k
        case FilterType::kNoFilter:
499
63.4k
          corrupted_filter_type = false;
500
63.4k
          break;
501
68.3k
      }
502
68.3k
      if (corrupted_filter_type) {
503
0
        RLOG(InfoLogLevel::FATAL_LEVEL, rep->ioptions.info_log, "Corrupted bloom filter type: %d",
504
0
            rep->filter_type);
505
0
        assert(false);
506
0
        return STATUS_SUBSTITUTE(Corruption, "Corrupted bloom filter type: $0", rep->filter_type);
507
0
      }
508
72.6k
    }
509
72.6k
  }
510
511
  // Filters are checked before seeking the index.
512
72.6k
  const bool skip_filters_for_index = true;
513
72.6k
  rep->data_index_iterator_state = std::make_unique<BlockEntryIteratorState>(
514
72.6k
      new_table.get(), ReadOptions::kDefault, skip_filters_for_index, BlockType::kIndex);
515
516
72.6k
  *table_reader = std::move(new_table);
517
518
72.6k
  return Status::OK();
519
72.6k
}
520
521
72.6k
Status BlockBasedTable::ReadPropertiesBlock(InternalIterator* meta_iter) {
522
  // Read the properties
523
72.6k
  bool found_properties_block = true;
524
72.6k
  auto s = SeekToPropertiesBlock(meta_iter, &found_properties_block);
525
526
72.6k
  if (!s.ok()) {
527
0
    RLOG(InfoLogLevel::WARN_LEVEL, rep_->ioptions.info_log,
528
0
        "Cannot seek to properties block from file: %s",
529
0
        s.ToString().c_str());
530
0
    return s;
531
0
  }
532
533
72.6k
  if (found_properties_block) {
534
72.6k
    s = meta_iter->status();
535
72.6k
    TableProperties* table_properties = nullptr;
536
72.6k
    if (s.ok()) {
537
72.6k
      s = ReadProperties(
538
72.6k
            meta_iter->value(), rep_->base_reader_with_cache_prefix->reader.get(),
539
72.6k
            rep_->footer, rep_->ioptions.env, rep_->ioptions.info_log, &table_properties);
540
72.6k
    }
541
542
72.6k
    if (!s.ok()) {
543
0
      RLOG(InfoLogLevel::WARN_LEVEL, rep_->ioptions.info_log,
544
0
        "Encountered error while reading data from properties "
545
0
        "block %s", s.ToString().c_str());
546
0
      return s;
547
0
    }
548
72.6k
    rep_->table_properties.reset(table_properties);
549
18.4E
  } else {
550
18.4E
    RLOG(InfoLogLevel::ERROR_LEVEL, rep_->ioptions.info_log,
551
18.4E
        "Cannot find Properties block from file.");
552
18.4E
  }
553
554
  // Determine whether whole key filtering is supported.
555
72.6k
  if (rep_->table_properties) {
556
72.6k
    rep_->whole_key_filtering &=
557
72.6k
        IsFeatureSupported(*(rep_->table_properties),
558
72.6k
                           BlockBasedTablePropertyNames::kWholeKeyFiltering,
559
72.6k
                           rep_->ioptions.info_log);
560
72.6k
    rep_->prefix_filtering &= IsFeatureSupported(
561
72.6k
        *(rep_->table_properties),
562
72.6k
        BlockBasedTablePropertyNames::kPrefixFiltering, rep_->ioptions.info_log);
563
564
72.6k
    auto& props = rep_->table_properties->user_collected_properties;
565
72.6k
    auto it = props.find(BlockBasedTablePropertyNames::kDataBlockKeyValueEncodingFormat);
566
72.6k
    if (it != props.end()) {
567
72.6k
      rep_->data_block_key_value_encoding_format =
568
72.6k
          static_cast<KeyValueEncodingFormat>(DecodeFixed8(it->second.c_str()));
569
72.6k
    }
570
72.6k
  }
571
572
72.6k
  return Status::OK();
573
72.6k
}
574
575
72.6k
Status BlockBasedTable::SetupFilter(InternalIterator* meta_iter) {
576
  // Find filter handle and filter type.
577
72.6k
  if (!rep_->filter_policy) {
578
63.6k
    return Status::OK();
579
63.6k
  }
580
9.00k
  const auto& table_filter_policy_name = rep_->table_properties->filter_policy_name;
581
9.00k
  if (rep_->filter_policy->Name() != table_filter_policy_name &&
582
5
      !table_filter_policy_name.empty()) {
583
    // SST file has been written using another filter policy - use it for reading if it is still
584
    // supported.
585
4
    const FilterPolicy* table_filter_policy = nullptr;
586
4
    const auto& policies = rep_->table_options.supported_filter_policies;
587
4
    if (policies) {
588
4
      const auto it = policies->find(table_filter_policy_name);
589
4
      if (it != policies->end()) {
590
4
        table_filter_policy = it->second.get();
591
4
      }
592
4
    }
593
4
    if (!table_filter_policy) {
594
0
      rep_->filter_policy = nullptr;
595
0
      rep_->filter_key_transformer = nullptr;
596
0
      const auto error_message = yb::Format(
597
0
        "Filter policy '$0' is not supported, not using use bloom filters for reading '$1'",
598
0
          table_filter_policy_name,
599
0
          rep_->base_reader_with_cache_prefix->reader->file()->filename());
600
0
      RLOG(InfoLogLevel::ERROR_LEVEL, rep_->ioptions.info_log, error_message.c_str());
601
      // For testing in debug build we want to fail in case some filter policy is not supported, but
602
      // for production we prefer to continue operation with lower performance due to lack of
603
      // supported bloom filters for this file. And eventually during compaction this file will
604
      // be replaced and latest version of filter policy will be used.
605
0
#ifndef NDEBUG
606
0
      return STATUS(IllegalState, error_message);
607
#else
608
      return Status::OK();
609
#endif
610
0
    }
611
4
    rep_->filter_policy = table_filter_policy;
612
4
    rep_->filter_key_transformer = table_filter_policy->GetKeyTransformer();
613
4
  }
614
615
9.00k
  for (const auto& prefix : {block_based_table::kFullFilterBlockPrefix,
616
9.00k
                             block_based_table::kFilterBlockPrefix,
617
17.9k
                             block_based_table::kFixedSizeFilterBlockPrefix}) {
618
    // Unsuccessful read implies we should not use filter.
619
17.9k
    std::string filter_block_key = prefix;
620
17.9k
    filter_block_key.append(rep_->filter_policy->Name());
621
17.9k
    if (FindMetaBlock(meta_iter, filter_block_key, &rep_->filter_handle).ok()) {
622
8.79k
      if (prefix == block_based_table::kFullFilterBlockPrefix) {
623
4.00k
        rep_->filter_type = FilterType::kFullFilter;
624
4.79k
      } else if (prefix == block_based_table::kFilterBlockPrefix) {
625
1.08k
        rep_->filter_type = FilterType::kBlockBasedFilter;
626
3.70k
      } else if (prefix == block_based_table::kFixedSizeFilterBlockPrefix) {
627
3.70k
        rep_->filter_type = FilterType::kFixedSizeFilter;
628
1
      } else {
629
        // That means we have memory corruption, so we should fail.
630
1
        RLOG(
631
1
            InfoLogLevel::FATAL_LEVEL, rep_->ioptions.info_log, "Invalid filter block prefix: %s",
632
1
            prefix);
633
1
        assert(false);
634
1
        return STATUS(Corruption, "Invalid filter block prefix", prefix);
635
1
      }
636
8.79k
      break;
637
8.79k
    }
638
17.9k
  }
639
640
9.00k
  return Status::OK();
641
9.00k
}
642
643
69.4k
void BlockBasedTable::SetDataFileReader(unique_ptr<RandomAccessFileReader> &&data_file) {
644
69.4k
  rep_->data_reader_with_cache_prefix =
645
69.4k
      std::make_shared<FileReaderWithCachePrefix>(std::move(data_file));
646
69.4k
  SetupCacheKeyPrefix(rep_, rep_->data_reader_with_cache_prefix.get());
647
69.4k
}
648
649
namespace {
650
void SetupFileReaderForCompaction(const Options::AccessHint &access_hint,
651
89.3k
    RandomAccessFileReader *reader) {
652
89.3k
  if (reader != nullptr) {
653
89.3k
    switch (access_hint) {
654
0
      case Options::NONE:
655
0
        break;
656
89.3k
      case Options::NORMAL:
657
89.3k
        reader->file()->Hint(RandomAccessFile::NORMAL);
658
89.3k
        break;
659
0
      case Options::SEQUENTIAL:
660
0
        reader->file()->Hint(RandomAccessFile::SEQUENTIAL);
661
0
        break;
662
0
      case Options::WILLNEED:
663
0
        reader->file()->Hint(RandomAccessFile::WILLNEED);
664
0
        break;
665
0
      default:
666
0
        assert(false);
667
89.3k
    }
668
89.3k
  }
669
89.3k
}
670
} // anonymous namespace
671
672
44.6k
void BlockBasedTable::SetupForCompaction() {
673
44.6k
  auto access_hint = rep_->ioptions.access_hint_on_compaction_start;
674
44.6k
  ::rocksdb::SetupFileReaderForCompaction(access_hint,
675
44.6k
      rep_->base_reader_with_cache_prefix->reader.get());
676
44.6k
  ::rocksdb::SetupFileReaderForCompaction(access_hint,
677
44.6k
      rep_->data_reader_with_cache_prefix->reader.get());
678
44.6k
}
679
680
std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
681
70.2k
    const {
682
70.2k
  return rep_->table_properties;
683
70.2k
}
684
685
29
size_t BlockBasedTable::ApproximateMemoryUsage() const {
686
29
  size_t usage = 0;
687
29
  if (rep_->filter) {
688
0
    usage += rep_->filter->ApproximateMemoryUsage();
689
0
  }
690
29
  if (rep_->filter_index_reader) {
691
0
    usage += rep_->filter_index_reader->ApproximateMemoryUsage();
692
0
  }
693
29
  IndexReader* data_index_reader = rep_->data_index_reader.get(std::memory_order_relaxed);
694
29
  if (data_index_reader) {
695
29
    usage += data_index_reader->ApproximateMemoryUsage();
696
29
  }
697
29
  return usage;
698
29
}
699
700
// Load the meta-block from the file. On success, return the loaded meta block
701
// and its iterator.
702
Status BlockBasedTable::ReadMetaBlock(Rep* rep,
703
                                      std::unique_ptr<Block>* meta_block,
704
74.3k
                                      std::unique_ptr<InternalIterator>* iter) {
705
  // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
706
  // it is an empty block.
707
  // TODO: we never really verify check sum for meta index block
708
74.3k
  std::unique_ptr<Block> meta;
709
74.3k
  Status s = block_based_table::ReadBlockFromFile(
710
74.3k
      rep->base_reader_with_cache_prefix->reader.get(),
711
74.3k
      rep->footer,
712
74.3k
      ReadOptions::kDefault,
713
74.3k
      rep->footer.metaindex_handle(),
714
74.3k
      &meta,
715
74.3k
      rep->ioptions.env,
716
74.3k
      rep->mem_tracker);
717
718
74.3k
  if (!s.ok()) {
719
0
    RLOG(InfoLogLevel::ERROR_LEVEL, rep->ioptions.info_log,
720
0
        "Encountered error while reading data from properties"
721
0
        " block %s", s.ToString().c_str());
722
0
    return s;
723
0
  }
724
725
74.3k
  *meta_block = std::move(meta);
726
  // meta block uses bytewise comparator.
727
74.3k
  iter->reset(
728
74.3k
      meta_block->get()->NewIterator(BytewiseComparator(), kMetaIndexBlockKeyValueEncodingFormat));
729
74.3k
  return Status::OK();
730
74.3k
}
731
732
namespace {
733
734
18.3M
Tickers GetBlockCacheMissTicker(BlockType block_type) {
735
18.3M
  switch (block_type) {
736
14.4M
    case BlockType::kData:
737
14.4M
      return BLOCK_CACHE_DATA_MISS;
738
3.89M
    case BlockType::kIndex:
739
3.89M
      return BLOCK_CACHE_INDEX_MISS;
740
0
  }
741
0
  FATAL_INVALID_ENUM_VALUE(BlockType, block_type);
742
0
}
743
744
18.3M
Tickers GetBlockCacheHitTicker(BlockType block_type) {
745
18.3M
  switch (block_type) {
746
14.4M
    case BlockType::kData:
747
14.4M
      return BLOCK_CACHE_DATA_HIT;
748
3.89M
    case BlockType::kIndex:
749
3.89M
      return BLOCK_CACHE_INDEX_HIT;
750
0
  }
751
0
  FATAL_INVALID_ENUM_VALUE(BlockType, block_type);
752
0
}
753
754
} // namespace
755
756
Status BlockBasedTable::GetDataBlockFromCache(
757
    const Slice& block_cache_key, const Slice& compressed_block_cache_key,
758
    Cache* block_cache, Cache* block_cache_compressed, Statistics* statistics,
759
    const ReadOptions& read_options, BlockBasedTable::CachableEntry<Block>* block,
760
    uint32_t format_version, BlockType block_type,
761
18.3M
    const std::shared_ptr<yb::MemTracker>& mem_tracker) {
762
18.3M
  Status s;
763
18.3M
  Block* compressed_block = nullptr;
764
18.3M
  Cache::Handle* block_cache_compressed_handle = nullptr;
765
766
  // Lookup uncompressed cache first
767
18.3M
  if (block_cache != nullptr) {
768
18.3M
    block->cache_handle =
769
18.3M
        GetEntryFromCache(
770
18.3M
            block_cache, block_cache_key, GetBlockCacheMissTicker(block_type),
771
18.3M
            GetBlockCacheHitTicker(block_type), statistics, read_options.query_id);
772
18.3M
    if (block->cache_handle != nullptr) {
773
14.8M
      block->value =
774
14.8M
          static_cast<Block*>(block_cache->Value(block->cache_handle));
775
14.8M
      return s;
776
14.8M
    }
777
3.52M
  }
778
779
  // If not found, search from the compressed block cache.
780
3.52M
  assert(block->cache_handle == nullptr && block->value == nullptr);
781
782
3.52M
  if (block_cache_compressed == nullptr) {
783
3.51M
    return s;
784
3.51M
  }
785
786
8.50k
  assert(!compressed_block_cache_key.empty());
787
8.50k
  block_cache_compressed_handle =
788
8.50k
      block_cache_compressed->Lookup(compressed_block_cache_key, read_options.query_id);
789
  // if we found in the compressed cache, then uncompress and insert into
790
  // uncompressed cache
791
9.63k
  if (block_cache_compressed_handle == nullptr) {
792
9.63k
    RecordTick(statistics, BLOCK_CACHE_COMPRESSED_MISS);
793
9.63k
    return s;
794
9.63k
  }
795
796
  // found compressed block
797
18.4E
  RecordTick(statistics, BLOCK_CACHE_COMPRESSED_HIT);
798
18.4E
  compressed_block = static_cast<Block*>(
799
18.4E
      block_cache_compressed->Value(block_cache_compressed_handle));
800
18.4E
  assert(compressed_block->compression_type() != kNoCompression);
801
802
  // Retrieve the uncompressed contents into a new buffer
803
18.4E
  BlockContents contents;
804
18.4E
  s = UncompressBlockContents(compressed_block->data(), compressed_block->size(), &contents,
805
18.4E
                              format_version, mem_tracker);
806
807
  // Insert uncompressed block into block cache
808
18.4E
  if (s.ok()) {
809
148
    block->value = new Block(std::move(contents));  // uncompressed block
810
148
    assert(block->value->compression_type() == kNoCompression);
811
148
    if (block_cache != nullptr && block->value->cachable() &&
812
80
        read_options.fill_cache) {
813
80
      s = block_cache->Insert(block_cache_key, read_options.query_id, block->value,
814
80
                              block->value->usable_size(), &DeleteCachedEntry<Block>,
815
80
                              &block->cache_handle, statistics);
816
80
      if (!s.ok()) {
817
0
        delete block->value;
818
0
        block->value = nullptr;
819
0
      }
820
80
    }
821
148
  }
822
823
  // Release hold on compressed cache entry
824
18.4E
  block_cache_compressed->Release(block_cache_compressed_handle);
825
18.4E
  return s;
826
18.4E
}
827
828
Status BlockBasedTable::PutDataBlockToCache(
829
    const Slice& block_cache_key, const Slice& compressed_block_cache_key,
830
    Cache* block_cache, Cache* block_cache_compressed,
831
    const ReadOptions& read_options, Statistics* statistics,
832
    CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
833
2.13M
    const std::shared_ptr<yb::MemTracker>& mem_tracker) {
834
2.13M
  assert(raw_block->compression_type() == kNoCompression ||
835
2.13M
         block_cache_compressed != nullptr);
836
837
2.13M
  Status s;
838
  // Retrieve the uncompressed contents into a new buffer
839
2.13M
  BlockContents contents;
840
2.13M
  if (raw_block->compression_type() != kNoCompression) {
841
8.13k
    s = UncompressBlockContents(raw_block->data(), raw_block->size(), &contents,
842
8.13k
                                format_version, mem_tracker);
843
8.13k
  }
844
2.13M
  if (!s.ok()) {
845
0
    delete raw_block;
846
0
    return s;
847
0
  }
848
849
2.13M
  if (raw_block->compression_type() != kNoCompression) {
850
8.14k
    block->value = new Block(std::move(contents));  // uncompressed block
851
2.12M
  } else {
852
2.12M
    block->value = raw_block;
853
2.12M
    raw_block = nullptr;
854
2.12M
  }
855
856
  // Insert compressed block into compressed block cache.
857
  // Release the hold on the compressed cache entry immediately.
858
2.13M
  if (block_cache_compressed != nullptr && raw_block != nullptr &&
859
8.14k
      raw_block->cachable()) {
860
8.14k
    s = block_cache_compressed->Insert(compressed_block_cache_key, read_options.query_id, raw_block,
861
8.14k
                                       raw_block->usable_size(), &DeleteCachedEntry<Block>);
862
8.14k
    if (s.ok()) {
863
      // Avoid the following code to delete this cached block.
864
8.14k
      raw_block = nullptr;
865
8.14k
      RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD);
866
0
    } else {
867
0
      RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
868
0
    }
869
8.14k
  }
870
2.13M
  delete raw_block;
871
872
  // insert into uncompressed block cache
873
2.13M
  assert((block->value->compression_type() == kNoCompression));
874
2.13M
  if (block_cache != nullptr && block->value->cachable()) {
875
2.12M
    s = block_cache->Insert(block_cache_key, read_options.query_id, block->value,
876
2.12M
                            block->value->usable_size(),
877
2.12M
                            &DeleteCachedEntry<Block>, &block->cache_handle, statistics);
878
2.12M
    if (!s.ok()) {
879
0
      delete block->value;
880
0
      block->value = nullptr;
881
0
    }
882
2.12M
  }
883
884
2.13M
  return s;
885
2.13M
}
886
887
3.70k
Status BlockBasedTable::CreateFilterIndexReader(std::unique_ptr<IndexReader>* filter_index_reader) {
888
3.70k
  auto base_file_reader = rep_->base_reader_with_cache_prefix->reader.get();
889
3.70k
  auto env = rep_->ioptions.env;
890
3.70k
  auto footer = rep_->footer;
891
3.70k
  return BinarySearchIndexReader::Create(base_file_reader, footer, rep_->filter_handle, env,
892
3.70k
      SharedBytewiseComparator(), filter_index_reader, rep_->mem_tracker);
893
3.70k
}
894
895
FilterBlockReader* BlockBasedTable::ReadFilterBlock(const BlockHandle& filter_handle, Rep* rep,
896
6.46k
    size_t* filter_size) {
897
  // TODO: We might want to unify with ReadBlockFromFile() if we start
898
  // requiring checksum verification in Table::Open.
899
6.46k
  if (rep->filter_type == FilterType::kNoFilter) {
900
0
    return nullptr;
901
0
  }
902
6.46k
  BlockContents block;
903
6.46k
  if (!ReadBlockContents(
904
6.46k
           rep->base_reader_with_cache_prefix->reader.get(), rep->footer, ReadOptions::kDefault,
905
0
           filter_handle, &block, rep->ioptions.env, rep->mem_tracker, false).ok()) {
906
    // Error reading the block
907
0
    return nullptr;
908
0
  }
909
910
6.46k
  if (filter_size) {
911
1.56k
    *filter_size = block.data.size();
912
1.56k
  }
913
914
6.46k
  assert(rep->filter_policy);
915
916
6.46k
  switch (rep->filter_type) {
917
0
    case FilterType::kNoFilter:
918
      // Shouldn't happen, since we already checked for that above. In case of memory corruption
919
      // will be caught after switch statement.
920
0
      break;
921
1.09k
    case FilterType::kBlockBasedFilter:
922
1.09k
      return new BlockBasedFilterBlockReader(
923
1.05k
          rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
924
1.09k
          rep->table_options, rep->whole_key_filtering, std::move(block));
925
4.00k
    case FilterType::kFullFilter: {
926
4.00k
      auto filter_bits_reader = rep->filter_policy->GetFilterBitsReader(block.data);
927
4.00k
      assert(filter_bits_reader);
928
4.00k
      return new FullFilterBlockReader(
929
3.97k
          rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
930
4.00k
          rep->whole_key_filtering, std::move(block), filter_bits_reader);
931
0
    }
932
1.37k
    case FilterType::kFixedSizeFilter:
933
1.37k
      return new FixedSizeFilterBlockReader(
934
1.37k
          rep->prefix_filtering ? rep->ioptions.prefix_extractor : nullptr,
935
1.37k
          rep->table_options, rep->whole_key_filtering, std::move(block));
936
0
      break;
937
0
  }
938
0
  RLOG(InfoLogLevel::FATAL_LEVEL, rep->ioptions.info_log, "Corrupted filter_type: %d",
939
0
      rep->filter_type);
940
0
  return nullptr;
941
0
}
942
943
Status BlockBasedTable::GetFixedSizeFilterBlockHandle(const Slice& filter_key,
944
4.55M
    BlockHandle* filter_block_handle) const {
945
  // Determine block of fixed-size bloom filter using filter index.
946
4.55M
  BlockIter fiter;
947
4.55M
  rep_->filter_index_reader->NewIterator(&fiter,
948
      // Following parameters are ignored by BinarySearchIndexReader which we use as
949
      // filter_index_reader.
950
4.55M
      nullptr /* index_iterator_state */, true /* total_order_seek */);
951
4.55M
  fiter.Seek(filter_key);
952
4.55M
  if (fiter.Valid()) {
953
4.55M
    Slice filter_block_handle_encoded = fiter.value();
954
4.55M
    return filter_block_handle->DecodeFrom(&filter_block_handle_encoded);
955
4.20k
  } else {
956
    // We are beyond the index, that means key is absent in filter, we use null block handle
957
    // stub to indicate that.
958
4.20k
    filter_block_handle->set_offset(0);
959
4.20k
    filter_block_handle->set_size(0);
960
4.20k
    return Status::OK();
961
4.20k
  }
962
4.55M
}
963
964
7.97M
Slice BlockBasedTable::GetFilterKeyFromInternalKey(const Slice &internal_key) const {
965
7.97M
  return GetFilterKeyFromUserKey(ExtractUserKey(internal_key));
966
7.97M
}
967
968
12.3M
Slice BlockBasedTable::GetFilterKeyFromUserKey(const Slice &user_key) const {
969
12.3M
  return rep_->filter_key_transformer ?
970
7.57M
      rep_->filter_key_transformer->Transform(user_key) : user_key;
971
12.3M
}
972
973
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
974
    const QueryId query_id,
975
    bool no_io,
976
12.1M
    const Slice* filter_key) const {
977
12.1M
  const bool is_fixed_size_filter = rep_->filter_type == FilterType::kFixedSizeFilter;
978
979
  // Key is required for fixed size filter.
980
12.1M
  assert(!is_fixed_size_filter || filter_key != nullptr);
981
982
  // If cache_index_and_filter_blocks is false, filter (except fixed-size filter) should be
983
  // pre-populated.
984
  // We will return rep_->filter anyway. rep_->filter can be nullptr if filter
985
  // read fails at Open() time. We don't want to reload again since it will
986
  // most probably fail again.
987
  // Note: rep_->filter can be nullptr also if Open was called with
988
  // prefetch_index_and_filter == false. That means bloom filters are not be used if
989
  // both prefetch_index_and_filter and table_options.cache_index_and_filter_blocks are false.
990
12.1M
  if (!rep_->table_options.cache_index_and_filter_blocks && !is_fixed_size_filter) {
991
7.24M
    return {rep_->filter.get(), nullptr /* cache handle */};
992
7.24M
  }
993
994
4.86M
  PERF_TIMER_GUARD(read_filter_block_nanos);
995
996
4.86M
  Cache* block_cache = rep_->table_options.block_cache.get();
997
4.86M
  if (rep_->filter_policy == nullptr /* do not use filter */ ||
998
4.87M
      block_cache == nullptr /* no block cache at all */) {
999
    // If we get here, we have:
1000
    // table_options.cache_index_and_filter_blocks || is_fixed_size_filter
1001
    // table_options.block_cache == nullptr
1002
0
    return {nullptr /* filter */, nullptr /* cache handle */};
1003
0
  }
1004
1005
4.86M
  const BlockHandle* filter_block_handle;
1006
  // Determine filter block handle
1007
4.86M
  BlockHandle fixed_size_filter_block_handle;
1008
4.86M
  if (is_fixed_size_filter) {
1009
4.55M
    Status s = GetFixedSizeFilterBlockHandle(*filter_key, &fixed_size_filter_block_handle);
1010
4.55M
    if (s.ok()) {
1011
4.55M
      if (fixed_size_filter_block_handle.IsNull()) {
1012
        // Key is beyond filter index - return stub filter.
1013
10
        return rep_->not_matching_filter_entry;
1014
10
      }
1015
4.55M
      filter_block_handle = &fixed_size_filter_block_handle;
1016
363
    } else {
1017
      // If we failed to decode filter block handle from filter index we will just log error in
1018
      // production to continue operation in case of just filter corruption,
1019
      // but we should fail in debug and under tests to be able to catch possible bugs.
1020
363
      RLOG(InfoLogLevel::ERROR_LEVEL, rep_->ioptions.info_log,
1021
363
          "Failed to decode fixed-size filter block handle from filter index.");
1022
363
      FAIL_IF_NOT_PRODUCTION();
1023
363
      return {nullptr /* filter */, nullptr /* cache handle */};
1024
363
    }
1025
318k
  } else {
1026
318k
    filter_block_handle = &rep_->filter_handle;
1027
318k
  }
1028
1029
  // Fetching from the cache
1030
4.86M
  char cache_key_buffer[block_based_table::kCacheKeyBufferSize];
1031
4.86M
  auto filter_block_cache_key = GetCacheKey(rep_->base_reader_with_cache_prefix->cache_key_prefix,
1032
4.86M
      *filter_block_handle, cache_key_buffer);
1033
1034
4.86M
  Statistics* statistics = rep_->ioptions.statistics;
1035
4.86M
  auto cache_handle = GetEntryFromCache(block_cache, filter_block_cache_key,
1036
4.86M
      BLOCK_CACHE_FILTER_MISS, BLOCK_CACHE_FILTER_HIT, statistics, query_id);
1037
1038
4.86M
  FilterBlockReader* filter = nullptr;
1039
4.86M
  if (cache_handle != nullptr) {
1040
4.86M
    filter = static_cast<FilterBlockReader*>(block_cache->Value(cache_handle));
1041
18.4E
  } else if (no_io && rep_->filter_type != FilterType::kFixedSizeFilter) {
1042
    // Do not invoke any io.
1043
0
    return CachableEntry<FilterBlockReader>();
1044
18.4E
  } else {
1045
    // For fixed-size filter we don't prefetch all filter blocks and ignore no_io parameter always
1046
    // loading necessary filter block through block cache.
1047
18.4E
    size_t filter_size = 0;
1048
18.4E
    filter = ReadFilterBlock(*filter_block_handle, rep_, &filter_size);
1049
18.4E
    if (filter != nullptr) {
1050
1.56k
      assert(filter_size > 0);
1051
1.56k
      Status s = block_cache->Insert(filter_block_cache_key, query_id,
1052
1.56k
                                     filter, filter_size,
1053
1.56k
                                     &DeleteCachedEntry<FilterBlockReader>, &cache_handle,
1054
1.56k
                                     statistics);
1055
1.56k
      if (!s.ok()) {
1056
0
        delete filter;
1057
0
        return CachableEntry<FilterBlockReader>();
1058
0
      }
1059
4.86M
    }
1060
18.4E
  }
1061
1062
4.86M
  return { filter, cache_handle };
1063
4.86M
}
1064
1065
namespace {
1066
1067
1
InternalIterator* ReturnErrorIterator(const Status& status, BlockIter* input_iter) {
1068
1
  if (input_iter != nullptr) {
1069
0
    input_iter->SetStatus(status);
1070
0
    return input_iter;
1071
1
  } else {
1072
1
    return NewErrorInternalIterator(status);
1073
1
  }
1074
1
}
1075
1076
88
Status ReturnNoIOError() {
1077
88
  return STATUS(Incomplete, "no blocking io");
1078
88
}
1079
1080
} // namespace
1081
1082
yb::Result<BlockBasedTable::CachableEntry<IndexReader>> BlockBasedTable::GetIndexReader(
1083
11.7M
    const ReadOptions& read_options) {
1084
11.7M
  auto* index_reader = rep_->data_index_reader.get(std::memory_order_acquire);
1085
11.7M
  if (index_reader) {
1086
    // Index reader has already been pre-populated.
1087
7.72M
    return BlockBasedTable::CachableEntry<IndexReader>{index_reader, /* cache_handle =*/ nullptr};
1088
7.72M
  }
1089
4.00M
  PERF_TIMER_GUARD(read_index_block_nanos);
1090
1091
4.00M
  const bool no_io = read_options.read_tier == kBlockCacheTier;
1092
4.00M
  Cache* const block_cache = rep_->table_options.block_cache.get();
1093
1094
4.00M
  if (block_cache && (rep_->data_index_load_mode == DataIndexLoadMode::USE_CACHE ||
1095
4.00M
      rep_->table_options.cache_index_and_filter_blocks)) {
1096
3.93M
    char cache_key[block_based_table::kCacheKeyBufferSize];
1097
3.93M
    auto key = GetCacheKey(rep_->base_reader_with_cache_prefix->cache_key_prefix,
1098
3.93M
        rep_->footer.index_handle(), cache_key);
1099
3.93M
    Statistics* statistics = rep_->ioptions.statistics;
1100
3.93M
    auto cache_handle =
1101
3.93M
        GetEntryFromCache(block_cache, key, BLOCK_CACHE_INDEX_MISS,
1102
3.93M
            BLOCK_CACHE_INDEX_HIT, statistics, read_options.query_id);
1103
1104
3.93M
    if (cache_handle == nullptr && no_io) {
1105
0
      return ReturnNoIOError();
1106
0
    }
1107
1108
3.93M
    if (cache_handle != nullptr) {
1109
3.93M
      index_reader = static_cast<IndexReader*>(block_cache->Value(cache_handle));
1110
2.17k
    } else {
1111
      // Create index reader and put it in the cache.
1112
2.17k
      std::unique_ptr<IndexReader> index_reader_unique;
1113
2.17k
      RETURN_NOT_OK(CreateDataBlockIndexReader(&index_reader_unique));
1114
2.17k
      RETURN_NOT_OK(block_cache->Insert(
1115
2.17k
          key, read_options.query_id, index_reader_unique.get(), index_reader_unique->usable_size(),
1116
2.17k
          &DeleteCachedEntry<IndexReader>, &cache_handle, statistics));
1117
2.17k
      assert(cache_handle);
1118
2.17k
      index_reader = index_reader_unique.release();
1119
2.17k
    }
1120
1121
3.93M
    return BlockBasedTable::CachableEntry<IndexReader>{index_reader, cache_handle};
1122
68.9k
  } else {
1123
68.9k
    if (no_io) {
1124
0
      return ReturnNoIOError();
1125
0
    }
1126
    // Note that we've already performed first check at the beginning of method.
1127
68.9k
    std::lock_guard<std::mutex> lock(rep_->data_index_reader_mutex);
1128
68.9k
    index_reader = rep_->data_index_reader.get(std::memory_order_relaxed);
1129
68.9k
    if (!index_reader) {
1130
      // preloaded_meta_index_iter is not needed for kBinarySearch data index which DocDB uses,
1131
      // for kHashSearch data index it will do one more access to file to load it.
1132
      // TODO: if we need to optimize kHashSearch data index load, we can preload and store in
1133
      // rep_ meta index with iterator during Open.
1134
68.2k
      std::unique_ptr<IndexReader> index_reader_holder;
1135
68.2k
      RETURN_NOT_OK(CreateDataBlockIndexReader(
1136
68.2k
          &index_reader_holder, /* preloaded_meta_index_iter =*/ nullptr));
1137
68.2k
      index_reader = index_reader_holder.release();
1138
68.2k
      rep_->data_index_reader.reset(index_reader, std::memory_order_acq_rel);
1139
68.2k
    }
1140
68.9k
    return BlockBasedTable::CachableEntry<IndexReader>{index_reader, /* cache_handle =*/ nullptr};
1141
68.9k
  }
1142
4.00M
}
1143
1144
InternalIterator* BlockBasedTable::NewIndexIterator(
1145
11.7M
    const ReadOptions& read_options, BlockIter* input_iter) {
1146
11.7M
  const auto index_reader_result = GetIndexReader(read_options);
1147
11.7M
  if (!index_reader_result.ok()) {
1148
1
    return ReturnErrorIterator(index_reader_result.status(), input_iter);
1149
1
  }
1150
1151
11.7M
  auto* new_iter = index_reader_result->value->NewIterator(
1152
11.7M
      input_iter, rep_->data_index_iterator_state.get(), read_options.total_order_seek);
1153
1154
11.7M
  if (index_reader_result->cache_handle) {
1155
18.4E
    auto iter = new_iter ? new_iter : input_iter;
1156
3.93M
    iter->RegisterCleanup(
1157
3.93M
        &ReleaseCachedEntry, rep_->table_options.block_cache.get(),
1158
3.93M
        index_reader_result->cache_handle);
1159
3.93M
  }
1160
1161
11.7M
  return new_iter;
1162
11.7M
}
1163
1164
// Convert an index iterator value (i.e., an encoded BlockHandle)
1165
// into an iterator over the contents of the corresponding block.
1166
// If input_iter is null, new a iterator
1167
// If input_iter is not null, update this iter and return it
1168
InternalIterator* BlockBasedTable::NewDataBlockIterator(const ReadOptions& ro,
1169
18.3M
    const Slice& index_value, BlockType block_type, BlockIter* input_iter) {
1170
18.3M
  PERF_TIMER_GUARD(new_table_block_iter_nanos);
1171
1172
18.3M
  const bool no_io = (ro.read_tier == kBlockCacheTier);
1173
18.3M
  Cache* block_cache = rep_->table_options.block_cache.get();
1174
18.3M
  Cache* block_cache_compressed =
1175
18.3M
      rep_->table_options.block_cache_compressed.get();
1176
18.3M
  CachableEntry<Block> block;
1177
1178
18.3M
  BlockHandle handle;
1179
18.3M
  Slice input = index_value;
1180
  // We intentionally allow extra stuff in index_value so that we
1181
  // can add more features in the future.
1182
18.3M
  Status s = handle.DecodeFrom(&input);
1183
1184
18.3M
  if (!s.ok()) {
1185
0
    if (input_iter != nullptr) {
1186
0
      input_iter->SetStatus(s);
1187
0
      return input_iter;
1188
0
    } else {
1189
0
      return NewErrorInternalIterator(s);
1190
0
    }
1191
18.3M
  }
1192
1193
18.3M
  FileReaderWithCachePrefix* reader = GetBlockReader(block_type);
1194
1195
  // If either block cache is enabled, we'll try to read from it.
1196
18.3M
  if (block_cache != nullptr || block_cache_compressed != nullptr) {
1197
18.3M
    Statistics* statistics = rep_->ioptions.statistics;
1198
18.3M
    char cache_key[block_based_table::kCacheKeyBufferSize];
1199
18.3M
    char compressed_cache_key[block_based_table::kCacheKeyBufferSize];
1200
18.3M
    Slice key, /* key to the block cache */
1201
18.3M
        ckey /* key to the compressed block cache */;
1202
1203
    // create key for block cache
1204
18.3M
    if (block_cache != nullptr) {
1205
18.3M
      key = GetCacheKey(reader->cache_key_prefix, handle, cache_key);
1206
18.3M
    }
1207
1208
18.3M
    if (block_cache_compressed != nullptr) {
1209
79.4k
      ckey = GetCacheKey(reader->compressed_cache_key_prefix, handle, compressed_cache_key);
1210
79.4k
    }
1211
1212
18.3M
    s = GetDataBlockFromCache(
1213
18.3M
        key, ckey, block_cache, block_cache_compressed, statistics, ro, &block,
1214
18.3M
        rep_->table_options.format_version, block_type, rep_->mem_tracker);
1215
1216
18.3M
    if (block.value == nullptr && !no_io && ro.fill_cache) {
1217
2.13M
      std::unique_ptr<Block> raw_block;
1218
2.13M
      {
1219
2.13M
        StopWatch sw(rep_->ioptions.env, statistics, READ_BLOCK_GET_MICROS);
1220
2.13M
        s = block_based_table::ReadBlockFromFile(
1221
2.13M
            reader->reader.get(), rep_->footer, ro, handle, &raw_block, rep_->ioptions.env,
1222
2.13M
            rep_->mem_tracker, block_cache_compressed == nullptr);
1223
2.13M
      }
1224
1225
2.13M
      if (s.ok()) {
1226
2.13M
        s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
1227
2.13M
                                ro, statistics, &block, raw_block.release(),
1228
2.13M
                                rep_->table_options.format_version, rep_->mem_tracker);
1229
2.13M
      }
1230
2.13M
    }
1231
18.3M
  }
1232
1233
  // Didn't get any data from block caches.
1234
18.3M
  if (s.ok() && block.value == nullptr) {
1235
1.45M
    if (no_io) {
1236
      // Could not read from block_cache and can't do IO
1237
88
      if (input_iter != nullptr) {
1238
40
        input_iter->SetStatus(ReturnNoIOError());
1239
40
        return input_iter;
1240
48
      } else {
1241
48
        return NewErrorInternalIterator(ReturnNoIOError());
1242
48
      }
1243
1.45M
    }
1244
1.45M
    std::unique_ptr<Block> block_value;
1245
1.45M
    s = block_based_table::ReadBlockFromFile(
1246
1.45M
        reader->reader.get(), rep_->footer, ro, handle, &block_value, rep_->ioptions.env,
1247
1.45M
        rep_->mem_tracker);
1248
1.45M
    if (s.ok()) {
1249
1.45M
      block.value = block_value.release();
1250
1.45M
    }
1251
1.45M
  }
1252
1253
18.3M
  InternalIterator* iter;
1254
18.3M
  if (s.ok() && block.value != nullptr) {
1255
18.3M
    iter = block.value->NewIterator(
1256
18.3M
        rep_->comparator.get(), GetKeyValueEncodingFormat(block_type), input_iter);
1257
18.3M
    if (block.cache_handle != nullptr) {
1258
16.9M
      iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
1259
16.9M
          block.cache_handle);
1260
1.45M
    } else {
1261
1.45M
      iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
1262
1.45M
    }
1263
580
  } else {
1264
580
    if (input_iter != nullptr) {
1265
0
      input_iter->SetStatus(s);
1266
0
      iter = input_iter;
1267
580
    } else {
1268
580
      iter = NewErrorInternalIterator(s);
1269
580
    }
1270
580
  }
1271
18.3M
  return iter;
1272
18.3M
}
1273
1274
// This will be broken if the user specifies an unusual implementation
1275
// of Options.comparator, or if the user specifies an unusual
1276
// definition of prefixes in BlockBasedTableOptions.filter_policy.
1277
// In particular, we require the following three properties:
1278
//
1279
// 1) key.starts_with(prefix(key))
1280
// 2) Compare(prefix(key), key) <= 0.
1281
// 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0
1282
//
1283
// Otherwise, this method guarantees no I/O will be incurred.
1284
//
1285
// REQUIRES: this method shouldn't be called while the DB lock is held.
1286
324
bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
1287
324
  if (!rep_->filter_policy) {
1288
272
    return true;
1289
272
  }
1290
1291
52
  assert(rep_->ioptions.prefix_extractor != nullptr);
1292
52
  auto user_key = ExtractUserKey(internal_key);
1293
52
  auto filter_key = GetFilterKeyFromUserKey(user_key);
1294
52
  if (filter_key.empty() ||
1295
52
      !rep_->ioptions.prefix_extractor->InDomain(filter_key) ||
1296
51
      !rep_->ioptions.prefix_extractor->InDomain(user_key)) {
1297
1
    return true;
1298
1
  }
1299
51
  auto user_key_prefix = rep_->ioptions.prefix_extractor->Transform(user_key);
1300
51
  auto filter_key_prefix = rep_->ioptions.prefix_extractor->Transform(filter_key);
1301
51
  InternalKey internal_key_prefix(user_key_prefix, kMaxSequenceNumber, kTypeValue);
1302
51
  auto internal_prefix = internal_key_prefix.Encode();
1303
1304
51
  bool may_match = true;
1305
51
  Status s;
1306
1307
  // To prevent any io operation in this method, we set `read_tier` to make
1308
  // sure we always read index or filter only when they have already been
1309
  // loaded to memory.
1310
51
  ReadOptions no_io_read_options;
1311
51
  no_io_read_options.read_tier = kBlockCacheTier;
1312
1313
  // First check non block-based filter.
1314
51
  auto filter_entry = GetFilter(no_io_read_options.query_id, true /* no io */, &filter_key);
1315
51
  FilterBlockReader* filter = filter_entry.value;
1316
51
  const bool is_block_based_filter = rep_->filter_type == FilterType::kBlockBasedFilter;
1317
51
  if (filter != nullptr && !is_block_based_filter) {
1318
25
    may_match = filter->PrefixMayMatch(filter_key_prefix);
1319
25
  }
1320
1321
  // If filter is block-based or checking filter was not successful we need to get data block
1322
  // offset. For block-based filter we need to know offset of data block to get and check
1323
  // corresponding filter block. For non block-based filter we just need offset to try to get data
1324
  // for the key.
1325
51
  if (may_match) {
1326
39
    unique_ptr<InternalIterator> iiter(NewIndexIterator(no_io_read_options));
1327
39
    iiter->Seek(internal_prefix);
1328
1329
39
    if (!iiter->Valid()) {
1330
      // we're past end of file
1331
      // if it's incomplete, it means that we avoided I/O
1332
      // and we're not really sure that we're past the end
1333
      // of the file
1334
0
      may_match = iiter->status().IsIncomplete();
1335
39
    } else if (ExtractUserKey(iiter->key()).starts_with(
1336
2
                ExtractUserKey(internal_prefix))) {
1337
      // we need to check for this subtle case because our only
1338
      // guarantee is that "the key is a string >= last key in that data
1339
      // block" according to the doc/table_format.txt spec.
1340
      //
1341
      // Suppose iiter.key() starts with the desired prefix; it is not
1342
      // necessarily the case that the corresponding data block will
1343
      // contain the prefix, since iiter.key() need not be in the
1344
      // block.  However, the next data block may contain the prefix, so
1345
      // we return true to play it safe.
1346
2
      may_match = true;
1347
37
    } else if (filter != nullptr && is_block_based_filter) {
1348
      // iiter.key() does NOT start with the desired prefix.  Because
1349
      // Seek() finds the first key that is >= the seek target, this
1350
      // means that iiter.key() > prefix.  Thus, any data blocks coming
1351
      // after the data block corresponding to iiter.key() cannot
1352
      // possibly contain the key.  Thus, the corresponding data block
1353
      // is the only on could potentially contain the prefix.
1354
25
      Slice handle_value = iiter->value();
1355
25
      BlockHandle handle;
1356
25
      s = handle.DecodeFrom(&handle_value);
1357
25
      assert(s.ok());
1358
25
      may_match = filter->PrefixMayMatch(filter_key_prefix, handle.offset());
1359
25
    }
1360
39
  }
1361
1362
51
  Statistics* statistics = rep_->ioptions.statistics;
1363
51
  RecordTick(statistics, BLOOM_FILTER_PREFIX_CHECKED);
1364
51
  if (!may_match) {
1365
31
    RecordTick(statistics, BLOOM_FILTER_PREFIX_USEFUL);
1366
31
  }
1367
1368
51
  filter_entry.Release(rep_->table_options.block_cache.get());
1369
51
  return may_match;
1370
51
}
1371
1372
InternalIterator* BlockBasedTable::NewIterator(const ReadOptions& read_options,
1373
                                               Arena* arena,
1374
3.77M
                                               bool skip_filters) {
1375
3.77M
  auto state = std::make_unique<BlockEntryIteratorState>(
1376
3.77M
      this, read_options, skip_filters, BlockType::kData);
1377
  // TODO: unify the semantics across NewIterator callsites, so that we can pass an arena across
1378
  // them, and decide the free / no free based on that. This callsite, for example, allows us to
1379
  // put the top level iterator on the arena and potentially even the State object, however, not
1380
  // the IndexIterator, as that does not expose arena allocation semantics...
1381
3.77M
  return NewTwoLevelIterator(
1382
3.77M
      state.release(), NewIndexIterator(read_options), arena, true /* need_free_iter_and_state */
1383
3.77M
  );
1384
3.77M
}
1385
1386
bool BlockBasedTable::NonBlockBasedFilterKeyMayMatch(FilterBlockReader* filter,
1387
11.1M
    const Slice& filter_key) const {
1388
11.1M
  assert(rep_->filter_type != FilterType::kBlockBasedFilter);
1389
11.1M
  if (filter == nullptr) {
1390
6.35M
    return true;
1391
6.35M
  }
1392
4.83M
  RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_CHECKED);
1393
4.83M
  if (!filter->KeyMayMatch(filter_key)) {
1394
2.95M
    return false;
1395
2.95M
  }
1396
1.88M
  if (rep_->ioptions.prefix_extractor &&
1397
37
      rep_->ioptions.prefix_extractor->InDomain(filter_key) &&
1398
36
      !filter->PrefixMayMatch(
1399
5
          rep_->ioptions.prefix_extractor->Transform(filter_key))) {
1400
5
    return false;
1401
5
  }
1402
1.88M
  return true;
1403
1.88M
}
1404
1405
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& internal_key,
1406
8.20M
                            GetContext* get_context, bool skip_filters) {
1407
8.20M
  Status s;
1408
8.20M
  CachableEntry<FilterBlockReader> filter_entry;
1409
8.20M
  Slice filter_key;
1410
8.20M
  if (!skip_filters) {
1411
7.97M
    filter_key = GetFilterKeyFromInternalKey(internal_key);
1412
7.97M
    if (!filter_key.empty()) {
1413
7.77M
      filter_entry =
1414
7.77M
          GetFilter(read_options.query_id, read_options.read_tier == kBlockCacheTier, &filter_key);
1415
200k
    } else {
1416
200k
      skip_filters = true;
1417
200k
    }
1418
7.97M
  }
1419
8.20M
  FilterBlockReader* filter = filter_entry.value;
1420
1421
8.20M
  const bool is_block_based_filter = rep_->filter_type == FilterType::kBlockBasedFilter;
1422
1423
  // First check non block-based filter.
1424
8.20M
  if (!is_block_based_filter && !NonBlockBasedFilterKeyMayMatch(filter, filter_key)) {
1425
264k
    RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
1426
7.94M
  } else {
1427
    // Either filter is block-based or key may match.
1428
7.94M
    IndexIteratorHolder iiter_holder(this, read_options);
1429
7.94M
    InternalIterator& iiter = *iiter_holder.iter();
1430
1431
7.94M
    RETURN_NOT_OK(iiter.status());
1432
1433
7.94M
    bool done = false;
1434
15.2M
    for (iiter.Seek(internal_key); iiter.Valid() && !done; iiter.Next()) {
1435
7.94M
      {
1436
7.94M
        Slice data_block_handle_encoded = iiter.value();
1437
1438
7.94M
        if (!skip_filters && is_block_based_filter) {
1439
1.36M
          RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_CHECKED);
1440
1.36M
          BlockHandle data_block_handle;
1441
1.36M
          const bool absent_from_filter =
1442
1.36M
              data_block_handle.DecodeFrom(&data_block_handle_encoded).ok()
1443
1.36M
              && !filter->KeyMayMatch(filter_key, data_block_handle.offset());
1444
1445
1.36M
          if (absent_from_filter) {
1446
            // Not found
1447
            // TODO: think about interaction with Merge. If a user key cannot
1448
            // cross one data block, we should be fine.
1449
669k
            RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_USEFUL);
1450
669k
            break;
1451
669k
          }
1452
7.27M
        }
1453
7.27M
      }
1454
1455
7.27M
      BlockIter biter;
1456
7.27M
      NewDataBlockIterator(read_options, iiter.value(), BlockType::kData, &biter);
1457
1458
7.27M
      if (read_options.read_tier == kBlockCacheTier &&
1459
43
          biter.status().IsIncomplete()) {
1460
        // couldn't get block from block_cache
1461
        // Update Saver.state to Found because we are only looking for whether
1462
        // we can guarantee the key is not there when "no_io" is set
1463
40
        get_context->MarkKeyMayExist();
1464
40
        break;
1465
40
      }
1466
7.27M
      if (!biter.status().ok()) {
1467
0
        s = biter.status();
1468
0
        break;
1469
0
      }
1470
1471
      // Call the *saver function on each entry/block until it returns false
1472
7.27M
      for (biter.Seek(internal_key); biter.Valid(); biter.Next()) {
1473
7.26M
        ParsedInternalKey parsed_key;
1474
7.26M
        if (!ParseInternalKey(biter.key(), &parsed_key)) {
1475
0
          s = STATUS(Corruption, Slice());
1476
0
        }
1477
1478
7.26M
        if (!get_context->SaveValue(parsed_key, biter.value())) {
1479
7.26M
          done = true;
1480
7.26M
          break;
1481
7.26M
        }
1482
7.26M
      }
1483
7.27M
      s = biter.status();
1484
7.27M
    }
1485
7.94M
    if (s.ok()) {
1486
7.94M
      s = iiter.status();
1487
7.94M
    }
1488
7.94M
  }
1489
1490
8.20M
  filter_entry.Release(rep_->table_options.block_cache.get());
1491
8.20M
  return s;
1492
8.20M
}
1493
1494
Status BlockBasedTable::Prefetch(const Slice* const begin,
1495
10
                                 const Slice* const end) {
1496
10
  auto& comparator = *rep_->comparator;
1497
  // pre-condition
1498
10
  if (begin && end && comparator.Compare(*begin, *end) > 0) {
1499
1
    return STATUS(InvalidArgument, *begin, *end);
1500
1
  }
1501
1502
9
  IndexIteratorHolder iiter_holder(this, ReadOptions::kDefault);
1503
9
  InternalIterator& iiter = *iiter_holder.iter();
1504
1505
9
  RETURN_NOT_OK(iiter.status());
1506
1507
  // indicates if we are on the last page that need to be pre-fetched
1508
9
  bool prefetching_boundary_page = false;
1509
1510
36
  for (begin ? iiter.Seek(*begin) : iiter.SeekToFirst(); iiter.Valid();
1511
31
       iiter.Next()) {
1512
31
    Slice block_handle = iiter.value();
1513
1514
31
    if (end && comparator.Compare(iiter.key(), *end) >= 0) {
1515
9
      if (prefetching_boundary_page) {
1516
4
        break;
1517
4
      }
1518
1519
      // The index entry represents the last key in the data block.
1520
      // We should load this page into memory as well, but no more
1521
5
      prefetching_boundary_page = true;
1522
5
    }
1523
1524
    // Load the block specified by the block_handle into the block cache
1525
27
    BlockIter biter;
1526
27
    NewDataBlockIterator(ReadOptions::kDefault, block_handle, BlockType::kData, &biter);
1527
1528
27
    if (!biter.status().ok()) {
1529
      // there was an unexpected error while pre-fetching
1530
0
      return biter.status();
1531
0
    }
1532
27
  }
1533
1534
9
  return Status::OK();
1535
9
}
1536
1537
bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
1538
77
                                      const Slice& key) {
1539
77
  std::unique_ptr<InternalIterator> iiter(NewIndexIterator(options));
1540
77
  iiter->Seek(key);
1541
77
  assert(iiter->Valid());
1542
77
  CachableEntry<Block> block;
1543
1544
77
  BlockHandle handle;
1545
77
  Slice input = iiter->value();
1546
77
  Status s = handle.DecodeFrom(&input);
1547
77
  assert(s.ok());
1548
77
  Cache* block_cache = rep_->table_options.block_cache.get();
1549
77
  assert(block_cache != nullptr);
1550
1551
77
  char cache_key_storage[block_based_table::kCacheKeyBufferSize];
1552
77
  Slice cache_key =
1553
77
      GetCacheKey(rep_->data_reader_with_cache_prefix->cache_key_prefix, handle, cache_key_storage);
1554
77
  Slice ckey;
1555
1556
77
  s = GetDataBlockFromCache(cache_key, ckey, block_cache, nullptr, nullptr, options, &block,
1557
77
      rep_->table_options.format_version, BlockType::kData, rep_->mem_tracker);
1558
77
  assert(s.ok());
1559
77
  bool in_cache = block.value != nullptr;
1560
77
  if (in_cache) {
1561
55
    ReleaseCachedEntry(block_cache, block.cache_handle);
1562
55
  }
1563
77
  return in_cache;
1564
77
}
1565
1566
// REQUIRES: The following fields of rep_ should have already been populated:
1567
//  1. file
1568
//  2. index_handle,
1569
//  3. options
1570
//  4. internal_comparator
1571
//  5. index_type
1572
Status BlockBasedTable::CreateDataBlockIndexReader(
1573
72.4k
    std::unique_ptr<IndexReader>* index_reader, InternalIterator* preloaded_meta_index_iter) {
1574
  // Some old version of block-based tables don't have index type present in
1575
  // table properties. If that's the case we can safely use the kBinarySearch.
1576
72.4k
  auto index_type_on_file = IndexType::kBinarySearch;
1577
72.4k
  if (rep_->table_properties) {
1578
72.4k
    auto& props = rep_->table_properties->user_collected_properties;
1579
72.4k
    auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
1580
72.4k
    if (pos != props.end()) {
1581
72.4k
      index_type_on_file = static_cast<IndexType>(
1582
72.4k
          DecodeFixed32(pos->second.c_str()));
1583
72.4k
    }
1584
72.4k
  }
1585
1586
72.4k
  auto file = rep_->base_reader_with_cache_prefix->reader.get();
1587
72.4k
  auto env = rep_->ioptions.env;
1588
72.4k
  const auto& comparator = rep_->comparator;
1589
72.4k
  const Footer& footer = rep_->footer;
1590
1591
72.4k
  if (index_type_on_file == IndexType::kHashSearch &&
1592
1.65k
      rep_->ioptions.prefix_extractor == nullptr) {
1593
1
    RLOG(InfoLogLevel::WARN_LEVEL, rep_->ioptions.info_log,
1594
1
        "IndexType::kHashSearch requires "
1595
1
        "options.prefix_extractor to be set."
1596
1
        " Fall back to binary search index.");
1597
1
    index_type_on_file = IndexType::kBinarySearch;
1598
1
  }
1599
1600
72.4k
  switch (index_type_on_file) {
1601
6
    case IndexType::kBinarySearch: {
1602
6
      return BinarySearchIndexReader::Create(
1603
6
          file, footer, footer.index_handle(), env, comparator, index_reader, rep_->mem_tracker);
1604
0
    }
1605
1.65k
    case IndexType::kHashSearch: {
1606
1.65k
      std::unique_ptr<Block> meta_guard;
1607
1.65k
      std::unique_ptr<InternalIterator> meta_iter_guard;
1608
1.65k
      auto meta_index_iter = preloaded_meta_index_iter;
1609
1.65k
      if (meta_index_iter == nullptr) {
1610
1.65k
        auto s = ReadMetaBlock(rep_, &meta_guard, &meta_iter_guard);
1611
1.65k
        if (!s.ok()) {
1612
          // we simply fall back to binary search in case there is any
1613
          // problem with prefix hash index loading.
1614
0
          RLOG(InfoLogLevel::WARN_LEVEL, rep_->ioptions.info_log,
1615
0
              "Unable to read the metaindex block."
1616
0
              " Fall back to binary search index.");
1617
0
          return BinarySearchIndexReader::Create(
1618
0
            file, footer, footer.index_handle(), env, comparator, index_reader, rep_->mem_tracker);
1619
0
        }
1620
1.65k
        meta_index_iter = meta_iter_guard.get();
1621
1.65k
      }
1622
1623
      // We need to wrap data with internal_prefix_transform to make sure it can
1624
      // handle prefix correctly.
1625
1.65k
      rep_->internal_prefix_transform.reset(
1626
1.65k
          new InternalKeySliceTransform(rep_->ioptions.prefix_extractor));
1627
1.65k
      return HashIndexReader::Create(
1628
1.65k
          rep_->internal_prefix_transform.get(), footer, file, env, comparator,
1629
1.65k
          footer.index_handle(), meta_index_iter, index_reader,
1630
1.65k
          rep_->hash_index_allow_collision, rep_->mem_tracker);
1631
1.65k
    }
1632
70.8k
    case IndexType::kMultiLevelBinarySearch: {
1633
70.8k
      auto& props = DCHECK_NOTNULL(rep_->table_properties.get())->user_collected_properties;
1634
70.8k
      auto pos = props.find(BlockBasedTablePropertyNames::kNumIndexLevels);
1635
70.8k
      if (pos == props.end()) {
1636
0
        return STATUS_FORMAT(
1637
0
            NotFound, "Missed table property $0 for multi-level binary-search index",
1638
0
            BlockBasedTablePropertyNames::kNumIndexLevels);
1639
0
      }
1640
70.8k
      int num_levels = DecodeFixed32(pos->second.c_str());
1641
70.8k
      auto result = MultiLevelIndexReader::Create(
1642
70.8k
          file, footer, num_levels, footer.index_handle(), env, comparator, rep_->mem_tracker);
1643
70.8k
      RETURN_NOT_OK(result);
1644
70.8k
      *index_reader = std::move(*result);
1645
70.8k
      return Status::OK();
1646
70.8k
    }
1647
0
    default: {
1648
0
      std::string error_message =
1649
0
          "Unrecognized index type: " + ToString(rep_->index_type);
1650
0
      return STATUS(InvalidArgument, error_message.c_str());
1651
70.8k
    }
1652
72.4k
  }
1653
72.4k
}
1654
1655
16.9k
uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
1656
16.9k
  unique_ptr<InternalIterator> index_iter(NewIndexIterator(ReadOptions::kDefault));
1657
1658
16.9k
  index_iter->Seek(key);
1659
16.9k
  uint64_t result;
1660
16.9k
  if (index_iter->Valid()) {
1661
16.9k
    BlockHandle handle;
1662
16.9k
    Slice input = index_iter->value();
1663
16.9k
    Status s = handle.DecodeFrom(&input);
1664
16.9k
    if (s.ok()) {
1665
16.9k
      result = handle.offset();
1666
0
    } else {
1667
      // Strange: we can't decode the block handle in the index block.
1668
      // We'll just return the offset of the metaindex block, which is
1669
      // close to the whole file size for this case.
1670
0
      result = rep_->footer.metaindex_handle().offset();
1671
0
    }
1672
16
  } else {
1673
    // key is past the last key in the file. If table_properties is not
1674
    // available, approximate the offset by returning the offset of the
1675
    // metaindex block (which is right near the end of the file).
1676
16
    result = 0;
1677
16
    if (rep_->table_properties) {
1678
16
      result = rep_->table_properties->data_size;
1679
16
    }
1680
    // table_properties is not present in the table.
1681
16
    if (result == 0) {
1682
0
      result = rep_->footer.metaindex_handle().offset();
1683
0
    }
1684
16
  }
1685
16.9k
  return result;
1686
16.9k
}
1687
1688
3
bool BlockBasedTable::TEST_filter_block_preloaded() const {
1689
3
  return rep_->filter != nullptr;
1690
3
}
1691
1692
3
bool BlockBasedTable::TEST_index_reader_loaded() const {
1693
3
  return rep_->data_index_reader.get() != nullptr;
1694
3
}
1695
1696
3
Status BlockBasedTable::DumpTable(WritableFile* out_file) {
1697
  // Output Footer
1698
3
  RETURN_NOT_OK(out_file->Append(
1699
3
      "Footer Details:\n"
1700
3
      "--------------------------------------\n"
1701
3
      "  "));
1702
3
  RETURN_NOT_OK(out_file->Append(rep_->footer.ToString().c_str()));
1703
3
  RETURN_NOT_OK(out_file->Append("\n"));
1704
1705
  // Output MetaIndex
1706
3
  RETURN_NOT_OK(out_file->Append(
1707
3
      "Metaindex Details:\n"
1708
3
      "--------------------------------------\n"));
1709
3
  std::unique_ptr<Block> meta;
1710
3
  std::unique_ptr<InternalIterator> meta_iter;
1711
3
  Status s = ReadMetaBlock(rep_, &meta, &meta_iter);
1712
3
  if (s.ok()) {
1713
8
    for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) {
1714
5
      s = meta_iter->status();
1715
5
      if (!s.ok()) {
1716
0
        return s;
1717
0
      }
1718
5
      if (meta_iter->key() == rocksdb::kPropertiesBlock) {
1719
3
        RETURN_NOT_OK(out_file->Append("  Properties block handle: "));
1720
3
        RETURN_NOT_OK(out_file->Append(meta_iter->value().ToString(true).c_str()));
1721
3
        RETURN_NOT_OK(out_file->Append("\n"));
1722
2
      } else if (strstr(meta_iter->key().ToString().c_str(),
1723
2
                        "filter.rocksdb.") != nullptr) {
1724
2
        RETURN_NOT_OK(out_file->Append("  Filter block handle: "));
1725
2
        RETURN_NOT_OK(out_file->Append(meta_iter->value().ToString(true).c_str()));
1726
2
        RETURN_NOT_OK(out_file->Append("\n"));
1727
2
      }
1728
5
    }
1729
3
    RETURN_NOT_OK(out_file->Append("\n"));
1730
0
  } else {
1731
0
    return s;
1732
0
  }
1733
1734
  // Output TableProperties
1735
3
  const rocksdb::TableProperties* table_properties;
1736
3
  table_properties = rep_->table_properties.get();
1737
1738
3
  if (table_properties != nullptr) {
1739
3
    RETURN_NOT_OK(out_file->Append(
1740
3
        "Table Properties:\n"
1741
3
        "--------------------------------------\n"
1742
3
        "  "));
1743
3
    RETURN_NOT_OK(out_file->Append(table_properties->ToString("\n  ", ": ").c_str()));
1744
3
    RETURN_NOT_OK(out_file->Append("\n"));
1745
3
  }
1746
1747
  // Output Filter blocks
1748
3
  if (!rep_->filter && !table_properties->filter_policy_name.empty()) {
1749
    // Support only BloomFilter as off now
1750
2
    rocksdb::BlockBasedTableOptions table_options;
1751
2
    table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(1));
1752
2
    if (table_properties->filter_policy_name.compare(
1753
2
            table_options.filter_policy->Name()) == 0) {
1754
2
      std::string filter_block_key = block_based_table::kFilterBlockPrefix;
1755
2
      filter_block_key.append(table_properties->filter_policy_name);
1756
2
      BlockHandle handle;
1757
2
      if (FindMetaBlock(meta_iter.get(), filter_block_key, &handle).ok()) {
1758
1
        BlockContents block;
1759
1
        if (ReadBlockContents(
1760
1
                rep_->base_reader_with_cache_prefix->reader.get(), rep_->footer,
1761
1
                ReadOptions::kDefault, handle, &block, rep_->ioptions.env, rep_->mem_tracker,
1762
1
                false).ok()) {
1763
1
          rep_->filter.reset(new BlockBasedFilterBlockReader(
1764
1
              rep_->ioptions.prefix_extractor, table_options,
1765
1
              table_options.whole_key_filtering, std::move(block)));
1766
1
        }
1767
1
      }
1768
2
    }
1769
2
  }
1770
3
  if (rep_->filter) {
1771
1
    RETURN_NOT_OK(out_file->Append(
1772
1
        "Filter Details:\n"
1773
1
        "--------------------------------------\n"
1774
1
        "  "));
1775
1
    RETURN_NOT_OK(out_file->Append(rep_->filter->ToString().c_str()));
1776
1
    RETURN_NOT_OK(out_file->Append("\n"));
1777
1
  }
1778
1779
  // Output Index block
1780
3
  s = DumpIndexBlock(out_file);
1781
3
  if (!s.ok()) {
1782
0
    return s;
1783
0
  }
1784
  // Output Data blocks
1785
3
  s = DumpDataBlocks(out_file);
1786
1787
3
  return s;
1788
3
}
1789
1790
3
Status BlockBasedTable::DumpIndexBlock(WritableFile* out_file) {
1791
3
  RETURN_NOT_OK(out_file->Append(
1792
3
      "Index Details:\n"
1793
3
      "--------------------------------------\n"));
1794
1795
3
  std::unique_ptr<InternalIterator> blockhandles_iter(
1796
3
      NewIndexIterator(ReadOptions::kDefault));
1797
3
  Status s = blockhandles_iter->status();
1798
3
  if (!s.ok()) {
1799
0
    RETURN_NOT_OK(out_file->Append("Can not read Index Block \n\n"));
1800
0
    return s;
1801
3
  }
1802
1803
3
  RETURN_NOT_OK(out_file->Append("  Block key hex dump: Data block handle\n"));
1804
3
  RETURN_NOT_OK(out_file->Append("  Block key ascii\n\n"));
1805
24
  for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
1806
21
       blockhandles_iter->Next()) {
1807
21
    s = blockhandles_iter->status();
1808
21
    if (!s.ok()) {
1809
0
      break;
1810
0
    }
1811
21
    Slice key = blockhandles_iter->key();
1812
21
    InternalKey ikey = InternalKey::DecodeFrom(key);
1813
1814
21
    RETURN_NOT_OK(out_file->Append("  HEX    "));
1815
21
    RETURN_NOT_OK(out_file->Append(ikey.user_key().ToString(true).c_str()));
1816
21
    RETURN_NOT_OK(out_file->Append(": "));
1817
21
    RETURN_NOT_OK(out_file->Append(blockhandles_iter->value().ToString(true).c_str()));
1818
21
    RETURN_NOT_OK(out_file->Append("\n"));
1819
1820
21
    std::string str_key = ikey.user_key().ToString();
1821
21
    std::string res_key("");
1822
21
    char cspace = ' ';
1823
132
    for (size_t i = 0; i < str_key.size(); i++) {
1824
111
      res_key.append(&str_key[i], 1);
1825
111
      res_key.append(1, cspace);
1826
111
    }
1827
21
    RETURN_NOT_OK(out_file->Append("  ASCII  "));
1828
21
    RETURN_NOT_OK(out_file->Append(res_key.c_str()));
1829
21
    RETURN_NOT_OK(out_file->Append("\n  ------\n"));
1830
21
  }
1831
3
  RETURN_NOT_OK(out_file->Append("\n"));
1832
3
  return Status::OK();
1833
3
}
1834
1835
3
Status BlockBasedTable::DumpDataBlocks(WritableFile* out_file) {
1836
3
  std::unique_ptr<InternalIterator> blockhandles_iter(
1837
3
      NewIndexIterator(ReadOptions::kDefault));
1838
3
  Status s = blockhandles_iter->status();
1839
3
  if (!s.ok()) {
1840
0
    RETURN_NOT_OK(out_file->Append("Can not read Index Block \n\n"));
1841
0
    return s;
1842
3
  }
1843
1844
3
  size_t block_id = 1;
1845
24
  for (blockhandles_iter->SeekToFirst(); blockhandles_iter->Valid();
1846
21
       block_id++, blockhandles_iter->Next()) {
1847
21
    s = blockhandles_iter->status();
1848
21
    if (!s.ok()) {
1849
0
      break;
1850
0
    }
1851
1852
21
    RETURN_NOT_OK(out_file->Append("Data Block # "));
1853
21
    RETURN_NOT_OK(out_file->Append(rocksdb::ToString(block_id)));
1854
21
    RETURN_NOT_OK(out_file->Append(" @ "));
1855
21
    RETURN_NOT_OK(out_file->Append(blockhandles_iter->value().ToString(true).c_str()));
1856
21
    RETURN_NOT_OK(out_file->Append("\n"));
1857
21
    RETURN_NOT_OK(out_file->Append("--------------------------------------\n"));
1858
1859
21
    std::unique_ptr<InternalIterator> datablock_iter;
1860
21
    datablock_iter.reset(
1861
21
        NewDataBlockIterator(
1862
21
            ReadOptions::kDefault, blockhandles_iter->value(), BlockType::kData));
1863
21
    s = datablock_iter->status();
1864
1865
21
    if (!s.ok()) {
1866
0
      RETURN_NOT_OK(out_file->Append("Error reading the block - Skipped \n\n"));
1867
0
      continue;
1868
21
    }
1869
1870
3.09k
    for (datablock_iter->SeekToFirst(); datablock_iter->Valid();
1871
3.07k
         datablock_iter->Next()) {
1872
3.07k
      s = datablock_iter->status();
1873
3.07k
      if (!s.ok()) {
1874
0
        RETURN_NOT_OK(out_file->Append("Error reading the block - Skipped \n"));
1875
0
        break;
1876
3.07k
      }
1877
3.07k
      Slice key = datablock_iter->key();
1878
3.07k
      Slice value = datablock_iter->value();
1879
3.07k
      InternalKey ikey = InternalKey::DecodeFrom(key);
1880
1881
3.07k
      RETURN_NOT_OK(out_file->Append("  HEX    "));
1882
3.07k
      RETURN_NOT_OK(out_file->Append(ikey.user_key().ToString(true).c_str()));
1883
3.07k
      RETURN_NOT_OK(out_file->Append(": "));
1884
3.07k
      RETURN_NOT_OK(out_file->Append(value.ToString(true).c_str()));
1885
3.07k
      RETURN_NOT_OK(out_file->Append("\n"));
1886
1887
3.07k
      std::string str_key = ikey.user_key().ToString();
1888
3.07k
      std::string str_value = value.ToString();
1889
3.07k
      std::string res_key(""), res_value("");
1890
3.07k
      char cspace = ' ';
1891
21.5k
      for (size_t i = 0; i < str_key.size(); i++) {
1892
18.4k
        res_key.append(&str_key[i], 1);
1893
18.4k
        res_key.append(1, cspace);
1894
18.4k
      }
1895
46.0k
      for (size_t i = 0; i < str_value.size(); i++) {
1896
43.0k
        res_value.append(&str_value[i], 1);
1897
43.0k
        res_value.append(1, cspace);
1898
43.0k
      }
1899
1900
3.07k
      RETURN_NOT_OK(out_file->Append("  ASCII  "));
1901
3.07k
      RETURN_NOT_OK(out_file->Append(res_key.c_str()));
1902
3.07k
      RETURN_NOT_OK(out_file->Append(": "));
1903
3.07k
      RETURN_NOT_OK(out_file->Append(res_value.c_str()));
1904
3.07k
      RETURN_NOT_OK(out_file->Append("\n  ------\n"));
1905
3.07k
    }
1906
21
    RETURN_NOT_OK(out_file->Append("\n"));
1907
21
  }
1908
3
  return Status::OK();
1909
3
}
1910
1911
0
const ImmutableCFOptions& BlockBasedTable::ioptions() {
1912
0
  return rep_->ioptions;
1913
0
}
1914
1915
46
yb::Result<std::string> BlockBasedTable::GetMiddleKey() {
1916
46
  auto index_reader = VERIFY_RESULT(GetIndexReader(ReadOptions::kDefault));
1917
1918
  // TODO: remove this trick after https://github.com/yugabyte/yugabyte-db/issues/4720 is resolved.
1919
46
  auto se = yb::ScopeExit([this, &index_reader] {
1920
46
    index_reader.Release(rep_->table_options.block_cache.get());
1921
46
  });
1922
1923
46
  const auto index_middle_key = VERIFY_RESULT(index_reader.value->GetMiddleKey());
1924
46
  std::unique_ptr<InternalIterator> iter(
1925
46
      NewIterator(ReadOptions::kDefault, nullptr, /* skip_filters =*/ true));
1926
46
  iter->Seek(index_middle_key);
1927
46
  if (!iter->Valid()) {
1928
    // There are no keys in SST that are >= index_middle_key. That means SST is empty or just have
1929
    // the single data block.
1930
    // For tablet splitting we don't need to handle such small files, but if needed for other cases
1931
    // we can update this function to return the middle key of the data block in case there is data
1932
    // in the SST.
1933
0
    return STATUS(Incomplete, "Empty or too small SST");
1934
0
  }
1935
46
  return iter->key().ToBuffer();
1936
46
}
1937
1938
}  // namespace rocksdb