YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/index_builder.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/rocksdb/table/index_builder.h"
15
16
#include "yb/rocksdb/comparator.h"
17
#include "yb/rocksdb/port/likely.h"
18
#include "yb/rocksdb/slice_transform.h"
19
#include "yb/rocksdb/table/block_based_table_factory.h"
20
#include "yb/rocksdb/util/coding.h"
21
22
#include "yb/util/logging.h"
23
24
namespace rocksdb {
25
26
Result<bool> IndexBuilder::FlushNextBlock(
27
1.45k
    IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
28
1.45k
  RETURN_NOT_OK(Finish(index_blocks));
29
1.45k
  return true;
30
1.45k
}
31
32
IndexBuilder* IndexBuilder::CreateIndexBuilder(
33
    IndexType type,
34
    const Comparator* comparator,
35
    const SliceTransform* prefix_extractor,
36
122k
    const BlockBasedTableOptions& table_opt) {
37
122k
  switch (type) {
38
61.4k
    case IndexType::kBinarySearch: {
39
61.4k
      return new ShortenedIndexBuilder(comparator,
40
61.4k
                                       table_opt.index_block_restart_interval);
41
0
    }
42
1.45k
    case IndexType::kHashSearch: {
43
1.45k
      return new HashIndexBuilder(comparator, prefix_extractor,
44
1.45k
                                  table_opt.index_block_restart_interval);
45
0
    }
46
60.0k
    case IndexType::kMultiLevelBinarySearch: {
47
60.0k
      return new MultiLevelIndexBuilder(comparator, table_opt);
48
0
    }
49
0
    default: {
50
0
      assert(!"Do not recognize the index type ");
51
0
      return nullptr;
52
0
    }
53
0
  }
54
  // impossible.
55
0
  assert(false);
56
0
  return nullptr;
57
0
}
58
59
void ShortenedIndexBuilder::AddIndexEntry(
60
    std::string* last_key_in_current_block,
61
    const Slice* first_key_in_next_block,
62
26.9k
    const BlockHandle& block_handle) {
63
26.9k
  std::string handle_encoding;
64
26.9k
  block_handle.AppendEncodedTo(&handle_encoding);
65
26.9k
  AddIndexEntry(
66
26.9k
      last_key_in_current_block, first_key_in_next_block, handle_encoding, ShortenKeys::kTrue);
67
26.9k
}
68
69
void ShortenedIndexBuilder::AddIndexEntry(
70
    std::string* last_key_in_current_block,
71
    const Slice* first_key_in_next_block,
72
    const std::string& block_handle_encoded,
73
2.55M
    const ShortenKeys shorten_keys) {
74
2.55M
  if (shorten_keys) {
75
2.53M
    if (UNLIKELY(first_key_in_next_block == nullptr)) {
76
68.8k
      comparator_->FindShortSuccessor(last_key_in_current_block);
77
2.47M
    } else {
78
2.47M
      comparator_->FindShortestSeparator(last_key_in_current_block, *first_key_in_next_block);
79
2.47M
    }
80
2.53M
  }
81
82
2.55M
  index_block_builder_.Add(*last_key_in_current_block, block_handle_encoded);
83
2.55M
}
84
85
80.2k
Status ShortenedIndexBuilder::Finish(IndexBlocks* index_blocks) {
86
80.2k
  index_blocks->index_block_contents = index_block_builder_.Finish();
87
80.2k
  return Status::OK();
88
80.2k
}
89
90
void HashIndexBuilder::AddIndexEntry(
91
    std::string* last_key_in_current_block,
92
    const Slice* first_key_in_next_block,
93
19.3k
    const BlockHandle& block_handle) {
94
19.3k
  ++current_restart_index_;
95
19.3k
  primary_index_builder_.AddIndexEntry(
96
19.3k
      last_key_in_current_block, first_key_in_next_block, block_handle);
97
19.3k
}
98
99
546k
void HashIndexBuilder::OnKeyAdded(const Slice& key) {
100
546k
  auto key_prefix = hash_key_extractor_->Transform(key);
101
546k
  bool is_first_entry = pending_block_num_ == 0;
102
103
  // Keys may share the prefix
104
546k
  if (is_first_entry || pending_entry_prefix_ != key_prefix) {
105
265k
    if (!is_first_entry) {
106
264k
      FlushPendingPrefix();
107
264k
    }
108
109
    // need a hard copy otherwise the underlying data changes all the time.
110
    // TODO(kailiu) ToString() is expensive. We may speed up can avoid data
111
    // copy.
112
265k
    pending_entry_prefix_ = key_prefix.ToString();
113
265k
    pending_block_num_ = 1;
114
265k
    pending_entry_index_ = static_cast<uint32_t>(current_restart_index_);
115
280k
  } else {
116
    // entry number increments when keys share the prefix reside in
117
    // different data blocks.
118
280k
    auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
119
280k
    assert(last_restart_index <= current_restart_index_);
120
280k
    if (last_restart_index != current_restart_index_) {
121
9.02k
      ++pending_block_num_;
122
9.02k
    }
123
280k
  }
124
546k
}
125
126
1.44k
Status HashIndexBuilder::Finish(IndexBlocks* index_blocks) {
127
1.44k
  FlushPendingPrefix();
128
1.44k
  RETURN_NOT_OK(primary_index_builder_.Finish(index_blocks));
129
1.44k
  index_blocks->meta_blocks.emplace(kHashIndexPrefixesBlock, prefix_block_);
130
1.44k
  index_blocks->meta_blocks.emplace(kHashIndexPrefixesMetadataBlock, prefix_meta_block_);
131
1.44k
  return Status::OK();
132
1.44k
}
133
134
265k
void HashIndexBuilder::FlushPendingPrefix() {
135
265k
  prefix_block_.append(pending_entry_prefix_.data(), pending_entry_prefix_.size());
136
265k
  PutVarint32(&prefix_meta_block_, static_cast<uint32_t>(pending_entry_prefix_.size()));
137
265k
  PutVarint32(&prefix_meta_block_, pending_entry_index_);
138
265k
  PutVarint32(&prefix_meta_block_, pending_block_num_);
139
265k
}
140
141
MultiLevelIndexBuilder::MultiLevelIndexBuilder(
142
    const Comparator* comparator, const BlockBasedTableOptions& table_opt)
143
    : IndexBuilder(comparator),
144
64.0k
      table_opt_(table_opt) {}
145
146
2.52M
void MultiLevelIndexBuilder::EnsureCurrentLevelIndexBuilderCreated() {
147
2.52M
  if (!current_level_index_block_builder_) {
148
75.6k
    DCHECK(!flush_policy_);
149
75.6k
    current_level_index_block_builder_.reset(
150
75.6k
        new ShortenedIndexBuilder(comparator_, table_opt_.index_block_restart_interval));
151
75.6k
    flush_policy_ = FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
152
75.6k
        table_opt_.index_block_size, table_opt_.block_size_deviation,
153
75.6k
        table_opt_.min_keys_per_index_block,
154
75.6k
        current_level_index_block_builder_->GetIndexBlockBuilder());
155
75.6k
  }
156
2.52M
}
157
158
void MultiLevelIndexBuilder::AddIndexEntry(
159
    std::string* last_key_in_current_block, const Slice* first_key_in_next_block,
160
2.52M
    const BlockHandle& block_handle, const ShortenKeys shorten_keys) {
161
21
  DCHECK(!current_level_block_.is_ready)
162
21
      << "Expected to first flush already complete index block";
163
2.52M
  EnsureCurrentLevelIndexBuilderCreated();
164
165
2.52M
  block_handle_encoding_.clear();
166
2.52M
  block_handle.AppendEncodedTo(&block_handle_encoding_);
167
168
2.52M
  current_level_index_block_builder_->AddIndexEntry(
169
2.52M
      last_key_in_current_block, first_key_in_next_block, block_handle_encoding_, shorten_keys);
170
171
2.52M
  if (flush_policy_->Update(*last_key_in_current_block, block_handle_encoding_)
172
2.51M
      || first_key_in_next_block == nullptr) {
173
75.5k
    current_level_block_.is_ready = true;
174
75.5k
    yb::CopyToBuffer(*last_key_in_current_block, &current_level_block_.last_key);
175
75.5k
    current_level_block_.has_next_block = first_key_in_next_block != nullptr;
176
75.5k
    if (current_level_block_.has_next_block) {
177
11.6k
      first_key_in_next_block->CopyToBuffer(&current_level_block_.next_block_first_key);
178
11.6k
    }
179
75.5k
  }
180
2.52M
}
181
182
3.98M
bool MultiLevelIndexBuilder::ShouldFlush() const {
183
3.98M
  return current_level_block_.is_ready ||
184
3.90M
         next_level_.block_to_add.IsReadyAndLast() ||
185
3.89M
         (next_level_index_builder_ && next_level_index_builder_->ShouldFlush());
186
3.98M
}
187
188
75.6k
Status MultiLevelIndexBuilder::FlushCurrentLevel(IndexBlocks* index_blocks) {
189
75.6k
  RETURN_NOT_OK(current_level_index_block_builder_->Finish(index_blocks));
190
75.6k
  finished_index_block_builder_ = std::move(current_level_index_block_builder_);
191
75.6k
  flush_policy_.reset();
192
75.6k
  current_level_size_ += index_blocks->index_block_contents.size() + kBlockTrailerSize;
193
75.6k
  return Status::OK();
194
75.6k
}
195
196
void MultiLevelIndexBuilder::AddToNextLevelIfReady(
197
75.5k
    IndexBlockInfo* block_info, const BlockHandle& block_handle) {
198
75.5k
  DCHECK_ONLY_NOTNULL(block_info);
199
75.5k
  if (block_info->is_ready) {
200
15.6k
    DCHECK_ONLY_NOTNULL(next_level_index_builder_.get());
201
    // We don't need to shorten keys in next level index builder, since we already did that when
202
    // added this entry at previous levels.
203
15.6k
    if (block_info->has_next_block) {
204
11.6k
      Slice key_slice(block_info->next_block_first_key);
205
11.6k
      next_level_index_builder_->AddIndexEntry(
206
11.6k
          &block_info->last_key, &key_slice, block_handle, ShortenKeys::kFalse);
207
4.06k
    } else {
208
4.06k
      next_level_index_builder_->AddIndexEntry(
209
4.06k
          &block_info->last_key, nullptr, block_handle, ShortenKeys::kFalse);
210
18.4E
      DCHECK(next_level_index_builder_->ShouldFlush())
211
18.4E
          << "Expected to request flushing of last block at next index level";
212
4.06k
    }
213
15.6k
    block_info->is_ready = false;
214
15.6k
  }
215
75.5k
}
216
217
Result<bool> MultiLevelIndexBuilder::FlushNextBlock(
218
139k
    IndexBlocks* index_blocks, const BlockHandle& last_index_block_handle) {
219
139k
  if (next_level_.just_flushed) {
220
    // We should only have current level index block to add for next level if we have just
221
    // flushed it, but not next level index block.
222
4.07k
    if (next_level_.block_to_add.is_ready) {
223
0
      return STATUS(IllegalState, "MultiLevelIndexBuilder just flushed next level index block, but"
224
0
          " current level index block is ready to add for next level.");
225
0
    }
226
4.07k
    next_level_.last_flushed_block_ = last_index_block_handle;
227
4.07k
    next_level_.just_flushed = false;
228
4.07k
  }
229
139k
  if (flushing_indexes_) {
230
    // If ready - add last flushed current level index block to next level index.
231
75.5k
    AddToNextLevelIfReady(&next_level_.block_to_add, last_index_block_handle);
232
75.5k
    if (next_level_index_builder_ && next_level_index_builder_->ShouldFlush()) {
233
      // First flush next level index block for all previously flushed index blocks at this
234
      // level and only after that we will flush pending current level index block on subsequent
235
      // FlushNextBlock() call at this level.
236
4.08k
      auto result = next_level_index_builder_->FlushNextBlock(
237
4.08k
          index_blocks, next_level_.last_flushed_block_);
238
4.08k
      RETURN_NOT_OK(result);
239
4.08k
      next_level_.just_flushed = true;
240
4.08k
      return result;
241
135k
    }
242
75.5k
  }
243
135k
  flushing_indexes_ = true;
244
135k
  if (current_level_block_.is_ready) {
245
75.5k
    RETURN_NOT_OK(FlushCurrentLevel(index_blocks));
246
75.5k
    if (!next_level_index_builder_ && current_level_block_.has_next_block) {
247
      // We only need next level index builder if we plan to have more than one index entry at next
248
      // level, i.e. if we have next block at current level. Otherwise current level will be the top
249
      // level with single index block.
250
4.07k
      next_level_index_builder_ = std::make_unique<MultiLevelIndexBuilder>(
251
4.07k
          comparator_, table_opt_);
252
4.07k
    }
253
75.5k
    if (next_level_index_builder_) {
254
      // Postpone adding index entry for just flushed block for next FlushNextBlock() call, since
255
      // we need to have block handle to add it to next level index.
256
15.6k
      next_level_.block_to_add = std::move(current_level_block_);
257
15.6k
    }
258
75.5k
    current_level_block_.is_ready = false;
259
75.5k
    return true;
260
59.9k
  } else if (!last_index_block_handle.IsSet()) {
261
    // It means this is the first call of FlushNextBlock() and no keys have been added to index,
262
    // which can only happen when we call FlushNextBlock() unconditionally during finishing of empty
263
    // SST file, so we need to produce an empty index.
264
54
    EnsureCurrentLevelIndexBuilderCreated();
265
54
    RETURN_NOT_OK(FlushCurrentLevel(index_blocks));
266
54
    return true;
267
59.9k
  } else {
268
59.9k
    return false;
269
59.9k
  }
270
135k
}
271
272
64.0k
size_t MultiLevelIndexBuilder::EstimatedSize() const {
273
  // We subtract kBlockTrailerSize at the top level, because it will be added by
274
  // BlockBasedTableBuilder.
275
64.0k
  return current_level_size_ +
276
59.9k
      (next_level_index_builder_ ? next_level_index_builder_->EstimatedSize() : -kBlockTrailerSize);
277
64.0k
}
278
279
119k
int MultiLevelIndexBuilder::NumLevels() const {
280
111k
  return 1 + (next_level_index_builder_ ? next_level_index_builder_->NumLevels() : 0);
281
119k
}
282
283
} // namespace rocksdb