YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/index_builder.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/rocksdb/table/index_builder.h"
15
16
#include "yb/rocksdb/comparator.h"
17
#include "yb/rocksdb/port/likely.h"
18
#include "yb/rocksdb/slice_transform.h"
19
#include "yb/rocksdb/table/block_based_table_factory.h"
20
#include "yb/rocksdb/util/coding.h"
21
22
#include "yb/util/logging.h"
23
24
namespace rocksdb {
25
26
Result<bool> IndexBuilder::FlushNextBlock(
27
1.45k
    IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
28
1.45k
  RETURN_NOT_OK(Finish(index_blocks));
29
1.45k
  return true;
30
1.45k
}
31
32
IndexBuilder* IndexBuilder::CreateIndexBuilder(
33
    IndexType type,
34
    const Comparator* comparator,
35
    const SliceTransform* prefix_extractor,
36
131k
    const BlockBasedTableOptions& table_opt) {
37
131k
  switch (type) {
38
65.6k
    case IndexType::kBinarySearch: {
39
65.6k
      return new ShortenedIndexBuilder(comparator,
40
65.6k
                                       table_opt.index_block_restart_interval);
41
0
    }
42
1.45k
    case IndexType::kHashSearch: {
43
1.45k
      return new HashIndexBuilder(comparator, prefix_extractor,
44
1.45k
                                  table_opt.index_block_restart_interval);
45
0
    }
46
64.1k
    case IndexType::kMultiLevelBinarySearch: {
47
64.1k
      return new MultiLevelIndexBuilder(comparator, table_opt);
48
0
    }
49
0
    default: {
50
0
      assert(!"Do not recognize the index type ");
51
0
      return nullptr;
52
0
    }
53
131k
  }
54
  // impossible.
55
0
  assert(false);
56
0
  return nullptr;
57
131k
}
58
59
void ShortenedIndexBuilder::AddIndexEntry(
60
    std::string* last_key_in_current_block,
61
    const Slice* first_key_in_next_block,
62
33.8k
    const BlockHandle& block_handle) {
63
33.8k
  std::string handle_encoding;
64
33.8k
  block_handle.AppendEncodedTo(&handle_encoding);
65
33.8k
  AddIndexEntry(
66
33.8k
      last_key_in_current_block, first_key_in_next_block, handle_encoding, ShortenKeys::kTrue);
67
33.8k
}
68
69
void ShortenedIndexBuilder::AddIndexEntry(
70
    std::string* last_key_in_current_block,
71
    const Slice* first_key_in_next_block,
72
    const std::string& block_handle_encoded,
73
2.89M
    const ShortenKeys shorten_keys) {
74
2.89M
  if (shorten_keys) {
75
2.87M
    if (UNLIKELY(first_key_in_next_block == nullptr)) {
76
76.9k
      comparator_->FindShortSuccessor(last_key_in_current_block);
77
2.79M
    } else {
78
2.79M
      comparator_->FindShortestSeparator(last_key_in_current_block, *first_key_in_next_block);
79
2.79M
    }
80
2.87M
  }
81
82
2.89M
  index_block_builder_.Add(*last_key_in_current_block, block_handle_encoded);
83
2.89M
}
84
85
90.5k
Status ShortenedIndexBuilder::Finish(IndexBlocks* index_blocks) {
86
90.5k
  index_blocks->index_block_contents = index_block_builder_.Finish();
87
90.5k
  return Status::OK();
88
90.5k
}
89
90
void HashIndexBuilder::AddIndexEntry(
91
    std::string* last_key_in_current_block,
92
    const Slice* first_key_in_next_block,
93
19.4k
    const BlockHandle& block_handle) {
94
19.4k
  ++current_restart_index_;
95
19.4k
  primary_index_builder_.AddIndexEntry(
96
19.4k
      last_key_in_current_block, first_key_in_next_block, block_handle);
97
19.4k
}
98
99
554k
void HashIndexBuilder::OnKeyAdded(const Slice& key) {
100
554k
  auto key_prefix = hash_key_extractor_->Transform(key);
101
554k
  bool is_first_entry = pending_block_num_ == 0;
102
103
  // Keys may share the prefix
104
554k
  if (is_first_entry || 
pending_entry_prefix_ != key_prefix553k
) {
105
268k
    if (!is_first_entry) {
106
266k
      FlushPendingPrefix();
107
266k
    }
108
109
    // need a hard copy otherwise the underlying data changes all the time.
110
    // TODO(kailiu) ToString() is expensive. We may speed up can avoid data
111
    // copy.
112
268k
    pending_entry_prefix_ = key_prefix.ToString();
113
268k
    pending_block_num_ = 1;
114
268k
    pending_entry_index_ = static_cast<uint32_t>(current_restart_index_);
115
286k
  } else {
116
    // entry number increments when keys share the prefix reside in
117
    // different data blocks.
118
286k
    auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
119
286k
    assert(last_restart_index <= current_restart_index_);
120
286k
    if (last_restart_index != current_restart_index_) {
121
9.06k
      ++pending_block_num_;
122
9.06k
    }
123
286k
  }
124
554k
}
125
126
1.45k
Status HashIndexBuilder::Finish(IndexBlocks* index_blocks) {
127
1.45k
  FlushPendingPrefix();
128
1.45k
  RETURN_NOT_OK(primary_index_builder_.Finish(index_blocks));
129
1.45k
  index_blocks->meta_blocks.emplace(kHashIndexPrefixesBlock, prefix_block_);
130
1.45k
  index_blocks->meta_blocks.emplace(kHashIndexPrefixesMetadataBlock, prefix_meta_block_);
131
1.45k
  return Status::OK();
132
1.45k
}
133
134
268k
void HashIndexBuilder::FlushPendingPrefix() {
135
268k
  prefix_block_.append(pending_entry_prefix_.data(), pending_entry_prefix_.size());
136
268k
  PutVarint32(&prefix_meta_block_, static_cast<uint32_t>(pending_entry_prefix_.size()));
137
268k
  PutVarint32(&prefix_meta_block_, pending_entry_index_);
138
268k
  PutVarint32(&prefix_meta_block_, pending_block_num_);
139
268k
}
140
141
MultiLevelIndexBuilder::MultiLevelIndexBuilder(
142
    const Comparator* comparator, const BlockBasedTableOptions& table_opt)
143
    : IndexBuilder(comparator),
144
68.4k
      table_opt_(table_opt) {}
145
146
2.86M
void MultiLevelIndexBuilder::EnsureCurrentLevelIndexBuilderCreated() {
147
2.86M
  if (!current_level_index_block_builder_) {
148
82.0k
    DCHECK(!flush_policy_);
149
82.0k
    current_level_index_block_builder_.reset(
150
82.0k
        new ShortenedIndexBuilder(comparator_, table_opt_.index_block_restart_interval));
151
82.0k
    flush_policy_ = FlushBlockBySizePolicyFactory::NewFlushBlockPolicy(
152
82.0k
        table_opt_.index_block_size, table_opt_.block_size_deviation,
153
82.0k
        table_opt_.min_keys_per_index_block,
154
82.0k
        current_level_index_block_builder_->GetIndexBlockBuilder());
155
82.0k
  }
156
2.86M
}
157
158
void MultiLevelIndexBuilder::AddIndexEntry(
159
    std::string* last_key_in_current_block, const Slice* first_key_in_next_block,
160
2.86M
    const BlockHandle& block_handle, const ShortenKeys shorten_keys) {
161
2.86M
  DCHECK(!current_level_block_.is_ready)
162
39
      << "Expected to first flush already complete index block";
163
2.86M
  EnsureCurrentLevelIndexBuilderCreated();
164
165
2.86M
  block_handle_encoding_.clear();
166
2.86M
  block_handle.AppendEncodedTo(&block_handle_encoding_);
167
168
2.86M
  current_level_index_block_builder_->AddIndexEntry(
169
2.86M
      last_key_in_current_block, first_key_in_next_block, block_handle_encoding_, shorten_keys);
170
171
2.86M
  if (flush_policy_->Update(*last_key_in_current_block, block_handle_encoding_)
172
2.86M
      || 
first_key_in_next_block == nullptr2.84M
) {
173
81.9k
    current_level_block_.is_ready = true;
174
81.9k
    yb::CopyToBuffer(*last_key_in_current_block, &current_level_block_.last_key);
175
81.9k
    current_level_block_.has_next_block = first_key_in_next_block != nullptr;
176
81.9k
    if (current_level_block_.has_next_block) {
177
13.5k
      first_key_in_next_block->CopyToBuffer(&current_level_block_.next_block_first_key);
178
13.5k
    }
179
81.9k
  }
180
2.86M
}
181
182
4.52M
bool MultiLevelIndexBuilder::ShouldFlush() const {
183
4.52M
  return current_level_block_.is_ready ||
184
4.52M
         
next_level_.block_to_add.IsReadyAndLast()4.43M
||
185
4.52M
         
(4.43M
next_level_index_builder_4.43M
&&
next_level_index_builder_->ShouldFlush()1.57M
);
186
4.52M
}
187
188
81.9k
Status MultiLevelIndexBuilder::FlushCurrentLevel(IndexBlocks* index_blocks) {
189
81.9k
  RETURN_NOT_OK(current_level_index_block_builder_->Finish(index_blocks));
190
81.9k
  finished_index_block_builder_ = std::move(current_level_index_block_builder_);
191
81.9k
  flush_policy_.reset();
192
81.9k
  current_level_size_ += index_blocks->index_block_contents.size() + kBlockTrailerSize;
193
81.9k
  return Status::OK();
194
81.9k
}
195
196
void MultiLevelIndexBuilder::AddToNextLevelIfReady(
197
81.9k
    IndexBlockInfo* block_info, const BlockHandle& block_handle) {
198
81.9k
  DCHECK_ONLY_NOTNULL(block_info);
199
81.9k
  if (block_info->is_ready) {
200
17.8k
    DCHECK_ONLY_NOTNULL(next_level_index_builder_.get());
201
    // We don't need to shorten keys in next level index builder, since we already did that when
202
    // added this entry at previous levels.
203
17.8k
    if (block_info->has_next_block) {
204
13.5k
      Slice key_slice(block_info->next_block_first_key);
205
13.5k
      next_level_index_builder_->AddIndexEntry(
206
13.5k
          &block_info->last_key, &key_slice, block_handle, ShortenKeys::kFalse);
207
13.5k
    } else {
208
4.31k
      next_level_index_builder_->AddIndexEntry(
209
4.31k
          &block_info->last_key, nullptr, block_handle, ShortenKeys::kFalse);
210
4.31k
      DCHECK(next_level_index_builder_->ShouldFlush())
211
0
          << "Expected to request flushing of last block at next index level";
212
4.31k
    }
213
17.8k
    block_info->is_ready = false;
214
17.8k
  }
215
81.9k
}
216
217
Result<bool> MultiLevelIndexBuilder::FlushNextBlock(
218
150k
    IndexBlocks* index_blocks, const BlockHandle& last_index_block_handle) {
219
150k
  if (next_level_.just_flushed) {
220
    // We should only have current level index block to add for next level if we have just
221
    // flushed it, but not next level index block.
222
4.32k
    if (next_level_.block_to_add.is_ready) {
223
0
      return STATUS(IllegalState, "MultiLevelIndexBuilder just flushed next level index block, but"
224
0
          " current level index block is ready to add for next level.");
225
0
    }
226
4.32k
    next_level_.last_flushed_block_ = last_index_block_handle;
227
4.32k
    next_level_.just_flushed = false;
228
4.32k
  }
229
150k
  if (flushing_indexes_) {
230
    // If ready - add last flushed current level index block to next level index.
231
81.9k
    AddToNextLevelIfReady(&next_level_.block_to_add, last_index_block_handle);
232
81.9k
    if (next_level_index_builder_ && 
next_level_index_builder_->ShouldFlush()22.1k
) {
233
      // First flush next level index block for all previously flushed index blocks at this
234
      // level and only after that we will flush pending current level index block on subsequent
235
      // FlushNextBlock() call at this level.
236
4.33k
      auto result = next_level_index_builder_->FlushNextBlock(
237
4.33k
          index_blocks, next_level_.last_flushed_block_);
238
4.33k
      RETURN_NOT_OK(result);
239
4.33k
      next_level_.just_flushed = true;
240
4.33k
      return result;
241
4.33k
    }
242
81.9k
  }
243
145k
  flushing_indexes_ = true;
244
145k
  if (current_level_block_.is_ready) {
245
81.9k
    RETURN_NOT_OK(FlushCurrentLevel(index_blocks));
246
81.9k
    if (!next_level_index_builder_ && 
current_level_block_.has_next_block68.3k
) {
247
      // We only need next level index builder if we plan to have more than one index entry at next
248
      // level, i.e. if we have next block at current level. Otherwise current level will be the top
249
      // level with single index block.
250
4.33k
      next_level_index_builder_ = std::make_unique<MultiLevelIndexBuilder>(
251
4.33k
          comparator_, table_opt_);
252
4.33k
    }
253
81.9k
    if (next_level_index_builder_) {
254
      // Postpone adding index entry for just flushed block for next FlushNextBlock() call, since
255
      // we need to have block handle to add it to next level index.
256
17.8k
      next_level_.block_to_add = std::move(current_level_block_);
257
17.8k
    }
258
81.9k
    current_level_block_.is_ready = false;
259
81.9k
    return true;
260
81.9k
  } else 
if (64.0k
!last_index_block_handle.IsSet()64.0k
) {
261
    // It means this is the first call of FlushNextBlock() and no keys have been added to index,
262
    // which can only happen when we call FlushNextBlock() unconditionally during finishing of empty
263
    // SST file, so we need to produce an empty index.
264
54
    EnsureCurrentLevelIndexBuilderCreated();
265
54
    RETURN_NOT_OK(FlushCurrentLevel(index_blocks));
266
54
    return true;
267
64.0k
  } else {
268
64.0k
    return false;
269
64.0k
  }
270
145k
}
271
272
68.3k
size_t MultiLevelIndexBuilder::EstimatedSize() const {
273
  // We subtract kBlockTrailerSize at the top level, because it will be added by
274
  // BlockBasedTableBuilder.
275
68.3k
  return current_level_size_ +
276
68.3k
      (next_level_index_builder_ ? 
next_level_index_builder_->EstimatedSize()4.31k
:
-kBlockTrailerSize64.0k
);
277
68.3k
}
278
279
127k
int MultiLevelIndexBuilder::NumLevels() const {
280
127k
  return 1 + (next_level_index_builder_ ? 
next_level_index_builder_->NumLevels()8.62k
:
0119k
);
281
127k
}
282
283
} // namespace rocksdb