YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/index_builder.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_ROCKSDB_TABLE_INDEX_BUILDER_H
15
#define YB_ROCKSDB_TABLE_INDEX_BUILDER_H
16
17
#include "yb/rocksdb/flush_block_policy.h"
18
#include "yb/rocksdb/table.h"
19
#include "yb/rocksdb/table/block_builder.h"
20
#include "yb/rocksdb/table/format.h"
21
22
#include "yb/util/strongly_typed_bool.h"
23
24
namespace rocksdb {
25
26
using yb::Result;
27
28
// The interface for building index.
29
// Instruction for adding a new concrete IndexBuilder:
30
//  1. Create a subclass instantiated from IndexBuilder.
31
//  2. Add a new entry associated with that subclass in IndexType.
32
//  3. Add a create function for the new subclass in CreateIndexBuilder.
33
// Note: we can devise more advanced design to simplify the process for adding
34
// new subclass, which will, on the other hand, increase the code complexity and
35
// catch unwanted attention from readers. Given that we won't add/change
36
// indexes frequently, it makes sense to just embrace a more straightforward
37
// design that just works.
38
class IndexBuilder {
39
 public:
40
  // Create a index builder based on its type.
41
  static IndexBuilder* CreateIndexBuilder(
42
      IndexType index_type,
43
      const Comparator* comparator,
44
      const SliceTransform* prefix_extractor,
45
      const BlockBasedTableOptions& table_opt);
46
47
  // Index builder will construct a set of blocks which contain:
48
  //  1. One primary index block.
49
  //  2. (Optional) a set of metablocks that contains the metadata of the
50
  //     primary index.
51
  struct IndexBlocks {
52
    Slice index_block_contents;
53
    std::unordered_map<std::string, Slice> meta_blocks;
54
  };
55
  explicit IndexBuilder(const Comparator* comparator)
56
218k
      : comparator_(comparator) {}
57
58
218k
  virtual ~IndexBuilder() {}
59
60
  // Add a new index entry to index block.
61
  // To allow further optimization, we provide `last_key_in_current_block` and
62
  // `first_key_in_next_block`, based on which the specific implementation can
63
  // determine the best index key to be used for the index block.
64
  // @last_key_in_current_block: this parameter maybe overridden with the value
65
  //                             "substitute key".
66
  // @first_key_in_next_block: it will be nullptr if the entry being added is
67
  //                           the last one in the table
68
  //
69
  // REQUIRES: Finish() has not yet been called.
70
  virtual void AddIndexEntry(std::string* last_key_in_current_block,
71
                             const Slice* first_key_in_next_block,
72
                             const BlockHandle& block_handle) = 0;
73
74
  // This method will be called whenever a key is added. The subclasses may
75
  // override OnKeyAdded() if they need to collect additional information.
76
179M
  virtual void OnKeyAdded(const Slice& key) {}
77
78
  // Inform the index builder that all entries has been written. Block builder
79
  // may therefore perform any operation required for block finalization.
80
  //
81
  // REQUIRES: Finish() has not yet been called.
82
  virtual CHECKED_STATUS Finish(IndexBlocks* index_blocks) = 0;
83
84
  // Whether it is time to flush the current index block. Overridden in MultiLevelIndexBuilder.
85
  // While true is returned the caller should keep calling FlushNextBlock(IndexBlocks*,
86
  // const BlockHandle&) with handle of the block written with the result of the last call to
87
  // FlushNextBlock and keep writing flushed blocks.
88
19.4k
  virtual bool ShouldFlush() const { return false; }
89
90
  // This method can be overridden to build the n-th level index in
91
  // MultiLevelIndexBuilder.
92
  //
93
  // Returns true when it actually flushed entries into index_blocks.
94
  // Returns false when everything was already flushed by previous call to FlushNextBlock.
95
  virtual Result<bool> FlushNextBlock(
96
      IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle);
97
98
  // Get the estimated size for index block.
99
  virtual size_t EstimatedSize() const = 0;
100
101
  // Number of levels for index.
102
  virtual int NumLevels() const = 0;
103
104
 protected:
105
  const Comparator* comparator_;
106
};
107
108
YB_STRONGLY_TYPED_BOOL(ShortenKeys);
109
110
// This index builder builds space-efficient index block.
111
//
112
// Optimizations:
113
//  1. Made block's `block_restart_interval` to be 1, which will avoid linear
114
//     search when doing index lookup (can be disabled by setting
115
//     index_block_restart_interval).
116
//  2. Shorten the key length for index block. Other than honestly using the
117
//     last key in the data block as the index key, we instead find a shortest
118
//     substitute key that serves the same function.
119
class ShortenedIndexBuilder : public IndexBuilder {
120
 public:
121
  explicit ShortenedIndexBuilder(const Comparator* comparator,
122
                                 int index_block_restart_interval)
123
      : IndexBuilder(comparator),
124
149k
        index_block_builder_(index_block_restart_interval, kIndexBlockKeyValueEncodingFormat) {}
125
126
  void AddIndexEntry(
127
      std::string* last_key_in_current_block,
128
      const Slice* first_key_in_next_block,
129
      const BlockHandle& block_handle) override;
130
131
  void AddIndexEntry(
132
      std::string* last_key_in_current_block,
133
      const Slice* first_key_in_next_block,
134
      const std::string& block_handle_encoded,
135
      ShortenKeys shorten_keys);
136
137
  CHECKED_STATUS Finish(IndexBlocks* index_blocks) override;
138
139
8.55k
  size_t EstimatedSize() const override {
140
8.55k
    return index_block_builder_.CurrentSizeEstimate();
141
8.55k
  }
142
143
8
  int NumLevels() const override { return 1; }
144
145
0
  void Reset() { index_block_builder_.Reset(); }
146
147
82.0k
  const BlockBuilder& GetIndexBlockBuilder() { return index_block_builder_; }
148
149
 private:
150
  BlockBuilder index_block_builder_;
151
};
152
153
// HashIndexBuilder contains a binary-searchable primary index and the
154
// metadata for secondary hash index construction.
155
// The metadata for hash index consists two parts:
156
//  - a metablock that compactly contains a sequence of prefixes. All prefixes
157
//    are stored consecutively without any metadata (like, prefix sizes) being
158
//    stored, which is kept in the other metablock.
159
//  - a metablock contains the metadata of the prefixes, including prefix size,
160
//    restart index and number of block it spans. The format looks like:
161
//
162
// +-----------------+---------------------------+---------------------+ <=prefix 1
163
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
164
// +-----------------+---------------------------+---------------------+ <=prefix 2
165
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
166
// +-----------------+---------------------------+---------------------+
167
// |                                                                   |
168
// | ....                                                              |
169
// |                                                                   |
170
// +-----------------+---------------------------+---------------------+ <=prefix n
171
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
172
// +-----------------+---------------------------+---------------------+
173
//
174
// The reason of separating these two metablocks is to enable the efficiently
175
// reuse the first metablock during hash index construction without unnecessary
176
// data copy or small heap allocations for prefixes.
177
class HashIndexBuilder : public IndexBuilder {
178
 public:
179
  explicit HashIndexBuilder(const Comparator* comparator,
180
                            const SliceTransform* hash_key_extractor,
181
                            int index_block_restart_interval)
182
      : IndexBuilder(comparator),
183
        primary_index_builder_(comparator, index_block_restart_interval),
184
1.45k
        hash_key_extractor_(hash_key_extractor) {}
185
186
  void AddIndexEntry(
187
      std::string* last_key_in_current_block,
188
      const Slice* first_key_in_next_block,
189
      const BlockHandle& block_handle) override;
190
191
  void OnKeyAdded(const Slice& key) override;
192
193
  CHECKED_STATUS Finish(IndexBlocks* index_blocks) override;
194
195
1.45k
  size_t EstimatedSize() const override {
196
1.45k
    return primary_index_builder_.EstimatedSize() + prefix_block_.size() +
197
1.45k
        prefix_meta_block_.size();
198
1.45k
  }
199
200
2.07k
  int NumLevels() const override { return 1; }
201
202
 private:
203
  void FlushPendingPrefix();
204
205
  ShortenedIndexBuilder primary_index_builder_;
206
  const SliceTransform* hash_key_extractor_;
207
208
  // stores a sequence of prefixes
209
  std::string prefix_block_;
210
  // stores the metadata of prefixes
211
  std::string prefix_meta_block_;
212
213
  // The following 3 variables keeps unflushed prefix and its metadata.
214
  // The details of block_num and entry_index can be found in
215
  // "block_hash_index.{h,cc}"
216
  uint32_t pending_block_num_ = 0;
217
  uint32_t pending_entry_index_ = 0;
218
  std::string pending_entry_prefix_;
219
220
  uint64_t current_restart_index_ = 0;
221
};
222
223
class MultiLevelIndexBuilder : public IndexBuilder {
224
 public:
225
  MultiLevelIndexBuilder(const Comparator* comparator, const BlockBasedTableOptions& table_opt);
226
227
  void AddIndexEntry(
228
      std::string* last_key_in_current_block,
229
      const Slice* first_key_in_next_block,
230
2.84M
      const BlockHandle& block_handle) override {
231
2.84M
    AddIndexEntry(
232
2.84M
        last_key_in_current_block, first_key_in_next_block, block_handle, ShortenKeys::kTrue);
233
2.84M
  }
234
235
  void AddIndexEntry(
236
      std::string* last_key_in_current_block,
237
      const Slice* first_key_in_next_block,
238
      const BlockHandle& block_handle,
239
      ShortenKeys shorten_keys);
240
241
0
  CHECKED_STATUS Finish(IndexBlocks* index_blocks) override {
242
0
    return STATUS(NotSupported, "Finishing as single block is not supported by multi-level index.");
243
0
  }
244
245
  bool ShouldFlush() const override;
246
247
  Result<bool> FlushNextBlock(
248
      IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) override;
249
250
  size_t EstimatedSize() const override;
251
252
  int NumLevels() const override;
253
254
 private:
255
  struct IndexBlockInfo;
256
257
  void EnsureCurrentLevelIndexBuilderCreated();
258
  CHECKED_STATUS FlushCurrentLevel(IndexBlocks* index_blocks);
259
  void AddToNextLevelIfReady(
260
      IndexBlockInfo* block_info, const BlockHandle& block_handle);
261
262
  const BlockBasedTableOptions& table_opt_;
263
264
  // Builder for an index block at the current level.
265
  std::unique_ptr<ShortenedIndexBuilder> current_level_index_block_builder_;
266
267
  // Already flushed index block builder, which can only be deleted after the corresponding block is
268
  // written, because it holds flushed data referenced by a slice returned from FlushNextBlock.
269
  std::unique_ptr<ShortenedIndexBuilder> finished_index_block_builder_;
270
271
  // Builder for index blocks at higher levels.
272
  std::unique_ptr<MultiLevelIndexBuilder> next_level_index_builder_;
273
  std::unique_ptr<FlushBlockPolicy> flush_policy_;
274
275
  // Whether we've started flushing index blocks, i.e. the FlushNextBlock method has been already
276
  // called.
277
  bool flushing_indexes_ = false;
278
279
  // Size of index blocks at the current level.
280
  size_t current_level_size_ = 0;
281
282
  struct IndexBlockInfo {
283
    // Whether the index block is ready for processing, other fields are only correct when
284
    // is_ready is true.
285
    bool is_ready = false;
286
    // Last key in the index block.
287
    std::string last_key;
288
    // Whether there are keys for the next block at the same level.
289
    bool has_next_block = false;
290
    // First key for the next index block at the same level (only correct if has_next_block is
291
    // true).
292
    std::string next_block_first_key;
293
294
4.43M
    bool IsReadyAndLast() const { return is_ready && 
!has_next_block1.57M
; }
295
  };
296
297
  // A complete current level index block once it is ready for flush.
298
  IndexBlockInfo current_level_block_;
299
  struct {
300
    // A complete current level index block once it is ready to add to next level index.
301
    IndexBlockInfo block_to_add;
302
    // Handle of the last index block flushed at a higher level.
303
    BlockHandle last_flushed_block_;
304
    // If an index block from a higher level was flushed during the previous call to
305
    // FlushNextBlock().
306
    bool just_flushed = false;
307
  } next_level_;
308
  // Buffer for block handle encoding.
309
  std::string block_handle_encoding_;
310
};
311
312
} // namespace rocksdb
313
314
#endif  // YB_ROCKSDB_TABLE_INDEX_BUILDER_H