/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/index_reader.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/rocksdb/table/index_reader.h" |
15 | | |
16 | | #include "yb/rocksdb/table/block_based_table_factory.h" |
17 | | #include "yb/rocksdb/table/block_based_table_internal.h" |
18 | | #include "yb/rocksdb/table/iterator_wrapper.h" |
19 | | #include "yb/rocksdb/table/meta_blocks.h" |
20 | | #include "yb/util/slice.h" |
21 | | |
22 | | namespace rocksdb { |
23 | | |
24 | | using namespace std::placeholders; |
25 | | |
26 | | Status BinarySearchIndexReader::Create( |
27 | | RandomAccessFileReader* file, const Footer& footer, |
28 | | const BlockHandle& index_handle, Env* env, |
29 | | const ComparatorPtr& comparator, |
30 | | std::unique_ptr<IndexReader>* index_reader, |
31 | 3.71k | const std::shared_ptr<yb::MemTracker>& mem_tracker) { |
32 | 3.71k | std::unique_ptr<Block> index_block; |
33 | 3.71k | auto s = block_based_table::ReadBlockFromFile( |
34 | 3.71k | file, footer, ReadOptions::kDefault, index_handle, &index_block, env, mem_tracker); |
35 | | |
36 | 3.71k | if (s.ok()) { |
37 | 3.71k | index_reader->reset(new BinarySearchIndexReader(comparator, std::move(index_block))); |
38 | 3.71k | } |
39 | | |
40 | 3.71k | return s; |
41 | 3.71k | } |
42 | 0 | Result<Slice> BinarySearchIndexReader::GetMiddleKey() { |
43 | 0 | return index_block_->GetMiddleKey(kIndexBlockKeyValueEncodingFormat); |
44 | 0 | } |
45 | | |
46 | | Status HashIndexReader::Create(const SliceTransform* hash_key_extractor, |
47 | | const Footer& footer, RandomAccessFileReader* file, |
48 | | Env* env, const ComparatorPtr& comparator, |
49 | | const BlockHandle& index_handle, |
50 | | InternalIterator* meta_index_iter, |
51 | | std::unique_ptr<IndexReader>* index_reader, |
52 | | bool hash_index_allow_collision, |
53 | 1.65k | const std::shared_ptr<yb::MemTracker>& mem_tracker) { |
54 | 1.65k | std::unique_ptr<Block> index_block; |
55 | 1.65k | auto s = block_based_table::ReadBlockFromFile(file, footer, ReadOptions::kDefault, index_handle, |
56 | 1.65k | &index_block, env, mem_tracker); |
57 | | |
58 | 1.65k | if (!s.ok()) { |
59 | 0 | return s; |
60 | 0 | } |
61 | | |
62 | | // Note, failure to create prefix hash index does not need to be a hard error. We can still fall |
63 | | // back to the original binary search index. |
64 | | // So, Create will succeed regardless, from this point on. |
65 | 1.65k | HashIndexReader* new_index_reader; |
66 | 1.65k | index_reader->reset(new_index_reader = new HashIndexReader(comparator, std::move(index_block))); |
67 | | |
68 | | // Get prefixes block |
69 | 1.65k | BlockHandle prefixes_handle; |
70 | 1.65k | s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesBlock, |
71 | 1.65k | &prefixes_handle); |
72 | 1.65k | if (!s.ok()) { |
73 | 0 | LOG(ERROR) << "Failed to find hash index prefixes block: " << s; |
74 | 0 | return Status::OK(); |
75 | 0 | } |
76 | | |
77 | | // Get index metadata block |
78 | 1.65k | BlockHandle prefixes_meta_handle; |
79 | 1.65k | s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesMetadataBlock, |
80 | 1.65k | &prefixes_meta_handle); |
81 | 1.65k | if (!s.ok()) { |
82 | 0 | LOG(ERROR) << "Failed to find hash index prefixes metadata block: " << s; |
83 | 0 | return Status::OK(); |
84 | 0 | } |
85 | | |
86 | | // Read contents for the blocks |
87 | 1.65k | BlockContents prefixes_contents; |
88 | 1.65k | s = ReadBlockContents(file, footer, ReadOptions::kDefault, prefixes_handle, |
89 | 1.65k | &prefixes_contents, env, mem_tracker, true /* do decompression */); |
90 | 1.65k | if (!s.ok()) { |
91 | 0 | return s; |
92 | 0 | } |
93 | 1.65k | BlockContents prefixes_meta_contents; |
94 | 1.65k | s = ReadBlockContents(file, footer, ReadOptions::kDefault, prefixes_meta_handle, |
95 | 1.65k | &prefixes_meta_contents, env, mem_tracker, |
96 | 1.65k | true /* do decompression */); |
97 | 1.65k | if (!s.ok()) { |
98 | 0 | LOG(ERROR) << "Failed to read hash index prefixes metadata block: " << s; |
99 | 0 | return Status::OK(); |
100 | 0 | } |
101 | | |
102 | 1.65k | if (!hash_index_allow_collision) { |
103 | | // TODO: deprecate once hash_index_allow_collision proves to be stable. |
104 | 0 | BlockHashIndex* hash_index = nullptr; |
105 | 0 | s = CreateBlockHashIndex(hash_key_extractor, |
106 | 0 | prefixes_contents.data, |
107 | 0 | prefixes_meta_contents.data, |
108 | 0 | &hash_index); |
109 | 0 | if (s.ok()) { |
110 | 0 | new_index_reader->index_block_->SetBlockHashIndex(hash_index); |
111 | 0 | new_index_reader->OwnPrefixesContents(std::move(prefixes_contents)); |
112 | 0 | } else { |
113 | 0 | LOG(ERROR) << "Failed to create block hash index: " << s; |
114 | 0 | } |
115 | 1.65k | } else { |
116 | 1.65k | BlockPrefixIndex* prefix_index = nullptr; |
117 | 1.65k | s = BlockPrefixIndex::Create(hash_key_extractor, |
118 | 1.65k | prefixes_contents.data, |
119 | 1.65k | prefixes_meta_contents.data, |
120 | 1.65k | &prefix_index); |
121 | 1.65k | if (s.ok()) { |
122 | 1.65k | new_index_reader->index_block_->SetBlockPrefixIndex(prefix_index); |
123 | 18.4E | } else { |
124 | 18.4E | LOG(ERROR) << "Failed to create block prefix index: " << s; |
125 | 18.4E | } |
126 | 1.65k | } |
127 | | |
128 | 1.65k | return Status::OK(); |
129 | 1.65k | } |
130 | | |
131 | 0 | Result<Slice> HashIndexReader::GetMiddleKey() { |
132 | 0 | return index_block_->GetMiddleKey(kIndexBlockKeyValueEncodingFormat); |
133 | 0 | } |
134 | | |
135 | | class MultiLevelIterator : public InternalIterator { |
136 | | public: |
137 | | static constexpr auto kIterChainInitialCapacity = 4; |
138 | | |
139 | | MultiLevelIterator( |
140 | | TwoLevelIteratorState* state, InternalIterator* top_level_iter, int num_levels, |
141 | | bool need_free_top_level_iter) |
142 | | : state_(state), iter_(num_levels), index_block_handle_(num_levels - 1), |
143 | | bottom_level_iter_(iter_.data() + (num_levels - 1)), |
144 | 11.6M | need_free_top_level_iter_(need_free_top_level_iter) { |
145 | 11.6M | iter_[0].Set(top_level_iter); |
146 | 11.6M | } |
147 | | |
148 | 11.6M | ~MultiLevelIterator() { |
149 | 7.84M | IteratorWrapper* iter = iter_.data() + (need_free_top_level_iter_ ? 0 : 1); |
150 | 19.1M | while (iter <= bottom_level_iter_) { |
151 | 7.48M | iter->DeleteIter(false /* arena_mode */); |
152 | 7.48M | ++iter; |
153 | 7.48M | } |
154 | 11.6M | } |
155 | | |
156 | 21.5M | void Seek(const Slice& target) override { |
157 | 21.5M | if (state_->check_prefix_may_match && !state_->PrefixMayMatch(target)) { |
158 | 0 | bottommost_positioned_iter_ = &iter_[0]; |
159 | 0 | return; |
160 | 0 | } |
161 | | |
162 | 21.5M | DoSeek(std::bind(&IteratorWrapper::Seek, std::placeholders::_1, target)); |
163 | 21.5M | } |
164 | | |
165 | 249k | void SeekToFirst() override { |
166 | 249k | DoSeek(std::bind(&IteratorWrapper::SeekToFirst, std::placeholders::_1)); |
167 | 249k | } |
168 | | |
169 | 158k | void SeekToLast() override { |
170 | 158k | DoSeek(std::bind(&IteratorWrapper::SeekToLast, std::placeholders::_1)); |
171 | 158k | } |
172 | | |
173 | 9.69M | void Next() override { |
174 | 9.69M | DoMove( |
175 | 9.69M | std::bind(&IteratorWrapper::Next, std::placeholders::_1), |
176 | 9.69M | std::bind(&IteratorWrapper::SeekToFirst, std::placeholders::_1) |
177 | 9.69M | ); |
178 | 9.69M | } |
179 | | |
180 | 113k | void Prev() override { |
181 | 113k | DoMove( |
182 | 113k | std::bind(&IteratorWrapper::Prev, std::placeholders::_1), |
183 | 113k | std::bind(&IteratorWrapper::SeekToLast, std::placeholders::_1) |
184 | 113k | ); |
185 | 113k | } |
186 | | |
187 | 93.3M | bool Valid() const override { |
188 | 93.3M | return bottommost_positioned_iter_ == bottom_level_iter_ && bottom_level_iter_->Valid(); |
189 | 93.3M | } |
190 | | |
191 | 16.5M | Slice key() const override { |
192 | 16.5M | DCHECK(Valid()); |
193 | 16.5M | return bottom_level_iter_->key(); |
194 | 16.5M | } |
195 | | |
196 | 31.5M | Slice value() const override { |
197 | 31.5M | DCHECK(Valid()); |
198 | 31.5M | return bottom_level_iter_->value(); |
199 | 31.5M | } |
200 | | |
201 | 19.0M | Status status() const override { |
202 | 19.0M | if (bottommost_positioned_iter_) { |
203 | 11.1M | const IteratorWrapper* iter = iter_.data(); |
204 | 25.4M | while (iter <= bottommost_positioned_iter_ && iter->iter()) { |
205 | 14.3M | if (!iter->status().ok()) { |
206 | 0 | return iter->status(); |
207 | 0 | } |
208 | 14.3M | ++iter; |
209 | 14.3M | } |
210 | 11.1M | } |
211 | 19.0M | return status_; |
212 | 19.0M | } |
213 | | |
214 | 3.89M | void SetSubIterator(IteratorWrapper* iter_wrapper, InternalIterator* iter) { |
215 | 3.89M | if (iter_wrapper->iter() != nullptr) { |
216 | 189k | SaveError(iter_wrapper->status()); |
217 | 189k | } |
218 | 3.89M | iter_wrapper->Set(iter); |
219 | 3.89M | } |
220 | | |
221 | | private: |
222 | 189k | void SaveError(const Status& s) { |
223 | 189k | if (status_.ok() && !s.ok()) status_ = s; |
224 | 189k | } |
225 | | |
226 | | template <typename F> |
227 | 21.9M | void DoSeek(F seek_function) { |
228 | 21.9M | IteratorWrapper* iter = iter_.data(); |
229 | 21.9M | seek_function(iter); |
230 | 21.9M | bottommost_positioned_iter_ = iter; |
231 | 29.8M | while (iter < bottom_level_iter_ && iter->Valid()) { |
232 | 7.91M | InitSubIterator(iter); |
233 | 7.91M | ++iter; |
234 | 7.91M | seek_function(iter); |
235 | 7.91M | } |
236 | 21.9M | bottommost_positioned_iter_ = iter; |
237 | 21.9M | } _ZN7rocksdb18MultiLevelIterator6DoSeekINSt3__16__bindIMNS_15IteratorWrapperEFvvEJRKNS2_12placeholders4__phILi1EEEEEEEEvT_ Line | Count | Source | 227 | 408k | void DoSeek(F seek_function) { | 228 | 408k | IteratorWrapper* iter = iter_.data(); | 229 | 408k | seek_function(iter); | 230 | 408k | bottommost_positioned_iter_ = iter; | 231 | 419k | while (iter < bottom_level_iter_ && iter->Valid()) { | 232 | 10.9k | InitSubIterator(iter); | 233 | 10.9k | ++iter; | 234 | 10.9k | seek_function(iter); | 235 | 10.9k | } | 236 | 408k | bottommost_positioned_iter_ = iter; | 237 | 408k | } |
_ZN7rocksdb18MultiLevelIterator6DoSeekINSt3__16__bindIMNS_15IteratorWrapperEFvRKN2yb5SliceEEJRKNS2_12placeholders4__phILi1EEES8_EEEEEvT_ Line | Count | Source | 227 | 21.5M | void DoSeek(F seek_function) { | 228 | 21.5M | IteratorWrapper* iter = iter_.data(); | 229 | 21.5M | seek_function(iter); | 230 | 21.5M | bottommost_positioned_iter_ = iter; | 231 | 29.4M | while (iter < bottom_level_iter_ && iter->Valid()) { | 232 | 7.90M | InitSubIterator(iter); | 233 | 7.90M | ++iter; | 234 | 7.90M | seek_function(iter); | 235 | 7.90M | } | 236 | 21.5M | bottommost_positioned_iter_ = iter; | 237 | 21.5M | } |
|
238 | | |
239 | | template <typename F1, typename F2> |
240 | 9.80M | void DoMove(F1 move_function, F2 lower_levels_init_function) { |
241 | 9.80M | DCHECK(Valid()); |
242 | | // First try to move iterator starting with bottom level. |
243 | 9.80M | IteratorWrapper* iter = bottom_level_iter_; |
244 | 9.80M | move_function(iter); |
245 | 9.85M | while (!iter->Valid() && iter > iter_.data()) { |
246 | 43.6k | --iter; |
247 | 43.6k | move_function(iter); |
248 | 43.6k | } |
249 | 9.80M | if (!iter->Valid()) { |
250 | 459k | bottommost_positioned_iter_ = iter; |
251 | 459k | return; |
252 | 459k | } |
253 | | // Once we've moved iterator at some level, we need to reset iterators at levels below. |
254 | 9.37M | while (iter < bottom_level_iter_) { |
255 | 23.6k | InitSubIterator(iter); |
256 | 23.6k | ++iter; |
257 | 23.6k | lower_levels_init_function(iter); |
258 | 23.6k | } |
259 | 9.35M | bottommost_positioned_iter_ = bottom_level_iter_; |
260 | 9.35M | } |
261 | | |
262 | 7.93M | void InitSubIterator(IteratorWrapper* parent_iter) { |
263 | 7.93M | DCHECK(parent_iter->Valid()); |
264 | 7.93M | IteratorWrapper* sub_iter = parent_iter + 1; |
265 | 7.93M | std::string* child_index_block_handle = |
266 | 7.93M | index_block_handle_.data() + (parent_iter - iter_.data()); |
267 | 7.93M | Slice handle = parent_iter->value(); |
268 | 7.93M | if (sub_iter->iter() && !sub_iter->status().IsIncomplete() |
269 | 4.22M | && handle.compare(*child_index_block_handle) == 0) { |
270 | | // wrapper is already set to iterator for this handle, no need to change. |
271 | 3.89M | } else { |
272 | 3.89M | InternalIterator* iter = state_->NewSecondaryIterator(handle); |
273 | 3.89M | handle.CopyToBuffer(child_index_block_handle); |
274 | 3.89M | SetSubIterator(sub_iter, iter); |
275 | 3.89M | } |
276 | 7.93M | } |
277 | | |
278 | | TwoLevelIteratorState* const state_; |
279 | | boost::container::small_vector<IteratorWrapper, kIterChainInitialCapacity> iter_; |
280 | | // If iter_[level] holds non-nullptr, then "index_block_handle_[level-1]" holds the |
281 | | // handle passed to state_->NewSecondaryIterator to create iter_[level]. |
282 | | boost::container::small_vector<std::string, kIterChainInitialCapacity - 1> index_block_handle_; |
283 | | IteratorWrapper* const bottom_level_iter_; |
284 | | bool need_free_top_level_iter_; |
285 | | Status status_ = Status::OK(); |
286 | | IteratorWrapper* bottommost_positioned_iter_ = nullptr; |
287 | | }; |
288 | | |
289 | | Result<std::unique_ptr<MultiLevelIndexReader>> MultiLevelIndexReader::Create( |
290 | | RandomAccessFileReader* file, const Footer& footer, const int num_levels, |
291 | | const BlockHandle& top_level_index_handle, Env* env, const ComparatorPtr& comparator, |
292 | 70.8k | const std::shared_ptr<yb::MemTracker>& mem_tracker) { |
293 | 70.8k | std::unique_ptr<Block> index_block; |
294 | 70.8k | RETURN_NOT_OK(block_based_table::ReadBlockFromFile( |
295 | 70.8k | file, footer, ReadOptions::kDefault, top_level_index_handle, &index_block, env, |
296 | 70.8k | mem_tracker)); |
297 | | |
298 | 70.8k | return std::make_unique<MultiLevelIndexReader>(comparator, num_levels, std::move(index_block)); |
299 | 70.8k | } |
300 | | |
301 | | InternalIterator* MultiLevelIndexReader::NewIterator( |
302 | 11.6M | BlockIter* iter, TwoLevelIteratorState* index_iterator_state, bool) { |
303 | 11.6M | InternalIterator* top_level_iter = top_level_index_block_->NewIndexIterator( |
304 | 11.6M | comparator_.get(), iter, true /* total_order_seek */); |
305 | 11.6M | return new MultiLevelIterator( |
306 | 11.6M | index_iterator_state, top_level_iter, num_levels_, top_level_iter != iter); |
307 | 11.6M | } |
308 | | |
309 | 46 | Result<Slice> MultiLevelIndexReader::GetMiddleKey() { |
310 | 46 | return top_level_index_block_->GetMiddleKey(kIndexBlockKeyValueEncodingFormat); |
311 | 46 | } |
312 | | |
313 | | } // namespace rocksdb |