/Users/deen/code/yugabyte-db/src/yb/docdb/doc_rowwise_iterator.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/docdb/doc_rowwise_iterator.h" |
15 | | #include <iterator> |
16 | | |
17 | | #include <cstdint> |
18 | | #include <ostream> |
19 | | #include <string> |
20 | | #include <vector> |
21 | | |
22 | | #include "yb/common/common.pb.h" |
23 | | #include "yb/common/doc_hybrid_time.h" |
24 | | #include "yb/common/hybrid_time.h" |
25 | | #include "yb/common/ql_expr.h" |
26 | | #include "yb/common/ql_scanspec.h" |
27 | | #include "yb/common/ql_value.h" |
28 | | #include "yb/common/read_hybrid_time.h" |
29 | | #include "yb/common/transaction.h" |
30 | | |
31 | | #include "yb/docdb/docdb_fwd.h" |
32 | | #include "yb/docdb/doc_key.h" |
33 | | #include "yb/docdb/doc_path.h" |
34 | | #include "yb/docdb/doc_ql_scanspec.h" |
35 | | #include "yb/docdb/doc_reader.h" |
36 | | #include "yb/docdb/doc_scanspec_util.h" |
37 | | #include "yb/docdb/docdb_rocksdb_util.h" |
38 | | #include "yb/docdb/docdb_types.h" |
39 | | #include "yb/docdb/expiration.h" |
40 | | #include "yb/docdb/intent_aware_iterator.h" |
41 | | #include "yb/docdb/primitive_value.h" |
42 | | #include "yb/docdb/subdocument.h" |
43 | | #include "yb/docdb/value.h" |
44 | | #include "yb/docdb/value_type.h" |
45 | | |
46 | | #include "yb/gutil/strings/substitute.h" |
47 | | #include "yb/util/flags.h" |
48 | | #include "yb/rocksdb/db/compaction.h" |
49 | | #include "yb/rocksutil/yb_rocksdb.h" |
50 | | |
51 | | #include "yb/rocksdb/db.h" |
52 | | |
53 | | #include "yb/util/flag_tags.h" |
54 | | #include "yb/util/result.h" |
55 | | #include "yb/util/status.h" |
56 | | #include "yb/util/status_format.h" |
57 | | #include "yb/util/status_log.h" |
58 | | #include "yb/util/strongly_typed_bool.h" |
59 | | |
60 | | DEFINE_bool(disable_hybrid_scan, false, |
61 | | "If true, hybrid scan will be disabled"); |
62 | | TAG_FLAG(disable_hybrid_scan, runtime); |
63 | | |
64 | | using std::string; |
65 | | |
66 | | namespace yb { |
67 | | namespace docdb { |
68 | | |
69 | | class ScanChoices { |
70 | | public: |
71 | 75.4k | explicit ScanChoices(bool is_forward_scan) : is_forward_scan_(is_forward_scan) {} |
72 | 75.4k | virtual ~ScanChoices() {} |
73 | | |
74 | 26.0M | bool CurrentTargetMatchesKey(const Slice& curr) { |
75 | 18.4E | VLOG(3) << __PRETTY_FUNCTION__ << " checking if acceptable ? " |
76 | 18.4E | << (curr == current_scan_target_ ? "YEP"0 : "NOPE") |
77 | 18.4E | << ": " << DocKey::DebugSliceToString(curr) |
78 | 18.4E | << " vs " << DocKey::DebugSliceToString(current_scan_target_.AsSlice()); |
79 | 26.0M | return curr == current_scan_target_; |
80 | 26.0M | } |
81 | | |
82 | | // Returns false if there are still target keys we need to scan, and true if we are done. |
83 | 38.3M | virtual bool FinishedWithScanChoices() const { return finished_; } |
84 | | |
85 | | // Go to the next scan target if any. |
86 | | virtual CHECKED_STATUS DoneWithCurrentTarget() = 0; |
87 | | |
88 | | // Go (directly) to the new target (or the one after if new_target does not |
89 | | // exist in the desired list/range). If the new_target is larger than all scan target options it |
90 | | // means we are done. |
91 | | virtual CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) = 0; |
92 | | |
93 | | // If the given doc_key isn't already at the desired target, seek appropriately to go to the |
94 | | // current target. |
95 | | virtual CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) = 0; |
96 | | |
97 | | protected: |
98 | | const bool is_forward_scan_; |
99 | | KeyBytes current_scan_target_; |
100 | | bool finished_ = false; |
101 | | }; |
102 | | |
103 | | class DiscreteScanChoices : public ScanChoices { |
104 | | public: |
105 | | DiscreteScanChoices(const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, |
106 | | const KeyBytes& upper_doc_key) |
107 | 0 | : ScanChoices(doc_spec.is_forward_scan()) { |
108 | 0 | range_cols_scan_options_ = doc_spec.range_options(); |
109 | 0 | current_scan_target_idxs_.resize(range_cols_scan_options_->size()); |
110 | 0 | for (size_t i = 0; i < range_cols_scan_options_->size(); i++) { |
111 | 0 | current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin(); |
112 | 0 | } |
113 | | |
114 | | // Initialize target doc key. |
115 | 0 | if (is_forward_scan_) { |
116 | 0 | current_scan_target_ = lower_doc_key; |
117 | 0 | if (CHECK_RESULT(ClearRangeComponents(¤t_scan_target_))) { |
118 | 0 | CHECK_OK(SkipTargetsUpTo(lower_doc_key)); |
119 | 0 | } |
120 | 0 | } else { |
121 | 0 | current_scan_target_ = upper_doc_key; |
122 | 0 | if (CHECK_RESULT(ClearRangeComponents(¤t_scan_target_))) { |
123 | 0 | CHECK_OK(SkipTargetsUpTo(upper_doc_key)); |
124 | 0 | } |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | | DiscreteScanChoices(const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key, |
129 | | const KeyBytes& upper_doc_key) |
130 | 0 | : ScanChoices(doc_spec.is_forward_scan()) { |
131 | 0 | range_cols_scan_options_ = doc_spec.range_options(); |
132 | 0 | current_scan_target_idxs_.resize(range_cols_scan_options_->size()); |
133 | 0 | for (size_t i = 0; i < range_cols_scan_options_->size(); i++) { |
134 | 0 | current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin(); |
135 | 0 | } |
136 | | |
137 | | // Initialize target doc key. |
138 | 0 | if (is_forward_scan_) { |
139 | 0 | current_scan_target_ = lower_doc_key; |
140 | 0 | if (CHECK_RESULT(ClearRangeComponents(¤t_scan_target_))) { |
141 | 0 | CHECK_OK(SkipTargetsUpTo(lower_doc_key)); |
142 | 0 | } |
143 | 0 | } else { |
144 | 0 | current_scan_target_ = upper_doc_key; |
145 | 0 | if (CHECK_RESULT(ClearRangeComponents(¤t_scan_target_))) { |
146 | 0 | CHECK_OK(SkipTargetsUpTo(upper_doc_key)); |
147 | 0 | } |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | | CHECKED_STATUS DoneWithCurrentTarget() override; |
152 | | CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override; |
153 | | CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override; |
154 | | |
155 | | protected: |
156 | | // Utility function for (multi)key scans. Updates the target scan key by incrementing the option |
157 | | // index for one column. Will handle overflow by setting current column index to 0 and |
158 | | // incrementing the previous column instead. If it overflows at first column it means we are done, |
159 | | // so it clears the scan target idxs array. |
160 | | CHECKED_STATUS IncrementScanTargetAtColumn(size_t start_col); |
161 | | |
162 | | // Utility function for (multi)key scans to initialize the range portion of the current scan |
163 | | // target, scan target with the first option. |
164 | | // Only needed for scans that include the static row, otherwise Init will take care of this. |
165 | | Result<bool> InitScanTargetRangeGroupIfNeeded(); |
166 | | |
167 | | private: |
168 | | // For (multi)key scans (e.g. selects with 'IN' condition on the range columns) we hold the |
169 | | // options for each range column as we iteratively seek to each target key. |
170 | | // e.g. for a query "h = 1 and r1 in (2,3) and r2 in (4,5) and r3 = 6": |
171 | | // range_cols_scan_options_ [[2, 3], [4, 5], [6]] -- value options for each column. |
172 | | // current_scan_target_idxs_ goes from [0, 0, 0] up to [1, 1, 0] -- except when including the |
173 | | // static row when it starts from [0, 0, -1] instead. |
174 | | // current_scan_target_ goes from [1][2,4,6] up to [1][3,5,6] -- is the doc key containing, |
175 | | // for each range column, the value (option) referenced by the |
176 | | // corresponding index (updated along with current_scan_target_idxs_). |
177 | | std::shared_ptr<std::vector<std::vector<PrimitiveValue>>> range_cols_scan_options_; |
178 | | mutable std::vector<std::vector<PrimitiveValue>::const_iterator> current_scan_target_idxs_; |
179 | | }; |
180 | | |
181 | 0 | Status DiscreteScanChoices::IncrementScanTargetAtColumn(size_t start_col) { |
182 | 0 | DCHECK_LE(start_col, current_scan_target_idxs_.size()); |
183 | | |
184 | | // Increment start col, move backwards in case of overflow. |
185 | 0 | ssize_t col_idx = start_col; |
186 | 0 | for (; col_idx >= 0; col_idx--) { |
187 | 0 | const auto& choices = (*range_cols_scan_options_)[col_idx]; |
188 | 0 | auto& it = current_scan_target_idxs_[col_idx]; |
189 | |
|
190 | 0 | if (++it != choices.end()) { |
191 | 0 | break; |
192 | 0 | } |
193 | 0 | it = choices.begin(); |
194 | 0 | } |
195 | |
|
196 | 0 | if (col_idx < 0) { |
197 | | // If we got here we finished all the options and are done. |
198 | 0 | finished_ = true; |
199 | 0 | return Status::OK(); |
200 | 0 | } |
201 | | |
202 | 0 | DocKeyDecoder decoder(current_scan_target_); |
203 | 0 | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
204 | 0 | for (int i = 0; i != col_idx; ++i) { |
205 | 0 | RETURN_NOT_OK(decoder.DecodePrimitiveValue()); |
206 | 0 | } |
207 | | |
208 | 0 | current_scan_target_.Truncate( |
209 | 0 | decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata()); |
210 | |
|
211 | 0 | for (size_t i = col_idx; i <= start_col; ++i) { |
212 | 0 | current_scan_target_idxs_[i]->AppendToKey(¤t_scan_target_); |
213 | 0 | } |
214 | |
|
215 | 0 | return Status::OK(); |
216 | 0 | } |
217 | | |
218 | 0 | Result<bool> DiscreteScanChoices::InitScanTargetRangeGroupIfNeeded() { |
219 | 0 | DocKeyDecoder decoder(current_scan_target_.AsSlice()); |
220 | 0 | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
221 | | |
222 | | // Initialize the range key values if needed (i.e. we scanned the static row until now). |
223 | 0 | if (!VERIFY_RESULT(decoder.HasPrimitiveValue())) { |
224 | 0 | current_scan_target_.mutable_data()->pop_back(); |
225 | 0 | for (size_t col_idx = 0; col_idx < range_cols_scan_options_->size(); col_idx++) { |
226 | 0 | current_scan_target_idxs_[col_idx]->AppendToKey(¤t_scan_target_); |
227 | 0 | } |
228 | 0 | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
229 | 0 | return true; |
230 | 0 | } |
231 | 0 | return false; |
232 | 0 | } |
233 | | |
234 | 0 | Status DiscreteScanChoices::DoneWithCurrentTarget() { |
235 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target"; |
236 | 0 | DCHECK(!FinishedWithScanChoices()); |
237 | | |
238 | | // Initialize the first target/option if not done already, otherwise go to the next one. |
239 | 0 | if (!VERIFY_RESULT(InitScanTargetRangeGroupIfNeeded())) { |
240 | 0 | RETURN_NOT_OK(IncrementScanTargetAtColumn(range_cols_scan_options_->size() - 1)); |
241 | 0 | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
242 | 0 | } |
243 | 0 | return Status::OK(); |
244 | 0 | } |
245 | | |
246 | 0 | Status DiscreteScanChoices::SkipTargetsUpTo(const Slice& new_target) { |
247 | 0 | VLOG(2) << __PRETTY_FUNCTION__ |
248 | 0 | << " Updating current target to be >= " |
249 | 0 | << DocKey::DebugSliceToString(new_target); |
250 | 0 | DCHECK(!FinishedWithScanChoices()); |
251 | 0 | RETURN_NOT_OK(InitScanTargetRangeGroupIfNeeded()); |
252 | 0 | DocKeyDecoder decoder(new_target); |
253 | 0 | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
254 | 0 | current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data())); |
255 | |
|
256 | 0 | size_t col_idx = 0; |
257 | 0 | PrimitiveValue target_value; |
258 | 0 | while (col_idx < range_cols_scan_options_->size()) { |
259 | 0 | RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value)); |
260 | 0 | const auto& choices = (*range_cols_scan_options_)[col_idx]; |
261 | 0 | auto& it = current_scan_target_idxs_[col_idx]; |
262 | | |
263 | | // Fast-path in case the existing value for this column already matches the new target. |
264 | 0 | if (target_value == *it) { |
265 | 0 | col_idx++; |
266 | 0 | target_value.AppendToKey(¤t_scan_target_); |
267 | 0 | continue; |
268 | 0 | } |
269 | | |
270 | | // Search for the option that matches new target value (for the current column). |
271 | 0 | if (is_forward_scan_) { |
272 | 0 | it = std::lower_bound(choices.begin(), choices.end(), target_value); |
273 | 0 | } else { |
274 | 0 | it = std::lower_bound(choices.begin(), choices.end(), target_value, std::greater<>()); |
275 | 0 | } |
276 | | |
277 | | // If we overflowed, the new target value for this column is larger than all our options, so |
278 | | // we go back and increment the previous column instead. |
279 | 0 | if (it == choices.end()) { |
280 | 0 | RETURN_NOT_OK(IncrementScanTargetAtColumn(col_idx - 1)); |
281 | 0 | break; |
282 | 0 | } |
283 | | |
284 | | // Else, update the current target value for this column. |
285 | 0 | it->AppendToKey(¤t_scan_target_); |
286 | | |
287 | | // If we did not find an exact match we are already beyond the new target so we can stop. |
288 | 0 | if (target_value != *it) { |
289 | 0 | col_idx++; |
290 | 0 | break; |
291 | 0 | } |
292 | | |
293 | 0 | col_idx++; |
294 | 0 | } |
295 | | |
296 | | // If there are any columns left (i.e. we stopped early), it means we did not find an exact |
297 | | // match and we reached beyond the new target key. So we need to include all options for the |
298 | | // leftover columns (i.e. set all following indexes to 0). |
299 | 0 | for (size_t i = col_idx; i < current_scan_target_idxs_.size(); i++) { |
300 | 0 | current_scan_target_idxs_[i] = (*range_cols_scan_options_)[i].begin(); |
301 | 0 | current_scan_target_idxs_[i]->AppendToKey(¤t_scan_target_); |
302 | 0 | } |
303 | |
|
304 | 0 | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
305 | |
|
306 | 0 | VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " |
307 | 0 | << DocKey::DebugSliceToString(current_scan_target_); |
308 | |
|
309 | 0 | return Status::OK(); |
310 | 0 | } |
311 | | |
312 | 0 | Status DiscreteScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) { |
313 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target"; |
314 | | // Seek to the current target doc key if needed. |
315 | 0 | if (!FinishedWithScanChoices()) { |
316 | 0 | if (is_forward_scan_) { |
317 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " Seeking to " << current_scan_target_; |
318 | 0 | db_iter->Seek(current_scan_target_); |
319 | 0 | } else { |
320 | 0 | auto tmp = current_scan_target_; |
321 | 0 | tmp.AppendValueType(ValueType::kHighest); |
322 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp; |
323 | 0 | db_iter->PrevDocKey(tmp); |
324 | 0 | } |
325 | 0 | } |
326 | 0 | return Status::OK(); |
327 | 0 | } |
328 | | |
329 | | // This class combines the notions of option filters (col1 IN (1,2,3)) and |
330 | | // singular range bound filters (col1 < 4 AND col1 >= 1) into a single notion of |
331 | | // lists of ranges. So a filter for a column given in the |
332 | | // Doc(QL/PGSQL)ScanSpec is converted into a range bound filter. |
333 | | // In the end, each HybridScanChoices |
334 | | // instance should have a sorted list of disjoint ranges to filter each column. |
335 | | // Right now this supports a conjunction of range bound and discrete filters. |
336 | | // Disjunctions are also supported but are UNTESTED. |
337 | | // TODO: Test disjunctions when YSQL and YQL support pushing those down |
338 | | |
339 | | class HybridScanChoices : public ScanChoices { |
340 | | public: |
341 | | |
342 | | // Constructs a list of ranges for each column from the given scanspec. |
343 | | // A filter of the form col1 IN (1,4,5) is converted to a filter |
344 | | // in the form col1 IN ([1, 1], [4, 4], [5, 5]). |
345 | | HybridScanChoices(const Schema& schema, |
346 | | const KeyBytes &lower_doc_key, |
347 | | const KeyBytes &upper_doc_key, |
348 | | bool is_forward_scan, |
349 | | const std::vector<ColumnId> &range_options_indexes, |
350 | | const |
351 | | std::shared_ptr<std::vector<std::vector<PrimitiveValue>>>& |
352 | | range_options, |
353 | | const std::vector<ColumnId> range_bounds_indexes, |
354 | | const QLScanRange *range_bounds) |
355 | | : ScanChoices(is_forward_scan), |
356 | | lower_doc_key_(lower_doc_key), |
357 | 75.4k | upper_doc_key_(upper_doc_key) { |
358 | 75.4k | auto range_cols_scan_options = range_options; |
359 | 75.4k | size_t idx = 0; |
360 | 75.4k | range_cols_scan_options_lower_.reserve(schema.num_range_key_columns()); |
361 | 75.4k | range_cols_scan_options_upper_.reserve(schema.num_range_key_columns()); |
362 | | |
363 | 75.4k | size_t num_hash_cols = schema.num_hash_key_columns(); |
364 | | |
365 | 75.4k | for (idx = schema.num_hash_key_columns(); |
366 | 204k | idx < schema.num_key_columns(); idx++128k ) { |
367 | 128k | const ColumnId col_idx = schema.column_id(idx); |
368 | 128k | range_cols_scan_options_lower_.push_back({}); |
369 | 128k | range_cols_scan_options_upper_.push_back({}); |
370 | | |
371 | | // If this is a range bound filter, we create a singular |
372 | | // list of the given range bound |
373 | 128k | if ((std::find(range_bounds_indexes.begin(), |
374 | 128k | range_bounds_indexes.end(), col_idx) |
375 | 128k | != range_bounds_indexes.end()) |
376 | 128k | && (std::find(range_options_indexes.begin(), |
377 | 128k | range_options_indexes.end(), col_idx) |
378 | 128k | == range_options_indexes.end())) { |
379 | 127k | const auto col_sort_type = schema.column(idx).sorting_type(); |
380 | 127k | const QLScanRange::QLRange range = range_bounds->RangeFor(col_idx); |
381 | 127k | const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, |
382 | 127k | true /* lower_bound */); |
383 | 127k | const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, |
384 | 127k | false /* upper_bound */); |
385 | | |
386 | 127k | range_cols_scan_options_lower_[idx - num_hash_cols].push_back(lower); |
387 | 127k | range_cols_scan_options_upper_[idx - num_hash_cols].push_back(upper); |
388 | 127k | } else { |
389 | | |
390 | | // If this is an option filter, we turn each option into a |
391 | | // range bound to produce a list of singular range bounds |
392 | 1.31k | if(std::find(range_options_indexes.begin(), |
393 | 1.31k | range_options_indexes.end(), col_idx) |
394 | 1.31k | != range_options_indexes.end()) { |
395 | 1.31k | auto &options = (*range_cols_scan_options)[idx - num_hash_cols]; |
396 | | |
397 | 1.31k | if (options.empty()) { |
398 | | // If there is nothing specified in the IN list like in |
399 | | // SELECT * FROM ... WHERE c1 IN (); |
400 | | // then nothing should pass the filter. |
401 | | // To enforce this, we create a range bound (kHighest, kLowest) |
402 | | // |
403 | | // As of D15647 we do not send empty options. |
404 | | // This is kept for backward compatibility during rolling upgrades. |
405 | 0 | range_cols_scan_options_lower_[idx |
406 | 0 | - num_hash_cols].push_back(PrimitiveValue(ValueType::kHighest)); |
407 | 0 | range_cols_scan_options_upper_[idx |
408 | 0 | - num_hash_cols].push_back(PrimitiveValue(ValueType::kLowest)); |
409 | 0 | } |
410 | | |
411 | 3.50k | for (auto val : options) { |
412 | 3.50k | const auto lower = val; |
413 | 3.50k | const auto upper = val; |
414 | 3.50k | range_cols_scan_options_lower_[idx |
415 | 3.50k | - num_hash_cols].push_back(lower); |
416 | 3.50k | range_cols_scan_options_upper_[idx |
417 | 3.50k | - num_hash_cols].push_back(upper); |
418 | 3.50k | } |
419 | | |
420 | 18.4E | } else { |
421 | | // If no filter is specified, we just impose an artificial range |
422 | | // filter [kLowest, kHighest] |
423 | 18.4E | range_cols_scan_options_lower_[idx - num_hash_cols] |
424 | 18.4E | .push_back(PrimitiveValue(ValueType::kLowest)); |
425 | 18.4E | range_cols_scan_options_upper_[idx - num_hash_cols] |
426 | 18.4E | .push_back(PrimitiveValue(ValueType::kHighest)); |
427 | 18.4E | } |
428 | 1.31k | } |
429 | 128k | } |
430 | | |
431 | 75.4k | current_scan_target_idxs_.resize(range_cols_scan_options_lower_.size()); |
432 | | |
433 | 75.4k | if (is_forward_scan_) { |
434 | 67.3k | current_scan_target_ = lower_doc_key; |
435 | 67.3k | } else { |
436 | 8.11k | current_scan_target_ = upper_doc_key; |
437 | 8.11k | } |
438 | | |
439 | 75.4k | } |
440 | | |
441 | | HybridScanChoices(const Schema& schema, |
442 | | const DocPgsqlScanSpec& doc_spec, |
443 | | const KeyBytes &lower_doc_key, |
444 | | const KeyBytes &upper_doc_key) |
445 | | : HybridScanChoices(schema, lower_doc_key, upper_doc_key, |
446 | | doc_spec.is_forward_scan(), doc_spec.range_options_indexes(), |
447 | | doc_spec.range_options(), doc_spec.range_bounds_indexes(), |
448 | 13.2k | doc_spec.range_bounds()) { |
449 | 13.2k | } |
450 | | |
451 | | HybridScanChoices(const Schema& schema, |
452 | | const DocQLScanSpec& doc_spec, |
453 | | const KeyBytes &lower_doc_key, |
454 | | const KeyBytes &upper_doc_key) |
455 | | : HybridScanChoices(schema, lower_doc_key, upper_doc_key, |
456 | | doc_spec.is_forward_scan(), doc_spec.range_options_indexes(), |
457 | | doc_spec.range_options(), doc_spec.range_bounds_indexes(), |
458 | 62.1k | doc_spec.range_bounds()) { |
459 | 62.1k | } |
460 | | |
461 | | CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override; |
462 | | CHECKED_STATUS DoneWithCurrentTarget() override; |
463 | | CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override; |
464 | | |
465 | | protected: |
466 | | // Utility function for (multi)key scans. Updates the target scan key by |
467 | | // incrementing the option |
468 | | // index for one column. Will handle overflow by setting current column |
469 | | // index to 0 and incrementing the previous column instead. If it overflows |
470 | | // at first column it means we are done, so it clears the scan target idxs |
471 | | // array. |
472 | | CHECKED_STATUS IncrementScanTargetAtColumn(int start_col); |
473 | | |
474 | | private: |
475 | | KeyBytes prev_scan_target_; |
476 | | |
477 | | // The following encodes the list of ranges we are iterating over |
478 | | std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_lower_; |
479 | | std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_upper_; |
480 | | |
481 | | std::vector<ColumnId> range_options_indexes_; |
482 | | mutable std::vector<size_t> current_scan_target_idxs_; |
483 | | |
484 | | bool is_options_done_ = false; |
485 | | |
486 | | const KeyBytes lower_doc_key_; |
487 | | const KeyBytes upper_doc_key_; |
488 | | }; |
489 | | |
490 | | // Sets current_scan_target_ to the first tuple in the filter space |
491 | | // that is >= new_target. |
492 | 12.2M | Status HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { |
493 | 12.2M | VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= " |
494 | 2 | << DocKey::DebugSliceToString(new_target); |
495 | 12.2M | DCHECK(!FinishedWithScanChoices()); |
496 | 12.2M | is_options_done_ = false; |
497 | | |
498 | | /* |
499 | | Let's say we have a row key with (A B) as the hash part and C, D as the range part: |
500 | | ((A B) C D) E F |
501 | | |
502 | | Let's say our current constraints : |
503 | | l_c_k <= C <= u_c_k |
504 | | 4 6 |
505 | | |
506 | | l_d_j <= D <= u_d_j |
507 | | 3 5 |
508 | | |
509 | | a b 0 d -> a b l_c d |
510 | | |
511 | | a b 5 d -> a b 5 d |
512 | | [ Will subsequently seek out of document on reading the subdoc] |
513 | | |
514 | | a b 7 d -> a b l_c_(k+1) 0 |
515 | | [ If there is another range bound filter that's higher than the |
516 | | current one, effectively, moving this column to the next |
517 | | range in the filter list.] |
518 | | -> a b Inf |
519 | | [ This will seek to <b_next> and on the next invocation update: |
520 | | a <b_next> ? ? -> a <b_next> l_c_0 0 ] |
521 | | |
522 | | a b c 6 -> a b c l_d_(j+1) |
523 | | [ If there is another range bound filter that's higher than the |
524 | | d, effectively, moving column D to the next |
525 | | range in the filter list.] |
526 | | -> a b c Inf |
527 | | [ If c_next is between l_c_k and u_c_k. This will seek to <a b |
528 | | <c_next>> and on the next invocation update: |
529 | | a b <c_next> ? -> a b <c_next> l_d_0 ] |
530 | | -> a b l_c_(k+1) l_d_0 |
531 | | [ If c_next is above u_c_k. We do this because we know |
532 | | exactly what the next tuple in our filter space should be.] |
533 | | */ |
534 | 12.2M | DocKeyDecoder decoder(new_target); |
535 | 12.2M | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
536 | 12.2M | current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data())); |
537 | | |
538 | 12.2M | size_t col_idx = 0; |
539 | 12.2M | PrimitiveValue target_value; |
540 | 35.3M | for (col_idx = 0; col_idx < current_scan_target_idxs_.size(); col_idx++23.0M ) { |
541 | 33.9M | RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value)); |
542 | 33.9M | const auto& lower_choices = range_cols_scan_options_lower_[col_idx]; |
543 | 33.9M | const auto& upper_choices = range_cols_scan_options_upper_[col_idx]; |
544 | 33.9M | auto current_ind = current_scan_target_idxs_[col_idx]; |
545 | 33.9M | DCHECK(current_ind < lower_choices.size()); |
546 | 33.9M | const auto& lower = lower_choices[current_ind]; |
547 | 33.9M | const auto& upper = upper_choices[current_ind]; |
548 | | |
549 | | // If it's in range then good, continue after appending the target value |
550 | | // column. |
551 | | |
552 | 33.9M | if (target_value >= lower && target_value <= upper28.6M ) { |
553 | 23.0M | target_value.AppendToKey(¤t_scan_target_); |
554 | 23.0M | continue; |
555 | 23.0M | } |
556 | | |
557 | | // If target_value is not in the current range then we must find a range |
558 | | // that works for it. |
559 | | // If we are above all ranges then increment the index of the previous |
560 | | // column. |
561 | | // Else, target_value is below at least one range: find the lowest lower |
562 | | // bound above target_value and use that, this relies on the assumption |
563 | | // that all our filter ranges are disjoint. |
564 | | |
565 | 10.8M | auto it = lower_choices.begin(); |
566 | 10.8M | size_t ind = 0; |
567 | | |
568 | | // Find an upper (lower) bound closest to target_value |
569 | 10.8M | if (is_forward_scan_) { |
570 | 10.8M | it = std::lower_bound(upper_choices.begin(), |
571 | 10.8M | upper_choices.end(), target_value); |
572 | 10.8M | ind = it - upper_choices.begin(); |
573 | 10.8M | } else { |
574 | 408 | it = std::lower_bound(lower_choices.begin(), lower_choices.end(), |
575 | 408 | target_value, std::greater<>()); |
576 | 408 | ind = it - lower_choices.begin(); |
577 | 408 | } |
578 | | |
579 | 10.8M | if (ind == lower_choices.size()) { |
580 | | // target value is higher than all range options and |
581 | | // we need to increment. |
582 | 5.57M | RETURN_NOT_OK(IncrementScanTargetAtColumn(static_cast<int>(col_idx) - 1)); |
583 | 5.57M | col_idx = current_scan_target_idxs_.size(); |
584 | 5.57M | break; |
585 | 5.57M | } |
586 | | |
587 | 5.28M | current_scan_target_idxs_[col_idx] = ind; |
588 | | |
589 | | // If we are within a range then target_value itself should work. |
590 | 5.28M | if (lower_choices[ind] <= target_value |
591 | 5.28M | && upper_choices[ind] >= target_value721 ) { |
592 | 652 | target_value.AppendToKey(¤t_scan_target_); |
593 | 652 | continue; |
594 | 652 | } |
595 | | |
596 | | // Otherwise we must set it to the next lower bound. |
597 | | // This only works as we are assuming all given ranges are |
598 | | // disjoint. |
599 | | |
600 | 5.28M | DCHECK((is_forward_scan_ && lower_choices[ind] > target_value) |
601 | 5.28M | || (!is_forward_scan_ && upper_choices[ind] |
602 | 5.28M | < target_value)); |
603 | | |
604 | 5.28M | if (is_forward_scan_) { |
605 | 5.28M | lower_choices[ind].AppendToKey(¤t_scan_target_); |
606 | 5.28M | } else { |
607 | 355 | upper_choices[ind].AppendToKey(¤t_scan_target_); |
608 | 355 | } |
609 | 5.28M | col_idx++; |
610 | 5.28M | break; |
611 | 5.28M | } |
612 | | |
613 | | // Reset the remaining range columns to lower bounds for forward scans |
614 | | // or upper bounds for backward scans. |
615 | 12.7M | for (size_t i = col_idx; 12.2M i < range_cols_scan_options_lower_.size(); i++514k ) { |
616 | 514k | current_scan_target_idxs_[i] = 0; |
617 | 514k | if (is_forward_scan_) { |
618 | 514k | range_cols_scan_options_lower_[i][0] |
619 | 514k | .AppendToKey(¤t_scan_target_); |
620 | 514k | } else { |
621 | 6 | range_cols_scan_options_upper_[i][0] |
622 | 6 | .AppendToKey(¤t_scan_target_); |
623 | 6 | } |
624 | 514k | } |
625 | | |
626 | 12.2M | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
627 | 18.4E | VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " |
628 | 18.4E | << DocKey::DebugSliceToString(current_scan_target_); |
629 | 12.2M | return Status::OK(); |
630 | 12.2M | } |
631 | | |
632 | | // Update the value at start column by setting it up for incrementing to the |
633 | | // next allowed value in the filter space |
634 | | // --------------------------------------------------------------------------- |
635 | | // There are two important cases to consider here. |
636 | | // Let's say the value of current_scan_target_ at start_col, c, |
637 | | // is currently V and the current bounds for that column |
638 | | // is l_c_k <= V <= u_c_k. In the usual case where V != u_c_k |
639 | | // (or V != l_c_k for backwards scans) such that V_next is still in the given |
640 | | // restriction, we set column c + 1 to kHighest (kLowest), such that the next |
641 | | // invocation of GetNext() produces V_next at column similar to what is done |
642 | | // in SkipTargetsUpTo. In this case, doing a SkipTargetsUpTo on the resulting |
643 | | // current_scan_target_ should yield the next allowed value in the filter space |
644 | | // In the case where V = u_c_k (V = l_c_k), or in other words V is at the |
645 | | // EXTREMAL boundary of the current range, we know exactly what the next value |
646 | | // of column C will be. So we move column c to the next |
647 | | // range k+1 and set that column to the new value l_c_(k+1) (u_c_(k+1)) |
648 | | // while setting all columns, b > c to l_b_0 (u_b_0) |
649 | | // In the case of overflow on a column c (we want to increment the |
650 | | // restriction range of c to the next range bound for that column but there |
651 | | // are no restriction ranges remaining), we set the |
652 | | // current column to the 0th range and move on to increment c - 1 |
653 | | // Note that in almost all cases the resulting current_scan_target_ is strictly |
654 | | // greater (lesser in the case of backwards scans) than the original |
655 | | // current_scan_target_. This is necessary to allow the iterator seek out |
656 | | // of the current scan target. The exception to this rule is below. |
657 | | // --------------------------------------------------------------------------- |
658 | | // This function leaves the scan target as is if the next tuple in the current |
659 | | // scan direction is also the next tuple in the filter space and start_col |
660 | | // is given as the last column |
661 | 7.01M | Status HybridScanChoices::IncrementScanTargetAtColumn(int start_col) { |
662 | | |
663 | 18.4E | VLOG(2) << __PRETTY_FUNCTION__ |
664 | 18.4E | << " Incrementing at " << start_col; |
665 | | |
666 | | // Increment start col, move backwards in case of overflow. |
667 | 7.01M | int col_idx = start_col; |
668 | | // lower and upper here are taken relative to the scan order |
669 | 7.01M | auto &lower_extremal_vector = is_forward_scan_ |
670 | 7.01M | ? range_cols_scan_options_lower_6.55M |
671 | 7.01M | : range_cols_scan_options_upper_451k ; |
672 | 7.01M | auto &upper_extremal_vector = is_forward_scan_ |
673 | 7.01M | ? range_cols_scan_options_upper_6.55M |
674 | 7.01M | : range_cols_scan_options_lower_451k ; |
675 | 7.01M | DocKeyDecoder t_decoder(current_scan_target_); |
676 | 7.01M | RETURN_NOT_OK(t_decoder.DecodeToRangeGroup()); |
677 | | |
678 | | // refer to the documentation of this function to see what extremal |
679 | | // means here |
680 | 7.01M | std::vector<bool> is_extremal; |
681 | 7.01M | PrimitiveValue target_value; |
682 | 19.8M | for (int i = 0; i <= col_idx; ++i12.8M ) { |
683 | 12.8M | RETURN_NOT_OK(t_decoder.DecodePrimitiveValue(&target_value)); |
684 | 12.8M | is_extremal.push_back(target_value == |
685 | 12.8M | upper_extremal_vector[i][current_scan_target_idxs_[i]]); |
686 | 12.8M | } |
687 | | |
688 | | // this variable tells us whether we start by appending |
689 | | // kHighest/kLowest at col_idx after the following for loop |
690 | 7.01M | bool start_with_infinity = true; |
691 | | |
692 | 7.05M | for (; col_idx >= 0; col_idx--41.5k ) { |
693 | 6.83M | const auto& choices = lower_extremal_vector[col_idx]; |
694 | 6.83M | auto it = current_scan_target_idxs_[col_idx]; |
695 | | |
696 | 6.83M | if (!is_extremal[col_idx]) { |
697 | 6.78M | col_idx++; |
698 | 6.78M | start_with_infinity = true; |
699 | 6.78M | break; |
700 | 6.78M | } |
701 | | |
702 | 46.3k | if (++it < choices.size()) { |
703 | | // and if this value is at the extremal bound |
704 | 4.84k | if (is_extremal[col_idx]) { |
705 | 4.84k | current_scan_target_idxs_[col_idx]++; |
706 | 4.84k | start_with_infinity = false; |
707 | 4.84k | } |
708 | 4.84k | break; |
709 | 4.84k | } |
710 | | |
711 | 41.5k | current_scan_target_idxs_[col_idx] = 0; |
712 | 41.5k | } |
713 | | |
714 | 7.01M | DocKeyDecoder decoder(current_scan_target_); |
715 | 7.01M | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
716 | 19.7M | for (int i = 0; 7.01M i < col_idx; ++i12.7M ) { |
717 | 12.7M | RETURN_NOT_OK(decoder.DecodePrimitiveValue()); |
718 | 12.7M | } |
719 | | |
720 | 7.01M | if (col_idx < 0) { |
721 | | // If we got here we finished all the options and are done. |
722 | 218k | col_idx++; |
723 | 218k | start_with_infinity = true; |
724 | 218k | is_options_done_ = true; |
725 | 218k | } |
726 | | |
727 | 7.01M | current_scan_target_.Truncate( |
728 | 7.01M | decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata()); |
729 | | |
730 | | |
731 | 7.01M | if (start_with_infinity && |
732 | 7.01M | (col_idx < static_cast<int64>(current_scan_target_idxs_.size()))7.00M ) { |
733 | 5.61M | if (is_forward_scan_) { |
734 | 5.61M | PrimitiveValue(ValueType::kHighest).AppendToKey(¤t_scan_target_); |
735 | 5.61M | } else { |
736 | 6.17k | PrimitiveValue(ValueType::kLowest).AppendToKey(¤t_scan_target_); |
737 | 6.17k | } |
738 | 5.61M | col_idx++; |
739 | 5.61M | } |
740 | | |
741 | 7.01M | if (start_with_infinity) { |
742 | | // there's no point in appending anything after infinity |
743 | 7.00M | return Status::OK(); |
744 | 7.00M | } |
745 | | |
746 | 9.84k | for (int i = col_idx; 4.70k i <= start_col; ++i5.14k ) { |
747 | 5.14k | lower_extremal_vector[i][current_scan_target_idxs_[i]] |
748 | 5.14k | .AppendToKey(¤t_scan_target_); |
749 | 5.14k | } |
750 | | |
751 | 4.71k | for (size_t i = start_col + 1; i < current_scan_target_idxs_.size(); ++i10 ) { |
752 | 10 | current_scan_target_idxs_[i] = 0; |
753 | 10 | lower_extremal_vector[i][current_scan_target_idxs_[i]] |
754 | 10 | .AppendToKey(¤t_scan_target_); |
755 | 10 | } |
756 | | |
757 | 4.70k | return Status::OK(); |
758 | 7.01M | } |
759 | | |
760 | | // Method called when the scan target is done being used |
761 | 1.43M | Status HybridScanChoices::DoneWithCurrentTarget() { |
762 | | // prev_scan_target_ is necessary for backwards scans |
763 | 1.43M | prev_scan_target_ = current_scan_target_; |
764 | 1.43M | RETURN_NOT_OK(IncrementScanTargetAtColumn( |
765 | 1.43M | static_cast<int>(current_scan_target_idxs_.size()) - 1)); |
766 | 1.43M | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
767 | | |
768 | | // if we we incremented the last index then |
769 | | // if this is a forward scan it doesn't matter what we do |
770 | | // if this is a backwards scan then dont clear current_scan_target and we |
771 | | // stay live |
772 | 18.4E | VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " |
773 | 18.4E | << DocKey::DebugSliceToString(current_scan_target_); |
774 | | |
775 | 18.4E | VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target"; |
776 | 1.43M | DCHECK(!FinishedWithScanChoices()); |
777 | | |
778 | 1.43M | if (is_options_done_) { |
779 | | // It could be possible that we finished all our options but are not |
780 | | // done because we haven't hit the bound key yet. This would usually be |
781 | | // the case if we are moving onto the next hash key where we will |
782 | | // restart our range options. |
783 | 208k | const KeyBytes &bound_key = is_forward_scan_ ? |
784 | 202k | upper_doc_key_ : lower_doc_key_6.10k ; |
785 | 208k | finished_ = bound_key.empty() ? false192k |
786 | 208k | : is_forward_scan_ |
787 | 15.9k | == (current_scan_target_.CompareTo(bound_key) >= 0); |
788 | 18.4E | VLOG(4) << "finished_ = " << finished_; |
789 | 208k | } |
790 | | |
791 | | |
792 | 18.4E | VLOG(4) << "current_scan_target_ is " |
793 | 18.4E | << DocKey::DebugSliceToString(current_scan_target_) |
794 | 18.4E | << " and prev_scan_target_ is " |
795 | 18.4E | << DocKey::DebugSliceToString(prev_scan_target_); |
796 | | |
797 | | // The below condition is either indicative of the special case |
798 | | // where IncrementScanTargetAtColumn didn't change the target due |
799 | | // to the case specified in the last section of the |
800 | | // documentation for IncrementScanTargetAtColumn or we have exhausted |
801 | | // all available range keys for the given hash key (indicated |
802 | | // by is_options_done_) |
803 | | // We clear the scan target in these cases to indicate that the |
804 | | // current_scan_target_ has been used and is invalid |
805 | | // In all other cases, IncrementScanTargetAtColumn has updated |
806 | | // current_scan_target_ to the new value that we want to seek to. |
807 | | // Hence, we shouldn't clear it in those cases |
808 | 1.43M | if ((prev_scan_target_ == current_scan_target_) || is_options_done_43.6k ) { |
809 | 1.40M | current_scan_target_.Clear(); |
810 | 1.40M | } |
811 | | |
812 | 1.43M | return Status::OK(); |
813 | 1.43M | } |
814 | | |
815 | | // Seeks the given iterator to the current target as specified by |
816 | | // current_scan_target_ and prev_scan_target_ (relevant in backwards |
817 | | // scans) |
818 | 12.2M | Status HybridScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) { |
819 | 18.4E | VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target"; |
820 | | |
821 | 12.2M | if (!FinishedWithScanChoices()) { |
822 | | // if current_scan_target_ is valid we use it to determine |
823 | | // what to seek to |
824 | 12.2M | if (!current_scan_target_.empty()) { |
825 | 10.8M | VLOG(3) << __PRETTY_FUNCTION__ |
826 | 0 | << " current_scan_target_ is non-empty. " |
827 | 0 | << DocKey::DebugSliceToString(current_scan_target_); |
828 | 10.8M | if (is_forward_scan_) { |
829 | 10.8M | VLOG(3) << __PRETTY_FUNCTION__ |
830 | 0 | << " Seeking to " |
831 | 0 | << DocKey::DebugSliceToString(current_scan_target_); |
832 | 10.8M | db_iter->Seek(current_scan_target_); |
833 | 10.8M | } else { |
834 | | // seek to the highest key <= current_scan_target_ |
835 | | // seeking to the highest key < current_scan_target_ + kHighest |
836 | | // is equivalent to seeking to the highest key <= |
837 | | // current_scan_target_ |
838 | 150 | auto tmp = current_scan_target_; |
839 | 150 | PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp); |
840 | 150 | VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp0 ; |
841 | 150 | db_iter->PrevDocKey(tmp); |
842 | 150 | } |
843 | 10.8M | } else { |
844 | 1.38M | if (!is_forward_scan_ && !prev_scan_target_.empty()444k ) { |
845 | 444k | db_iter->PrevDocKey(prev_scan_target_); |
846 | 444k | } |
847 | 1.38M | } |
848 | 12.2M | } |
849 | | |
850 | 12.2M | return Status::OK(); |
851 | 12.2M | } |
852 | | |
853 | | class RangeBasedScanChoices : public ScanChoices { |
854 | | public: |
855 | | RangeBasedScanChoices(const Schema& schema, const DocQLScanSpec& doc_spec) |
856 | 0 | : ScanChoices(doc_spec.is_forward_scan()) { |
857 | 0 | DCHECK(doc_spec.range_bounds()); |
858 | 0 | lower_.reserve(schema.num_range_key_columns()); |
859 | 0 | upper_.reserve(schema.num_range_key_columns()); |
860 | 0 | size_t idx = 0; |
861 | 0 | for (idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) { |
862 | 0 | const ColumnId col_idx = schema.column_id(idx); |
863 | 0 | const auto col_sort_type = schema.column(idx).sorting_type(); |
864 | 0 | const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx); |
865 | 0 | const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */); |
866 | 0 | const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */); |
867 | 0 | lower_.emplace_back(lower); |
868 | 0 | upper_.emplace_back(upper); |
869 | 0 | } |
870 | 0 | } |
871 | | |
872 | | RangeBasedScanChoices(const Schema& schema, const DocPgsqlScanSpec& doc_spec) |
873 | 0 | : ScanChoices(doc_spec.is_forward_scan()) { |
874 | 0 | DCHECK(doc_spec.range_bounds()); |
875 | 0 | lower_.reserve(schema.num_range_key_columns()); |
876 | 0 | upper_.reserve(schema.num_range_key_columns()); |
877 | 0 | for (auto idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) { |
878 | 0 | const ColumnId col_idx = schema.column_id(idx); |
879 | 0 | const auto col_sort_type = schema.column(idx).sorting_type(); |
880 | 0 | const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx); |
881 | 0 | const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */); |
882 | 0 | const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */); |
883 | 0 | lower_.emplace_back(lower); |
884 | 0 | upper_.emplace_back(upper); |
885 | 0 | } |
886 | 0 | } |
887 | | |
888 | | CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override; |
889 | | CHECKED_STATUS DoneWithCurrentTarget() override; |
890 | | CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override; |
891 | | |
892 | | private: |
893 | | std::vector<PrimitiveValue> lower_, upper_; |
894 | | KeyBytes prev_scan_target_; |
895 | | }; |
896 | | |
897 | 0 | Status RangeBasedScanChoices::SkipTargetsUpTo(const Slice& new_target) { |
898 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= " |
899 | 0 | << DocKey::DebugSliceToString(new_target); |
900 | 0 | DCHECK(!FinishedWithScanChoices()); |
901 | | |
902 | | /* |
903 | | Let's say we have a row key with (A B) as the hash part and C, D as the range part: |
904 | | ((A B) C D) E F |
905 | | |
906 | | Let's say we have a range constraint : |
907 | | l_c < C < u_c |
908 | | 4 6 |
909 | | |
910 | | a b 0 d -> a b l_c d |
911 | | |
912 | | a b 5 d -> a b 5 d |
913 | | [ Will subsequently seek out of document on reading the subdoc] |
914 | | |
915 | | a b 7 d -> a <b> MAX |
916 | | [ This will seek to <b_next> and on the next invocation update: |
917 | | a <b_next> ? ? -> a <b_next> l_c d ] |
918 | | */ |
919 | 0 | DocKeyDecoder decoder(new_target); |
920 | 0 | RETURN_NOT_OK(decoder.DecodeToRangeGroup()); |
921 | 0 | current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data())); |
922 | |
|
923 | 0 | size_t col_idx = 0; |
924 | 0 | PrimitiveValue target_value; |
925 | 0 | bool last_was_infinity = false; |
926 | 0 | for (col_idx = 0; VERIFY_RESULT(decoder.HasPrimitiveValue()); col_idx++) { |
927 | 0 | RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value)); |
928 | 0 | VLOG(3) << "col_idx " << col_idx << " is " << target_value << " in [" |
929 | 0 | << yb::ToString(lower_[col_idx]) << " , " << yb::ToString(upper_[col_idx]) << " ] ?"; |
930 | |
|
931 | 0 | const auto& lower = lower_[col_idx]; |
932 | 0 | if (target_value < lower) { |
933 | 0 | const auto tgt = (is_forward_scan_ ? lower : PrimitiveValue(ValueType::kLowest)); |
934 | 0 | tgt.AppendToKey(¤t_scan_target_); |
935 | 0 | last_was_infinity = tgt.IsInfinity(); |
936 | 0 | VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt; |
937 | 0 | break; |
938 | 0 | } |
939 | 0 | const auto& upper = upper_[col_idx]; |
940 | 0 | if (target_value > upper) { |
941 | 0 | const auto tgt = (!is_forward_scan_ ? upper : PrimitiveValue(ValueType::kHighest)); |
942 | 0 | VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt; |
943 | 0 | tgt.AppendToKey(¤t_scan_target_); |
944 | 0 | last_was_infinity = tgt.IsInfinity(); |
945 | 0 | break; |
946 | 0 | } |
947 | 0 | target_value.AppendToKey(¤t_scan_target_); |
948 | 0 | last_was_infinity = target_value.IsInfinity(); |
949 | 0 | } |
950 | | |
951 | | // Reset the remaining range columns to kHighest/lower for forward scans |
952 | | // or kLowest/upper for backward scans. |
953 | 0 | while (++col_idx < lower_.size()) { |
954 | 0 | if (last_was_infinity) { |
955 | | // No point having more components after +/- Inf. |
956 | 0 | break; |
957 | 0 | } |
958 | 0 | if (is_forward_scan_) { |
959 | 0 | VLOG(3) << " Updating col_idx " << col_idx << " to " << lower_[col_idx]; |
960 | 0 | lower_[col_idx].AppendToKey(¤t_scan_target_); |
961 | 0 | last_was_infinity = lower_[col_idx].IsInfinity(); |
962 | 0 | } else { |
963 | 0 | VLOG(3) << " Updating col_idx " << col_idx << " to " << upper_[col_idx]; |
964 | 0 | upper_[col_idx].AppendToKey(¤t_scan_target_); |
965 | 0 | last_was_infinity = upper_[col_idx].IsInfinity(); |
966 | 0 | } |
967 | 0 | } |
968 | 0 | current_scan_target_.AppendValueType(ValueType::kGroupEnd); |
969 | 0 | VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " |
970 | 0 | << DocKey::DebugSliceToString(current_scan_target_); |
971 | |
|
972 | 0 | return Status::OK(); |
973 | 0 | } |
974 | | |
975 | 0 | Status RangeBasedScanChoices::DoneWithCurrentTarget() { |
976 | 0 | prev_scan_target_ = current_scan_target_; |
977 | 0 | current_scan_target_.Clear(); |
978 | 0 | return Status::OK(); |
979 | 0 | } |
980 | | |
981 | 0 | Status RangeBasedScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) { |
982 | 0 | VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target"; |
983 | |
|
984 | 0 | if (!FinishedWithScanChoices()) { |
985 | 0 | if (!current_scan_target_.empty()) { |
986 | 0 | VLOG(3) << __PRETTY_FUNCTION__ |
987 | 0 | << " current_scan_target_ is non-empty. " |
988 | 0 | << current_scan_target_; |
989 | 0 | if (is_forward_scan_) { |
990 | 0 | VLOG(3) << __PRETTY_FUNCTION__ |
991 | 0 | << " Seeking to " |
992 | 0 | << DocKey::DebugSliceToString(current_scan_target_); |
993 | 0 | db_iter->Seek(current_scan_target_); |
994 | 0 | } else { |
995 | 0 | auto tmp = current_scan_target_; |
996 | 0 | PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp); |
997 | 0 | VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp; // Never seen. |
998 | 0 | db_iter->PrevDocKey(tmp); |
999 | 0 | } |
1000 | 0 | } else { |
1001 | 0 | if (!is_forward_scan_ && !prev_scan_target_.empty()) { |
1002 | 0 | db_iter->PrevDocKey(prev_scan_target_); |
1003 | 0 | } |
1004 | 0 | } |
1005 | 0 | } |
1006 | |
|
1007 | 0 | return Status::OK(); |
1008 | 0 | } |
1009 | | |
1010 | | DocRowwiseIterator::DocRowwiseIterator( |
1011 | | const Schema &projection, |
1012 | | const Schema &schema, |
1013 | | const TransactionOperationContext& txn_op_context, |
1014 | | const DocDB& doc_db, |
1015 | | CoarseTimePoint deadline, |
1016 | | const ReadHybridTime& read_time, |
1017 | | RWOperationCounter* pending_op_counter) |
1018 | | : projection_(projection), |
1019 | | schema_(schema), |
1020 | | txn_op_context_(txn_op_context), |
1021 | | deadline_(deadline), |
1022 | | read_time_(read_time), |
1023 | | doc_db_(doc_db), |
1024 | | has_bound_key_(false), |
1025 | | pending_op_(pending_op_counter), |
1026 | 19.3M | done_(false) { |
1027 | 19.3M | projection_subkeys_.reserve(projection.num_columns() + 1); |
1028 | 19.3M | projection_subkeys_.push_back(PrimitiveValue::kLivenessColumn); |
1029 | 70.4M | for (size_t i = projection_.num_key_columns(); i < projection.num_columns(); i++51.0M ) { |
1030 | 51.0M | projection_subkeys_.emplace_back(projection.column_id(i)); |
1031 | 51.0M | } |
1032 | 19.3M | std::sort(projection_subkeys_.begin(), projection_subkeys_.end()); |
1033 | 19.3M | } |
1034 | | |
1035 | 19.4M | DocRowwiseIterator::~DocRowwiseIterator() { |
1036 | 19.4M | } |
1037 | | |
1038 | 420k | Status DocRowwiseIterator::Init(TableType table_type, const Slice& sub_doc_key) { |
1039 | 420k | db_iter_ = CreateIntentAwareIterator( |
1040 | 420k | doc_db_, |
1041 | 420k | BloomFilterMode::DONT_USE_BLOOM_FILTER, |
1042 | 420k | boost::none /* user_key_for_filter */, |
1043 | 420k | rocksdb::kDefaultQueryId, |
1044 | 420k | txn_op_context_, |
1045 | 420k | deadline_, |
1046 | 420k | read_time_); |
1047 | 420k | if (!sub_doc_key.empty()) { |
1048 | 0 | row_key_ = sub_doc_key; |
1049 | 420k | } else { |
1050 | 420k | DocKeyEncoder(&iter_key_).Schema(schema_); |
1051 | 420k | row_key_ = iter_key_; |
1052 | 420k | } |
1053 | 420k | row_hash_key_ = row_key_; |
1054 | 420k | VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << row_key_408 ; |
1055 | 420k | db_iter_->Seek(row_key_); |
1056 | 420k | row_ready_ = false; |
1057 | 420k | has_bound_key_ = false; |
1058 | 420k | if (table_type == TableType::PGSQL_TABLE_TYPE) { |
1059 | 12 | ignore_ttl_ = true; |
1060 | 12 | } |
1061 | | |
1062 | 420k | return Status::OK(); |
1063 | 420k | } |
1064 | | |
1065 | | Result<bool> DocRowwiseIterator::InitScanChoices( |
1066 | 7.30M | const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) { |
1067 | | |
1068 | 7.30M | if (!FLAGS_disable_hybrid_scan) { |
1069 | 7.30M | if (doc_spec.range_options()7.28M || doc_spec.range_bounds()) { |
1070 | 62.1k | scan_choices_.reset(new HybridScanChoices(schema_, doc_spec, |
1071 | 62.1k | lower_doc_key, upper_doc_key)); |
1072 | 62.1k | } |
1073 | | |
1074 | 7.28M | return false; |
1075 | 7.28M | } |
1076 | | |
1077 | 10.6k | if (doc_spec.range_options()) { |
1078 | 0 | scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key)); |
1079 | | // Let's not seek to the lower doc key or upper doc key. We know exactly what we want. |
1080 | 0 | RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow()); |
1081 | 0 | return true; |
1082 | 0 | } |
1083 | | |
1084 | 10.6k | if (doc_spec.range_bounds()) { |
1085 | 0 | scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec)); |
1086 | 0 | } |
1087 | | |
1088 | 10.6k | return false; |
1089 | 10.6k | } |
1090 | | |
1091 | | Result<bool> DocRowwiseIterator::InitScanChoices( |
1092 | | const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key, |
1093 | 11.6M | const KeyBytes& upper_doc_key) { |
1094 | | |
1095 | 11.6M | if (!FLAGS_disable_hybrid_scan) { |
1096 | 11.6M | if (doc_spec.range_options() || doc_spec.range_bounds()11.6M ) { |
1097 | 13.2k | scan_choices_.reset(new HybridScanChoices(schema_, doc_spec, |
1098 | 13.2k | lower_doc_key, upper_doc_key)); |
1099 | 13.2k | } |
1100 | | |
1101 | 11.6M | return false; |
1102 | 11.6M | } |
1103 | | |
1104 | 1.06k | if (doc_spec.range_options()) { |
1105 | 0 | scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key)); |
1106 | | // Let's not seek to the lower doc key or upper doc key. We know exactly what we want. |
1107 | 0 | RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow()); |
1108 | 0 | return true; |
1109 | 0 | } |
1110 | | |
1111 | 1.06k | if (doc_spec.range_bounds()) { |
1112 | 0 | scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec)); |
1113 | 0 | } |
1114 | | |
1115 | 1.06k | return false; |
1116 | 1.06k | } |
1117 | | |
1118 | | template <class T> |
1119 | 18.9M | Status DocRowwiseIterator::DoInit(const T& doc_spec) { |
1120 | 18.9M | is_forward_scan_ = doc_spec.is_forward_scan(); |
1121 | | |
1122 | 18.9M | VLOG(4) << "Initializing iterator direction: " << (37.6k is_forward_scan_37.6k ? "FORWARD"0 : "BACKWARD"37.6k ); |
1123 | | |
1124 | 18.9M | auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound()); |
1125 | 18.9M | auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound()); |
1126 | 18.4E | VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice()) |
1127 | 18.4E | << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice()); |
1128 | | |
1129 | | // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans. |
1130 | 18.9M | const bool is_fixed_point_get = |
1131 | 18.9M | !lower_doc_key.empty() && |
1132 | 18.9M | VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key)); |
1133 | 18.9M | const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER18.1M |
1134 | 18.9M | : BloomFilterMode::DONT_USE_BLOOM_FILTER808k ; |
1135 | | |
1136 | 18.9M | db_iter_ = CreateIntentAwareIterator( |
1137 | 18.9M | doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_, |
1138 | 18.9M | deadline_, read_time_, doc_spec.CreateFileFilter()); |
1139 | | |
1140 | 18.9M | row_ready_ = false; |
1141 | | |
1142 | 19.0M | if (is_forward_scan_18.9M ) { |
1143 | 19.0M | has_bound_key_ = !upper_doc_key.empty(); |
1144 | 19.0M | if (has_bound_key_) { |
1145 | 18.9M | bound_key_ = std::move(upper_doc_key); |
1146 | 18.9M | db_iter_->SetUpperbound(bound_key_); |
1147 | 18.9M | } |
1148 | 18.4E | } else { |
1149 | 18.4E | has_bound_key_ = !lower_doc_key.empty(); |
1150 | 18.4E | if (has_bound_key_) { |
1151 | 8.22k | bound_key_ = std::move(lower_doc_key); |
1152 | 8.22k | } |
1153 | 18.4E | } |
1154 | | |
1155 | 18.9M | if (!VERIFY_RESULT(InitScanChoices(doc_spec, |
1156 | 18.9M | !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key, |
1157 | 18.9M | is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) { |
1158 | 18.9M | if (is_forward_scan_) { |
1159 | 18.4E | VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key); |
1160 | 18.9M | db_iter_->Seek(lower_doc_key); |
1161 | 18.9M | } else { |
1162 | | // TODO consider adding an operator bool to DocKey to use instead of empty() here. |
1163 | 8.21k | if (!upper_doc_key.empty()8.17k ) { |
1164 | 8.21k | db_iter_->PrevDocKey(upper_doc_key); |
1165 | 18.4E | } else { |
1166 | 18.4E | db_iter_->SeekToLastDocKey(); |
1167 | 18.4E | } |
1168 | 8.17k | } |
1169 | 18.9M | } |
1170 | | |
1171 | 0 | return Status::OK(); |
1172 | 18.9M | } yb::Status yb::docdb::DocRowwiseIterator::DoInit<yb::docdb::DocQLScanSpec>(yb::docdb::DocQLScanSpec const&) Line | Count | Source | 1119 | 7.31M | Status DocRowwiseIterator::DoInit(const T& doc_spec) { | 1120 | 7.31M | is_forward_scan_ = doc_spec.is_forward_scan(); | 1121 | | | 1122 | 7.31M | VLOG(4) << "Initializing iterator direction: " << (35.0k is_forward_scan_35.0k ? "FORWARD"0 : "BACKWARD"35.0k ); | 1123 | | | 1124 | 7.31M | auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound()); | 1125 | 7.31M | auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound()); | 1126 | 18.4E | VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice()) | 1127 | 18.4E | << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice()); | 1128 | | | 1129 | | // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans. | 1130 | 7.31M | const bool is_fixed_point_get = | 1131 | 7.31M | !lower_doc_key.empty() && | 1132 | 7.31M | VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key)); | 1133 | 7.31M | const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER7.24M | 1134 | 7.31M | : BloomFilterMode::DONT_USE_BLOOM_FILTER71.6k ; | 1135 | | | 1136 | 7.31M | db_iter_ = CreateIntentAwareIterator( | 1137 | 7.31M | doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_, | 1138 | 7.31M | deadline_, read_time_, doc_spec.CreateFileFilter()); | 1139 | | | 1140 | 7.31M | row_ready_ = false; | 1141 | | | 1142 | 7.31M | if (is_forward_scan_7.31M ) { | 1143 | 7.31M | has_bound_key_ = !upper_doc_key.empty(); | 1144 | 7.31M | if (has_bound_key_) { | 1145 | 7.21M | bound_key_ = std::move(upper_doc_key); | 1146 | 7.21M | db_iter_->SetUpperbound(bound_key_); | 1147 | 7.21M | } | 1148 | 18.4E | } else { | 1149 | 18.4E | has_bound_key_ = !lower_doc_key.empty(); | 1150 | 18.4E | if (has_bound_key_) { | 1151 | 8.12k | bound_key_ = std::move(lower_doc_key); | 1152 | 8.12k | } | 1153 | 18.4E | } | 1154 | | | 1155 | 7.31M | if (!VERIFY_RESULT(InitScanChoices(doc_spec, | 1156 | 7.31M | !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key, | 1157 | 7.31M | is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) { | 1158 | 7.28M | if (is_forward_scan_) { | 1159 | 18.4E | VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key); | 1160 | 7.28M | db_iter_->Seek(lower_doc_key); | 1161 | 7.28M | } else { | 1162 | | // TODO consider adding an operator bool to DocKey to use instead of empty() here. | 1163 | 8.21k | if (!upper_doc_key.empty()) { | 1164 | 8.12k | db_iter_->PrevDocKey(upper_doc_key); | 1165 | 8.12k | } else { | 1166 | 95 | db_iter_->SeekToLastDocKey(); | 1167 | 95 | } | 1168 | 8.21k | } | 1169 | 7.28M | } | 1170 | | | 1171 | 0 | return Status::OK(); | 1172 | 7.31M | } |
yb::Status yb::docdb::DocRowwiseIterator::DoInit<yb::docdb::DocPgsqlScanSpec>(yb::docdb::DocPgsqlScanSpec const&) Line | Count | Source | 1119 | 11.6M | Status DocRowwiseIterator::DoInit(const T& doc_spec) { | 1120 | 11.6M | is_forward_scan_ = doc_spec.is_forward_scan(); | 1121 | | | 1122 | 11.6M | VLOG(4) << "Initializing iterator direction: " << (2.67k is_forward_scan_2.67k ? "FORWARD"0 : "BACKWARD"2.67k ); | 1123 | | | 1124 | 11.6M | auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound()); | 1125 | 11.6M | auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound()); | 1126 | 18.4E | VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice()) | 1127 | 18.4E | << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice()); | 1128 | | | 1129 | | // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans. | 1130 | 11.6M | const bool is_fixed_point_get = | 1131 | 11.6M | !lower_doc_key.empty() && | 1132 | 11.6M | VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key)); | 1133 | 11.6M | const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER10.9M | 1134 | 11.6M | : BloomFilterMode::DONT_USE_BLOOM_FILTER736k ; | 1135 | | | 1136 | 11.6M | db_iter_ = CreateIntentAwareIterator( | 1137 | 11.6M | doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_, | 1138 | 11.6M | deadline_, read_time_, doc_spec.CreateFileFilter()); | 1139 | | | 1140 | 11.6M | row_ready_ = false; | 1141 | | | 1142 | 11.6M | if (is_forward_scan_11.6M ) { | 1143 | 11.6M | has_bound_key_ = !upper_doc_key.empty(); | 1144 | 11.6M | if (has_bound_key_) { | 1145 | 11.6M | bound_key_ = std::move(upper_doc_key); | 1146 | 11.6M | db_iter_->SetUpperbound(bound_key_); | 1147 | 11.6M | } | 1148 | 18.4E | } else { | 1149 | 18.4E | has_bound_key_ = !lower_doc_key.empty(); | 1150 | 18.4E | if (has_bound_key_) { | 1151 | 96 | bound_key_ = std::move(lower_doc_key); | 1152 | 96 | } | 1153 | 18.4E | } | 1154 | | | 1155 | 11.6M | if (!VERIFY_RESULT(InitScanChoices(doc_spec, | 1156 | 11.6M | !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key, | 1157 | 11.6M | is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) { | 1158 | 11.6M | if (is_forward_scan_11.6M ) { | 1159 | 18.4E | VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key); | 1160 | 11.6M | db_iter_->Seek(lower_doc_key); | 1161 | 18.4E | } else { | 1162 | | // TODO consider adding an operator bool to DocKey to use instead of empty() here. | 1163 | 18.4E | if (!upper_doc_key.empty()) { | 1164 | 96 | db_iter_->PrevDocKey(upper_doc_key); | 1165 | 18.4E | } else { | 1166 | 18.4E | db_iter_->SeekToLastDocKey(); | 1167 | 18.4E | } | 1168 | 18.4E | } | 1169 | 11.6M | } | 1170 | | | 1171 | 0 | return Status::OK(); | 1172 | 11.6M | } |
|
1173 | | |
1174 | 7.33M | Status DocRowwiseIterator::Init(const QLScanSpec& spec) { |
1175 | 7.33M | return DoInit(dynamic_cast<const DocQLScanSpec&>(spec)); |
1176 | 7.33M | } |
1177 | | |
1178 | 11.6M | Status DocRowwiseIterator::Init(const PgsqlScanSpec& spec) { |
1179 | 11.6M | ignore_ttl_ = true; |
1180 | 11.6M | return DoInit(dynamic_cast<const DocPgsqlScanSpec&>(spec)); |
1181 | 11.6M | } |
1182 | | |
1183 | 82.9M | Status DocRowwiseIterator::AdvanceIteratorToNextDesiredRow() const { |
1184 | 82.9M | if (scan_choices_) { |
1185 | 1.43M | if (!IsNextStaticColumn() |
1186 | 1.43M | && !scan_choices_->CurrentTargetMatchesKey(row_key_)1.43M ) { |
1187 | 1.43M | return scan_choices_->SeekToCurrentTarget(db_iter_.get()); |
1188 | 1.43M | } |
1189 | 81.4M | } else { |
1190 | 81.4M | if (!is_forward_scan_) { |
1191 | 941 | VLOG(4) << __PRETTY_FUNCTION__ << " setting as PrevDocKey"0 ; |
1192 | 941 | db_iter_->PrevDocKey(row_key_); |
1193 | 941 | } |
1194 | 81.4M | } |
1195 | | |
1196 | 81.4M | return Status::OK(); |
1197 | 82.9M | } |
1198 | | |
1199 | 93.6M | Result<bool> DocRowwiseIterator::HasNext() const { |
1200 | 93.6M | VLOG(4) << __PRETTY_FUNCTION__6.66k ; |
1201 | | |
1202 | | // Repeated HasNext calls (without Skip/NextRow in between) should be idempotent: |
1203 | | // 1. If a previous call failed we returned the same status. |
1204 | | // 2. If a row is already available (row_ready_), return true directly. |
1205 | | // 3. If we finished all target rows for the scan (done_), return false directly. |
1206 | 93.6M | RETURN_NOT_OK(has_next_status_); |
1207 | 93.6M | if (row_ready_) { |
1208 | | // If row is ready, then HasNext returns true. |
1209 | 201 | return true; |
1210 | 201 | } |
1211 | 93.6M | if (done_) { |
1212 | 1.49k | return false; |
1213 | 1.49k | } |
1214 | | |
1215 | 93.6M | bool doc_found = false; |
1216 | 187M | while (!doc_found) { |
1217 | 105M | if (!db_iter_->valid() || (93.7M scan_choices_93.7M && scan_choices_->FinishedWithScanChoices()12.3M )) { |
1218 | 11.5M | done_ = true; |
1219 | 11.5M | return false; |
1220 | 11.5M | } |
1221 | | |
1222 | 93.8M | const auto key_data = db_iter_->FetchKey(); |
1223 | 93.8M | if (!key_data.ok()) { |
1224 | 0 | has_next_status_ = key_data.status(); |
1225 | 0 | return has_next_status_; |
1226 | 0 | } |
1227 | | |
1228 | 93.8M | VLOG(4) << "*fetched_key is " << SubDocKey::DebugSliceToString(key_data->key)67.6k ; |
1229 | 93.8M | if (debug_dump_) { |
1230 | 0 | LOG(INFO) << __func__ << ", fetched key: " << SubDocKey::DebugSliceToString(key_data->key) |
1231 | 0 | << ", " << key_data->key.ToDebugHexString(); |
1232 | 0 | } |
1233 | | |
1234 | | // The iterator is positioned by the previous GetSubDocument call (which places the iterator |
1235 | | // outside the previous doc_key). Ensure the iterator is pushed forward/backward indeed. We |
1236 | | // check it here instead of after GetSubDocument() below because we want to avoid the extra |
1237 | | // expensive FetchKey() call just to fetch and validate the key. |
1238 | 93.8M | if (!iter_key_.data().empty() && |
1239 | 93.8M | (79.7M is_forward_scan_79.7M ? iter_key_.CompareTo(key_data->key) >= 079.2M |
1240 | 79.7M | : iter_key_.CompareTo(key_data->key) <= 0445k )) { |
1241 | | // TODO -- could turn this check off in TPCC? |
1242 | 0 | has_next_status_ = STATUS_SUBSTITUTE(Corruption, "Infinite loop detected at $0", |
1243 | 0 | FormatSliceAsStr(key_data->key)); |
1244 | 0 | return has_next_status_; |
1245 | 0 | } |
1246 | 93.8M | iter_key_.Reset(key_data->key); |
1247 | 93.8M | VLOG(4) << " Current iter_key_ is " << iter_key_44.9k ; |
1248 | | |
1249 | 93.8M | const auto dockey_sizes = DocKey::EncodedHashPartAndDocKeySizes(iter_key_); |
1250 | 93.8M | if (!dockey_sizes.ok()) { |
1251 | 0 | has_next_status_ = dockey_sizes.status(); |
1252 | 0 | return has_next_status_; |
1253 | 0 | } |
1254 | 93.8M | row_hash_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->first); |
1255 | 93.8M | row_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->second); |
1256 | | |
1257 | 93.8M | if (!DocKeyBelongsTo(row_key_, schema_) || // e.g in cotable, row may point outside table bounds |
1258 | 93.8M | (93.7M has_bound_key_93.7M && is_forward_scan_ == (row_key_.compare(bound_key_) >= 0)84.4M )) { |
1259 | 20.3k | done_ = true; |
1260 | 20.3k | return false; |
1261 | 20.3k | } |
1262 | | |
1263 | | // Prepare the DocKey to get the SubDocument. Trim the DocKey to contain just the primary key. |
1264 | 93.8M | Slice sub_doc_key = row_key_; |
1265 | 93.8M | VLOG(4) << " sub_doc_key part of iter_key_ is " << DocKey::DebugSliceToString(sub_doc_key)62.5k ; |
1266 | | |
1267 | 93.8M | bool is_static_column = IsNextStaticColumn(); |
1268 | 93.8M | if (scan_choices_ && !is_static_column12.2M ) { |
1269 | 12.2M | if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) { |
1270 | | // We must have seeked past the target key we are looking for (no result) so we can safely |
1271 | | // skip all scan targets between the current target and row key (excluding row_key_ itself). |
1272 | | // Update the target key and iterator and call HasNext again to try the next target. |
1273 | 12.2M | RETURN_NOT_OK(scan_choices_->SkipTargetsUpTo(row_key_)); |
1274 | | |
1275 | | // We updated scan target above, if it goes past the row_key_ we will seek again, and |
1276 | | // process the found key in the next loop. |
1277 | 12.2M | if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) { |
1278 | 10.8M | RETURN_NOT_OK(scan_choices_->SeekToCurrentTarget(db_iter_.get())); |
1279 | 10.8M | continue; |
1280 | 10.8M | } |
1281 | 12.2M | } |
1282 | | // We found a match for the target key or a static column, so we move on to getting the |
1283 | | // SubDocument. |
1284 | 12.2M | } |
1285 | 82.9M | if (doc_reader_ == nullptr) { |
1286 | 13.5M | doc_reader_ = std::make_unique<DocDBTableReader>(db_iter_.get(), deadline_); |
1287 | 13.5M | RETURN_NOT_OK(doc_reader_->UpdateTableTombstoneTime(sub_doc_key)); |
1288 | 13.5M | if (!ignore_ttl_) { |
1289 | 7.55M | doc_reader_->SetTableTtl(schema_); |
1290 | 7.55M | } |
1291 | 13.5M | } |
1292 | | |
1293 | 82.9M | row_ = SubDocument(); |
1294 | 82.9M | auto doc_found_res = doc_reader_->Get(sub_doc_key, &projection_subkeys_, &row_); |
1295 | 82.9M | if (!doc_found_res.ok()) { |
1296 | 0 | has_next_status_ = doc_found_res.status(); |
1297 | 0 | return has_next_status_; |
1298 | 82.9M | } else { |
1299 | 82.9M | doc_found = *doc_found_res; |
1300 | 82.9M | } |
1301 | 82.9M | if (scan_choices_ && !is_static_column1.43M ) { |
1302 | 1.43M | has_next_status_ = scan_choices_->DoneWithCurrentTarget(); |
1303 | 1.43M | RETURN_NOT_OK(has_next_status_); |
1304 | 1.43M | } |
1305 | 82.9M | has_next_status_ = AdvanceIteratorToNextDesiredRow(); |
1306 | 82.9M | RETURN_NOT_OK(has_next_status_); |
1307 | 82.9M | } |
1308 | 82.1M | row_ready_ = true; |
1309 | 82.1M | return true; |
1310 | 93.6M | } |
1311 | | |
1312 | 0 | string DocRowwiseIterator::ToString() const { |
1313 | 0 | return "DocRowwiseIterator"; |
1314 | 0 | } |
1315 | | |
1316 | | namespace { |
1317 | | |
1318 | | // Set primary key column values (hashed or range columns) in a QL row value map. |
1319 | | CHECKED_STATUS SetQLPrimaryKeyColumnValues(const Schema& schema, |
1320 | | const size_t begin_index, |
1321 | | const size_t column_count, |
1322 | | const char* column_type, |
1323 | | DocKeyDecoder* decoder, |
1324 | 95.0M | QLTableRow* table_row) { |
1325 | 95.0M | if (begin_index + column_count > schema.num_columns()) { |
1326 | 0 | return STATUS_SUBSTITUTE( |
1327 | 0 | Corruption, |
1328 | 0 | "$0 primary key columns between positions $1 and $2 go beyond table columns $3", |
1329 | 0 | column_type, begin_index, begin_index + column_count - 1, schema.num_columns()); |
1330 | 0 | } |
1331 | 95.0M | PrimitiveValue primitive_value; |
1332 | 241M | for (size_t i = 0, j = begin_index; i < column_count; i++, j++146M ) { |
1333 | 146M | const auto ql_type = schema.column(j).type(); |
1334 | 146M | QLTableColumn& column = table_row->AllocColumn(schema.column_id(j)); |
1335 | 146M | RETURN_NOT_OK(decoder->DecodePrimitiveValue(&primitive_value)); |
1336 | 146M | PrimitiveValue::ToQLValuePB(primitive_value, ql_type, &column.value); |
1337 | 146M | } |
1338 | 95.0M | return decoder->ConsumeGroupEnd(); |
1339 | 95.0M | } |
1340 | | |
1341 | | } // namespace |
1342 | | |
1343 | 161k | void DocRowwiseIterator::SkipRow() { |
1344 | 161k | row_ready_ = false; |
1345 | 161k | } |
1346 | | |
1347 | 16.9M | HybridTime DocRowwiseIterator::RestartReadHt() { |
1348 | 16.9M | auto max_seen_ht = db_iter_->max_seen_ht(); |
1349 | 16.9M | if (max_seen_ht.is_valid() && max_seen_ht > db_iter_->read_time().read16.9M ) { |
1350 | 1.87k | VLOG(4) << "Restart read: " << max_seen_ht << ", original: " << db_iter_->read_time()0 ; |
1351 | 1.87k | return max_seen_ht; |
1352 | 1.87k | } |
1353 | 16.9M | return HybridTime::kInvalid; |
1354 | 16.9M | } |
1355 | | |
1356 | 104M | bool DocRowwiseIterator::IsNextStaticColumn() const { |
1357 | 104M | return schema_.has_statics() && row_hash_key_.end() + 1 == row_key_.end()1.80k ; |
1358 | 104M | } |
1359 | | |
1360 | 78.0M | Status DocRowwiseIterator::DoNextRow(const Schema& projection, QLTableRow* table_row) { |
1361 | 78.0M | VLOG(4) << __PRETTY_FUNCTION__5.86k ; |
1362 | | |
1363 | 78.0M | if (PREDICT_FALSE(done_)) { |
1364 | 0 | return STATUS(NotFound, "end of iter"); |
1365 | 0 | } |
1366 | | |
1367 | | // Ensure row is ready to be read. HasNext() must be called before reading the first row, or |
1368 | | // again after the previous row has been read or skipped. |
1369 | 78.0M | if (!row_ready_) { |
1370 | 0 | return STATUS(InternalError, "next row has not be prepared for reading"); |
1371 | 0 | } |
1372 | | |
1373 | 78.0M | DocKeyDecoder decoder(row_key_); |
1374 | 78.0M | RETURN_NOT_OK(decoder.DecodeCotableId()); |
1375 | 78.0M | RETURN_NOT_OK(decoder.DecodeColocationId()); |
1376 | 78.0M | bool has_hash_components = VERIFY_RESULT(decoder.DecodeHashCode()); |
1377 | | |
1378 | | // Populate the key column values from the doc key. The key column values in doc key were |
1379 | | // written in the same order as in the table schema (see DocKeyFromQLKey). If the range columns |
1380 | | // are present, read them also. |
1381 | 78.0M | if (has_hash_components) { |
1382 | 31.7M | RETURN_NOT_OK(SetQLPrimaryKeyColumnValues( |
1383 | 31.7M | schema_, 0, schema_.num_hash_key_columns(), |
1384 | 31.7M | "hash", &decoder, table_row)); |
1385 | 31.7M | } |
1386 | 78.0M | if (!decoder.GroupEnded()) { |
1387 | 63.3M | RETURN_NOT_OK(SetQLPrimaryKeyColumnValues( |
1388 | 63.3M | schema_, schema_.num_hash_key_columns(), schema_.num_range_key_columns(), |
1389 | 63.3M | "range", &decoder, table_row)); |
1390 | 63.3M | } |
1391 | | |
1392 | 679M | for (size_t i = projection.num_key_columns(); 78.0M i < projection.num_columns(); i++601M ) { |
1393 | 601M | const auto& column_id = projection.column_id(i); |
1394 | 601M | const auto ql_type = projection.column(i).type(); |
1395 | 601M | const SubDocument* column_value = row_.GetChild(PrimitiveValue(column_id)); |
1396 | 601M | if (column_value != nullptr) { |
1397 | 599M | QLTableColumn& column = table_row->AllocColumn(column_id); |
1398 | 599M | SubDocument::ToQLValuePB(*column_value, ql_type, &column.value); |
1399 | 599M | column.ttl_seconds = column_value->GetTtl(); |
1400 | 599M | if (column_value->IsWriteTimeSet()) { |
1401 | 532M | column.write_time = column_value->GetWriteTime(); |
1402 | 532M | } |
1403 | 599M | } |
1404 | 601M | } |
1405 | | |
1406 | 78.0M | row_ready_ = false; |
1407 | 78.0M | return Status::OK(); |
1408 | 78.0M | } |
1409 | | |
1410 | 1.26k | bool DocRowwiseIterator::LivenessColumnExists() const { |
1411 | 1.26k | const SubDocument* subdoc = row_.GetChild(PrimitiveValue::kLivenessColumn); |
1412 | 1.26k | return subdoc != nullptr && subdoc->value_type() != ValueType::kInvalid; |
1413 | 1.26k | } |
1414 | | |
1415 | 7.21M | CHECKED_STATUS DocRowwiseIterator::GetNextReadSubDocKey(SubDocKey* sub_doc_key) const { |
1416 | 7.21M | if (db_iter_ == nullptr) { |
1417 | 0 | return STATUS(Corruption, "Iterator not initialized."); |
1418 | 0 | } |
1419 | | |
1420 | | // There are no more rows to fetch, so no next SubDocKey to read. |
1421 | 7.21M | if (!VERIFY_RESULT(HasNext())) { |
1422 | 18.4E | DVLOG(3) << "No Next SubDocKey"; |
1423 | 3.36M | return Status::OK(); |
1424 | 3.36M | } |
1425 | | |
1426 | 3.84M | DocKey doc_key; |
1427 | 3.84M | RETURN_NOT_OK(doc_key.FullyDecodeFrom(row_key_)); |
1428 | 3.84M | *sub_doc_key = SubDocKey(doc_key, read_time_.read); |
1429 | 18.4E | DVLOG(3) << "Next SubDocKey: " << sub_doc_key->ToString(); |
1430 | 3.84M | return Status::OK(); |
1431 | 3.84M | } |
1432 | | |
1433 | 42.4M | Result<Slice> DocRowwiseIterator::GetTupleId() const { |
1434 | | // Return tuple id without cotable id / colocation id if any. |
1435 | 42.4M | Slice tuple_id = row_key_; |
1436 | 42.4M | if (tuple_id.starts_with(ValueTypeAsChar::kTableId)) { |
1437 | 36.2M | tuple_id.remove_prefix(1 + kUuidSize); |
1438 | 36.2M | } else if (6.22M tuple_id.starts_with(ValueTypeAsChar::kColocationId)6.22M ) { |
1439 | 152 | tuple_id.remove_prefix(1 + sizeof(ColocationId)); |
1440 | 152 | } |
1441 | 42.4M | return tuple_id; |
1442 | 42.4M | } |
1443 | | |
1444 | 1.13M | Result<bool> DocRowwiseIterator::SeekTuple(const Slice& tuple_id) { |
1445 | | // If cotable id / colocation id is present in the table schema, then |
1446 | | // we need to prepend it in the tuple key to seek. |
1447 | 1.13M | if (schema_.has_cotable_id() || schema_.has_colocation_id()136 ) { |
1448 | 1.13M | uint32_t size = schema_.has_colocation_id() ? sizeof(ColocationId)39 : kUuidSize1.13M ; |
1449 | 1.13M | if (!tuple_key_) { |
1450 | 259k | tuple_key_.emplace(); |
1451 | 259k | tuple_key_->Reserve(1 + size + tuple_id.size()); |
1452 | | |
1453 | 259k | if (schema_.has_cotable_id()) { |
1454 | 259k | std::string bytes; |
1455 | 259k | schema_.cotable_id().EncodeToComparable(&bytes); |
1456 | 259k | tuple_key_->AppendValueType(ValueType::kTableId); |
1457 | 259k | tuple_key_->AppendRawBytes(bytes); |
1458 | 259k | } else { |
1459 | 6 | tuple_key_->AppendValueType(ValueType::kColocationId); |
1460 | 6 | tuple_key_->AppendUInt32(schema_.colocation_id()); |
1461 | 6 | } |
1462 | 877k | } else { |
1463 | 877k | tuple_key_->Truncate(1 + size); |
1464 | 877k | } |
1465 | 1.13M | tuple_key_->AppendRawBytes(tuple_id); |
1466 | 1.13M | db_iter_->Seek(*tuple_key_); |
1467 | 1.13M | } else { |
1468 | 119 | db_iter_->Seek(tuple_id); |
1469 | 119 | } |
1470 | | |
1471 | 1.13M | iter_key_.Clear(); |
1472 | 1.13M | row_ready_ = false; |
1473 | | |
1474 | 2.27M | return VERIFY_RESULT1.13M (HasNext()) && VERIFY_RESULT1.13M (GetTupleId()) == tuple_id; |
1475 | 1.13M | } |
1476 | | |
1477 | | } // namespace docdb |
1478 | | } // namespace yb |