YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_rowwise_iterator.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_rowwise_iterator.h"
15
#include <iterator>
16
17
#include <cstdint>
18
#include <ostream>
19
#include <string>
20
#include <vector>
21
22
#include "yb/common/common.pb.h"
23
#include "yb/common/doc_hybrid_time.h"
24
#include "yb/common/hybrid_time.h"
25
#include "yb/common/ql_expr.h"
26
#include "yb/common/ql_scanspec.h"
27
#include "yb/common/ql_value.h"
28
#include "yb/common/read_hybrid_time.h"
29
#include "yb/common/transaction.h"
30
31
#include "yb/docdb/docdb_fwd.h"
32
#include "yb/docdb/doc_key.h"
33
#include "yb/docdb/doc_path.h"
34
#include "yb/docdb/doc_ql_scanspec.h"
35
#include "yb/docdb/doc_reader.h"
36
#include "yb/docdb/doc_scanspec_util.h"
37
#include "yb/docdb/docdb_rocksdb_util.h"
38
#include "yb/docdb/docdb_types.h"
39
#include "yb/docdb/expiration.h"
40
#include "yb/docdb/intent_aware_iterator.h"
41
#include "yb/docdb/primitive_value.h"
42
#include "yb/docdb/subdocument.h"
43
#include "yb/docdb/value.h"
44
#include "yb/docdb/value_type.h"
45
46
#include "yb/gutil/strings/substitute.h"
47
#include "yb/util/flags.h"
48
#include "yb/rocksdb/db/compaction.h"
49
#include "yb/rocksutil/yb_rocksdb.h"
50
51
#include "yb/rocksdb/db.h"
52
53
#include "yb/util/flag_tags.h"
54
#include "yb/util/result.h"
55
#include "yb/util/status.h"
56
#include "yb/util/status_format.h"
57
#include "yb/util/status_log.h"
58
#include "yb/util/strongly_typed_bool.h"
59
60
DEFINE_bool(disable_hybrid_scan, false,
61
            "If true, hybrid scan will be disabled");
62
TAG_FLAG(disable_hybrid_scan, runtime);
63
64
using std::string;
65
66
namespace yb {
67
namespace docdb {
68
69
class ScanChoices {
70
 public:
71
75.4k
  explicit ScanChoices(bool is_forward_scan) : is_forward_scan_(is_forward_scan) {}
72
75.4k
  virtual ~ScanChoices() {}
73
74
26.0M
  bool CurrentTargetMatchesKey(const Slice& curr) {
75
18.4E
    VLOG(3) << __PRETTY_FUNCTION__ << " checking if acceptable ? "
76
18.4E
            << (curr == current_scan_target_ ? 
"YEP"0
: "NOPE")
77
18.4E
            << ": " << DocKey::DebugSliceToString(curr)
78
18.4E
            << " vs " << DocKey::DebugSliceToString(current_scan_target_.AsSlice());
79
26.0M
    return curr == current_scan_target_;
80
26.0M
  }
81
82
  // Returns false if there are still target keys we need to scan, and true if we are done.
83
38.3M
  virtual bool FinishedWithScanChoices() const { return finished_; }
84
85
  // Go to the next scan target if any.
86
  virtual CHECKED_STATUS DoneWithCurrentTarget() = 0;
87
88
  // Go (directly) to the new target (or the one after if new_target does not
89
  // exist in the desired list/range). If the new_target is larger than all scan target options it
90
  // means we are done.
91
  virtual CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) = 0;
92
93
  // If the given doc_key isn't already at the desired target, seek appropriately to go to the
94
  // current target.
95
  virtual CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) = 0;
96
97
 protected:
98
  const bool is_forward_scan_;
99
  KeyBytes current_scan_target_;
100
  bool finished_ = false;
101
};
102
103
class DiscreteScanChoices : public ScanChoices {
104
 public:
105
  DiscreteScanChoices(const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key,
106
                      const KeyBytes& upper_doc_key)
107
0
      : ScanChoices(doc_spec.is_forward_scan()) {
108
0
    range_cols_scan_options_ = doc_spec.range_options();
109
0
    current_scan_target_idxs_.resize(range_cols_scan_options_->size());
110
0
    for (size_t i = 0; i < range_cols_scan_options_->size(); i++) {
111
0
      current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin();
112
0
    }
113
114
    // Initialize target doc key.
115
0
    if (is_forward_scan_) {
116
0
      current_scan_target_ = lower_doc_key;
117
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
118
0
        CHECK_OK(SkipTargetsUpTo(lower_doc_key));
119
0
      }
120
0
    } else {
121
0
      current_scan_target_ = upper_doc_key;
122
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
123
0
        CHECK_OK(SkipTargetsUpTo(upper_doc_key));
124
0
      }
125
0
    }
126
0
  }
127
128
  DiscreteScanChoices(const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key,
129
                      const KeyBytes& upper_doc_key)
130
0
      : ScanChoices(doc_spec.is_forward_scan()) {
131
0
    range_cols_scan_options_ = doc_spec.range_options();
132
0
    current_scan_target_idxs_.resize(range_cols_scan_options_->size());
133
0
    for (size_t i = 0; i < range_cols_scan_options_->size(); i++) {
134
0
      current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin();
135
0
    }
136
137
    // Initialize target doc key.
138
0
    if (is_forward_scan_) {
139
0
      current_scan_target_ = lower_doc_key;
140
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
141
0
        CHECK_OK(SkipTargetsUpTo(lower_doc_key));
142
0
      }
143
0
    } else {
144
0
      current_scan_target_ = upper_doc_key;
145
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
146
0
        CHECK_OK(SkipTargetsUpTo(upper_doc_key));
147
0
      }
148
0
    }
149
0
  }
150
151
  CHECKED_STATUS DoneWithCurrentTarget() override;
152
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
153
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
154
155
 protected:
156
  // Utility function for (multi)key scans. Updates the target scan key by incrementing the option
157
  // index for one column. Will handle overflow by setting current column index to 0 and
158
  // incrementing the previous column instead. If it overflows at first column it means we are done,
159
  // so it clears the scan target idxs array.
160
  CHECKED_STATUS IncrementScanTargetAtColumn(size_t start_col);
161
162
  // Utility function for (multi)key scans to initialize the range portion of the current scan
163
  // target, scan target with the first option.
164
  // Only needed for scans that include the static row, otherwise Init will take care of this.
165
  Result<bool> InitScanTargetRangeGroupIfNeeded();
166
167
 private:
168
  // For (multi)key scans (e.g. selects with 'IN' condition on the range columns) we hold the
169
  // options for each range column as we iteratively seek to each target key.
170
  // e.g. for a query "h = 1 and r1 in (2,3) and r2 in (4,5) and r3 = 6":
171
  //  range_cols_scan_options_   [[2, 3], [4, 5], [6]] -- value options for each column.
172
  //  current_scan_target_idxs_  goes from [0, 0, 0] up to [1, 1, 0] -- except when including the
173
  //                             static row when it starts from [0, 0, -1] instead.
174
  //  current_scan_target_       goes from [1][2,4,6] up to [1][3,5,6] -- is the doc key containing,
175
  //                             for each range column, the value (option) referenced by the
176
  //                             corresponding index (updated along with current_scan_target_idxs_).
177
  std::shared_ptr<std::vector<std::vector<PrimitiveValue>>> range_cols_scan_options_;
178
  mutable std::vector<std::vector<PrimitiveValue>::const_iterator> current_scan_target_idxs_;
179
};
180
181
0
Status DiscreteScanChoices::IncrementScanTargetAtColumn(size_t start_col) {
182
0
  DCHECK_LE(start_col, current_scan_target_idxs_.size());
183
184
  // Increment start col, move backwards in case of overflow.
185
0
  ssize_t col_idx = start_col;
186
0
  for (; col_idx >= 0; col_idx--) {
187
0
    const auto& choices = (*range_cols_scan_options_)[col_idx];
188
0
    auto& it = current_scan_target_idxs_[col_idx];
189
190
0
    if (++it != choices.end()) {
191
0
      break;
192
0
    }
193
0
    it = choices.begin();
194
0
  }
195
196
0
  if (col_idx < 0) {
197
    // If we got here we finished all the options and are done.
198
0
    finished_ = true;
199
0
    return Status::OK();
200
0
  }
201
202
0
  DocKeyDecoder decoder(current_scan_target_);
203
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
204
0
  for (int i = 0; i != col_idx; ++i) {
205
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue());
206
0
  }
207
208
0
  current_scan_target_.Truncate(
209
0
      decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata());
210
211
0
  for (size_t i = col_idx; i <= start_col; ++i) {
212
0
    current_scan_target_idxs_[i]->AppendToKey(&current_scan_target_);
213
0
  }
214
215
0
  return Status::OK();
216
0
}
217
218
0
Result<bool> DiscreteScanChoices::InitScanTargetRangeGroupIfNeeded() {
219
0
  DocKeyDecoder decoder(current_scan_target_.AsSlice());
220
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
221
222
  // Initialize the range key values if needed (i.e. we scanned the static row until now).
223
0
  if (!VERIFY_RESULT(decoder.HasPrimitiveValue())) {
224
0
    current_scan_target_.mutable_data()->pop_back();
225
0
    for (size_t col_idx = 0; col_idx < range_cols_scan_options_->size(); col_idx++) {
226
0
      current_scan_target_idxs_[col_idx]->AppendToKey(&current_scan_target_);
227
0
    }
228
0
    current_scan_target_.AppendValueType(ValueType::kGroupEnd);
229
0
    return true;
230
0
  }
231
0
  return false;
232
0
}
233
234
0
Status DiscreteScanChoices::DoneWithCurrentTarget() {
235
0
  VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target";
236
0
  DCHECK(!FinishedWithScanChoices());
237
238
  // Initialize the first target/option if not done already, otherwise go to the next one.
239
0
  if (!VERIFY_RESULT(InitScanTargetRangeGroupIfNeeded())) {
240
0
    RETURN_NOT_OK(IncrementScanTargetAtColumn(range_cols_scan_options_->size() - 1));
241
0
    current_scan_target_.AppendValueType(ValueType::kGroupEnd);
242
0
  }
243
0
  return Status::OK();
244
0
}
245
246
0
Status DiscreteScanChoices::SkipTargetsUpTo(const Slice& new_target) {
247
0
  VLOG(2) << __PRETTY_FUNCTION__
248
0
            << " Updating current target to be >= "
249
0
            << DocKey::DebugSliceToString(new_target);
250
0
  DCHECK(!FinishedWithScanChoices());
251
0
  RETURN_NOT_OK(InitScanTargetRangeGroupIfNeeded());
252
0
  DocKeyDecoder decoder(new_target);
253
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
254
0
  current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data()));
255
256
0
  size_t col_idx = 0;
257
0
  PrimitiveValue target_value;
258
0
  while (col_idx < range_cols_scan_options_->size()) {
259
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
260
0
    const auto& choices = (*range_cols_scan_options_)[col_idx];
261
0
    auto& it = current_scan_target_idxs_[col_idx];
262
263
    // Fast-path in case the existing value for this column already matches the new target.
264
0
    if (target_value == *it) {
265
0
      col_idx++;
266
0
      target_value.AppendToKey(&current_scan_target_);
267
0
      continue;
268
0
    }
269
270
    // Search for the option that matches new target value (for the current column).
271
0
    if (is_forward_scan_) {
272
0
      it = std::lower_bound(choices.begin(), choices.end(), target_value);
273
0
    } else {
274
0
      it = std::lower_bound(choices.begin(), choices.end(), target_value, std::greater<>());
275
0
    }
276
277
    // If we overflowed, the new target value for this column is larger than all our options, so
278
    // we go back and increment the previous column instead.
279
0
    if (it == choices.end()) {
280
0
      RETURN_NOT_OK(IncrementScanTargetAtColumn(col_idx - 1));
281
0
      break;
282
0
    }
283
284
    // Else, update the current target value for this column.
285
0
    it->AppendToKey(&current_scan_target_);
286
287
    // If we did not find an exact match we are already beyond the new target so we can stop.
288
0
    if (target_value != *it) {
289
0
      col_idx++;
290
0
      break;
291
0
    }
292
293
0
    col_idx++;
294
0
  }
295
296
  // If there are any columns left (i.e. we stopped early), it means we did not find an exact
297
  // match and we reached beyond the new target key. So we need to include all options for the
298
  // leftover columns (i.e. set all following indexes to 0).
299
0
  for (size_t i = col_idx; i < current_scan_target_idxs_.size(); i++) {
300
0
    current_scan_target_idxs_[i] = (*range_cols_scan_options_)[i].begin();
301
0
    current_scan_target_idxs_[i]->AppendToKey(&current_scan_target_);
302
0
  }
303
304
0
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
305
306
0
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
307
0
          << DocKey::DebugSliceToString(current_scan_target_);
308
309
0
  return Status::OK();
310
0
}
311
312
0
Status DiscreteScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
313
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
314
  // Seek to the current target doc key if needed.
315
0
  if (!FinishedWithScanChoices()) {
316
0
    if (is_forward_scan_) {
317
0
      VLOG(2) << __PRETTY_FUNCTION__ << " Seeking to " << current_scan_target_;
318
0
      db_iter->Seek(current_scan_target_);
319
0
    } else {
320
0
      auto tmp = current_scan_target_;
321
0
      tmp.AppendValueType(ValueType::kHighest);
322
0
      VLOG(2) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp;
323
0
      db_iter->PrevDocKey(tmp);
324
0
    }
325
0
  }
326
0
  return Status::OK();
327
0
}
328
329
// This class combines the notions of option filters (col1 IN (1,2,3)) and
330
// singular range bound filters (col1 < 4 AND col1 >= 1) into a single notion of
331
// lists of ranges. So a filter for a column given in the
332
// Doc(QL/PGSQL)ScanSpec is converted into a range bound filter.
333
// In the end, each HybridScanChoices
334
// instance should have a sorted list of disjoint ranges to filter each column.
335
// Right now this supports a conjunction of range bound and discrete filters.
336
// Disjunctions are also supported but are UNTESTED.
337
// TODO: Test disjunctions when YSQL and YQL support pushing those down
338
339
class HybridScanChoices : public ScanChoices {
340
 public:
341
342
  // Constructs a list of ranges for each column from the given scanspec.
343
  // A filter of the form col1 IN (1,4,5) is converted to a filter
344
  // in the form col1 IN ([1, 1], [4, 4], [5, 5]).
345
  HybridScanChoices(const Schema& schema,
346
                    const KeyBytes &lower_doc_key,
347
                    const KeyBytes &upper_doc_key,
348
                    bool is_forward_scan,
349
                    const std::vector<ColumnId> &range_options_indexes,
350
                    const
351
                    std::shared_ptr<std::vector<std::vector<PrimitiveValue>>>&
352
                        range_options,
353
                    const std::vector<ColumnId> range_bounds_indexes,
354
                    const QLScanRange *range_bounds)
355
                    : ScanChoices(is_forward_scan),
356
                        lower_doc_key_(lower_doc_key),
357
75.4k
                        upper_doc_key_(upper_doc_key) {
358
75.4k
    auto range_cols_scan_options = range_options;
359
75.4k
    size_t idx = 0;
360
75.4k
    range_cols_scan_options_lower_.reserve(schema.num_range_key_columns());
361
75.4k
    range_cols_scan_options_upper_.reserve(schema.num_range_key_columns());
362
363
75.4k
    size_t num_hash_cols = schema.num_hash_key_columns();
364
365
75.4k
    for (idx = schema.num_hash_key_columns();
366
204k
            idx < schema.num_key_columns(); 
idx++128k
) {
367
128k
      const ColumnId col_idx = schema.column_id(idx);
368
128k
      range_cols_scan_options_lower_.push_back({});
369
128k
      range_cols_scan_options_upper_.push_back({});
370
371
      // If this is a range bound filter, we create a singular
372
      // list of the given range bound
373
128k
      if ((std::find(range_bounds_indexes.begin(),
374
128k
                        range_bounds_indexes.end(), col_idx)
375
128k
                    != range_bounds_indexes.end())
376
128k
            && (std::find(range_options_indexes.begin(),
377
128k
                            range_options_indexes.end(), col_idx)
378
128k
                        == range_options_indexes.end())) {
379
127k
        const auto col_sort_type = schema.column(idx).sorting_type();
380
127k
        const QLScanRange::QLRange range = range_bounds->RangeFor(col_idx);
381
127k
        const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type,
382
127k
                                                    true /* lower_bound */);
383
127k
        const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type,
384
127k
                                                    false /* upper_bound */);
385
386
127k
        range_cols_scan_options_lower_[idx - num_hash_cols].push_back(lower);
387
127k
        range_cols_scan_options_upper_[idx - num_hash_cols].push_back(upper);
388
127k
      } else {
389
390
        // If this is an option filter, we turn each option into a
391
        // range bound to produce a list of singular range bounds
392
1.31k
        if(std::find(range_options_indexes.begin(),
393
1.31k
                        range_options_indexes.end(), col_idx)
394
1.31k
                    != range_options_indexes.end()) {
395
1.31k
          auto &options = (*range_cols_scan_options)[idx - num_hash_cols];
396
397
1.31k
          if (options.empty()) {
398
            // If there is nothing specified in the IN list like in
399
            // SELECT * FROM ... WHERE c1 IN ();
400
            // then nothing should pass the filter.
401
            // To enforce this, we create a range bound (kHighest, kLowest)
402
            //
403
            // As of D15647 we do not send empty options.
404
            // This is kept for backward compatibility during rolling upgrades.
405
0
            range_cols_scan_options_lower_[idx
406
0
              - num_hash_cols].push_back(PrimitiveValue(ValueType::kHighest));
407
0
            range_cols_scan_options_upper_[idx
408
0
              - num_hash_cols].push_back(PrimitiveValue(ValueType::kLowest));
409
0
          }
410
411
3.50k
          for (auto val : options) {
412
3.50k
            const auto lower = val;
413
3.50k
            const auto upper = val;
414
3.50k
            range_cols_scan_options_lower_[idx
415
3.50k
              - num_hash_cols].push_back(lower);
416
3.50k
            range_cols_scan_options_upper_[idx
417
3.50k
              - num_hash_cols].push_back(upper);
418
3.50k
          }
419
420
18.4E
        } else {
421
            // If no filter is specified, we just impose an artificial range
422
            // filter [kLowest, kHighest]
423
18.4E
            range_cols_scan_options_lower_[idx - num_hash_cols]
424
18.4E
                                .push_back(PrimitiveValue(ValueType::kLowest));
425
18.4E
            range_cols_scan_options_upper_[idx - num_hash_cols]
426
18.4E
                                .push_back(PrimitiveValue(ValueType::kHighest));
427
18.4E
        }
428
1.31k
      }
429
128k
    }
430
431
75.4k
    current_scan_target_idxs_.resize(range_cols_scan_options_lower_.size());
432
433
75.4k
    if (is_forward_scan_) {
434
67.3k
      current_scan_target_ = lower_doc_key;
435
67.3k
    } else {
436
8.11k
      current_scan_target_ = upper_doc_key;
437
8.11k
    }
438
439
75.4k
  }
440
441
  HybridScanChoices(const Schema& schema,
442
                    const DocPgsqlScanSpec& doc_spec,
443
                    const KeyBytes &lower_doc_key,
444
                    const KeyBytes &upper_doc_key)
445
      : HybridScanChoices(schema, lower_doc_key, upper_doc_key,
446
      doc_spec.is_forward_scan(), doc_spec.range_options_indexes(),
447
      doc_spec.range_options(), doc_spec.range_bounds_indexes(),
448
13.2k
      doc_spec.range_bounds()) {
449
13.2k
  }
450
451
  HybridScanChoices(const Schema& schema,
452
                    const DocQLScanSpec& doc_spec,
453
                    const KeyBytes &lower_doc_key,
454
                    const KeyBytes &upper_doc_key)
455
      : HybridScanChoices(schema, lower_doc_key, upper_doc_key,
456
      doc_spec.is_forward_scan(), doc_spec.range_options_indexes(),
457
      doc_spec.range_options(), doc_spec.range_bounds_indexes(),
458
62.1k
      doc_spec.range_bounds()) {
459
62.1k
  }
460
461
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
462
  CHECKED_STATUS DoneWithCurrentTarget() override;
463
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
464
465
 protected:
466
  // Utility function for (multi)key scans. Updates the target scan key by
467
  // incrementing the option
468
  // index for one column. Will handle overflow by setting current column
469
  // index to 0 and incrementing the previous column instead. If it overflows
470
  // at first column it means we are done, so it clears the scan target idxs
471
  // array.
472
  CHECKED_STATUS IncrementScanTargetAtColumn(int start_col);
473
474
 private:
475
  KeyBytes prev_scan_target_;
476
477
  // The following encodes the list of ranges we are iterating over
478
  std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_lower_;
479
  std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_upper_;
480
481
  std::vector<ColumnId> range_options_indexes_;
482
  mutable std::vector<size_t> current_scan_target_idxs_;
483
484
  bool is_options_done_ = false;
485
486
  const KeyBytes lower_doc_key_;
487
  const KeyBytes upper_doc_key_;
488
};
489
490
// Sets current_scan_target_ to the first tuple in the filter space
491
// that is >= new_target.
492
12.2M
Status HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) {
493
12.2M
  VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= "
494
2
          << DocKey::DebugSliceToString(new_target);
495
12.2M
  DCHECK(!FinishedWithScanChoices());
496
12.2M
  is_options_done_ = false;
497
498
  /*
499
   Let's say we have a row key with (A B) as the hash part and C, D as the range part:
500
   ((A B) C D) E F
501
502
   Let's say our current constraints :
503
    l_c_k <= C <= u_c_k
504
     4            6
505
506
    l_d_j <= D <= u_d_j
507
      3           5
508
509
    a b  0 d  -> a  b l_c  d
510
511
    a b  5 d  -> a  b  5   d
512
                  [ Will subsequently seek out of document on reading the subdoc]
513
514
    a b  7 d  -> a b l_c_(k+1) 0
515
                [ If there is another range bound filter that's higher than the
516
                  current one, effectively, moving this column to the next
517
                  range in the filter list.]
518
              -> a b Inf
519
                [ This will seek to <b_next> and on the next invocation update:
520
                   a <b_next> ? ? -> a <b_next> l_c_0 0 ]
521
522
    a b  c 6  -> a b c l_d_(j+1)
523
                [ If there is another range bound filter that's higher than the
524
                  d, effectively, moving column D to the next
525
                  range in the filter list.]
526
              -> a b c Inf
527
                [ If c_next is between l_c_k and u_c_k. This will seek to <a b
528
                   <c_next>> and on the next invocation update:
529
                   a b <c_next> ? -> a b <c_next> l_d_0 ]
530
              -> a b l_c_(k+1) l_d_0
531
                 [ If c_next is above u_c_k. We do this because we know
532
                   exactly what the next tuple in our filter space should be.]
533
  */
534
12.2M
  DocKeyDecoder decoder(new_target);
535
12.2M
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
536
12.2M
  current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data()));
537
538
12.2M
  size_t col_idx = 0;
539
12.2M
  PrimitiveValue target_value;
540
35.3M
  for (col_idx = 0; col_idx < current_scan_target_idxs_.size(); 
col_idx++23.0M
) {
541
33.9M
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
542
33.9M
    const auto& lower_choices = range_cols_scan_options_lower_[col_idx];
543
33.9M
    const auto& upper_choices = range_cols_scan_options_upper_[col_idx];
544
33.9M
    auto current_ind = current_scan_target_idxs_[col_idx];
545
33.9M
    DCHECK(current_ind < lower_choices.size());
546
33.9M
    const auto& lower = lower_choices[current_ind];
547
33.9M
    const auto& upper = upper_choices[current_ind];
548
549
    // If it's in range then good, continue after appending the target value
550
    // column.
551
552
33.9M
    if (target_value >= lower && 
target_value <= upper28.6M
) {
553
23.0M
      target_value.AppendToKey(&current_scan_target_);
554
23.0M
      continue;
555
23.0M
    }
556
557
    // If target_value is not in the current range then we must find a range
558
    // that works for it.
559
    // If we are above all ranges then increment the index of the previous
560
    // column.
561
    // Else, target_value is below at least one range: find the lowest lower
562
    // bound above target_value and use that, this relies on the assumption
563
    // that all our filter ranges are disjoint.
564
565
10.8M
    auto it = lower_choices.begin();
566
10.8M
    size_t ind = 0;
567
568
    // Find an upper (lower) bound closest to target_value
569
10.8M
    if (is_forward_scan_) {
570
10.8M
      it = std::lower_bound(upper_choices.begin(),
571
10.8M
                                upper_choices.end(), target_value);
572
10.8M
      ind = it - upper_choices.begin();
573
10.8M
    } else {
574
408
      it = std::lower_bound(lower_choices.begin(), lower_choices.end(),
575
408
              target_value, std::greater<>());
576
408
      ind = it - lower_choices.begin();
577
408
    }
578
579
10.8M
    if (ind == lower_choices.size()) {
580
      // target value is higher than all range options and
581
      // we need to increment.
582
5.57M
      RETURN_NOT_OK(IncrementScanTargetAtColumn(static_cast<int>(col_idx) - 1));
583
5.57M
      col_idx = current_scan_target_idxs_.size();
584
5.57M
      break;
585
5.57M
    }
586
587
5.28M
    current_scan_target_idxs_[col_idx] = ind;
588
589
    // If we are within a range then target_value itself should work.
590
5.28M
    if (lower_choices[ind] <= target_value
591
5.28M
        && 
upper_choices[ind] >= target_value721
) {
592
652
      target_value.AppendToKey(&current_scan_target_);
593
652
      continue;
594
652
    }
595
596
    // Otherwise we must set it to the next lower bound.
597
    // This only works as we are assuming all given ranges are
598
    // disjoint.
599
600
5.28M
    DCHECK((is_forward_scan_ && lower_choices[ind] > target_value)
601
5.28M
              || (!is_forward_scan_ && upper_choices[ind]
602
5.28M
              < target_value));
603
604
5.28M
    if (is_forward_scan_) {
605
5.28M
      lower_choices[ind].AppendToKey(&current_scan_target_);
606
5.28M
    } else {
607
355
      upper_choices[ind].AppendToKey(&current_scan_target_);
608
355
    }
609
5.28M
    col_idx++;
610
5.28M
    break;
611
5.28M
  }
612
613
  // Reset the remaining range columns to lower bounds for forward scans
614
  // or upper bounds for backward scans.
615
12.7M
  
for (size_t i = col_idx; 12.2M
i < range_cols_scan_options_lower_.size();
i++514k
) {
616
514k
    current_scan_target_idxs_[i] = 0;
617
514k
    if (is_forward_scan_) {
618
514k
      range_cols_scan_options_lower_[i][0]
619
514k
                    .AppendToKey(&current_scan_target_);
620
514k
    } else {
621
6
      range_cols_scan_options_upper_[i][0]
622
6
                    .AppendToKey(&current_scan_target_);
623
6
    }
624
514k
  }
625
626
12.2M
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
627
18.4E
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
628
18.4E
          << DocKey::DebugSliceToString(current_scan_target_);
629
12.2M
  return Status::OK();
630
12.2M
}
631
632
// Update the value at start column by setting it up for incrementing to the
633
// next allowed value in the filter space
634
// ---------------------------------------------------------------------------
635
// There are two important cases to consider here.
636
// Let's say the value of current_scan_target_ at start_col, c,
637
// is currently V and the current bounds for that column
638
// is l_c_k <= V <= u_c_k. In the usual case where V != u_c_k
639
// (or V != l_c_k for backwards scans) such that V_next is still in the given
640
// restriction, we set column c + 1 to kHighest (kLowest), such that the next
641
// invocation of GetNext() produces V_next at column similar to what is done
642
// in SkipTargetsUpTo. In this case, doing a SkipTargetsUpTo on the resulting
643
// current_scan_target_ should yield the next allowed value in the filter space
644
// In the case where V = u_c_k (V = l_c_k), or in other words V is at the
645
// EXTREMAL boundary of the current range, we know exactly what the next value
646
// of column C will be. So we move column c to the next
647
// range k+1 and set that column to the new value l_c_(k+1) (u_c_(k+1))
648
// while setting all columns, b > c to l_b_0 (u_b_0)
649
// In the case of overflow on a column c (we want to increment the
650
// restriction range of c to the next range bound for that column but there
651
// are no restriction ranges remaining), we set the
652
// current column to the 0th range and move on to increment c - 1
653
// Note that in almost all cases the resulting current_scan_target_ is strictly
654
// greater (lesser in the case of backwards scans) than the original
655
// current_scan_target_. This is necessary to allow the iterator seek out
656
// of the current scan target. The exception to this rule is below.
657
// ---------------------------------------------------------------------------
658
// This function leaves the scan target as is if the next tuple in the current
659
// scan direction is also the next tuple in the filter space and start_col
660
// is given as the last column
661
7.01M
Status HybridScanChoices::IncrementScanTargetAtColumn(int start_col) {
662
663
18.4E
  VLOG(2) << __PRETTY_FUNCTION__
664
18.4E
          << " Incrementing at " << start_col;
665
666
  // Increment start col, move backwards in case of overflow.
667
7.01M
  int col_idx = start_col;
668
  // lower and upper here are taken relative to the scan order
669
7.01M
  auto &lower_extremal_vector = is_forward_scan_
670
7.01M
                          ? 
range_cols_scan_options_lower_6.55M
671
7.01M
                            : 
range_cols_scan_options_upper_451k
;
672
7.01M
  auto &upper_extremal_vector = is_forward_scan_
673
7.01M
                                ? 
range_cols_scan_options_upper_6.55M
674
7.01M
                                  : 
range_cols_scan_options_lower_451k
;
675
7.01M
  DocKeyDecoder t_decoder(current_scan_target_);
676
7.01M
  RETURN_NOT_OK(t_decoder.DecodeToRangeGroup());
677
678
  // refer to the documentation of this function to see what extremal
679
  // means here
680
7.01M
  std::vector<bool> is_extremal;
681
7.01M
  PrimitiveValue target_value;
682
19.8M
  for (int i = 0; i <= col_idx; 
++i12.8M
) {
683
12.8M
    RETURN_NOT_OK(t_decoder.DecodePrimitiveValue(&target_value));
684
12.8M
    is_extremal.push_back(target_value ==
685
12.8M
      upper_extremal_vector[i][current_scan_target_idxs_[i]]);
686
12.8M
  }
687
688
  // this variable tells us whether we start by appending
689
  // kHighest/kLowest at col_idx after the following for loop
690
7.01M
  bool start_with_infinity = true;
691
692
7.05M
  for (; col_idx >= 0; 
col_idx--41.5k
) {
693
6.83M
    const auto& choices = lower_extremal_vector[col_idx];
694
6.83M
    auto it = current_scan_target_idxs_[col_idx];
695
696
6.83M
    if (!is_extremal[col_idx]) {
697
6.78M
      col_idx++;
698
6.78M
      start_with_infinity = true;
699
6.78M
      break;
700
6.78M
    }
701
702
46.3k
    if (++it < choices.size()) {
703
      // and if this value is at the extremal bound
704
4.84k
      if (is_extremal[col_idx]) {
705
4.84k
        current_scan_target_idxs_[col_idx]++;
706
4.84k
        start_with_infinity = false;
707
4.84k
      }
708
4.84k
      break;
709
4.84k
    }
710
711
41.5k
    current_scan_target_idxs_[col_idx] = 0;
712
41.5k
  }
713
714
7.01M
  DocKeyDecoder decoder(current_scan_target_);
715
7.01M
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
716
19.7M
  
for (int i = 0; 7.01M
i < col_idx;
++i12.7M
) {
717
12.7M
    RETURN_NOT_OK(decoder.DecodePrimitiveValue());
718
12.7M
  }
719
720
7.01M
  if (col_idx < 0) {
721
    // If we got here we finished all the options and are done.
722
218k
    col_idx++;
723
218k
    start_with_infinity = true;
724
218k
    is_options_done_ = true;
725
218k
  }
726
727
7.01M
  current_scan_target_.Truncate(
728
7.01M
      decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata());
729
730
731
7.01M
  if (start_with_infinity &&
732
7.01M
        
(col_idx < static_cast<int64>(current_scan_target_idxs_.size()))7.00M
) {
733
5.61M
    if (is_forward_scan_) {
734
5.61M
      PrimitiveValue(ValueType::kHighest).AppendToKey(&current_scan_target_);
735
5.61M
    } else {
736
6.17k
      PrimitiveValue(ValueType::kLowest).AppendToKey(&current_scan_target_);
737
6.17k
    }
738
5.61M
    col_idx++;
739
5.61M
  }
740
741
7.01M
  if (start_with_infinity) {
742
    // there's no point in appending anything after infinity
743
7.00M
    return Status::OK();
744
7.00M
  }
745
746
9.84k
  
for (int i = col_idx; 4.70k
i <= start_col;
++i5.14k
) {
747
5.14k
      lower_extremal_vector[i][current_scan_target_idxs_[i]]
748
5.14k
                                      .AppendToKey(&current_scan_target_);
749
5.14k
  }
750
751
4.71k
  for (size_t i = start_col + 1; i < current_scan_target_idxs_.size(); 
++i10
) {
752
10
    current_scan_target_idxs_[i] = 0;
753
10
    lower_extremal_vector[i][current_scan_target_idxs_[i]]
754
10
                                    .AppendToKey(&current_scan_target_);
755
10
  }
756
757
4.70k
  return Status::OK();
758
7.01M
}
759
760
// Method called when the scan target is done being used
761
1.43M
Status HybridScanChoices::DoneWithCurrentTarget() {
762
  // prev_scan_target_ is necessary for backwards scans
763
1.43M
  prev_scan_target_ = current_scan_target_;
764
1.43M
  RETURN_NOT_OK(IncrementScanTargetAtColumn(
765
1.43M
                                  static_cast<int>(current_scan_target_idxs_.size()) - 1));
766
1.43M
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
767
768
  // if we we incremented the last index then
769
  // if this is a forward scan it doesn't matter what we do
770
  // if this is a backwards scan then dont clear current_scan_target and we
771
  // stay live
772
18.4E
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
773
18.4E
          << DocKey::DebugSliceToString(current_scan_target_);
774
775
18.4E
  VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target";
776
1.43M
  DCHECK(!FinishedWithScanChoices());
777
778
1.43M
  if (is_options_done_) {
779
      // It could be possible that we finished all our options but are not
780
      // done because we haven't hit the bound key yet. This would usually be
781
      // the case if we are moving onto the next hash key where we will
782
      // restart our range options.
783
208k
      const KeyBytes &bound_key = is_forward_scan_ ?
784
202k
                                    upper_doc_key_ : 
lower_doc_key_6.10k
;
785
208k
      finished_ = bound_key.empty() ? 
false192k
786
208k
                    : is_forward_scan_
787
15.9k
                        == (current_scan_target_.CompareTo(bound_key) >= 0);
788
18.4E
      VLOG(4) << "finished_ = " << finished_;
789
208k
  }
790
791
792
18.4E
  VLOG(4) << "current_scan_target_ is "
793
18.4E
          << DocKey::DebugSliceToString(current_scan_target_)
794
18.4E
          << " and prev_scan_target_ is "
795
18.4E
          << DocKey::DebugSliceToString(prev_scan_target_);
796
797
  // The below condition is either indicative of the special case
798
  // where IncrementScanTargetAtColumn didn't change the target due
799
  // to the case specified in the last section of the
800
  // documentation for IncrementScanTargetAtColumn or we have exhausted
801
  // all available range keys for the given hash key (indicated
802
  // by is_options_done_)
803
  // We clear the scan target in these cases to indicate that the
804
  // current_scan_target_ has been used and is invalid
805
  // In all other cases, IncrementScanTargetAtColumn has updated
806
  // current_scan_target_ to the new value that we want to seek to.
807
  // Hence, we shouldn't clear it in those cases
808
1.43M
  if ((prev_scan_target_ == current_scan_target_) || 
is_options_done_43.6k
) {
809
1.40M
      current_scan_target_.Clear();
810
1.40M
  }
811
812
1.43M
  return Status::OK();
813
1.43M
}
814
815
// Seeks the given iterator to the current target as specified by
816
// current_scan_target_ and prev_scan_target_ (relevant in backwards
817
// scans)
818
12.2M
Status HybridScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
819
18.4E
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
820
821
12.2M
  if (!FinishedWithScanChoices()) {
822
    // if current_scan_target_ is valid we use it to determine
823
    // what to seek to
824
12.2M
    if (!current_scan_target_.empty()) {
825
10.8M
      VLOG(3) << __PRETTY_FUNCTION__
826
0
              << " current_scan_target_ is non-empty. "
827
0
              << DocKey::DebugSliceToString(current_scan_target_);
828
10.8M
      if (is_forward_scan_) {
829
10.8M
        VLOG(3) << __PRETTY_FUNCTION__
830
0
                << " Seeking to "
831
0
                << DocKey::DebugSliceToString(current_scan_target_);
832
10.8M
        db_iter->Seek(current_scan_target_);
833
10.8M
      } else {
834
        // seek to the highest key <= current_scan_target_
835
        // seeking to the highest key < current_scan_target_ + kHighest
836
        // is equivalent to seeking to the highest key <=
837
        // current_scan_target_
838
150
        auto tmp = current_scan_target_;
839
150
        PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp);
840
150
        VLOG
(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp0
;
841
150
        db_iter->PrevDocKey(tmp);
842
150
      }
843
10.8M
    } else {
844
1.38M
      if (!is_forward_scan_ && 
!prev_scan_target_.empty()444k
) {
845
444k
        db_iter->PrevDocKey(prev_scan_target_);
846
444k
      }
847
1.38M
    }
848
12.2M
  }
849
850
12.2M
  return Status::OK();
851
12.2M
}
852
853
class RangeBasedScanChoices : public ScanChoices {
854
 public:
855
  RangeBasedScanChoices(const Schema& schema, const DocQLScanSpec& doc_spec)
856
0
      : ScanChoices(doc_spec.is_forward_scan()) {
857
0
    DCHECK(doc_spec.range_bounds());
858
0
    lower_.reserve(schema.num_range_key_columns());
859
0
    upper_.reserve(schema.num_range_key_columns());
860
0
    size_t idx = 0;
861
0
    for (idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) {
862
0
      const ColumnId col_idx = schema.column_id(idx);
863
0
      const auto col_sort_type = schema.column(idx).sorting_type();
864
0
      const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx);
865
0
      const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */);
866
0
      const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */);
867
0
      lower_.emplace_back(lower);
868
0
      upper_.emplace_back(upper);
869
0
    }
870
0
  }
871
872
  RangeBasedScanChoices(const Schema& schema, const DocPgsqlScanSpec& doc_spec)
873
0
      : ScanChoices(doc_spec.is_forward_scan()) {
874
0
    DCHECK(doc_spec.range_bounds());
875
0
    lower_.reserve(schema.num_range_key_columns());
876
0
    upper_.reserve(schema.num_range_key_columns());
877
0
    for (auto idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) {
878
0
      const ColumnId col_idx = schema.column_id(idx);
879
0
      const auto col_sort_type = schema.column(idx).sorting_type();
880
0
      const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx);
881
0
      const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */);
882
0
      const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */);
883
0
      lower_.emplace_back(lower);
884
0
      upper_.emplace_back(upper);
885
0
    }
886
0
  }
887
888
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
889
  CHECKED_STATUS DoneWithCurrentTarget() override;
890
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
891
892
 private:
893
  std::vector<PrimitiveValue> lower_, upper_;
894
  KeyBytes prev_scan_target_;
895
};
896
897
0
Status RangeBasedScanChoices::SkipTargetsUpTo(const Slice& new_target) {
898
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= "
899
0
          << DocKey::DebugSliceToString(new_target);
900
0
  DCHECK(!FinishedWithScanChoices());
901
902
  /*
903
   Let's say we have a row key with (A B) as the hash part and C, D as the range part:
904
   ((A B) C D) E F
905
906
   Let's say we have a range constraint :
907
    l_c < C < u_c
908
     4        6
909
910
    a b  0 d  -> a  b l_c  d
911
912
    a b  5 d  -> a  b  5   d
913
                  [ Will subsequently seek out of document on reading the subdoc]
914
915
    a b  7 d  -> a <b> MAX
916
                [ This will seek to <b_next> and on the next invocation update:
917
                   a <b_next> ? ? -> a <b_next> l_c d ]
918
  */
919
0
  DocKeyDecoder decoder(new_target);
920
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
921
0
  current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data()));
922
923
0
  size_t col_idx = 0;
924
0
  PrimitiveValue target_value;
925
0
  bool last_was_infinity = false;
926
0
  for (col_idx = 0; VERIFY_RESULT(decoder.HasPrimitiveValue()); col_idx++) {
927
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
928
0
    VLOG(3) << "col_idx " << col_idx << " is " << target_value << " in ["
929
0
            << yb::ToString(lower_[col_idx]) << " , " << yb::ToString(upper_[col_idx]) << " ] ?";
930
931
0
    const auto& lower = lower_[col_idx];
932
0
    if (target_value < lower) {
933
0
      const auto tgt = (is_forward_scan_ ? lower : PrimitiveValue(ValueType::kLowest));
934
0
      tgt.AppendToKey(&current_scan_target_);
935
0
      last_was_infinity = tgt.IsInfinity();
936
0
      VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt;
937
0
      break;
938
0
    }
939
0
    const auto& upper = upper_[col_idx];
940
0
    if (target_value > upper) {
941
0
      const auto tgt = (!is_forward_scan_ ? upper : PrimitiveValue(ValueType::kHighest));
942
0
      VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt;
943
0
      tgt.AppendToKey(&current_scan_target_);
944
0
      last_was_infinity = tgt.IsInfinity();
945
0
      break;
946
0
    }
947
0
    target_value.AppendToKey(&current_scan_target_);
948
0
    last_was_infinity = target_value.IsInfinity();
949
0
  }
950
951
  // Reset the remaining range columns to kHighest/lower for forward scans
952
  // or kLowest/upper for backward scans.
953
0
  while (++col_idx < lower_.size()) {
954
0
    if (last_was_infinity) {
955
      // No point having more components after +/- Inf.
956
0
      break;
957
0
    }
958
0
    if (is_forward_scan_) {
959
0
      VLOG(3) << " Updating col_idx " << col_idx << " to " << lower_[col_idx];
960
0
      lower_[col_idx].AppendToKey(&current_scan_target_);
961
0
      last_was_infinity = lower_[col_idx].IsInfinity();
962
0
    } else {
963
0
      VLOG(3) << " Updating col_idx " << col_idx << " to " << upper_[col_idx];
964
0
      upper_[col_idx].AppendToKey(&current_scan_target_);
965
0
      last_was_infinity = upper_[col_idx].IsInfinity();
966
0
    }
967
0
  }
968
0
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
969
0
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
970
0
          << DocKey::DebugSliceToString(current_scan_target_);
971
972
0
  return Status::OK();
973
0
}
974
975
0
Status RangeBasedScanChoices::DoneWithCurrentTarget() {
976
0
  prev_scan_target_ = current_scan_target_;
977
0
  current_scan_target_.Clear();
978
0
  return Status::OK();
979
0
}
980
981
0
Status RangeBasedScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
982
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
983
984
0
  if (!FinishedWithScanChoices()) {
985
0
    if (!current_scan_target_.empty()) {
986
0
      VLOG(3) << __PRETTY_FUNCTION__
987
0
              << " current_scan_target_ is non-empty. "
988
0
              << current_scan_target_;
989
0
      if (is_forward_scan_) {
990
0
        VLOG(3) << __PRETTY_FUNCTION__
991
0
                << " Seeking to "
992
0
                << DocKey::DebugSliceToString(current_scan_target_);
993
0
        db_iter->Seek(current_scan_target_);
994
0
      } else {
995
0
        auto tmp = current_scan_target_;
996
0
        PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp);
997
0
        VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp;  // Never seen.
998
0
        db_iter->PrevDocKey(tmp);
999
0
      }
1000
0
    } else {
1001
0
      if (!is_forward_scan_ && !prev_scan_target_.empty()) {
1002
0
        db_iter->PrevDocKey(prev_scan_target_);
1003
0
      }
1004
0
    }
1005
0
  }
1006
1007
0
  return Status::OK();
1008
0
}
1009
1010
DocRowwiseIterator::DocRowwiseIterator(
1011
    const Schema &projection,
1012
    const Schema &schema,
1013
    const TransactionOperationContext& txn_op_context,
1014
    const DocDB& doc_db,
1015
    CoarseTimePoint deadline,
1016
    const ReadHybridTime& read_time,
1017
    RWOperationCounter* pending_op_counter)
1018
    : projection_(projection),
1019
      schema_(schema),
1020
      txn_op_context_(txn_op_context),
1021
      deadline_(deadline),
1022
      read_time_(read_time),
1023
      doc_db_(doc_db),
1024
      has_bound_key_(false),
1025
      pending_op_(pending_op_counter),
1026
19.3M
      done_(false) {
1027
19.3M
  projection_subkeys_.reserve(projection.num_columns() + 1);
1028
19.3M
  projection_subkeys_.push_back(PrimitiveValue::kLivenessColumn);
1029
70.4M
  for (size_t i = projection_.num_key_columns(); i < projection.num_columns(); 
i++51.0M
) {
1030
51.0M
    projection_subkeys_.emplace_back(projection.column_id(i));
1031
51.0M
  }
1032
19.3M
  std::sort(projection_subkeys_.begin(), projection_subkeys_.end());
1033
19.3M
}
1034
1035
19.4M
DocRowwiseIterator::~DocRowwiseIterator() {
1036
19.4M
}
1037
1038
420k
Status DocRowwiseIterator::Init(TableType table_type, const Slice& sub_doc_key) {
1039
420k
  db_iter_ = CreateIntentAwareIterator(
1040
420k
      doc_db_,
1041
420k
      BloomFilterMode::DONT_USE_BLOOM_FILTER,
1042
420k
      boost::none /* user_key_for_filter */,
1043
420k
      rocksdb::kDefaultQueryId,
1044
420k
      txn_op_context_,
1045
420k
      deadline_,
1046
420k
      read_time_);
1047
420k
  if (!sub_doc_key.empty()) {
1048
0
    row_key_ = sub_doc_key;
1049
420k
  } else {
1050
420k
    DocKeyEncoder(&iter_key_).Schema(schema_);
1051
420k
    row_key_ = iter_key_;
1052
420k
  }
1053
420k
  row_hash_key_ = row_key_;
1054
420k
  VLOG
(3) << __PRETTY_FUNCTION__ << " Seeking to " << row_key_408
;
1055
420k
  db_iter_->Seek(row_key_);
1056
420k
  row_ready_ = false;
1057
420k
  has_bound_key_ = false;
1058
420k
  if (table_type == TableType::PGSQL_TABLE_TYPE) {
1059
12
    ignore_ttl_ = true;
1060
12
  }
1061
1062
420k
  return Status::OK();
1063
420k
}
1064
1065
Result<bool> DocRowwiseIterator::InitScanChoices(
1066
7.30M
    const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) {
1067
1068
7.30M
  if (!FLAGS_disable_hybrid_scan) {
1069
7.30M
    if (
doc_spec.range_options()7.28M
|| doc_spec.range_bounds()) {
1070
62.1k
        scan_choices_.reset(new HybridScanChoices(schema_, doc_spec,
1071
62.1k
                                    lower_doc_key, upper_doc_key));
1072
62.1k
    }
1073
1074
7.28M
    return false;
1075
7.28M
  }
1076
1077
10.6k
  if (doc_spec.range_options()) {
1078
0
    scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key));
1079
    // Let's not seek to the lower doc key or upper doc key. We know exactly what we want.
1080
0
    RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow());
1081
0
    return true;
1082
0
  }
1083
1084
10.6k
  if (doc_spec.range_bounds()) {
1085
0
    scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec));
1086
0
  }
1087
1088
10.6k
  return false;
1089
10.6k
}
1090
1091
Result<bool> DocRowwiseIterator::InitScanChoices(
1092
    const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key,
1093
11.6M
    const KeyBytes& upper_doc_key) {
1094
1095
11.6M
  if (!FLAGS_disable_hybrid_scan) {
1096
11.6M
    if (doc_spec.range_options() || 
doc_spec.range_bounds()11.6M
) {
1097
13.2k
        scan_choices_.reset(new HybridScanChoices(schema_, doc_spec,
1098
13.2k
                                    lower_doc_key, upper_doc_key));
1099
13.2k
    }
1100
1101
11.6M
    return false;
1102
11.6M
  }
1103
1104
1.06k
  if (doc_spec.range_options()) {
1105
0
    scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key));
1106
    // Let's not seek to the lower doc key or upper doc key. We know exactly what we want.
1107
0
    RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow());
1108
0
    return true;
1109
0
  }
1110
1111
1.06k
  if (doc_spec.range_bounds()) {
1112
0
    scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec));
1113
0
  }
1114
1115
1.06k
  return false;
1116
1.06k
}
1117
1118
template <class T>
1119
18.9M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1120
18.9M
  is_forward_scan_ = doc_spec.is_forward_scan();
1121
1122
18.9M
  VLOG
(4) << "Initializing iterator direction: " << (37.6k
is_forward_scan_37.6k
?
"FORWARD"0
:
"BACKWARD"37.6k
);
1123
1124
18.9M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1125
18.9M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1126
18.4E
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1127
18.4E
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1128
1129
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1130
18.9M
  const bool is_fixed_point_get =
1131
18.9M
      !lower_doc_key.empty() &&
1132
18.9M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1133
18.9M
  const auto mode = is_fixed_point_get ? 
BloomFilterMode::USE_BLOOM_FILTER18.1M
1134
18.9M
                                       : 
BloomFilterMode::DONT_USE_BLOOM_FILTER808k
;
1135
1136
18.9M
  db_iter_ = CreateIntentAwareIterator(
1137
18.9M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1138
18.9M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1139
1140
18.9M
  row_ready_ = false;
1141
1142
19.0M
  if (
is_forward_scan_18.9M
) {
1143
19.0M
    has_bound_key_ = !upper_doc_key.empty();
1144
19.0M
    if (has_bound_key_) {
1145
18.9M
      bound_key_ = std::move(upper_doc_key);
1146
18.9M
      db_iter_->SetUpperbound(bound_key_);
1147
18.9M
    }
1148
18.4E
  } else {
1149
18.4E
    has_bound_key_ = !lower_doc_key.empty();
1150
18.4E
    if (has_bound_key_) {
1151
8.22k
      bound_key_ = std::move(lower_doc_key);
1152
8.22k
    }
1153
18.4E
  }
1154
1155
18.9M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1156
18.9M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1157
18.9M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1158
18.9M
    if (is_forward_scan_) {
1159
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1160
18.9M
      db_iter_->Seek(lower_doc_key);
1161
18.9M
    } else {
1162
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1163
8.21k
      if (
!upper_doc_key.empty()8.17k
) {
1164
8.21k
        db_iter_->PrevDocKey(upper_doc_key);
1165
18.4E
      } else {
1166
18.4E
        db_iter_->SeekToLastDocKey();
1167
18.4E
      }
1168
8.17k
    }
1169
18.9M
  }
1170
1171
0
  return Status::OK();
1172
18.9M
}
yb::Status yb::docdb::DocRowwiseIterator::DoInit<yb::docdb::DocQLScanSpec>(yb::docdb::DocQLScanSpec const&)
Line
Count
Source
1119
7.31M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1120
7.31M
  is_forward_scan_ = doc_spec.is_forward_scan();
1121
1122
7.31M
  VLOG
(4) << "Initializing iterator direction: " << (35.0k
is_forward_scan_35.0k
?
"FORWARD"0
:
"BACKWARD"35.0k
);
1123
1124
7.31M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1125
7.31M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1126
18.4E
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1127
18.4E
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1128
1129
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1130
7.31M
  const bool is_fixed_point_get =
1131
7.31M
      !lower_doc_key.empty() &&
1132
7.31M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1133
7.31M
  const auto mode = is_fixed_point_get ? 
BloomFilterMode::USE_BLOOM_FILTER7.24M
1134
7.31M
                                       : 
BloomFilterMode::DONT_USE_BLOOM_FILTER71.6k
;
1135
1136
7.31M
  db_iter_ = CreateIntentAwareIterator(
1137
7.31M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1138
7.31M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1139
1140
7.31M
  row_ready_ = false;
1141
1142
7.31M
  if (
is_forward_scan_7.31M
) {
1143
7.31M
    has_bound_key_ = !upper_doc_key.empty();
1144
7.31M
    if (has_bound_key_) {
1145
7.21M
      bound_key_ = std::move(upper_doc_key);
1146
7.21M
      db_iter_->SetUpperbound(bound_key_);
1147
7.21M
    }
1148
18.4E
  } else {
1149
18.4E
    has_bound_key_ = !lower_doc_key.empty();
1150
18.4E
    if (has_bound_key_) {
1151
8.12k
      bound_key_ = std::move(lower_doc_key);
1152
8.12k
    }
1153
18.4E
  }
1154
1155
7.31M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1156
7.31M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1157
7.31M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1158
7.28M
    if (is_forward_scan_) {
1159
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1160
7.28M
      db_iter_->Seek(lower_doc_key);
1161
7.28M
    } else {
1162
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1163
8.21k
      if (!upper_doc_key.empty()) {
1164
8.12k
        db_iter_->PrevDocKey(upper_doc_key);
1165
8.12k
      } else {
1166
95
        db_iter_->SeekToLastDocKey();
1167
95
      }
1168
8.21k
    }
1169
7.28M
  }
1170
1171
0
  return Status::OK();
1172
7.31M
}
yb::Status yb::docdb::DocRowwiseIterator::DoInit<yb::docdb::DocPgsqlScanSpec>(yb::docdb::DocPgsqlScanSpec const&)
Line
Count
Source
1119
11.6M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1120
11.6M
  is_forward_scan_ = doc_spec.is_forward_scan();
1121
1122
11.6M
  VLOG
(4) << "Initializing iterator direction: " << (2.67k
is_forward_scan_2.67k
?
"FORWARD"0
:
"BACKWARD"2.67k
);
1123
1124
11.6M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1125
11.6M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1126
18.4E
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1127
18.4E
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1128
1129
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1130
11.6M
  const bool is_fixed_point_get =
1131
11.6M
      !lower_doc_key.empty() &&
1132
11.6M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1133
11.6M
  const auto mode = is_fixed_point_get ? 
BloomFilterMode::USE_BLOOM_FILTER10.9M
1134
11.6M
                                       : 
BloomFilterMode::DONT_USE_BLOOM_FILTER736k
;
1135
1136
11.6M
  db_iter_ = CreateIntentAwareIterator(
1137
11.6M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1138
11.6M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1139
1140
11.6M
  row_ready_ = false;
1141
1142
11.6M
  if (
is_forward_scan_11.6M
) {
1143
11.6M
    has_bound_key_ = !upper_doc_key.empty();
1144
11.6M
    if (has_bound_key_) {
1145
11.6M
      bound_key_ = std::move(upper_doc_key);
1146
11.6M
      db_iter_->SetUpperbound(bound_key_);
1147
11.6M
    }
1148
18.4E
  } else {
1149
18.4E
    has_bound_key_ = !lower_doc_key.empty();
1150
18.4E
    if (has_bound_key_) {
1151
96
      bound_key_ = std::move(lower_doc_key);
1152
96
    }
1153
18.4E
  }
1154
1155
11.6M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1156
11.6M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1157
11.6M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1158
11.6M
    if (
is_forward_scan_11.6M
) {
1159
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1160
11.6M
      db_iter_->Seek(lower_doc_key);
1161
18.4E
    } else {
1162
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1163
18.4E
      if (!upper_doc_key.empty()) {
1164
96
        db_iter_->PrevDocKey(upper_doc_key);
1165
18.4E
      } else {
1166
18.4E
        db_iter_->SeekToLastDocKey();
1167
18.4E
      }
1168
18.4E
    }
1169
11.6M
  }
1170
1171
0
  return Status::OK();
1172
11.6M
}
1173
1174
7.33M
Status DocRowwiseIterator::Init(const QLScanSpec& spec) {
1175
7.33M
  return DoInit(dynamic_cast<const DocQLScanSpec&>(spec));
1176
7.33M
}
1177
1178
11.6M
Status DocRowwiseIterator::Init(const PgsqlScanSpec& spec) {
1179
11.6M
  ignore_ttl_ = true;
1180
11.6M
  return DoInit(dynamic_cast<const DocPgsqlScanSpec&>(spec));
1181
11.6M
}
1182
1183
82.9M
Status DocRowwiseIterator::AdvanceIteratorToNextDesiredRow() const {
1184
82.9M
  if (scan_choices_) {
1185
1.43M
    if (!IsNextStaticColumn()
1186
1.43M
        && 
!scan_choices_->CurrentTargetMatchesKey(row_key_)1.43M
) {
1187
1.43M
      return scan_choices_->SeekToCurrentTarget(db_iter_.get());
1188
1.43M
    }
1189
81.4M
  } else {
1190
81.4M
    if (!is_forward_scan_) {
1191
941
      VLOG
(4) << __PRETTY_FUNCTION__ << " setting as PrevDocKey"0
;
1192
941
      db_iter_->PrevDocKey(row_key_);
1193
941
    }
1194
81.4M
  }
1195
1196
81.4M
  return Status::OK();
1197
82.9M
}
1198
1199
93.6M
Result<bool> DocRowwiseIterator::HasNext() const {
1200
93.6M
  VLOG
(4) << __PRETTY_FUNCTION__6.66k
;
1201
1202
  // Repeated HasNext calls (without Skip/NextRow in between) should be idempotent:
1203
  // 1. If a previous call failed we returned the same status.
1204
  // 2. If a row is already available (row_ready_), return true directly.
1205
  // 3. If we finished all target rows for the scan (done_), return false directly.
1206
93.6M
  RETURN_NOT_OK(has_next_status_);
1207
93.6M
  if (row_ready_) {
1208
    // If row is ready, then HasNext returns true.
1209
201
    return true;
1210
201
  }
1211
93.6M
  if (done_) {
1212
1.49k
    return false;
1213
1.49k
  }
1214
1215
93.6M
  bool doc_found = false;
1216
187M
  while (!doc_found) {
1217
105M
    if (!db_iter_->valid() || 
(93.7M
scan_choices_93.7M
&&
scan_choices_->FinishedWithScanChoices()12.3M
)) {
1218
11.5M
      done_ = true;
1219
11.5M
      return false;
1220
11.5M
    }
1221
1222
93.8M
    const auto key_data = db_iter_->FetchKey();
1223
93.8M
    if (!key_data.ok()) {
1224
0
      has_next_status_ = key_data.status();
1225
0
      return has_next_status_;
1226
0
    }
1227
1228
93.8M
    VLOG
(4) << "*fetched_key is " << SubDocKey::DebugSliceToString(key_data->key)67.6k
;
1229
93.8M
    if (debug_dump_) {
1230
0
      LOG(INFO) << __func__ << ", fetched key: " << SubDocKey::DebugSliceToString(key_data->key)
1231
0
                << ", " << key_data->key.ToDebugHexString();
1232
0
    }
1233
1234
    // The iterator is positioned by the previous GetSubDocument call (which places the iterator
1235
    // outside the previous doc_key). Ensure the iterator is pushed forward/backward indeed. We
1236
    // check it here instead of after GetSubDocument() below because we want to avoid the extra
1237
    // expensive FetchKey() call just to fetch and validate the key.
1238
93.8M
    if (!iter_key_.data().empty() &&
1239
93.8M
        
(79.7M
is_forward_scan_79.7M
?
iter_key_.CompareTo(key_data->key) >= 079.2M
1240
79.7M
                          : 
iter_key_.CompareTo(key_data->key) <= 0445k
)) {
1241
      // TODO -- could turn this check off in TPCC?
1242
0
      has_next_status_ = STATUS_SUBSTITUTE(Corruption, "Infinite loop detected at $0",
1243
0
                                           FormatSliceAsStr(key_data->key));
1244
0
      return has_next_status_;
1245
0
    }
1246
93.8M
    iter_key_.Reset(key_data->key);
1247
93.8M
    VLOG
(4) << " Current iter_key_ is " << iter_key_44.9k
;
1248
1249
93.8M
    const auto dockey_sizes = DocKey::EncodedHashPartAndDocKeySizes(iter_key_);
1250
93.8M
    if (!dockey_sizes.ok()) {
1251
0
      has_next_status_ = dockey_sizes.status();
1252
0
      return has_next_status_;
1253
0
    }
1254
93.8M
    row_hash_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->first);
1255
93.8M
    row_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->second);
1256
1257
93.8M
    if (!DocKeyBelongsTo(row_key_, schema_) || // e.g in cotable, row may point outside table bounds
1258
93.8M
        
(93.7M
has_bound_key_93.7M
&&
is_forward_scan_ == (row_key_.compare(bound_key_) >= 0)84.4M
)) {
1259
20.3k
      done_ = true;
1260
20.3k
      return false;
1261
20.3k
    }
1262
1263
    // Prepare the DocKey to get the SubDocument. Trim the DocKey to contain just the primary key.
1264
93.8M
    Slice sub_doc_key = row_key_;
1265
93.8M
    VLOG
(4) << " sub_doc_key part of iter_key_ is " << DocKey::DebugSliceToString(sub_doc_key)62.5k
;
1266
1267
93.8M
    bool is_static_column = IsNextStaticColumn();
1268
93.8M
    if (scan_choices_ && 
!is_static_column12.2M
) {
1269
12.2M
      if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) {
1270
        // We must have seeked past the target key we are looking for (no result) so we can safely
1271
        // skip all scan targets between the current target and row key (excluding row_key_ itself).
1272
        // Update the target key and iterator and call HasNext again to try the next target.
1273
12.2M
        RETURN_NOT_OK(scan_choices_->SkipTargetsUpTo(row_key_));
1274
1275
        // We updated scan target above, if it goes past the row_key_ we will seek again, and
1276
        // process the found key in the next loop.
1277
12.2M
        if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) {
1278
10.8M
          RETURN_NOT_OK(scan_choices_->SeekToCurrentTarget(db_iter_.get()));
1279
10.8M
          continue;
1280
10.8M
        }
1281
12.2M
      }
1282
      // We found a match for the target key or a static column, so we move on to getting the
1283
      // SubDocument.
1284
12.2M
    }
1285
82.9M
    if (doc_reader_ == nullptr) {
1286
13.5M
      doc_reader_ = std::make_unique<DocDBTableReader>(db_iter_.get(), deadline_);
1287
13.5M
      RETURN_NOT_OK(doc_reader_->UpdateTableTombstoneTime(sub_doc_key));
1288
13.5M
      if (!ignore_ttl_) {
1289
7.55M
        doc_reader_->SetTableTtl(schema_);
1290
7.55M
      }
1291
13.5M
    }
1292
1293
82.9M
    row_ = SubDocument();
1294
82.9M
    auto doc_found_res = doc_reader_->Get(sub_doc_key, &projection_subkeys_, &row_);
1295
82.9M
    if (!doc_found_res.ok()) {
1296
0
      has_next_status_ = doc_found_res.status();
1297
0
      return has_next_status_;
1298
82.9M
    } else {
1299
82.9M
      doc_found = *doc_found_res;
1300
82.9M
    }
1301
82.9M
    if (scan_choices_ && 
!is_static_column1.43M
) {
1302
1.43M
      has_next_status_ = scan_choices_->DoneWithCurrentTarget();
1303
1.43M
      RETURN_NOT_OK(has_next_status_);
1304
1.43M
    }
1305
82.9M
    has_next_status_ = AdvanceIteratorToNextDesiredRow();
1306
82.9M
    RETURN_NOT_OK(has_next_status_);
1307
82.9M
  }
1308
82.1M
  row_ready_ = true;
1309
82.1M
  return true;
1310
93.6M
}
1311
1312
0
string DocRowwiseIterator::ToString() const {
1313
0
  return "DocRowwiseIterator";
1314
0
}
1315
1316
namespace {
1317
1318
// Set primary key column values (hashed or range columns) in a QL row value map.
1319
CHECKED_STATUS SetQLPrimaryKeyColumnValues(const Schema& schema,
1320
                                           const size_t begin_index,
1321
                                           const size_t column_count,
1322
                                           const char* column_type,
1323
                                           DocKeyDecoder* decoder,
1324
95.0M
                                           QLTableRow* table_row) {
1325
95.0M
  if (begin_index + column_count > schema.num_columns()) {
1326
0
    return STATUS_SUBSTITUTE(
1327
0
        Corruption,
1328
0
        "$0 primary key columns between positions $1 and $2 go beyond table columns $3",
1329
0
        column_type, begin_index, begin_index + column_count - 1, schema.num_columns());
1330
0
  }
1331
95.0M
  PrimitiveValue primitive_value;
1332
241M
  for (size_t i = 0, j = begin_index; i < column_count; 
i++, j++146M
) {
1333
146M
    const auto ql_type = schema.column(j).type();
1334
146M
    QLTableColumn& column = table_row->AllocColumn(schema.column_id(j));
1335
146M
    RETURN_NOT_OK(decoder->DecodePrimitiveValue(&primitive_value));
1336
146M
    PrimitiveValue::ToQLValuePB(primitive_value, ql_type, &column.value);
1337
146M
  }
1338
95.0M
  return decoder->ConsumeGroupEnd();
1339
95.0M
}
1340
1341
} // namespace
1342
1343
161k
void DocRowwiseIterator::SkipRow() {
1344
161k
  row_ready_ = false;
1345
161k
}
1346
1347
16.9M
HybridTime DocRowwiseIterator::RestartReadHt() {
1348
16.9M
  auto max_seen_ht = db_iter_->max_seen_ht();
1349
16.9M
  if (max_seen_ht.is_valid() && 
max_seen_ht > db_iter_->read_time().read16.9M
) {
1350
1.87k
    VLOG
(4) << "Restart read: " << max_seen_ht << ", original: " << db_iter_->read_time()0
;
1351
1.87k
    return max_seen_ht;
1352
1.87k
  }
1353
16.9M
  return HybridTime::kInvalid;
1354
16.9M
}
1355
1356
104M
bool DocRowwiseIterator::IsNextStaticColumn() const {
1357
104M
  return schema_.has_statics() && 
row_hash_key_.end() + 1 == row_key_.end()1.80k
;
1358
104M
}
1359
1360
78.0M
Status DocRowwiseIterator::DoNextRow(const Schema& projection, QLTableRow* table_row) {
1361
78.0M
  VLOG
(4) << __PRETTY_FUNCTION__5.86k
;
1362
1363
78.0M
  if (PREDICT_FALSE(done_)) {
1364
0
    return STATUS(NotFound, "end of iter");
1365
0
  }
1366
1367
  // Ensure row is ready to be read. HasNext() must be called before reading the first row, or
1368
  // again after the previous row has been read or skipped.
1369
78.0M
  if (!row_ready_) {
1370
0
    return STATUS(InternalError, "next row has not be prepared for reading");
1371
0
  }
1372
1373
78.0M
  DocKeyDecoder decoder(row_key_);
1374
78.0M
  RETURN_NOT_OK(decoder.DecodeCotableId());
1375
78.0M
  RETURN_NOT_OK(decoder.DecodeColocationId());
1376
78.0M
  bool has_hash_components = VERIFY_RESULT(decoder.DecodeHashCode());
1377
1378
  // Populate the key column values from the doc key. The key column values in doc key were
1379
  // written in the same order as in the table schema (see DocKeyFromQLKey). If the range columns
1380
  // are present, read them also.
1381
78.0M
  if (has_hash_components) {
1382
31.7M
    RETURN_NOT_OK(SetQLPrimaryKeyColumnValues(
1383
31.7M
        schema_, 0, schema_.num_hash_key_columns(),
1384
31.7M
        "hash", &decoder, table_row));
1385
31.7M
  }
1386
78.0M
  if (!decoder.GroupEnded()) {
1387
63.3M
    RETURN_NOT_OK(SetQLPrimaryKeyColumnValues(
1388
63.3M
        schema_, schema_.num_hash_key_columns(), schema_.num_range_key_columns(),
1389
63.3M
        "range", &decoder, table_row));
1390
63.3M
  }
1391
1392
679M
  
for (size_t i = projection.num_key_columns(); 78.0M
i < projection.num_columns();
i++601M
) {
1393
601M
    const auto& column_id = projection.column_id(i);
1394
601M
    const auto ql_type = projection.column(i).type();
1395
601M
    const SubDocument* column_value = row_.GetChild(PrimitiveValue(column_id));
1396
601M
    if (column_value != nullptr) {
1397
599M
      QLTableColumn& column = table_row->AllocColumn(column_id);
1398
599M
      SubDocument::ToQLValuePB(*column_value, ql_type, &column.value);
1399
599M
      column.ttl_seconds = column_value->GetTtl();
1400
599M
      if (column_value->IsWriteTimeSet()) {
1401
532M
        column.write_time = column_value->GetWriteTime();
1402
532M
      }
1403
599M
    }
1404
601M
  }
1405
1406
78.0M
  row_ready_ = false;
1407
78.0M
  return Status::OK();
1408
78.0M
}
1409
1410
1.26k
bool DocRowwiseIterator::LivenessColumnExists() const {
1411
1.26k
  const SubDocument* subdoc = row_.GetChild(PrimitiveValue::kLivenessColumn);
1412
1.26k
  return subdoc != nullptr && subdoc->value_type() != ValueType::kInvalid;
1413
1.26k
}
1414
1415
7.21M
CHECKED_STATUS DocRowwiseIterator::GetNextReadSubDocKey(SubDocKey* sub_doc_key) const {
1416
7.21M
  if (db_iter_ == nullptr) {
1417
0
    return STATUS(Corruption, "Iterator not initialized.");
1418
0
  }
1419
1420
  // There are no more rows to fetch, so no next SubDocKey to read.
1421
7.21M
  if (!VERIFY_RESULT(HasNext())) {
1422
18.4E
    DVLOG(3) << "No Next SubDocKey";
1423
3.36M
    return Status::OK();
1424
3.36M
  }
1425
1426
3.84M
  DocKey doc_key;
1427
3.84M
  RETURN_NOT_OK(doc_key.FullyDecodeFrom(row_key_));
1428
3.84M
  *sub_doc_key = SubDocKey(doc_key, read_time_.read);
1429
18.4E
  DVLOG(3) << "Next SubDocKey: " << sub_doc_key->ToString();
1430
3.84M
  return Status::OK();
1431
3.84M
}
1432
1433
42.4M
Result<Slice> DocRowwiseIterator::GetTupleId() const {
1434
  // Return tuple id without cotable id / colocation id if any.
1435
42.4M
  Slice tuple_id = row_key_;
1436
42.4M
  if (tuple_id.starts_with(ValueTypeAsChar::kTableId)) {
1437
36.2M
    tuple_id.remove_prefix(1 + kUuidSize);
1438
36.2M
  } else 
if (6.22M
tuple_id.starts_with(ValueTypeAsChar::kColocationId)6.22M
) {
1439
152
    tuple_id.remove_prefix(1 + sizeof(ColocationId));
1440
152
  }
1441
42.4M
  return tuple_id;
1442
42.4M
}
1443
1444
1.13M
Result<bool> DocRowwiseIterator::SeekTuple(const Slice& tuple_id) {
1445
  // If cotable id / colocation id is present in the table schema, then
1446
  // we need to prepend it in the tuple key to seek.
1447
1.13M
  if (schema_.has_cotable_id() || 
schema_.has_colocation_id()136
) {
1448
1.13M
    uint32_t size = schema_.has_colocation_id() ? 
sizeof(ColocationId)39
:
kUuidSize1.13M
;
1449
1.13M
    if (!tuple_key_) {
1450
259k
      tuple_key_.emplace();
1451
259k
      tuple_key_->Reserve(1 + size + tuple_id.size());
1452
1453
259k
      if (schema_.has_cotable_id()) {
1454
259k
        std::string bytes;
1455
259k
        schema_.cotable_id().EncodeToComparable(&bytes);
1456
259k
        tuple_key_->AppendValueType(ValueType::kTableId);
1457
259k
        tuple_key_->AppendRawBytes(bytes);
1458
259k
      } else {
1459
6
        tuple_key_->AppendValueType(ValueType::kColocationId);
1460
6
        tuple_key_->AppendUInt32(schema_.colocation_id());
1461
6
      }
1462
877k
    } else {
1463
877k
      tuple_key_->Truncate(1 + size);
1464
877k
    }
1465
1.13M
    tuple_key_->AppendRawBytes(tuple_id);
1466
1.13M
    db_iter_->Seek(*tuple_key_);
1467
1.13M
  } else {
1468
119
    db_iter_->Seek(tuple_id);
1469
119
  }
1470
1471
1.13M
  iter_key_.Clear();
1472
1.13M
  row_ready_ = false;
1473
1474
2.27M
  return 
VERIFY_RESULT1.13M
(HasNext()) &&
VERIFY_RESULT1.13M
(GetTupleId()) == tuple_id;
1475
1.13M
}
1476
1477
}  // namespace docdb
1478
}  // namespace yb