YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_rowwise_iterator.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_rowwise_iterator.h"
15
#include <iterator>
16
17
#include <cstdint>
18
#include <ostream>
19
#include <string>
20
#include <vector>
21
22
#include "yb/common/common.pb.h"
23
#include "yb/common/doc_hybrid_time.h"
24
#include "yb/common/hybrid_time.h"
25
#include "yb/common/ql_expr.h"
26
#include "yb/common/ql_scanspec.h"
27
#include "yb/common/ql_value.h"
28
#include "yb/common/read_hybrid_time.h"
29
#include "yb/common/transaction.h"
30
31
#include "yb/docdb/docdb_fwd.h"
32
#include "yb/docdb/doc_key.h"
33
#include "yb/docdb/doc_path.h"
34
#include "yb/docdb/doc_ql_scanspec.h"
35
#include "yb/docdb/doc_reader.h"
36
#include "yb/docdb/doc_scanspec_util.h"
37
#include "yb/docdb/docdb_rocksdb_util.h"
38
#include "yb/docdb/docdb_types.h"
39
#include "yb/docdb/expiration.h"
40
#include "yb/docdb/intent_aware_iterator.h"
41
#include "yb/docdb/primitive_value.h"
42
#include "yb/docdb/subdocument.h"
43
#include "yb/docdb/value.h"
44
#include "yb/docdb/value_type.h"
45
46
#include "yb/gutil/strings/substitute.h"
47
#include "yb/util/flags.h"
48
#include "yb/rocksdb/db/compaction.h"
49
#include "yb/rocksutil/yb_rocksdb.h"
50
51
#include "yb/rocksdb/db.h"
52
53
#include "yb/util/flag_tags.h"
54
#include "yb/util/result.h"
55
#include "yb/util/status.h"
56
#include "yb/util/status_format.h"
57
#include "yb/util/status_log.h"
58
#include "yb/util/strongly_typed_bool.h"
59
60
DEFINE_bool(disable_hybrid_scan, false,
61
            "If true, hybrid scan will be disabled");
62
TAG_FLAG(disable_hybrid_scan, runtime);
63
64
using std::string;
65
66
namespace yb {
67
namespace docdb {
68
69
class ScanChoices {
70
 public:
71
43.5k
  explicit ScanChoices(bool is_forward_scan) : is_forward_scan_(is_forward_scan) {}
72
43.5k
  virtual ~ScanChoices() {}
73
74
22.8M
  bool CurrentTargetMatchesKey(const Slice& curr) {
75
18.4E
    VLOG(3) << __PRETTY_FUNCTION__ << " checking if acceptable ? "
76
18.4E
            << (curr == current_scan_target_ ? "YEP" : "NOPE")
77
18.4E
            << ": " << DocKey::DebugSliceToString(curr)
78
18.4E
            << " vs " << DocKey::DebugSliceToString(current_scan_target_.AsSlice());
79
22.8M
    return curr == current_scan_target_;
80
22.8M
  }
81
82
  // Returns false if there are still target keys we need to scan, and true if we are done.
83
33.8M
  virtual bool FinishedWithScanChoices() const { return finished_; }
84
85
  // Go to the next scan target if any.
86
  virtual CHECKED_STATUS DoneWithCurrentTarget() = 0;
87
88
  // Go (directly) to the new target (or the one after if new_target does not
89
  // exist in the desired list/range). If the new_target is larger than all scan target options it
90
  // means we are done.
91
  virtual CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) = 0;
92
93
  // If the given doc_key isn't already at the desired target, seek appropriately to go to the
94
  // current target.
95
  virtual CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) = 0;
96
97
 protected:
98
  const bool is_forward_scan_;
99
  KeyBytes current_scan_target_;
100
  bool finished_ = false;
101
};
102
103
class DiscreteScanChoices : public ScanChoices {
104
 public:
105
  DiscreteScanChoices(const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key,
106
                      const KeyBytes& upper_doc_key)
107
0
      : ScanChoices(doc_spec.is_forward_scan()) {
108
0
    range_cols_scan_options_ = doc_spec.range_options();
109
0
    current_scan_target_idxs_.resize(range_cols_scan_options_->size());
110
0
    for (size_t i = 0; i < range_cols_scan_options_->size(); i++) {
111
0
      current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin();
112
0
    }
113
114
    // Initialize target doc key.
115
0
    if (is_forward_scan_) {
116
0
      current_scan_target_ = lower_doc_key;
117
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
118
0
        CHECK_OK(SkipTargetsUpTo(lower_doc_key));
119
0
      }
120
0
    } else {
121
0
      current_scan_target_ = upper_doc_key;
122
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
123
0
        CHECK_OK(SkipTargetsUpTo(upper_doc_key));
124
0
      }
125
0
    }
126
0
  }
127
128
  DiscreteScanChoices(const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key,
129
                      const KeyBytes& upper_doc_key)
130
0
      : ScanChoices(doc_spec.is_forward_scan()) {
131
0
    range_cols_scan_options_ = doc_spec.range_options();
132
0
    current_scan_target_idxs_.resize(range_cols_scan_options_->size());
133
0
    for (size_t i = 0; i < range_cols_scan_options_->size(); i++) {
134
0
      current_scan_target_idxs_[i] = range_cols_scan_options_->at(i).begin();
135
0
    }
136
137
    // Initialize target doc key.
138
0
    if (is_forward_scan_) {
139
0
      current_scan_target_ = lower_doc_key;
140
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
141
0
        CHECK_OK(SkipTargetsUpTo(lower_doc_key));
142
0
      }
143
0
    } else {
144
0
      current_scan_target_ = upper_doc_key;
145
0
      if (CHECK_RESULT(ClearRangeComponents(&current_scan_target_))) {
146
0
        CHECK_OK(SkipTargetsUpTo(upper_doc_key));
147
0
      }
148
0
    }
149
0
  }
150
151
  CHECKED_STATUS DoneWithCurrentTarget() override;
152
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
153
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
154
155
 protected:
156
  // Utility function for (multi)key scans. Updates the target scan key by incrementing the option
157
  // index for one column. Will handle overflow by setting current column index to 0 and
158
  // incrementing the previous column instead. If it overflows at first column it means we are done,
159
  // so it clears the scan target idxs array.
160
  CHECKED_STATUS IncrementScanTargetAtColumn(size_t start_col);
161
162
  // Utility function for (multi)key scans to initialize the range portion of the current scan
163
  // target, scan target with the first option.
164
  // Only needed for scans that include the static row, otherwise Init will take care of this.
165
  Result<bool> InitScanTargetRangeGroupIfNeeded();
166
167
 private:
168
  // For (multi)key scans (e.g. selects with 'IN' condition on the range columns) we hold the
169
  // options for each range column as we iteratively seek to each target key.
170
  // e.g. for a query "h = 1 and r1 in (2,3) and r2 in (4,5) and r3 = 6":
171
  //  range_cols_scan_options_   [[2, 3], [4, 5], [6]] -- value options for each column.
172
  //  current_scan_target_idxs_  goes from [0, 0, 0] up to [1, 1, 0] -- except when including the
173
  //                             static row when it starts from [0, 0, -1] instead.
174
  //  current_scan_target_       goes from [1][2,4,6] up to [1][3,5,6] -- is the doc key containing,
175
  //                             for each range column, the value (option) referenced by the
176
  //                             corresponding index (updated along with current_scan_target_idxs_).
177
  std::shared_ptr<std::vector<std::vector<PrimitiveValue>>> range_cols_scan_options_;
178
  mutable std::vector<std::vector<PrimitiveValue>::const_iterator> current_scan_target_idxs_;
179
};
180
181
0
Status DiscreteScanChoices::IncrementScanTargetAtColumn(size_t start_col) {
182
0
  DCHECK_LE(start_col, current_scan_target_idxs_.size());
183
184
  // Increment start col, move backwards in case of overflow.
185
0
  ssize_t col_idx = start_col;
186
0
  for (; col_idx >= 0; col_idx--) {
187
0
    const auto& choices = (*range_cols_scan_options_)[col_idx];
188
0
    auto& it = current_scan_target_idxs_[col_idx];
189
190
0
    if (++it != choices.end()) {
191
0
      break;
192
0
    }
193
0
    it = choices.begin();
194
0
  }
195
196
0
  if (col_idx < 0) {
197
    // If we got here we finished all the options and are done.
198
0
    finished_ = true;
199
0
    return Status::OK();
200
0
  }
201
202
0
  DocKeyDecoder decoder(current_scan_target_);
203
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
204
0
  for (int i = 0; i != col_idx; ++i) {
205
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue());
206
0
  }
207
208
0
  current_scan_target_.Truncate(
209
0
      decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata());
210
211
0
  for (size_t i = col_idx; i <= start_col; ++i) {
212
0
    current_scan_target_idxs_[i]->AppendToKey(&current_scan_target_);
213
0
  }
214
215
0
  return Status::OK();
216
0
}
217
218
0
Result<bool> DiscreteScanChoices::InitScanTargetRangeGroupIfNeeded() {
219
0
  DocKeyDecoder decoder(current_scan_target_.AsSlice());
220
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
221
222
  // Initialize the range key values if needed (i.e. we scanned the static row until now).
223
0
  if (!VERIFY_RESULT(decoder.HasPrimitiveValue())) {
224
0
    current_scan_target_.mutable_data()->pop_back();
225
0
    for (size_t col_idx = 0; col_idx < range_cols_scan_options_->size(); col_idx++) {
226
0
      current_scan_target_idxs_[col_idx]->AppendToKey(&current_scan_target_);
227
0
    }
228
0
    current_scan_target_.AppendValueType(ValueType::kGroupEnd);
229
0
    return true;
230
0
  }
231
0
  return false;
232
0
}
233
234
0
Status DiscreteScanChoices::DoneWithCurrentTarget() {
235
0
  VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target";
236
0
  DCHECK(!FinishedWithScanChoices());
237
238
  // Initialize the first target/option if not done already, otherwise go to the next one.
239
0
  if (!VERIFY_RESULT(InitScanTargetRangeGroupIfNeeded())) {
240
0
    RETURN_NOT_OK(IncrementScanTargetAtColumn(range_cols_scan_options_->size() - 1));
241
0
    current_scan_target_.AppendValueType(ValueType::kGroupEnd);
242
0
  }
243
0
  return Status::OK();
244
0
}
245
246
0
Status DiscreteScanChoices::SkipTargetsUpTo(const Slice& new_target) {
247
0
  VLOG(2) << __PRETTY_FUNCTION__
248
0
            << " Updating current target to be >= "
249
0
            << DocKey::DebugSliceToString(new_target);
250
0
  DCHECK(!FinishedWithScanChoices());
251
0
  RETURN_NOT_OK(InitScanTargetRangeGroupIfNeeded());
252
0
  DocKeyDecoder decoder(new_target);
253
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
254
0
  current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data()));
255
256
0
  size_t col_idx = 0;
257
0
  PrimitiveValue target_value;
258
0
  while (col_idx < range_cols_scan_options_->size()) {
259
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
260
0
    const auto& choices = (*range_cols_scan_options_)[col_idx];
261
0
    auto& it = current_scan_target_idxs_[col_idx];
262
263
    // Fast-path in case the existing value for this column already matches the new target.
264
0
    if (target_value == *it) {
265
0
      col_idx++;
266
0
      target_value.AppendToKey(&current_scan_target_);
267
0
      continue;
268
0
    }
269
270
    // Search for the option that matches new target value (for the current column).
271
0
    if (is_forward_scan_) {
272
0
      it = std::lower_bound(choices.begin(), choices.end(), target_value);
273
0
    } else {
274
0
      it = std::lower_bound(choices.begin(), choices.end(), target_value, std::greater<>());
275
0
    }
276
277
    // If we overflowed, the new target value for this column is larger than all our options, so
278
    // we go back and increment the previous column instead.
279
0
    if (it == choices.end()) {
280
0
      RETURN_NOT_OK(IncrementScanTargetAtColumn(col_idx - 1));
281
0
      break;
282
0
    }
283
284
    // Else, update the current target value for this column.
285
0
    it->AppendToKey(&current_scan_target_);
286
287
    // If we did not find an exact match we are already beyond the new target so we can stop.
288
0
    if (target_value != *it) {
289
0
      col_idx++;
290
0
      break;
291
0
    }
292
293
0
    col_idx++;
294
0
  }
295
296
  // If there are any columns left (i.e. we stopped early), it means we did not find an exact
297
  // match and we reached beyond the new target key. So we need to include all options for the
298
  // leftover columns (i.e. set all following indexes to 0).
299
0
  for (size_t i = col_idx; i < current_scan_target_idxs_.size(); i++) {
300
0
    current_scan_target_idxs_[i] = (*range_cols_scan_options_)[i].begin();
301
0
    current_scan_target_idxs_[i]->AppendToKey(&current_scan_target_);
302
0
  }
303
304
0
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
305
306
0
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
307
0
          << DocKey::DebugSliceToString(current_scan_target_);
308
309
0
  return Status::OK();
310
0
}
311
312
0
Status DiscreteScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
313
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
314
  // Seek to the current target doc key if needed.
315
0
  if (!FinishedWithScanChoices()) {
316
0
    if (is_forward_scan_) {
317
0
      VLOG(2) << __PRETTY_FUNCTION__ << " Seeking to " << current_scan_target_;
318
0
      db_iter->Seek(current_scan_target_);
319
0
    } else {
320
0
      auto tmp = current_scan_target_;
321
0
      tmp.AppendValueType(ValueType::kHighest);
322
0
      VLOG(2) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp;
323
0
      db_iter->PrevDocKey(tmp);
324
0
    }
325
0
  }
326
0
  return Status::OK();
327
0
}
328
329
// This class combines the notions of option filters (col1 IN (1,2,3)) and
330
// singular range bound filters (col1 < 4 AND col1 >= 1) into a single notion of
331
// lists of ranges. So a filter for a column given in the
332
// Doc(QL/PGSQL)ScanSpec is converted into a range bound filter.
333
// In the end, each HybridScanChoices
334
// instance should have a sorted list of disjoint ranges to filter each column.
335
// Right now this supports a conjunction of range bound and discrete filters.
336
// Disjunctions are also supported but are UNTESTED.
337
// TODO: Test disjunctions when YSQL and YQL support pushing those down
338
339
class HybridScanChoices : public ScanChoices {
340
 public:
341
342
  // Constructs a list of ranges for each column from the given scanspec.
343
  // A filter of the form col1 IN (1,4,5) is converted to a filter
344
  // in the form col1 IN ([1, 1], [4, 4], [5, 5]).
345
  HybridScanChoices(const Schema& schema,
346
                    const KeyBytes &lower_doc_key,
347
                    const KeyBytes &upper_doc_key,
348
                    bool is_forward_scan,
349
                    const std::vector<ColumnId> &range_options_indexes,
350
                    const
351
                    std::shared_ptr<std::vector<std::vector<PrimitiveValue>>>&
352
                        range_options,
353
                    const std::vector<ColumnId> range_bounds_indexes,
354
                    const QLScanRange *range_bounds)
355
                    : ScanChoices(is_forward_scan),
356
                        lower_doc_key_(lower_doc_key),
357
43.4k
                        upper_doc_key_(upper_doc_key) {
358
43.4k
    auto range_cols_scan_options = range_options;
359
43.4k
    size_t idx = 0;
360
43.4k
    range_cols_scan_options_lower_.reserve(schema.num_range_key_columns());
361
43.4k
    range_cols_scan_options_upper_.reserve(schema.num_range_key_columns());
362
363
43.4k
    size_t num_hash_cols = schema.num_hash_key_columns();
364
365
43.4k
    for (idx = schema.num_hash_key_columns();
366
120k
            idx < schema.num_key_columns(); idx++) {
367
76.9k
      const ColumnId col_idx = schema.column_id(idx);
368
76.9k
      range_cols_scan_options_lower_.push_back({});
369
76.9k
      range_cols_scan_options_upper_.push_back({});
370
371
      // If this is a range bound filter, we create a singular
372
      // list of the given range bound
373
76.9k
      if ((std::find(range_bounds_indexes.begin(),
374
76.9k
                        range_bounds_indexes.end(), col_idx)
375
76.9k
                    != range_bounds_indexes.end())
376
76.9k
            && (std::find(range_options_indexes.begin(),
377
76.9k
                            range_options_indexes.end(), col_idx)
378
75.7k
                        == range_options_indexes.end())) {
379
75.7k
        const auto col_sort_type = schema.column(idx).sorting_type();
380
75.7k
        const QLScanRange::QLRange range = range_bounds->RangeFor(col_idx);
381
75.7k
        const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type,
382
75.7k
                                                    true /* lower_bound */);
383
75.7k
        const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type,
384
75.7k
                                                    false /* upper_bound */);
385
386
75.7k
        range_cols_scan_options_lower_[idx - num_hash_cols].push_back(lower);
387
75.7k
        range_cols_scan_options_upper_[idx - num_hash_cols].push_back(upper);
388
1.21k
      } else {
389
390
        // If this is an option filter, we turn each option into a
391
        // range bound to produce a list of singular range bounds
392
1.21k
        if(std::find(range_options_indexes.begin(),
393
1.21k
                        range_options_indexes.end(), col_idx)
394
1.22k
                    != range_options_indexes.end()) {
395
1.22k
          auto &options = (*range_cols_scan_options)[idx - num_hash_cols];
396
397
1.22k
          if (options.empty()) {
398
            // If there is nothing specified in the IN list like in
399
            // SELECT * FROM ... WHERE c1 IN ();
400
            // then nothing should pass the filter.
401
            // To enforce this, we create a range bound (kHighest, kLowest)
402
0
            range_cols_scan_options_lower_[idx
403
0
              - num_hash_cols].push_back(PrimitiveValue(ValueType::kHighest));
404
0
            range_cols_scan_options_upper_[idx
405
0
              - num_hash_cols].push_back(PrimitiveValue(ValueType::kLowest));
406
0
          }
407
408
3.26k
          for (auto val : options) {
409
3.26k
            const auto lower = val;
410
3.26k
            const auto upper = val;
411
3.26k
            range_cols_scan_options_lower_[idx
412
3.26k
              - num_hash_cols].push_back(lower);
413
3.26k
            range_cols_scan_options_upper_[idx
414
3.26k
              - num_hash_cols].push_back(upper);
415
3.26k
          }
416
417
18.4E
        } else {
418
            // If no filter is specified, we just impose an artificial range
419
            // filter [kLowest, kHighest]
420
18.4E
            range_cols_scan_options_lower_[idx - num_hash_cols]
421
18.4E
                                .push_back(PrimitiveValue(ValueType::kLowest));
422
18.4E
            range_cols_scan_options_upper_[idx - num_hash_cols]
423
18.4E
                                .push_back(PrimitiveValue(ValueType::kHighest));
424
18.4E
        }
425
1.21k
      }
426
76.9k
    }
427
428
43.4k
    current_scan_target_idxs_.resize(range_cols_scan_options_lower_.size());
429
430
43.4k
    if (is_forward_scan_) {
431
39.0k
      current_scan_target_ = lower_doc_key;
432
4.49k
    } else {
433
4.49k
      current_scan_target_ = upper_doc_key;
434
4.49k
    }
435
436
43.4k
  }
437
438
  HybridScanChoices(const Schema& schema,
439
                    const DocPgsqlScanSpec& doc_spec,
440
                    const KeyBytes &lower_doc_key,
441
                    const KeyBytes &upper_doc_key)
442
      : HybridScanChoices(schema, lower_doc_key, upper_doc_key,
443
      doc_spec.is_forward_scan(), doc_spec.range_options_indexes(),
444
      doc_spec.range_options(), doc_spec.range_bounds_indexes(),
445
4.72k
      doc_spec.range_bounds()) {
446
4.72k
  }
447
448
  HybridScanChoices(const Schema& schema,
449
                    const DocQLScanSpec& doc_spec,
450
                    const KeyBytes &lower_doc_key,
451
                    const KeyBytes &upper_doc_key)
452
      : HybridScanChoices(schema, lower_doc_key, upper_doc_key,
453
      doc_spec.is_forward_scan(), doc_spec.range_options_indexes(),
454
      doc_spec.range_options(), doc_spec.range_bounds_indexes(),
455
38.7k
      doc_spec.range_bounds()) {
456
38.7k
  }
457
458
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
459
  CHECKED_STATUS DoneWithCurrentTarget() override;
460
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
461
462
 protected:
463
  // Utility function for (multi)key scans. Updates the target scan key by
464
  // incrementing the option
465
  // index for one column. Will handle overflow by setting current column
466
  // index to 0 and incrementing the previous column instead. If it overflows
467
  // at first column it means we are done, so it clears the scan target idxs
468
  // array.
469
  CHECKED_STATUS IncrementScanTargetAtColumn(int start_col);
470
471
 private:
472
  KeyBytes prev_scan_target_;
473
474
  // The following encodes the list of ranges we are iterating over
475
  std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_lower_;
476
  std::vector<std::vector<PrimitiveValue>> range_cols_scan_options_upper_;
477
478
  std::vector<ColumnId> range_options_indexes_;
479
  mutable std::vector<size_t> current_scan_target_idxs_;
480
481
  bool is_options_done_ = false;
482
483
  const KeyBytes lower_doc_key_;
484
  const KeyBytes upper_doc_key_;
485
};
486
487
// Sets current_scan_target_ to the first tuple in the filter space
488
// that is >= new_target.
489
10.9M
Status HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) {
490
18.4E
  VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= "
491
18.4E
          << DocKey::DebugSliceToString(new_target);
492
10.9M
  DCHECK(!FinishedWithScanChoices());
493
10.9M
  is_options_done_ = false;
494
495
  /*
496
   Let's say we have a row key with (A B) as the hash part and C, D as the range part:
497
   ((A B) C D) E F
498
499
   Let's say our current constraints :
500
    l_c_k <= C <= u_c_k
501
     4            6
502
503
    l_d_j <= D <= u_d_j
504
      3           5
505
506
    a b  0 d  -> a  b l_c  d
507
508
    a b  5 d  -> a  b  5   d
509
                  [ Will subsequently seek out of document on reading the subdoc]
510
511
    a b  7 d  -> a b l_c_(k+1) 0
512
                [ If there is another range bound filter that's higher than the
513
                  current one, effectively, moving this column to the next
514
                  range in the filter list.]
515
              -> a b Inf
516
                [ This will seek to <b_next> and on the next invocation update:
517
                   a <b_next> ? ? -> a <b_next> l_c_0 0 ]
518
519
    a b  c 6  -> a b c l_d_(j+1)
520
                [ If there is another range bound filter that's higher than the
521
                  d, effectively, moving column D to the next
522
                  range in the filter list.]
523
              -> a b c Inf
524
                [ If c_next is between l_c_k and u_c_k. This will seek to <a b
525
                   <c_next>> and on the next invocation update:
526
                   a b <c_next> ? -> a b <c_next> l_d_0 ]
527
              -> a b l_c_(k+1) l_d_0
528
                 [ If c_next is above u_c_k. We do this because we know
529
                   exactly what the next tuple in our filter space should be.]
530
  */
531
10.9M
  DocKeyDecoder decoder(new_target);
532
10.9M
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
533
10.9M
  current_scan_target_.Reset(Slice(new_target.data(),
534
10.9M
                                decoder.left_input().data()));
535
536
10.9M
  size_t col_idx = 0;
537
10.9M
  PrimitiveValue target_value;
538
32.0M
  for (col_idx = 0; col_idx < current_scan_target_idxs_.size(); col_idx++) {
539
31.0M
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
540
31.0M
    const auto& lower_choices = (range_cols_scan_options_lower_)[col_idx];
541
31.0M
    const auto& upper_choices = (range_cols_scan_options_upper_)[col_idx];
542
31.0M
    auto current_ind = current_scan_target_idxs_[col_idx];
543
31.0M
    DCHECK(current_ind < lower_choices.size());
544
31.0M
    const auto& lower = lower_choices[current_ind];
545
31.0M
    const auto& upper = upper_choices[current_ind];
546
547
    // If it's in range then good, continue after appending the target value
548
    // column.
549
550
31.0M
    if (target_value >= lower && target_value <= upper) {
551
21.0M
      target_value.AppendToKey(&current_scan_target_);
552
21.0M
      continue;
553
21.0M
    }
554
555
    // If target_value is not in the current range then we must find a range
556
    // that works for it.
557
    // If we are above all ranges then increment the index of the previous
558
    // column.
559
    // Else, target_value is below at least one range: find the lowest lower
560
    // bound above target_value and use that, this relies on the assumption
561
    // that all our filter ranges are disjoint.
562
563
10.0M
    auto it = lower_choices.begin();
564
10.0M
    size_t ind = 0;
565
566
    // Find an upper (lower) bound closest to target_value
567
10.0M
    if (is_forward_scan_) {
568
10.0M
      it = std::lower_bound(upper_choices.begin(),
569
10.0M
                                upper_choices.end(), target_value);
570
10.0M
      ind = it - upper_choices.begin();
571
201
    } else {
572
201
      it = std::lower_bound(lower_choices.begin(), lower_choices.end(),
573
201
              target_value, std::greater<>());
574
201
      ind = it - lower_choices.begin();
575
201
    }
576
577
10.0M
    if (ind == lower_choices.size()) {
578
      // target value is higher than all range options and
579
      // we need to increment.
580
5.00M
      RETURN_NOT_OK(IncrementScanTargetAtColumn(static_cast<int>(col_idx) - 1));
581
5.00M
      col_idx = current_scan_target_idxs_.size();
582
5.00M
      break;
583
5.02M
    }
584
585
5.02M
    current_scan_target_idxs_[col_idx] = ind;
586
587
    // If we are within a range then target_value itself should work.
588
5.02M
    if (lower_choices[ind] <= target_value
589
680
        && upper_choices[ind] >= target_value) {
590
627
      target_value.AppendToKey(&current_scan_target_);
591
627
      continue;
592
627
    }
593
594
    // Otherwise we must set it to the next lower bound.
595
    // This only works as we are assuming all given ranges are
596
    // disjoint.
597
598
5.02M
    DCHECK((is_forward_scan_ && lower_choices[ind] > target_value)
599
5.02M
              || (!is_forward_scan_ && upper_choices[ind]
600
5.02M
              < target_value));
601
602
5.02M
    if (is_forward_scan_) {
603
5.02M
      lower_choices[ind].AppendToKey(&current_scan_target_);
604
146
    } else {
605
146
      upper_choices[ind].AppendToKey(&current_scan_target_);
606
146
    }
607
5.02M
    col_idx++;
608
5.02M
    break;
609
5.02M
  }
610
611
  // Reset the remaining range columns to lower bounds for forward scans
612
  // or upper bounds for backward scans.
613
10.9M
  for (size_t i = col_idx; i < range_cols_scan_options_lower_.size(); i++) {
614
26.6k
    current_scan_target_idxs_[i] = 0;
615
26.6k
    if (is_forward_scan_) {
616
26.6k
      range_cols_scan_options_lower_[i][0]
617
26.6k
                    .AppendToKey(&current_scan_target_);
618
4
    } else {
619
4
      range_cols_scan_options_upper_[i][0]
620
4
                    .AppendToKey(&current_scan_target_);
621
4
    }
622
26.6k
  }
623
624
10.9M
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
625
18.4E
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
626
18.4E
          << DocKey::DebugSliceToString(current_scan_target_);
627
10.9M
  return Status::OK();
628
10.9M
}
629
630
// Update the value at start column by setting it up for incrementing to the
631
// next allowed value in the filter space
632
// ---------------------------------------------------------------------------
633
// There are two important cases to consider here.
634
// Let's say the value of current_scan_target_ at start_col, c,
635
// is currently V and the current bounds for that column
636
// is l_c_k <= V <= u_c_k. In the usual case where V != u_c_k
637
// (or V != l_c_k for backwards scans) such that V_next is still in the given
638
// restriction, we set column c + 1 to kHighest (kLowest), such that the next
639
// invocation of GetNext() produces V_next at column similar to what is done
640
// in SkipTargetsUpTo. In this case, doing a SkipTargetsUpTo on the resulting
641
// current_scan_target_ should yield the next allowed value in the filter space
642
// In the case where V = u_c_k (V = l_c_k), or in other words V is at the
643
// EXTREMAL boundary of the current range, we know exactly what the next value
644
// of column C will be. So we move column c to the next
645
// range k+1 and set that column to the new value l_c_(k+1) (u_c_(k+1))
646
// while setting all columns, b > c to l_b_0 (u_b_0)
647
// In the case of overflow on a column c (we want to increment the
648
// restriction range of c to the next range bound for that column but there
649
// are no restriction ranges remaining), we set the
650
// current column to the 0th range and move on to increment c - 1
651
// Note that in almost all cases the resulting current_scan_target_ is strictly
652
// greater (lesser in the case of backwards scans) than the original
653
// current_scan_target_. This is necessary to allow the iterator seek out
654
// of the current scan target. The exception to this rule is below.
655
// ---------------------------------------------------------------------------
656
// This function leaves the scan target as is if the next tuple in the current
657
// scan direction is also the next tuple in the filter space and start_col
658
// is given as the last column
659
5.93M
Status HybridScanChoices::IncrementScanTargetAtColumn(int start_col) {
660
661
18.4E
  VLOG(2) << __PRETTY_FUNCTION__
662
18.4E
          << " Incrementing at " << start_col;
663
664
  // Increment start col, move backwards in case of overflow.
665
5.93M
  int col_idx = start_col;
666
  // lower and upper here are taken relative to the scan order
667
5.93M
  auto &lower_extremal_vector = is_forward_scan_
668
5.52M
                          ? range_cols_scan_options_lower_
669
412k
                            : range_cols_scan_options_upper_;
670
5.93M
  auto &upper_extremal_vector = is_forward_scan_
671
5.52M
                                ? range_cols_scan_options_upper_
672
412k
                                  : range_cols_scan_options_lower_;
673
5.93M
  DocKeyDecoder t_decoder(current_scan_target_);
674
5.93M
  RETURN_NOT_OK(t_decoder.DecodeToRangeGroup());
675
676
  // refer to the documentation of this function to see what extremal
677
  // means here
678
5.93M
  std::vector<bool> is_extremal;
679
5.93M
  PrimitiveValue target_value;
680
16.9M
  for (int i = 0; i <= col_idx; ++i) {
681
11.0M
    RETURN_NOT_OK(t_decoder.DecodePrimitiveValue(&target_value));
682
11.0M
    is_extremal.push_back(target_value ==
683
11.0M
      upper_extremal_vector[i][current_scan_target_idxs_[i]]);
684
11.0M
  }
685
686
  // this variable tells us whether we start by appending
687
  // kHighest/kLowest at col_idx after the following for loop
688
5.93M
  bool start_with_infinity = true;
689
690
5.94M
  for (; col_idx >= 0; col_idx--) {
691
5.72M
    const auto& choices = lower_extremal_vector[col_idx];
692
5.72M
    auto it = current_scan_target_idxs_[col_idx];
693
694
5.72M
    if (!is_extremal[col_idx]) {
695
5.70M
      col_idx++;
696
5.70M
      start_with_infinity = true;
697
5.70M
      break;
698
5.70M
    }
699
700
19.6k
    if (++it < choices.size()) {
701
      // and if this value is at the extremal bound
702
4.38k
      if (is_extremal[col_idx]) {
703
4.38k
        current_scan_target_idxs_[col_idx]++;
704
4.38k
        start_with_infinity = false;
705
4.38k
      }
706
4.38k
      break;
707
4.38k
    }
708
709
15.2k
    current_scan_target_idxs_[col_idx] = 0;
710
15.2k
  }
711
712
5.93M
  DocKeyDecoder decoder(current_scan_target_);
713
5.93M
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
714
16.9M
  for (int i = 0; i < col_idx; ++i) {
715
11.0M
    RETURN_NOT_OK(decoder.DecodePrimitiveValue());
716
11.0M
  }
717
718
5.93M
  if (col_idx < 0) {
719
    // If we got here we finished all the options and are done.
720
222k
    col_idx++;
721
222k
    start_with_infinity = true;
722
222k
    is_options_done_ = true;
723
222k
  }
724
725
5.93M
  current_scan_target_.Truncate(
726
5.93M
      decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata());
727
728
729
5.93M
  if (start_with_infinity &&
730
5.92M
        (col_idx < static_cast<int64>(current_scan_target_idxs_.size()))) {
731
5.01M
    if (is_forward_scan_) {
732
5.01M
      PrimitiveValue(ValueType::kHighest).AppendToKey(&current_scan_target_);
733
4.24k
    } else {
734
4.24k
      PrimitiveValue(ValueType::kLowest).AppendToKey(&current_scan_target_);
735
4.24k
    }
736
5.01M
    col_idx++;
737
5.01M
  }
738
739
5.93M
  if (start_with_infinity) {
740
    // there's no point in appending anything after infinity
741
5.92M
    return Status::OK();
742
5.92M
  }
743
744
8.84k
  for (int i = col_idx; i <= start_col; ++i) {
745
4.60k
      lower_extremal_vector[i][current_scan_target_idxs_[i]]
746
4.60k
                                      .AppendToKey(&current_scan_target_);
747
4.60k
  }
748
749
4.24k
  for (size_t i = start_col + 1; i < current_scan_target_idxs_.size(); ++i) {
750
8
    current_scan_target_idxs_[i] = 0;
751
8
    lower_extremal_vector[i][current_scan_target_idxs_[i]]
752
8
                                    .AppendToKey(&current_scan_target_);
753
8
  }
754
755
4.23k
  return Status::OK();
756
4.23k
}
757
758
// Method called when the scan target is done being used
759
926k
Status HybridScanChoices::DoneWithCurrentTarget() {
760
  // prev_scan_target_ is necessary for backwards scans
761
926k
  prev_scan_target_ = current_scan_target_;
762
926k
  RETURN_NOT_OK(IncrementScanTargetAtColumn(
763
926k
                                  static_cast<int>(current_scan_target_idxs_.size()) - 1));
764
926k
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
765
766
  // if we we incremented the last index then
767
  // if this is a forward scan it doesn't matter what we do
768
  // if this is a backwards scan then dont clear current_scan_target and we
769
  // stay live
770
18.4E
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
771
18.4E
          << DocKey::DebugSliceToString(current_scan_target_);
772
773
18.4E
  VLOG(2) << __PRETTY_FUNCTION__ << " moving on to next target";
774
926k
  DCHECK(!FinishedWithScanChoices());
775
776
926k
  if (is_options_done_) {
777
      // It could be possible that we finished all our options but are not
778
      // done because we haven't hit the bound key yet. This would usually be
779
      // the case if we are moving onto the next hash key where we will
780
      // restart our range options.
781
217k
      const KeyBytes &bound_key = is_forward_scan_ ?
782
213k
                                    upper_doc_key_ : lower_doc_key_;
783
208k
      finished_ = bound_key.empty() ? false
784
8.58k
                    : is_forward_scan_
785
8.58k
                        == (current_scan_target_.CompareTo(bound_key) >= 0);
786
18.4E
      VLOG(4) << "finished_ = " << finished_;
787
217k
  }
788
789
790
18.4E
  VLOG(4) << "current_scan_target_ is "
791
18.4E
          << DocKey::DebugSliceToString(current_scan_target_)
792
18.4E
          << " and prev_scan_target_ is "
793
18.4E
          << DocKey::DebugSliceToString(prev_scan_target_);
794
795
  // The below condition is either indicative of the special case
796
  // where IncrementScanTargetAtColumn didn't change the target due
797
  // to the case specified in the last section of the
798
  // documentation for IncrementScanTargetAtColumn or we have exhausted
799
  // all available range keys for the given hash key (indicated
800
  // by is_options_done_)
801
  // We clear the scan target in these cases to indicate that the
802
  // current_scan_target_ has been used and is invalid
803
  // In all other cases, IncrementScanTargetAtColumn has updated
804
  // current_scan_target_ to the new value that we want to seek to.
805
  // Hence, we shouldn't clear it in those cases
806
926k
  if ((prev_scan_target_ == current_scan_target_) || is_options_done_) {
807
918k
      current_scan_target_.Clear();
808
918k
  }
809
810
926k
  return Status::OK();
811
926k
}
812
813
// Seeks the given iterator to the current target as specified by
814
// current_scan_target_ and prev_scan_target_ (relevant in backwards
815
// scans)
816
10.9M
Status HybridScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
817
18.4E
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
818
819
10.9M
  if (!FinishedWithScanChoices()) {
820
    // if current_scan_target_ is valid we use it to determine
821
    // what to seek to
822
10.9M
    if (!current_scan_target_.empty()) {
823
0
      VLOG(3) << __PRETTY_FUNCTION__
824
0
              << " current_scan_target_ is non-empty. "
825
0
              << DocKey::DebugSliceToString(current_scan_target_);
826
10.0M
      if (is_forward_scan_) {
827
0
        VLOG(3) << __PRETTY_FUNCTION__
828
0
                << " Seeking to "
829
0
                << DocKey::DebugSliceToString(current_scan_target_);
830
10.0M
        db_iter->Seek(current_scan_target_);
831
122
      } else {
832
        // seek to the highest key <= current_scan_target_
833
        // seeking to the highest key < current_scan_target_ + kHighest
834
        // is equivalent to seeking to the highest key <=
835
        // current_scan_target_
836
122
        auto tmp = current_scan_target_;
837
122
        PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp);
838
0
        VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp;
839
122
        db_iter->PrevDocKey(tmp);
840
122
      }
841
909k
    } else {
842
909k
      if (!is_forward_scan_ && !prev_scan_target_.empty()) {
843
407k
        db_iter->PrevDocKey(prev_scan_target_);
844
407k
      }
845
909k
    }
846
10.9M
  }
847
848
10.9M
  return Status::OK();
849
10.9M
}
850
851
class RangeBasedScanChoices : public ScanChoices {
852
 public:
853
  RangeBasedScanChoices(const Schema& schema, const DocQLScanSpec& doc_spec)
854
0
      : ScanChoices(doc_spec.is_forward_scan()) {
855
0
    DCHECK(doc_spec.range_bounds());
856
0
    lower_.reserve(schema.num_range_key_columns());
857
0
    upper_.reserve(schema.num_range_key_columns());
858
0
    size_t idx = 0;
859
0
    for (idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) {
860
0
      const ColumnId col_idx = schema.column_id(idx);
861
0
      const auto col_sort_type = schema.column(idx).sorting_type();
862
0
      const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx);
863
0
      const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */);
864
0
      const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */);
865
0
      lower_.emplace_back(lower);
866
0
      upper_.emplace_back(upper);
867
0
    }
868
0
  }
869
870
  RangeBasedScanChoices(const Schema& schema, const DocPgsqlScanSpec& doc_spec)
871
0
      : ScanChoices(doc_spec.is_forward_scan()) {
872
0
    DCHECK(doc_spec.range_bounds());
873
0
    lower_.reserve(schema.num_range_key_columns());
874
0
    upper_.reserve(schema.num_range_key_columns());
875
0
    for (auto idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); idx++) {
876
0
      const ColumnId col_idx = schema.column_id(idx);
877
0
      const auto col_sort_type = schema.column(idx).sorting_type();
878
0
      const QLScanRange::QLRange range = doc_spec.range_bounds()->RangeFor(col_idx);
879
0
      const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */);
880
0
      const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */);
881
0
      lower_.emplace_back(lower);
882
0
      upper_.emplace_back(upper);
883
0
    }
884
0
  }
885
886
  CHECKED_STATUS SkipTargetsUpTo(const Slice& new_target) override;
887
  CHECKED_STATUS DoneWithCurrentTarget() override;
888
  CHECKED_STATUS SeekToCurrentTarget(IntentAwareIterator* db_iter) override;
889
890
 private:
891
  std::vector<PrimitiveValue> lower_, upper_;
892
  KeyBytes prev_scan_target_;
893
};
894
895
0
Status RangeBasedScanChoices::SkipTargetsUpTo(const Slice& new_target) {
896
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Updating current target to be >= "
897
0
          << DocKey::DebugSliceToString(new_target);
898
0
  DCHECK(!FinishedWithScanChoices());
899
900
  /*
901
   Let's say we have a row key with (A B) as the hash part and C, D as the range part:
902
   ((A B) C D) E F
903
904
   Let's say we have a range constraint :
905
    l_c < C < u_c
906
     4        6
907
908
    a b  0 d  -> a  b l_c  d
909
910
    a b  5 d  -> a  b  5   d
911
                  [ Will subsequently seek out of document on reading the subdoc]
912
913
    a b  7 d  -> a <b> MAX
914
                [ This will seek to <b_next> and on the next invocation update:
915
                   a <b_next> ? ? -> a <b_next> l_c d ]
916
  */
917
0
  DocKeyDecoder decoder(new_target);
918
0
  RETURN_NOT_OK(decoder.DecodeToRangeGroup());
919
0
  current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data()));
920
921
0
  size_t col_idx = 0;
922
0
  PrimitiveValue target_value;
923
0
  bool last_was_infinity = false;
924
0
  for (col_idx = 0; VERIFY_RESULT(decoder.HasPrimitiveValue()); col_idx++) {
925
0
    RETURN_NOT_OK(decoder.DecodePrimitiveValue(&target_value));
926
0
    VLOG(3) << "col_idx " << col_idx << " is " << target_value << " in ["
927
0
            << yb::ToString(lower_[col_idx]) << " , " << yb::ToString(upper_[col_idx]) << " ] ?";
928
929
0
    const auto& lower = lower_[col_idx];
930
0
    if (target_value < lower) {
931
0
      const auto tgt = (is_forward_scan_ ? lower : PrimitiveValue(ValueType::kLowest));
932
0
      tgt.AppendToKey(&current_scan_target_);
933
0
      last_was_infinity = tgt.IsInfinity();
934
0
      VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt;
935
0
      break;
936
0
    }
937
0
    const auto& upper = upper_[col_idx];
938
0
    if (target_value > upper) {
939
0
      const auto tgt = (!is_forward_scan_ ? upper : PrimitiveValue(ValueType::kHighest));
940
0
      VLOG(3) << " Updating idx " << col_idx << " from " << target_value << " to " << tgt;
941
0
      tgt.AppendToKey(&current_scan_target_);
942
0
      last_was_infinity = tgt.IsInfinity();
943
0
      break;
944
0
    }
945
0
    target_value.AppendToKey(&current_scan_target_);
946
0
    last_was_infinity = target_value.IsInfinity();
947
0
  }
948
949
  // Reset the remaining range columns to kHighest/lower for forward scans
950
  // or kLowest/upper for backward scans.
951
0
  while (++col_idx < lower_.size()) {
952
0
    if (last_was_infinity) {
953
      // No point having more components after +/- Inf.
954
0
      break;
955
0
    }
956
0
    if (is_forward_scan_) {
957
0
      VLOG(3) << " Updating col_idx " << col_idx << " to " << lower_[col_idx];
958
0
      lower_[col_idx].AppendToKey(&current_scan_target_);
959
0
      last_was_infinity = lower_[col_idx].IsInfinity();
960
0
    } else {
961
0
      VLOG(3) << " Updating col_idx " << col_idx << " to " << upper_[col_idx];
962
0
      upper_[col_idx].AppendToKey(&current_scan_target_);
963
0
      last_was_infinity = upper_[col_idx].IsInfinity();
964
0
    }
965
0
  }
966
0
  current_scan_target_.AppendValueType(ValueType::kGroupEnd);
967
0
  VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is "
968
0
          << DocKey::DebugSliceToString(current_scan_target_);
969
970
0
  return Status::OK();
971
0
}
972
973
0
Status RangeBasedScanChoices::DoneWithCurrentTarget() {
974
0
  prev_scan_target_ = current_scan_target_;
975
0
  current_scan_target_.Clear();
976
0
  return Status::OK();
977
0
}
978
979
0
Status RangeBasedScanChoices::SeekToCurrentTarget(IntentAwareIterator* db_iter) {
980
0
  VLOG(2) << __PRETTY_FUNCTION__ << " Advancing iterator towards target";
981
982
0
  if (!FinishedWithScanChoices()) {
983
0
    if (!current_scan_target_.empty()) {
984
0
      VLOG(3) << __PRETTY_FUNCTION__
985
0
              << " current_scan_target_ is non-empty. "
986
0
              << current_scan_target_;
987
0
      if (is_forward_scan_) {
988
0
        VLOG(3) << __PRETTY_FUNCTION__
989
0
                << " Seeking to "
990
0
                << DocKey::DebugSliceToString(current_scan_target_);
991
0
        db_iter->Seek(current_scan_target_);
992
0
      } else {
993
0
        auto tmp = current_scan_target_;
994
0
        PrimitiveValue(ValueType::kHighest).AppendToKey(&tmp);
995
0
        VLOG(3) << __PRETTY_FUNCTION__ << " Going to PrevDocKey " << tmp;  // Never seen.
996
0
        db_iter->PrevDocKey(tmp);
997
0
      }
998
0
    } else {
999
0
      if (!is_forward_scan_ && !prev_scan_target_.empty()) {
1000
0
        db_iter->PrevDocKey(prev_scan_target_);
1001
0
      }
1002
0
    }
1003
0
  }
1004
1005
0
  return Status::OK();
1006
0
}
1007
1008
DocRowwiseIterator::DocRowwiseIterator(
1009
    const Schema &projection,
1010
    const Schema &schema,
1011
    const TransactionOperationContext& txn_op_context,
1012
    const DocDB& doc_db,
1013
    CoarseTimePoint deadline,
1014
    const ReadHybridTime& read_time,
1015
    RWOperationCounter* pending_op_counter)
1016
    : projection_(projection),
1017
      schema_(schema),
1018
      txn_op_context_(txn_op_context),
1019
      deadline_(deadline),
1020
      read_time_(read_time),
1021
      doc_db_(doc_db),
1022
      has_bound_key_(false),
1023
      pending_op_(pending_op_counter),
1024
8.13M
      done_(false) {
1025
8.13M
  projection_subkeys_.reserve(projection.num_columns() + 1);
1026
8.13M
  projection_subkeys_.push_back(PrimitiveValue::kLivenessColumn);
1027
30.3M
  for (size_t i = projection_.num_key_columns(); i < projection.num_columns(); i++) {
1028
22.2M
    projection_subkeys_.emplace_back(projection.column_id(i));
1029
22.2M
  }
1030
8.13M
  std::sort(projection_subkeys_.begin(), projection_subkeys_.end());
1031
8.13M
}
1032
1033
8.16M
DocRowwiseIterator::~DocRowwiseIterator() {
1034
8.16M
}
1035
1036
123k
Status DocRowwiseIterator::Init(TableType table_type, const Slice& sub_doc_key) {
1037
123k
  db_iter_ = CreateIntentAwareIterator(
1038
123k
      doc_db_,
1039
123k
      BloomFilterMode::DONT_USE_BLOOM_FILTER,
1040
123k
      boost::none /* user_key_for_filter */,
1041
123k
      rocksdb::kDefaultQueryId,
1042
123k
      txn_op_context_,
1043
123k
      deadline_,
1044
123k
      read_time_);
1045
123k
  if (!sub_doc_key.empty()) {
1046
0
    row_key_ = sub_doc_key;
1047
123k
  } else {
1048
123k
    DocKeyEncoder(&iter_key_).Schema(schema_);
1049
123k
    row_key_ = iter_key_;
1050
123k
  }
1051
123k
  row_hash_key_ = row_key_;
1052
687
  VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << row_key_;
1053
123k
  db_iter_->Seek(row_key_);
1054
123k
  row_ready_ = false;
1055
123k
  has_bound_key_ = false;
1056
123k
  if (table_type == TableType::PGSQL_TABLE_TYPE) {
1057
6
    ignore_ttl_ = true;
1058
6
  }
1059
1060
123k
  return Status::OK();
1061
123k
}
1062
1063
Result<bool> DocRowwiseIterator::InitScanChoices(
1064
3.70M
    const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) {
1065
1066
3.70M
  if (!FLAGS_disable_hybrid_scan) {
1067
3.70M
    if (doc_spec.range_options() || doc_spec.range_bounds()) {
1068
38.7k
        scan_choices_.reset(new HybridScanChoices(schema_, doc_spec,
1069
38.7k
                                    lower_doc_key, upper_doc_key));
1070
38.7k
    }
1071
1072
3.70M
    return false;
1073
3.70M
  }
1074
1075
18.4E
  if (doc_spec.range_options()) {
1076
0
    scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key));
1077
    // Let's not seek to the lower doc key or upper doc key. We know exactly what we want.
1078
0
    RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow());
1079
0
    return true;
1080
18.4E
  }
1081
1082
18.4E
  if (doc_spec.range_bounds()) {
1083
0
    scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec));
1084
0
  }
1085
1086
18.4E
  return false;
1087
18.4E
}
1088
1089
Result<bool> DocRowwiseIterator::InitScanChoices(
1090
    const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key,
1091
4.32M
    const KeyBytes& upper_doc_key) {
1092
1093
4.32M
  if (!FLAGS_disable_hybrid_scan) {
1094
4.32M
    if (doc_spec.range_options() || doc_spec.range_bounds()) {
1095
4.72k
        scan_choices_.reset(new HybridScanChoices(schema_, doc_spec,
1096
4.72k
                                    lower_doc_key, upper_doc_key));
1097
4.72k
    }
1098
1099
4.32M
    return false;
1100
4.32M
  }
1101
1102
1.11k
  if (doc_spec.range_options()) {
1103
0
    scan_choices_.reset(new DiscreteScanChoices(doc_spec, lower_doc_key, upper_doc_key));
1104
    // Let's not seek to the lower doc key or upper doc key. We know exactly what we want.
1105
0
    RETURN_NOT_OK(AdvanceIteratorToNextDesiredRow());
1106
0
    return true;
1107
1.11k
  }
1108
1109
1.11k
  if (doc_spec.range_bounds()) {
1110
0
    scan_choices_.reset(new RangeBasedScanChoices(schema_, doc_spec));
1111
0
  }
1112
1113
1.11k
  return false;
1114
1.11k
}
1115
1116
template <class T>
1117
8.04M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1118
8.04M
  is_forward_scan_ = doc_spec.is_forward_scan();
1119
1120
15.7k
  VLOG(4) << "Initializing iterator direction: " << (is_forward_scan_ ? "FORWARD" : "BACKWARD");
1121
1122
8.04M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1123
8.04M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1124
18.4E
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1125
18.4E
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1126
1127
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1128
8.04M
  const bool is_fixed_point_get =
1129
8.04M
      !lower_doc_key.empty() &&
1130
8.04M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1131
7.73M
  const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER
1132
311k
                                       : BloomFilterMode::DONT_USE_BLOOM_FILTER;
1133
1134
8.04M
  db_iter_ = CreateIntentAwareIterator(
1135
8.04M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1136
8.04M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1137
1138
8.04M
  row_ready_ = false;
1139
1140
8.04M
  if (is_forward_scan_) {
1141
8.04M
    has_bound_key_ = !upper_doc_key.empty();
1142
8.04M
    if (has_bound_key_) {
1143
7.94M
      bound_key_ = std::move(upper_doc_key);
1144
7.94M
      db_iter_->SetUpperbound(bound_key_);
1145
7.94M
    }
1146
3.71k
  } else {
1147
3.71k
    has_bound_key_ = !lower_doc_key.empty();
1148
4.53k
    if (has_bound_key_) {
1149
4.53k
      bound_key_ = std::move(lower_doc_key);
1150
4.53k
    }
1151
3.71k
  }
1152
1153
8.04M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1154
8.04M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1155
8.03M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1156
8.03M
    if (is_forward_scan_) {
1157
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1158
8.03M
      db_iter_->Seek(lower_doc_key);
1159
5.44k
    } else {
1160
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1161
5.44k
      if (!upper_doc_key.empty()) {
1162
4.53k
        db_iter_->PrevDocKey(upper_doc_key);
1163
907
      } else {
1164
907
        db_iter_->SeekToLastDocKey();
1165
907
      }
1166
5.44k
    }
1167
8.03M
  }
1168
1169
8.04M
  return Status::OK();
1170
8.04M
}
_ZN2yb5docdb18DocRowwiseIterator6DoInitINS0_13DocQLScanSpecEEENS_6StatusERKT_
Line
Count
Source
1117
3.71M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1118
3.71M
  is_forward_scan_ = doc_spec.is_forward_scan();
1119
1120
15.8k
  VLOG(4) << "Initializing iterator direction: " << (is_forward_scan_ ? "FORWARD" : "BACKWARD");
1121
1122
3.71M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1123
3.71M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1124
18.4E
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1125
18.4E
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1126
1127
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1128
3.71M
  const bool is_fixed_point_get =
1129
3.71M
      !lower_doc_key.empty() &&
1130
3.71M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1131
3.65M
  const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER
1132
66.0k
                                       : BloomFilterMode::DONT_USE_BLOOM_FILTER;
1133
1134
3.71M
  db_iter_ = CreateIntentAwareIterator(
1135
3.71M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1136
3.71M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1137
1138
3.71M
  row_ready_ = false;
1139
1140
3.71M
  if (is_forward_scan_) {
1141
3.71M
    has_bound_key_ = !upper_doc_key.empty();
1142
3.71M
    if (has_bound_key_) {
1143
3.62M
      bound_key_ = std::move(upper_doc_key);
1144
3.62M
      db_iter_->SetUpperbound(bound_key_);
1145
3.62M
    }
1146
3.26k
  } else {
1147
3.26k
    has_bound_key_ = !lower_doc_key.empty();
1148
4.53k
    if (has_bound_key_) {
1149
4.53k
      bound_key_ = std::move(lower_doc_key);
1150
4.53k
    }
1151
3.26k
  }
1152
1153
3.71M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1154
3.71M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1155
3.70M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1156
3.70M
    if (is_forward_scan_) {
1157
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1158
3.70M
      db_iter_->Seek(lower_doc_key);
1159
5.75k
    } else {
1160
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1161
5.75k
      if (!upper_doc_key.empty()) {
1162
4.53k
        db_iter_->PrevDocKey(upper_doc_key);
1163
1.22k
      } else {
1164
1.22k
        db_iter_->SeekToLastDocKey();
1165
1.22k
      }
1166
5.75k
    }
1167
3.70M
  }
1168
1169
3.71M
  return Status::OK();
1170
3.71M
}
_ZN2yb5docdb18DocRowwiseIterator6DoInitINS0_16DocPgsqlScanSpecEEENS_6StatusERKT_
Line
Count
Source
1117
4.33M
Status DocRowwiseIterator::DoInit(const T& doc_spec) {
1118
4.33M
  is_forward_scan_ = doc_spec.is_forward_scan();
1119
1120
18.4E
  VLOG(4) << "Initializing iterator direction: " << (is_forward_scan_ ? "FORWARD" : "BACKWARD");
1121
1122
4.33M
  auto lower_doc_key = VERIFY_RESULT(doc_spec.LowerBound());
1123
4.33M
  auto upper_doc_key = VERIFY_RESULT(doc_spec.UpperBound());
1124
393
  VLOG(4) << "DocKey Bounds " << DocKey::DebugSliceToString(lower_doc_key.AsSlice())
1125
393
          << ", " << DocKey::DebugSliceToString(upper_doc_key.AsSlice());
1126
1127
  // TODO(bogdan): decide if this is a good enough heuristic for using blooms for scans.
1128
4.33M
  const bool is_fixed_point_get =
1129
4.33M
      !lower_doc_key.empty() &&
1130
4.33M
      VERIFY_RESULT(HashedOrFirstRangeComponentsEqual(lower_doc_key, upper_doc_key));
1131
4.08M
  const auto mode = is_fixed_point_get ? BloomFilterMode::USE_BLOOM_FILTER
1132
245k
                                       : BloomFilterMode::DONT_USE_BLOOM_FILTER;
1133
1134
4.33M
  db_iter_ = CreateIntentAwareIterator(
1135
4.33M
      doc_db_, mode, lower_doc_key.AsSlice(), doc_spec.QueryId(), txn_op_context_,
1136
4.33M
      deadline_, read_time_, doc_spec.CreateFileFilter());
1137
1138
4.33M
  row_ready_ = false;
1139
1140
4.33M
  if (is_forward_scan_) {
1141
4.32M
    has_bound_key_ = !upper_doc_key.empty();
1142
4.32M
    if (has_bound_key_) {
1143
4.32M
      bound_key_ = std::move(upper_doc_key);
1144
4.32M
      db_iter_->SetUpperbound(bound_key_);
1145
4.32M
    }
1146
449
  } else {
1147
449
    has_bound_key_ = !lower_doc_key.empty();
1148
449
    if (has_bound_key_) {
1149
1
      bound_key_ = std::move(lower_doc_key);
1150
1
    }
1151
449
  }
1152
1153
4.33M
  if (!VERIFY_RESULT(InitScanChoices(doc_spec,
1154
4.33M
        !is_forward_scan_ && has_bound_key_ ? bound_key_ : lower_doc_key,
1155
4.32M
        is_forward_scan_ && has_bound_key_ ? bound_key_ : upper_doc_key))) {
1156
4.32M
    if (is_forward_scan_) {
1157
18.4E
      VLOG(3) << __PRETTY_FUNCTION__ << " Seeking to " << DocKey::DebugSliceToString(lower_doc_key);
1158
4.32M
      db_iter_->Seek(lower_doc_key);
1159
18.4E
    } else {
1160
      // TODO consider adding an operator bool to DocKey to use instead of empty() here.
1161
18.4E
      if (!upper_doc_key.empty()) {
1162
1
        db_iter_->PrevDocKey(upper_doc_key);
1163
18.4E
      } else {
1164
18.4E
        db_iter_->SeekToLastDocKey();
1165
18.4E
      }
1166
18.4E
    }
1167
4.32M
  }
1168
1169
4.33M
  return Status::OK();
1170
4.33M
}
1171
1172
3.72M
Status DocRowwiseIterator::Init(const QLScanSpec& spec) {
1173
3.72M
  return DoInit(dynamic_cast<const DocQLScanSpec&>(spec));
1174
3.72M
}
1175
1176
4.32M
Status DocRowwiseIterator::Init(const PgsqlScanSpec& spec) {
1177
4.32M
  ignore_ttl_ = true;
1178
4.32M
  return DoInit(dynamic_cast<const DocPgsqlScanSpec&>(spec));
1179
4.32M
}
1180
1181
34.1M
Status DocRowwiseIterator::AdvanceIteratorToNextDesiredRow() const {
1182
34.1M
  if (scan_choices_) {
1183
927k
    if (!IsNextStaticColumn()
1184
927k
        && !scan_choices_->CurrentTargetMatchesKey(row_key_)) {
1185
926k
      return scan_choices_->SeekToCurrentTarget(db_iter_.get());
1186
926k
    }
1187
33.2M
  } else {
1188
33.2M
    if (!is_forward_scan_) {
1189
0
      VLOG(4) << __PRETTY_FUNCTION__ << " setting as PrevDocKey";
1190
537
      db_iter_->PrevDocKey(row_key_);
1191
537
    }
1192
33.2M
  }
1193
1194
33.2M
  return Status::OK();
1195
34.1M
}
1196
1197
38.8M
Result<bool> DocRowwiseIterator::HasNext() const {
1198
2.61k
  VLOG(4) << __PRETTY_FUNCTION__;
1199
1200
  // Repeated HasNext calls (without Skip/NextRow in between) should be idempotent:
1201
  // 1. If a previous call failed we returned the same status.
1202
  // 2. If a row is already available (row_ready_), return true directly.
1203
  // 3. If we finished all target rows for the scan (done_), return false directly.
1204
38.8M
  RETURN_NOT_OK(has_next_status_);
1205
38.8M
  if (row_ready_) {
1206
    // If row is ready, then HasNext returns true.
1207
196
    return true;
1208
196
  }
1209
38.8M
  if (done_) {
1210
1.49k
    return false;
1211
1.49k
  }
1212
1213
38.8M
  bool doc_found = false;
1214
83.0M
  while (!doc_found) {
1215
49.1M
    if (!db_iter_->valid() || (scan_choices_ && scan_choices_->FinishedWithScanChoices())) {
1216
4.94M
      done_ = true;
1217
4.94M
      return false;
1218
4.94M
    }
1219
1220
44.2M
    const auto key_data = db_iter_->FetchKey();
1221
44.2M
    if (!key_data.ok()) {
1222
0
      has_next_status_ = key_data.status();
1223
0
      return has_next_status_;
1224
0
    }
1225
1226
15.7k
    VLOG(4) << "*fetched_key is " << SubDocKey::DebugSliceToString(key_data->key);
1227
44.2M
    if (debug_dump_) {
1228
0
      LOG(INFO) << __func__ << ", fetched key: " << SubDocKey::DebugSliceToString(key_data->key)
1229
0
                << ", " << key_data->key.ToDebugHexString();
1230
0
    }
1231
1232
    // The iterator is positioned by the previous GetSubDocument call (which places the iterator
1233
    // outside the previous doc_key). Ensure the iterator is pushed forward/backward indeed. We
1234
    // check it here instead of after GetSubDocument() below because we want to avoid the extra
1235
    // expensive FetchKey() call just to fetch and validate the key.
1236
44.2M
    if (!iter_key_.data().empty() &&
1237
38.0M
        (is_forward_scan_ ? iter_key_.CompareTo(key_data->key) >= 0
1238
392k
                          : iter_key_.CompareTo(key_data->key) <= 0)) {
1239
      // TODO -- could turn this check off in TPCC?
1240
0
      has_next_status_ = STATUS_SUBSTITUTE(Corruption, "Infinite loop detected at $0",
1241
0
                                           FormatSliceAsStr(key_data->key));
1242
0
      return has_next_status_;
1243
0
    }
1244
44.2M
    iter_key_.Reset(key_data->key);
1245
35.2k
    VLOG(4) << " Current iter_key_ is " << iter_key_;
1246
1247
44.2M
    const auto dockey_sizes = DocKey::EncodedHashPartAndDocKeySizes(iter_key_);
1248
44.2M
    if (!dockey_sizes.ok()) {
1249
0
      has_next_status_ = dockey_sizes.status();
1250
0
      return has_next_status_;
1251
0
    }
1252
44.2M
    row_hash_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->first);
1253
44.2M
    row_key_ = iter_key_.AsSlice().Prefix(dockey_sizes->second);
1254
1255
44.2M
    if (!DocKeyBelongsTo(row_key_, schema_) || // e.g in cotable, row may point outside table bounds
1256
44.2M
        (has_bound_key_ && is_forward_scan_ == (row_key_.compare(bound_key_) >= 0))) {
1257
3.92k
      done_ = true;
1258
3.92k
      return false;
1259
3.92k
    }
1260
1261
    // Prepare the DocKey to get the SubDocument. Trim the DocKey to contain just the primary key.
1262
44.2M
    Slice sub_doc_key = row_key_;
1263
18.4E
    VLOG(4) << " sub_doc_key part of iter_key_ is " << DocKey::DebugSliceToString(sub_doc_key);
1264
1265
44.2M
    bool is_static_column = IsNextStaticColumn();
1266
44.2M
    if (scan_choices_ && !is_static_column) {
1267
10.9M
      if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) {
1268
        // We must have seeked past the target key we are looking for (no result) so we can safely
1269
        // skip all scan targets between the current target and row key (excluding row_key_ itself).
1270
        // Update the target key and iterator and call HasNext again to try the next target.
1271
10.9M
        RETURN_NOT_OK(scan_choices_->SkipTargetsUpTo(row_key_));
1272
1273
        // We updated scan target above, if it goes past the row_key_ we will seek again, and
1274
        // process the found key in the next loop.
1275
10.9M
        if (!scan_choices_->CurrentTargetMatchesKey(row_key_)) {
1276
10.0M
          RETURN_NOT_OK(scan_choices_->SeekToCurrentTarget(db_iter_.get()));
1277
10.0M
          continue;
1278
34.2M
        }
1279
10.9M
      }
1280
      // We found a match for the target key or a static column, so we move on to getting the
1281
      // SubDocument.
1282
10.9M
    }
1283
34.2M
    if (doc_reader_ == nullptr) {
1284
6.01M
      doc_reader_ = std::make_unique<DocDBTableReader>(db_iter_.get(), deadline_);
1285
6.01M
      RETURN_NOT_OK(doc_reader_->UpdateTableTombstoneTime(sub_doc_key));
1286
6.01M
      if (!ignore_ttl_) {
1287
3.75M
        doc_reader_->SetTableTtl(schema_);
1288
3.75M
      }
1289
6.01M
    }
1290
1291
34.2M
    row_ = SubDocument();
1292
34.2M
    auto doc_found_res = doc_reader_->Get(sub_doc_key, &projection_subkeys_, &row_);
1293
34.2M
    if (!doc_found_res.ok()) {
1294
0
      has_next_status_ = doc_found_res.status();
1295
0
      return has_next_status_;
1296
34.2M
    } else {
1297
34.2M
      doc_found = *doc_found_res;
1298
34.2M
    }
1299
34.2M
    if (scan_choices_ && !is_static_column) {
1300
926k
      has_next_status_ = scan_choices_->DoneWithCurrentTarget();
1301
926k
      RETURN_NOT_OK(has_next_status_);
1302
926k
    }
1303
34.2M
    has_next_status_ = AdvanceIteratorToNextDesiredRow();
1304
34.2M
    RETURN_NOT_OK(has_next_status_);
1305
34.2M
  }
1306
33.8M
  row_ready_ = true;
1307
33.8M
  return true;
1308
38.8M
}
1309
1310
0
string DocRowwiseIterator::ToString() const {
1311
0
  return "DocRowwiseIterator";
1312
0
}
1313
1314
namespace {
1315
1316
// Set primary key column values (hashed or range columns) in a QL row value map.
1317
CHECKED_STATUS SetQLPrimaryKeyColumnValues(const Schema& schema,
1318
                                           const size_t begin_index,
1319
                                           const size_t column_count,
1320
                                           const char* column_type,
1321
                                           DocKeyDecoder* decoder,
1322
42.4M
                                           QLTableRow* table_row) {
1323
42.4M
  if (begin_index + column_count > schema.num_columns()) {
1324
0
    return STATUS_SUBSTITUTE(
1325
0
        Corruption,
1326
0
        "$0 primary key columns between positions $1 and $2 go beyond table columns $3",
1327
0
        column_type, begin_index, begin_index + column_count - 1, schema.num_columns());
1328
0
  }
1329
42.4M
  PrimitiveValue primitive_value;
1330
105M
  for (size_t i = 0, j = begin_index; i < column_count; i++, j++) {
1331
62.5M
    const auto ql_type = schema.column(j).type();
1332
62.5M
    QLTableColumn& column = table_row->AllocColumn(schema.column_id(j));
1333
62.5M
    RETURN_NOT_OK(decoder->DecodePrimitiveValue(&primitive_value));
1334
62.5M
    PrimitiveValue::ToQLValuePB(primitive_value, ql_type, &column.value);
1335
62.5M
  }
1336
42.4M
  return decoder->ConsumeGroupEnd();
1337
42.4M
}
1338
1339
} // namespace
1340
1341
86.5k
void DocRowwiseIterator::SkipRow() {
1342
86.5k
  row_ready_ = false;
1343
86.5k
}
1344
1345
7.08M
HybridTime DocRowwiseIterator::RestartReadHt() {
1346
7.08M
  auto max_seen_ht = db_iter_->max_seen_ht();
1347
7.08M
  if (max_seen_ht.is_valid() && max_seen_ht > db_iter_->read_time().read) {
1348
0
    VLOG(4) << "Restart read: " << max_seen_ht << ", original: " << db_iter_->read_time();
1349
42
    return max_seen_ht;
1350
42
  }
1351
7.08M
  return HybridTime::kInvalid;
1352
7.08M
}
1353
1354
50.4M
bool DocRowwiseIterator::IsNextStaticColumn() const {
1355
50.4M
  return schema_.has_statics() && row_hash_key_.end() + 1 == row_key_.end();
1356
50.4M
}
1357
1358
31.8M
Status DocRowwiseIterator::DoNextRow(const Schema& projection, QLTableRow* table_row) {
1359
1.17k
  VLOG(4) << __PRETTY_FUNCTION__;
1360
1361
31.8M
  if (PREDICT_FALSE(done_)) {
1362
0
    return STATUS(NotFound, "end of iter");
1363
0
  }
1364
1365
  // Ensure row is ready to be read. HasNext() must be called before reading the first row, or
1366
  // again after the previous row has been read or skipped.
1367
31.8M
  if (!row_ready_) {
1368
0
    return STATUS(InternalError, "next row has not be prepared for reading");
1369
0
  }
1370
1371
31.8M
  DocKeyDecoder decoder(row_key_);
1372
31.8M
  RETURN_NOT_OK(decoder.DecodeCotableId());
1373
31.8M
  RETURN_NOT_OK(decoder.DecodePgtableId());
1374
31.8M
  bool has_hash_components = VERIFY_RESULT(decoder.DecodeHashCode());
1375
1376
  // Populate the key column values from the doc key. The key column values in doc key were
1377
  // written in the same order as in the table schema (see DocKeyFromQLKey). If the range columns
1378
  // are present, read them also.
1379
31.8M
  if (has_hash_components) {
1380
16.7M
    RETURN_NOT_OK(SetQLPrimaryKeyColumnValues(
1381
16.7M
        schema_, 0, schema_.num_hash_key_columns(),
1382
16.7M
        "hash", &decoder, table_row));
1383
16.7M
  }
1384
31.8M
  if (!decoder.GroupEnded()) {
1385
25.7M
    RETURN_NOT_OK(SetQLPrimaryKeyColumnValues(
1386
25.7M
        schema_, schema_.num_hash_key_columns(), schema_.num_range_key_columns(),
1387
25.7M
        "range", &decoder, table_row));
1388
25.7M
  }
1389
1390
235M
  for (size_t i = projection.num_key_columns(); i < projection.num_columns(); i++) {
1391
203M
    const auto& column_id = projection.column_id(i);
1392
203M
    const auto ql_type = projection.column(i).type();
1393
203M
    const SubDocument* column_value = row_.GetChild(PrimitiveValue(column_id));
1394
203M
    if (column_value != nullptr) {
1395
203M
      QLTableColumn& column = table_row->AllocColumn(column_id);
1396
203M
      SubDocument::ToQLValuePB(*column_value, ql_type, &column.value);
1397
203M
      column.ttl_seconds = column_value->GetTtl();
1398
203M
      if (column_value->IsWriteTimeSet()) {
1399
181M
        column.write_time = column_value->GetWriteTime();
1400
181M
      }
1401
203M
    }
1402
203M
  }
1403
1404
31.8M
  row_ready_ = false;
1405
31.8M
  return Status::OK();
1406
31.8M
}
1407
1408
1.26k
bool DocRowwiseIterator::LivenessColumnExists() const {
1409
1.26k
  const SubDocument* subdoc = row_.GetChild(PrimitiveValue::kLivenessColumn);
1410
1.26k
  return subdoc != nullptr && subdoc->value_type() != ValueType::kInvalid;
1411
1.26k
}
1412
1413
3.63M
CHECKED_STATUS DocRowwiseIterator::GetNextReadSubDocKey(SubDocKey* sub_doc_key) const {
1414
3.63M
  if (db_iter_ == nullptr) {
1415
0
    return STATUS(Corruption, "Iterator not initialized.");
1416
0
  }
1417
1418
  // There are no more rows to fetch, so no next SubDocKey to read.
1419
3.63M
  if (!VERIFY_RESULT(HasNext())) {
1420
10
    DVLOG(3) << "No Next SubDocKey";
1421
1.75M
    return Status::OK();
1422
1.75M
  }
1423
1424
1.88M
  DocKey doc_key;
1425
1.88M
  RETURN_NOT_OK(doc_key.FullyDecodeFrom(row_key_));
1426
1.88M
  *sub_doc_key = SubDocKey(doc_key, read_time_.read);
1427
18.4E
  DVLOG(3) << "Next SubDocKey: " << sub_doc_key->ToString();
1428
1.88M
  return Status::OK();
1429
1.88M
}
1430
1431
15.6M
Result<Slice> DocRowwiseIterator::GetTupleId() const {
1432
  // Return tuple id without cotable id / pgtable id if any.
1433
15.6M
  Slice tuple_id = row_key_;
1434
15.6M
  if (tuple_id.starts_with(ValueTypeAsChar::kTableId)) {
1435
12.3M
    tuple_id.remove_prefix(1 + kUuidSize);
1436
3.30M
  } else if (tuple_id.starts_with(ValueTypeAsChar::kPgTableOid)) {
1437
34
    tuple_id.remove_prefix(1 + sizeof(PgTableOid));
1438
34
  }
1439
15.6M
  return tuple_id;
1440
15.6M
}
1441
1442
291k
Result<bool> DocRowwiseIterator::SeekTuple(const Slice& tuple_id) {
1443
  // If cotable id / pgtable id is present in the table schema, then
1444
  // we need to prepend it in the tuple key to seek.
1445
291k
  if (schema_.has_cotable_id() || schema_.has_pgtable_id()) {
1446
291k
    uint32_t size = schema_.has_pgtable_id() ? sizeof(PgTableOid) : kUuidSize;
1447
291k
    if (!tuple_key_) {
1448
86.1k
      tuple_key_.emplace();
1449
86.1k
      tuple_key_->Reserve(1 + size + tuple_id.size());
1450
1451
86.1k
      if (schema_.has_cotable_id()) {
1452
86.1k
        std::string bytes;
1453
86.1k
        schema_.cotable_id().EncodeToComparable(&bytes);
1454
86.1k
        tuple_key_->AppendValueType(ValueType::kTableId);
1455
86.1k
        tuple_key_->AppendRawBytes(bytes);
1456
13
      } else {
1457
13
        tuple_key_->AppendValueType(ValueType::kPgTableOid);
1458
13
        tuple_key_->AppendUInt32(schema_.pgtable_id());
1459
13
      }
1460
205k
    } else {
1461
205k
      tuple_key_->Truncate(1 + size);
1462
205k
    }
1463
291k
    tuple_key_->AppendRawBytes(tuple_id);
1464
291k
    db_iter_->Seek(*tuple_key_);
1465
124
  } else {
1466
124
    db_iter_->Seek(tuple_id);
1467
124
  }
1468
1469
291k
  iter_key_.Clear();
1470
291k
  row_ready_ = false;
1471
1472
582k
  return VERIFY_RESULT(HasNext()) && VERIFY_RESULT(GetTupleId()) == tuple_id;
1473
291k
}
1474
1475
}  // namespace docdb
1476
}  // namespace yb