YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_ql_scanspec.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_ql_scanspec.h"
15
16
#include "yb/common/common.pb.h"
17
#include "yb/common/ql_value.h"
18
#include "yb/common/schema.h"
19
20
#include "yb/docdb/doc_expr.h"
21
#include "yb/docdb/doc_key.h"
22
#include "yb/docdb/doc_scanspec_util.h"
23
#include "yb/docdb/value_type.h"
24
25
#include "yb/rocksdb/db/compaction.h"
26
27
#include "yb/util/result.h"
28
#include "yb/util/status_format.h"
29
30
DECLARE_bool(disable_hybrid_scan);
31
32
using std::vector;
33
34
namespace yb {
35
namespace docdb {
36
37
DocQLScanSpec::DocQLScanSpec(const Schema& schema,
38
                             const DocKey& doc_key,
39
                             const rocksdb::QueryId query_id,
40
                             const bool is_forward_scan)
41
    : QLScanSpec(nullptr, nullptr, is_forward_scan, std::make_shared<DocExprExecutor>()),
42
      range_bounds_(nullptr),
43
      schema_(schema),
44
      hashed_components_(nullptr),
45
      include_static_columns_(false),
46
      doc_key_(doc_key.Encode()),
47
30.2k
      query_id_(query_id) {
48
30.2k
}
49
50
DocQLScanSpec::DocQLScanSpec(
51
    const Schema& schema,
52
    const boost::optional<int32_t> hash_code,
53
    const boost::optional<int32_t> max_hash_code,
54
    std::reference_wrapper<const std::vector<PrimitiveValue>> hashed_components,
55
    const QLConditionPB* condition,
56
    const QLConditionPB* if_condition,
57
    const rocksdb::QueryId query_id,
58
    const bool is_forward_scan,
59
    const bool include_static_columns,
60
    const DocKey& start_doc_key)
61
    : QLScanSpec(condition, if_condition, is_forward_scan, std::make_shared<DocExprExecutor>()),
62
      range_bounds_(condition ? new QLScanRange(schema, *condition) : nullptr),
63
      schema_(schema),
64
      hash_code_(hash_code),
65
      max_hash_code_(max_hash_code),
66
      hashed_components_(&hashed_components.get()),
67
      include_static_columns_(include_static_columns),
68
      start_doc_key_(start_doc_key.empty() ? KeyBytes() : start_doc_key.Encode()),
69
      lower_doc_key_(bound_key(true)),
70
      upper_doc_key_(bound_key(false)),
71
3.68M
      query_id_(query_id) {
72
73
3.68M
    if (range_bounds_) {
74
38.7k
        range_bounds_indexes_ = range_bounds_->GetColIds();
75
38.7k
    }
76
77
78
  // If the hash key is fixed and we have range columns with IN condition, try to construct the
79
  // exact list of range options to scan for.
80
3.68M
  if (!hashed_components_->empty() && schema_.num_range_key_columns() > 0 &&
81
3.61M
      range_bounds_ && range_bounds_->has_in_range_options()) {
82
539
    DCHECK(condition);
83
539
    range_options_ =
84
539
        std::make_shared<std::vector<std::vector<PrimitiveValue>>>(schema_.num_range_key_columns());
85
539
    InitRangeOptions(*condition);
86
87
539
    if (FLAGS_disable_hybrid_scan) {
88
        // Range options are only valid if all range columns
89
        // are set (i.e. have one or more options).
90
0
        for (size_t i = 0; i < schema_.num_range_key_columns(); i++) {
91
0
            if ((*range_options_)[i].empty()) {
92
0
                range_options_ = nullptr;
93
0
                break;
94
0
            }
95
0
        }
96
0
    }
97
539
  }
98
3.68M
}
99
100
1.12k
void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) {
101
1.12k
  size_t num_hash_cols = schema_.num_hash_key_columns();
102
1.12k
  switch (condition.op()) {
103
535
    case QLOperator::QL_OP_AND:
104
598
      for (const auto& operand : condition.operands()) {
105
598
        DCHECK(operand.has_condition());
106
598
        InitRangeOptions(operand.condition());
107
598
      }
108
535
      break;
109
110
1
    case QLOperator::QL_OP_EQUAL:
111
581
    case QLOperator::QL_OP_IN: {
112
581
      DCHECK_EQ(condition.operands_size(), 2);
113
      // Skip any condition where LHS is not a column (e.g. subscript columns: 'map[k] = v')
114
581
      if (condition.operands(0).expr_case() != QLExpressionPB::kColumnId) {
115
0
        return;
116
0
      }
117
118
      // Skip any RHS expressions that are not evaluated yet.
119
581
      if (condition.operands(1).expr_case() != QLExpressionPB::kValue) {
120
0
        return;
121
0
      }
122
123
581
      ColumnId col_id = ColumnId(condition.operands(0).column_id());
124
581
      int col_idx = schema_.find_column_by_id(col_id);
125
126
      // Skip any non-range columns.
127
581
      if (!schema_.is_range_column(col_idx)) {
128
0
        return;
129
0
      }
130
131
581
      SortingType sortingType = schema_.column(col_idx).sorting_type();
132
581
      range_options_indexes_.emplace_back(condition.operands(0).column_id());
133
134
581
      if (condition.op() == QL_OP_EQUAL) {
135
1
        auto pv = PrimitiveValue::FromQLValuePB(condition.operands(1).value(), sortingType);
136
1
        (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv));
137
580
      } else { // QL_OP_IN
138
580
        DCHECK_EQ(condition.op(), QL_OP_IN);
139
580
        DCHECK(condition.operands(1).value().has_list_value());
140
580
        const auto &options = condition.operands(1).value().list_value();
141
580
        int opt_size = options.elems_size();
142
580
        (*range_options_)[col_idx - num_hash_cols].reserve(opt_size);
143
144
        // IN arguments should have been de-duplicated and ordered ascendingly by the executor.
145
580
        bool is_reverse_order = is_forward_scan_ ^ (sortingType == SortingType::kAscending);
146
2.19k
        for (int i = 0; i < opt_size; i++) {
147
1.36k
          int elem_idx = is_reverse_order ? opt_size - i - 1 : i;
148
1.61k
          const auto &elem = options.elems(elem_idx);
149
1.61k
          auto pv = PrimitiveValue::FromQLValuePB(elem, sortingType);
150
1.61k
          (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv));
151
1.61k
        }
152
580
      }
153
154
581
      break;
155
581
    }
156
157
15
    default:
158
      // We don't support any other operators at this level.
159
15
      break;
160
1.12k
  }
161
1.12k
}
162
163
7.35M
KeyBytes DocQLScanSpec::bound_key(const bool lower_bound) const {
164
7.35M
  KeyBytes result;
165
7.35M
  auto encoder = DocKeyEncoder(&result).CotableId(Uuid::Nil());
166
167
  // If no hashed_component use hash lower/upper bounds if set.
168
7.35M
  if (hashed_components_->empty()) {
169
    // use lower bound hash code if set in request (for scans using token)
170
138k
    if (lower_bound && hash_code_) {
171
34.0k
      encoder.HashAndRange(*hash_code_, {PrimitiveValue(ValueType::kLowest)}, {});
172
34.0k
    }
173
    // use upper bound hash code if set in request (for scans using token)
174
138k
    if (!lower_bound && max_hash_code_) {
175
28
      encoder.HashAndRange(*max_hash_code_, {PrimitiveValue(ValueType::kHighest)}, {});
176
28
    }
177
138k
    return result;
178
138k
  }
179
180
  // If hash_components are non-empty then hash_code and max_hash_code must both be set and equal.
181
7.21M
  DCHECK(hash_code_);
182
7.21M
  DCHECK(max_hash_code_);
183
7.21M
  DCHECK_EQ(*hash_code_, *max_hash_code_);
184
7.21M
  auto hash_code = static_cast<DocKeyHash>(*hash_code_);
185
7.21M
  encoder.HashAndRange(hash_code, *hashed_components_, range_components(lower_bound));
186
7.21M
  return result;
187
7.21M
}
188
189
14.6M
std::vector<PrimitiveValue> DocQLScanSpec::range_components(const bool lower_bound) const {
190
14.6M
  return GetRangeKeyScanSpec(
191
14.6M
      schema_, nullptr /* prefixed_range_components */,
192
14.6M
      range_bounds_.get(), lower_bound, include_static_columns_);
193
14.6M
}
194
195
namespace {
196
197
template <class Predicate>
198
380
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
380
  if (bound_key.empty()) {
200
24
    return true;
201
24
  }
202
356
  return predicate(bound_key, key);
203
356
}
doc_ql_scanspec.cc:_ZN2yb5docdb12_GLOBAL__N_117KeySatisfiesBoundINSt3__110less_equalIvEEEEbRKNS0_8KeyBytesES8_RKT_
Line
Count
Source
198
190
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
190
  if (bound_key.empty()) {
200
0
    return true;
201
0
  }
202
190
  return predicate(bound_key, key);
203
190
}
doc_ql_scanspec.cc:_ZN2yb5docdb12_GLOBAL__N_117KeySatisfiesBoundINSt3__113greater_equalIvEEEEbRKNS0_8KeyBytesES8_RKT_
Line
Count
Source
198
190
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
190
  if (bound_key.empty()) {
200
24
    return true;
201
24
  }
202
166
  return predicate(bound_key, key);
203
166
}
204
205
190
bool KeyWithinRange(const KeyBytes& key, const KeyBytes& lower_key, const KeyBytes& upper_key) {
206
  // Verify that the key is within the lower/upper bound, which is either:
207
  // 1. the bound is empty,
208
  // 2. the key is <= or >= the fully-specified bound.
209
190
  return KeySatisfiesBound(key, lower_key, std::less_equal<>()) &&
210
190
         KeySatisfiesBound(key, upper_key, std::greater_equal<>());
211
190
}
212
213
} // namespace
214
215
7.41M
Result<KeyBytes> DocQLScanSpec::Bound(const bool lower_bound) const {
216
  // If a full doc key is specified, that is the exactly doc to scan. Otherwise, compute the
217
  // lower/upper bound doc keys to scan from the range.
218
7.41M
  if (!doc_key_.empty()) {
219
60.4k
    if (lower_bound) {
220
30.2k
      return doc_key_;
221
30.2k
    }
222
30.2k
    KeyBytes result = doc_key_;
223
    // We add +inf as an extra component to make sure this is greater than all keys in range.
224
    // For lower bound, this is true already, because dockey + suffix is > dockey
225
30.2k
    result.AppendValueTypeBeforeGroupEnd(ValueType::kHighest);
226
30.2k
    return std::move(result);
227
30.2k
  }
228
229
  // Otherwise, if we do not have a paging state (start_doc_key) just use the lower/upper bounds.
230
7.35M
  if (start_doc_key_.empty()) {
231
7.33M
    if (lower_bound) {
232
      // For lower-bound key, if static columns should be included in the scan, the lower-bound key
233
      // should be the hash key with no range components in order to include the static columns.
234
3.66M
      if (!include_static_columns_) {
235
3.66M
        return lower_doc_key_;
236
3.66M
      }
237
238
18.4E
      KeyBytes result = lower_doc_key_;
239
240
      // For lower-bound key, if static columns should be included in the scan, the lower-bound key
241
      // should be the hash key with no range components in order to include the static columns.
242
18.4E
      RETURN_NOT_OK(ClearRangeComponents(&result, AllowSpecial::kTrue));
243
244
18.4E
      return result;
245
3.67M
    } else {
246
3.67M
      return upper_doc_key_;
247
3.67M
    }
248
23.2k
  }
249
250
  // If we have a start_doc_key, we need to use it as a starting point (lower bound for forward
251
  // scan, upper bound for reverse scan).
252
23.2k
  if (range_bounds_ != nullptr &&
253
190
        !KeyWithinRange(start_doc_key_, lower_doc_key_, upper_doc_key_)) {
254
0
      return STATUS_FORMAT(
255
0
          Corruption, "Invalid start_doc_key: $0. Range: $1, $2",
256
0
          start_doc_key_, lower_doc_key_, upper_doc_key_);
257
0
  }
258
259
  // Paging state + forward scan.
260
23.2k
  if (is_forward_scan_) {
261
1.10k
    return lower_bound ? start_doc_key_ : upper_doc_key_;
262
2.20k
  }
263
264
  // Paging state + reverse scan.
265
  // For reverse scans static columns should be read by a separate iterator.
266
21.0k
  DCHECK(!include_static_columns_);
267
21.0k
  if (lower_bound) {
268
72
    return lower_doc_key_;
269
72
  }
270
271
  // If using start_doc_key_ as upper bound append +inf as extra component to ensure it includes
272
  // the target start_doc_key itself (dockey + suffix < dockey + kHighest).
273
  // For lower bound, this is true already, because dockey + suffix is > dockey.
274
20.9k
  KeyBytes result = start_doc_key_;
275
20.9k
  result.AppendValueTypeBeforeGroupEnd(ValueType::kHighest);
276
20.9k
  return result;
277
20.9k
}
278
279
rocksdb::UserBoundaryTag TagForRangeComponent(size_t index);
280
281
namespace {
282
283
std::vector<KeyBytes> EncodePrimitiveValues(const std::vector<PrimitiveValue>& source,
284
7.44M
    size_t min_size) {
285
7.44M
  size_t size = source.size();
286
7.44M
  std::vector<KeyBytes> result(std::max(min_size, size));
287
11.3M
  for (size_t i = 0; i != size; ++i) {
288
3.87M
    if (source[i].value_type() != ValueType::kTombstone) {
289
3.86M
      source[i].AppendToKey(&result[i]);
290
3.86M
    }
291
3.87M
  }
292
7.44M
  return result;
293
7.44M
}
294
295
5.59M
Slice ValueOrEmpty(const Slice* slice) { return slice ? *slice : Slice(); }
296
297
// Checks that lhs >= rhs, empty values means positive and negative infinity appropriately.
298
5.59M
bool GreaterOrEquals(const Slice& lhs, const Slice& rhs) {
299
5.59M
  if (lhs.empty() || rhs.empty()) {
300
2.78M
    return true;
301
2.78M
  }
302
2.81M
  return lhs.compare(rhs) >= 0;
303
2.81M
}
304
305
class RangeBasedFileFilter : public rocksdb::ReadFileFilter {
306
 public:
307
  RangeBasedFileFilter(const std::vector<PrimitiveValue>& lower_bounds,
308
      const std::vector<PrimitiveValue>& upper_bounds)
309
      : lower_bounds_(EncodePrimitiveValues(lower_bounds, upper_bounds.size())),
310
3.73M
      upper_bounds_(EncodePrimitiveValues(upper_bounds, lower_bounds.size())) {
311
3.73M
  }
312
313
2.74M
  bool Filter(const rocksdb::FdWithBoundaries& file) const override {
314
5.54M
    for (size_t i = 0; i != lower_bounds_.size(); ++i) {
315
2.80M
      auto lower_bound = lower_bounds_[i].AsSlice();
316
2.80M
      auto upper_bound = upper_bounds_[i].AsSlice();
317
2.80M
      rocksdb::UserBoundaryTag tag = TagForRangeComponent(i);
318
2.80M
      auto smallest = ValueOrEmpty(file.smallest.user_value_with_tag(tag));
319
2.80M
      auto largest = ValueOrEmpty(file.largest.user_value_with_tag(tag));
320
2.80M
      if (!GreaterOrEquals(upper_bound, smallest) || !GreaterOrEquals(largest, lower_bound)) {
321
1.61k
        return false;
322
1.61k
      }
323
2.80M
    }
324
2.73M
    return true;
325
2.74M
  }
326
 private:
327
  std::vector<KeyBytes> lower_bounds_;
328
  std::vector<KeyBytes> upper_bounds_;
329
};
330
331
} // namespace
332
333
3.70M
std::shared_ptr<rocksdb::ReadFileFilter> DocQLScanSpec::CreateFileFilter() const {
334
3.70M
  auto lower_bound = range_components(true);
335
3.70M
  auto upper_bound = range_components(false);
336
3.70M
  if (lower_bound.empty() && upper_bound.empty()) {
337
0
    return std::shared_ptr<rocksdb::ReadFileFilter>();
338
3.70M
  } else {
339
3.70M
    return std::make_shared<RangeBasedFileFilter>(std::move(lower_bound), std::move(upper_bound));
340
3.70M
  }
341
3.70M
}
342
343
3.72M
Result<KeyBytes> DocQLScanSpec::LowerBound() const {
344
3.72M
  return Bound(true /* lower_bound */);
345
3.72M
}
346
347
3.69M
Result<KeyBytes> DocQLScanSpec::UpperBound() const {
348
3.69M
  return Bound(false /* upper_bound */);
349
3.69M
}
350
351
26.2k
const DocKey& DocQLScanSpec::DefaultStartDocKey() {
352
26.2k
  static const DocKey result;
353
26.2k
  return result;
354
26.2k
}
355
356
}  // namespace docdb
357
}  // namespace yb