YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_ql_scanspec.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_ql_scanspec.h"
15
16
#include "yb/common/common.pb.h"
17
#include "yb/common/ql_value.h"
18
#include "yb/common/schema.h"
19
20
#include "yb/docdb/doc_expr.h"
21
#include "yb/docdb/doc_key.h"
22
#include "yb/docdb/doc_scanspec_util.h"
23
#include "yb/docdb/value_type.h"
24
25
#include "yb/rocksdb/db/compaction.h"
26
27
#include "yb/util/result.h"
28
#include "yb/util/status_format.h"
29
30
DECLARE_bool(disable_hybrid_scan);
31
32
using std::vector;
33
34
namespace yb {
35
namespace docdb {
36
37
DocQLScanSpec::DocQLScanSpec(const Schema& schema,
38
                             const DocKey& doc_key,
39
                             const rocksdb::QueryId query_id,
40
                             const bool is_forward_scan)
41
    : QLScanSpec(nullptr, nullptr, is_forward_scan, std::make_shared<DocExprExecutor>()),
42
      range_bounds_(nullptr),
43
      schema_(schema),
44
      hashed_components_(nullptr),
45
      include_static_columns_(false),
46
      doc_key_(doc_key.Encode()),
47
29.9k
      query_id_(query_id) {
48
29.9k
}
49
50
DocQLScanSpec::DocQLScanSpec(
51
    const Schema& schema,
52
    const boost::optional<int32_t> hash_code,
53
    const boost::optional<int32_t> max_hash_code,
54
    std::reference_wrapper<const std::vector<PrimitiveValue>> hashed_components,
55
    const QLConditionPB* condition,
56
    const QLConditionPB* if_condition,
57
    const rocksdb::QueryId query_id,
58
    const bool is_forward_scan,
59
    const bool include_static_columns,
60
    const DocKey& start_doc_key)
61
    : QLScanSpec(condition, if_condition, is_forward_scan, std::make_shared<DocExprExecutor>()),
62
      range_bounds_(condition ? new QLScanRange(schema, *condition) : nullptr),
63
      schema_(schema),
64
      hash_code_(hash_code),
65
      max_hash_code_(max_hash_code),
66
      hashed_components_(&hashed_components.get()),
67
      include_static_columns_(include_static_columns),
68
      start_doc_key_(start_doc_key.empty() ? KeyBytes() : start_doc_key.Encode()),
69
      lower_doc_key_(bound_key(true)),
70
      upper_doc_key_(bound_key(false)),
71
7.29M
      query_id_(query_id) {
72
73
7.29M
    if (range_bounds_) {
74
62.1k
        range_bounds_indexes_ = range_bounds_->GetColIds();
75
62.1k
    }
76
77
78
  // If the hash key is fixed and we have range columns with IN condition, try to construct the
79
  // exact list of range options to scan for.
80
7.29M
  if (!hashed_components_->empty() && 
schema_.num_range_key_columns() > 07.23M
&&
81
7.29M
      
range_bounds_7.21M
&&
range_bounds_->has_in_range_options()15.2k
) {
82
540
    DCHECK(condition);
83
540
    range_options_ =
84
540
        std::make_shared<std::vector<std::vector<PrimitiveValue>>>(schema_.num_range_key_columns());
85
540
    InitRangeOptions(*condition);
86
87
540
    if (FLAGS_disable_hybrid_scan) {
88
        // Range options are only valid if all range columns
89
        // are set (i.e. have one or more options).
90
0
        for (size_t i = 0; i < schema_.num_range_key_columns(); i++) {
91
0
            if ((*range_options_)[i].empty()) {
92
0
                range_options_ = nullptr;
93
0
                break;
94
0
            }
95
0
        }
96
0
    }
97
540
  }
98
7.29M
}
99
100
1.14k
void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) {
101
1.14k
  size_t num_hash_cols = schema_.num_hash_key_columns();
102
1.14k
  switch (condition.op()) {
103
539
    case QLOperator::QL_OP_AND:
104
604
      for (const auto& operand : condition.operands()) {
105
604
        DCHECK(operand.has_condition());
106
604
        InitRangeOptions(operand.condition());
107
604
      }
108
539
      break;
109
110
1
    case QLOperator::QL_OP_EQUAL:
111
588
    case QLOperator::QL_OP_IN: {
112
588
      DCHECK_EQ(condition.operands_size(), 2);
113
      // Skip any condition where LHS is not a column (e.g. subscript columns: 'map[k] = v')
114
588
      if (condition.operands(0).expr_case() != QLExpressionPB::kColumnId) {
115
0
        return;
116
0
      }
117
118
      // Skip any RHS expressions that are not evaluated yet.
119
588
      if (condition.operands(1).expr_case() != QLExpressionPB::kValue) {
120
0
        return;
121
0
      }
122
123
588
      ColumnId col_id = ColumnId(condition.operands(0).column_id());
124
588
      int col_idx = schema_.find_column_by_id(col_id);
125
126
      // Skip any non-range columns.
127
588
      if (!schema_.is_range_column(col_idx)) {
128
0
        return;
129
0
      }
130
131
588
      SortingType sortingType = schema_.column(col_idx).sorting_type();
132
588
      range_options_indexes_.emplace_back(condition.operands(0).column_id());
133
134
588
      if (condition.op() == QL_OP_EQUAL) {
135
1
        auto pv = PrimitiveValue::FromQLValuePB(condition.operands(1).value(), sortingType);
136
1
        (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv));
137
587
      } else { // QL_OP_IN
138
587
        DCHECK_EQ(condition.op(), QL_OP_IN);
139
587
        DCHECK(condition.operands(1).value().has_list_value());
140
587
        const auto &options = condition.operands(1).value().list_value();
141
587
        int opt_size = options.elems_size();
142
587
        (*range_options_)[col_idx - num_hash_cols].reserve(opt_size);
143
144
        // IN arguments should have been de-duplicated and ordered ascendingly by the executor.
145
587
        bool is_reverse_order = is_forward_scan_ ^ (sortingType == SortingType::kAscending);
146
2.20k
        for (int i = 0; i < opt_size; 
i++1.61k
) {
147
1.61k
          int elem_idx = is_reverse_order ? 
opt_size - i - 1249
:
i1.36k
;
148
1.61k
          const auto &elem = options.elems(elem_idx);
149
1.61k
          auto pv = PrimitiveValue::FromQLValuePB(elem, sortingType);
150
1.61k
          (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv));
151
1.61k
        }
152
587
      }
153
154
588
      break;
155
588
    }
156
157
15
    default:
158
      // We don't support any other operators at this level.
159
15
      break;
160
1.14k
  }
161
1.14k
}
162
163
14.5M
KeyBytes DocQLScanSpec::bound_key(const bool lower_bound) const {
164
14.5M
  KeyBytes result;
165
14.5M
  auto encoder = DocKeyEncoder(&result).CotableId(Uuid::Nil());
166
167
  // If no hashed_component use hash lower/upper bounds if set.
168
14.5M
  if (hashed_components_->empty()) {
169
    // use lower bound hash code if set in request (for scans using token)
170
170k
    if (lower_bound && 
hash_code_85.4k
) {
171
34.7k
      encoder.HashAndRange(*hash_code_, {PrimitiveValue(ValueType::kLowest)}, {});
172
34.7k
    }
173
    // use upper bound hash code if set in request (for scans using token)
174
170k
    if (!lower_bound && 
max_hash_code_85.4k
) {
175
138
      encoder.HashAndRange(*max_hash_code_, {PrimitiveValue(ValueType::kHighest)}, {});
176
138
    }
177
170k
    return result;
178
170k
  }
179
180
  // If hash_components are non-empty then hash_code and max_hash_code must both be set and equal.
181
14.3M
  DCHECK(hash_code_);
182
14.3M
  DCHECK(max_hash_code_);
183
14.3M
  DCHECK_EQ(*hash_code_, *max_hash_code_);
184
14.3M
  auto hash_code = static_cast<DocKeyHash>(*hash_code_);
185
14.3M
  encoder.HashAndRange(hash_code, *hashed_components_, range_components(lower_bound));
186
14.3M
  return result;
187
14.5M
}
188
189
28.9M
std::vector<PrimitiveValue> DocQLScanSpec::range_components(const bool lower_bound) const {
190
28.9M
  return GetRangeKeyScanSpec(
191
28.9M
      schema_, nullptr /* prefixed_range_components */,
192
28.9M
      range_bounds_.get(), lower_bound, include_static_columns_);
193
28.9M
}
194
195
namespace {
196
197
template <class Predicate>
198
380
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
380
  if (bound_key.empty()) {
200
24
    return true;
201
24
  }
202
356
  return predicate(bound_key, key);
203
380
}
doc_ql_scanspec.cc:bool yb::docdb::(anonymous namespace)::KeySatisfiesBound<std::__1::less_equal<void> >(yb::docdb::KeyBytes const&, yb::docdb::KeyBytes const&, std::__1::less_equal<void> const&)
Line
Count
Source
198
190
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
190
  if (bound_key.empty()) {
200
0
    return true;
201
0
  }
202
190
  return predicate(bound_key, key);
203
190
}
doc_ql_scanspec.cc:bool yb::docdb::(anonymous namespace)::KeySatisfiesBound<std::__1::greater_equal<void> >(yb::docdb::KeyBytes const&, yb::docdb::KeyBytes const&, std::__1::greater_equal<void> const&)
Line
Count
Source
198
190
bool KeySatisfiesBound(const KeyBytes& key, const KeyBytes& bound_key, const Predicate& predicate) {
199
190
  if (bound_key.empty()) {
200
24
    return true;
201
24
  }
202
166
  return predicate(bound_key, key);
203
190
}
204
205
190
bool KeyWithinRange(const KeyBytes& key, const KeyBytes& lower_key, const KeyBytes& upper_key) {
206
  // Verify that the key is within the lower/upper bound, which is either:
207
  // 1. the bound is empty,
208
  // 2. the key is <= or >= the fully-specified bound.
209
190
  return KeySatisfiesBound(key, lower_key, std::less_equal<>()) &&
210
190
         KeySatisfiesBound(key, upper_key, std::greater_equal<>());
211
190
}
212
213
} // namespace
214
215
14.5M
Result<KeyBytes> DocQLScanSpec::Bound(const bool lower_bound) const {
216
  // If a full doc key is specified, that is the exactly doc to scan. Otherwise, compute the
217
  // lower/upper bound doc keys to scan from the range.
218
14.5M
  if (!doc_key_.empty()) {
219
59.7k
    if (lower_bound) {
220
29.8k
      return doc_key_;
221
29.8k
    }
222
29.9k
    KeyBytes result = doc_key_;
223
    // We add +inf as an extra component to make sure this is greater than all keys in range.
224
    // For lower bound, this is true already, because dockey + suffix is > dockey
225
29.9k
    result.AppendValueTypeBeforeGroupEnd(ValueType::kHighest);
226
29.9k
    return std::move(result);
227
59.7k
  }
228
229
  // Otherwise, if we do not have a paging state (start_doc_key) just use the lower/upper bounds.
230
14.5M
  if (start_doc_key_.empty()) {
231
14.5M
    if (lower_bound) {
232
      // For lower-bound key, if static columns should be included in the scan, the lower-bound key
233
      // should be the hash key with no range components in order to include the static columns.
234
7.28M
      if (
!include_static_columns_7.26M
) {
235
7.28M
        return lower_doc_key_;
236
7.28M
      }
237
238
18.4E
      KeyBytes result = lower_doc_key_;
239
240
      // For lower-bound key, if static columns should be included in the scan, the lower-bound key
241
      // should be the hash key with no range components in order to include the static columns.
242
18.4E
      RETURN_NOT_OK(ClearRangeComponents(&result, AllowSpecial::kTrue));
243
244
18.4E
      return result;
245
18.4E
    } else {
246
7.24M
      return upper_doc_key_;
247
7.24M
    }
248
14.5M
  }
249
250
  // If we have a start_doc_key, we need to use it as a starting point (lower bound for forward
251
  // scan, upper bound for reverse scan).
252
12.4k
  if (range_bounds_ != nullptr &&
253
12.4k
        
!KeyWithinRange(start_doc_key_, lower_doc_key_, upper_doc_key_)190
) {
254
0
      return STATUS_FORMAT(
255
0
          Corruption, "Invalid start_doc_key: $0. Range: $1, $2",
256
0
          start_doc_key_, lower_doc_key_, upper_doc_key_);
257
0
  }
258
259
  // Paging state + forward scan.
260
12.4k
  if (is_forward_scan_) {
261
1.35k
    return lower_bound ? 
start_doc_key_678
:
upper_doc_key_678
;
262
1.35k
  }
263
264
  // Paging state + reverse scan.
265
  // For reverse scans static columns should be read by a separate iterator.
266
11.0k
  DCHECK(!include_static_columns_);
267
11.0k
  if (lower_bound) {
268
72
    return lower_doc_key_;
269
72
  }
270
271
  // If using start_doc_key_ as upper bound append +inf as extra component to ensure it includes
272
  // the target start_doc_key itself (dockey + suffix < dockey + kHighest).
273
  // For lower bound, this is true already, because dockey + suffix is > dockey.
274
11.0k
  KeyBytes result = start_doc_key_;
275
11.0k
  result.AppendValueTypeBeforeGroupEnd(ValueType::kHighest);
276
11.0k
  return result;
277
11.0k
}
278
279
rocksdb::UserBoundaryTag TagForRangeComponent(size_t index);
280
281
namespace {
282
283
std::vector<KeyBytes> EncodePrimitiveValues(const std::vector<PrimitiveValue>& source,
284
14.6M
    size_t min_size) {
285
14.6M
  size_t size = source.size();
286
14.6M
  std::vector<KeyBytes> result(std::max(min_size, size));
287
22.2M
  for (size_t i = 0; i != size; 
++i7.56M
) {
288
7.56M
    if (source[i].value_type() != ValueType::kTombstone) {
289
7.54M
      source[i].AppendToKey(&result[i]);
290
7.54M
    }
291
7.56M
  }
292
14.6M
  return result;
293
14.6M
}
294
295
11.6M
Slice ValueOrEmpty(const Slice* slice) { return slice ? 
*slice11.4M
:
Slice()201k
; }
296
297
// Checks that lhs >= rhs, empty values means positive and negative infinity appropriately.
298
11.5M
bool GreaterOrEquals(const Slice& lhs, const Slice& rhs) {
299
11.5M
  if (lhs.empty() || 
rhs.empty()11.4M
) {
300
5.70M
    return true;
301
5.70M
  }
302
5.85M
  return lhs.compare(rhs) >= 0;
303
11.5M
}
304
305
class RangeBasedFileFilter : public rocksdb::ReadFileFilter {
306
 public:
307
  RangeBasedFileFilter(const std::vector<PrimitiveValue>& lower_bounds,
308
      const std::vector<PrimitiveValue>& upper_bounds)
309
      : lower_bounds_(EncodePrimitiveValues(lower_bounds, upper_bounds.size())),
310
7.33M
      upper_bounds_(EncodePrimitiveValues(upper_bounds, lower_bounds.size())) {
311
7.33M
  }
312
313
5.67M
  bool Filter(const rocksdb::FdWithBoundaries& file) const override {
314
11.4M
    for (size_t i = 0; i != lower_bounds_.size(); 
++i5.73M
) {
315
5.80M
      auto lower_bound = lower_bounds_[i].AsSlice();
316
5.80M
      auto upper_bound = upper_bounds_[i].AsSlice();
317
5.80M
      rocksdb::UserBoundaryTag tag = TagForRangeComponent(i);
318
5.80M
      auto smallest = ValueOrEmpty(file.smallest.user_value_with_tag(tag));
319
5.80M
      auto largest = ValueOrEmpty(file.largest.user_value_with_tag(tag));
320
5.80M
      if (!GreaterOrEquals(upper_bound, smallest) || 
!GreaterOrEquals(largest, lower_bound)5.77M
) {
321
71.3k
        return false;
322
71.3k
      }
323
5.80M
    }
324
5.60M
    return true;
325
5.67M
  }
326
 private:
327
  std::vector<KeyBytes> lower_bounds_;
328
  std::vector<KeyBytes> upper_bounds_;
329
};
330
331
} // namespace
332
333
7.30M
std::shared_ptr<rocksdb::ReadFileFilter> DocQLScanSpec::CreateFileFilter() const {
334
7.30M
  auto lower_bound = range_components(true);
335
7.30M
  auto upper_bound = range_components(false);
336
7.30M
  if (lower_bound.empty() && 
upper_bound.empty()7.28M
) {
337
0
    return std::shared_ptr<rocksdb::ReadFileFilter>();
338
7.30M
  } else {
339
7.30M
    return std::make_shared<RangeBasedFileFilter>(std::move(lower_bound), std::move(upper_bound));
340
7.30M
  }
341
7.30M
}
342
343
7.31M
Result<KeyBytes> DocQLScanSpec::LowerBound() const {
344
7.31M
  return Bound(true /* lower_bound */);
345
7.31M
}
346
347
7.30M
Result<KeyBytes> DocQLScanSpec::UpperBound() const {
348
7.30M
  return Bound(false /* upper_bound */);
349
7.30M
}
350
351
48.3k
const DocKey& DocQLScanSpec::DefaultStartDocKey() {
352
48.3k
  static const DocKey result;
353
48.3k
  return result;
354
48.3k
}
355
356
}  // namespace docdb
357
}  // namespace yb