YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/yql/cql/ql/exec/eval_where.cc
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//--------------------------------------------------------------------------------------------------
15
16
#include "yb/common/ql_rowblock.h"
17
#include "yb/common/ql_value.h"
18
19
#include "yb/common/schema.h"
20
21
#include "yb/util/result.h"
22
#include "yb/util/status_format.h"
23
#include "yb/util/yb_partition.h"
24
25
#include "yb/yql/cql/ql/exec/exec_context.h"
26
#include "yb/yql/cql/ql/exec/executor.h"
27
#include "yb/yql/cql/ql/ptree/column_arg.h"
28
#include "yb/yql/cql/ql/ptree/column_desc.h"
29
#include "yb/yql/cql/ql/ptree/pt_expr.h"
30
#include "yb/yql/cql/ql/ptree/pt_select.h"
31
32
namespace yb {
33
namespace ql {
34
35
//--------------------------------------------------------------------------------------------------
36
37
Status Executor::WhereClauseToPB(QLWriteRequestPB *req,
38
                                 const MCVector<ColumnOp>& key_where_ops,
39
                                 const MCList<ColumnOp>& where_ops,
40
4.08k
                                 const MCList<SubscriptedColumnOp>& subcol_where_ops) {
41
42
  // Setup the key columns.
43
13.9k
  for (const auto& op : key_where_ops) {
44
13.9k
    const ColumnDesc *col_desc = op.desc();
45
13.9k
    QLExpressionPB *col_expr_pb;
46
13.9k
    if (col_desc->is_hash()) {
47
7.39k
      col_expr_pb = req->add_hashed_column_values();
48
7.39k
    } else 
if (6.53k
col_desc->is_primary()6.53k
) {
49
6.53k
      col_expr_pb = req->add_range_column_values();
50
18.4E
    } else {
51
18.4E
      LOG(FATAL) << "Unexpected non primary key column in this context";
52
18.4E
    }
53
13.9k
    RETURN_NOT_OK(PTExprToPB(op.expr(), col_expr_pb));
54
13.9k
    RETURN_NOT_OK(EvalExpr(col_expr_pb, QLTableRow::empty_row()));
55
13.9k
  }
56
57
  // Setup the rest of the columns.
58
4.07k
  CHECK(where_ops.empty() || req->type() == QLWriteRequestPB::QL_STMT_DELETE)
59
0
      << "Server only supports range operations in write requests for deletes";
60
61
4.07k
  CHECK(subcol_where_ops.empty())
62
0
      << "Server doesn't support sub-column conditions in where clause for write requests";
63
64
  // Setup the where clause -- only allowed for deletes, should be checked before getting here.
65
4.07k
  if (!where_ops.empty()) {
66
9
    QLConditionPB *where_pb = req->mutable_where_expr()->mutable_condition();
67
9
    where_pb->set_op(QL_OP_AND);
68
14
    for (const auto &col_op : where_ops) {
69
14
      RETURN_NOT_OK(WhereOpToPB(where_pb->add_operands()->mutable_condition(), col_op));
70
14
    }
71
9
  }
72
73
4.07k
  return Status::OK();
74
4.07k
}
75
76
Result<uint64_t> Executor::WhereClauseToPB(QLReadRequestPB *req,
77
                                           const MCVector<ColumnOp>& key_where_ops,
78
                                           const MCList<ColumnOp>& where_ops,
79
                                           const MCList<SubscriptedColumnOp>& subcol_where_ops,
80
                                           const MCList<JsonColumnOp>& jsoncol_where_ops,
81
                                           const MCList<PartitionKeyOp>& partition_key_ops,
82
                                           const MCList<FuncOp>& func_ops,
83
7.47M
                                           TnodeContext* tnode_context) {
84
7.47M
  uint64_t max_rows_estimate = std::numeric_limits<uint64_t>::max();
85
86
  // Setup the lower/upper bounds on the partition key -- if any
87
7.47M
  for (const auto& op : partition_key_ops) {
88
267
    QLExpressionPB expr_pb;
89
267
    RETURN_NOT_OK(PTExprToPB(op.expr(), &expr_pb));
90
267
    QLExprResult result;
91
267
    RETURN_NOT_OK(EvalExpr(expr_pb, QLTableRow::empty_row(), result.Writer()));
92
267
    const auto& value = result.Value();
93
267
    DCHECK(value.has_int64_value() || value.has_int32_value())
94
0
        << "Partition key operations are expected to return 64/16 bit integer";
95
267
    uint16_t hash_code;
96
    // 64 bits for token and 32 bits for partition_hash.
97
267
    if (value.has_int32_value()) {
98
      // Validate bounds for uint16_t.
99
10
      int32_t val = value.int32_value();
100
10
      if (val < std::numeric_limits<uint16_t>::min() ||
101
10
          val > std::numeric_limits<uint16_t>::max()) {
102
0
        return STATUS_SUBSTITUTE(InvalidArgument, "$0 out of bounds for unsigned 16 bit integer",
103
0
                                 val);
104
0
      }
105
10
      hash_code = val;
106
257
    } else {
107
257
      hash_code = YBPartition::CqlToYBHashCode(value.int64_value());
108
257
    }
109
110
    // We always use inclusive intervals [start, end] for hash_code
111
267
    switch (op.yb_op()) {
112
0
      case QL_OP_GREATER_THAN:
113
0
        if (hash_code < YBPartition::kMaxHashCode) {
114
0
          req->set_hash_code(hash_code + 1);
115
0
        } else {
116
          // Token hash greater than max implies no results.
117
0
          return 0;
118
0
        }
119
0
        break;
120
126
      case QL_OP_GREATER_THAN_EQUAL:
121
126
        req->set_hash_code(hash_code);
122
126
        break;
123
115
      case QL_OP_LESS_THAN:
124
        // Cassandra treats INT64_MIN upper bound as special case that includes everything (i.e. it
125
        // adds no real restriction). So we skip (do nothing) in that case.
126
115
        if (!value.has_int64_value() || value.int64_value() != INT64_MIN) {
127
115
          if (hash_code > YBPartition::kMinHashCode) {
128
115
            req->set_max_hash_code(hash_code - 1);
129
115
          } else {
130
            // Token hash smaller than min implies no results.
131
0
            return 0;
132
0
          }
133
115
        }
134
115
        break;
135
115
      case QL_OP_LESS_THAN_EQUAL:
136
        // Cassandra treats INT64_MIN upper bound as special case that includes everything (i.e. it
137
        // adds no real restriction). So we skip (do nothing) in that case.
138
2
        if (!value.has_int64_value() || value.int64_value() != INT64_MIN) {
139
2
          req->set_max_hash_code(hash_code);
140
2
        }
141
2
        break;
142
26
      case QL_OP_EQUAL:
143
26
        req->set_hash_code(hash_code);
144
26
        req->set_max_hash_code(hash_code);
145
26
        break;
146
147
0
      default:
148
0
        LOG(FATAL) << "Unsupported operator for token-based partition key condition";
149
267
    }
150
267
  }
151
152
  // Try to set up key_where_ops as the requests' hash key columns.
153
  // For selects with 'IN' conditions on the hash keys we may need to read several partitions.
154
  // If we find an 'IN', we add subsequent hash column values options to the execution context.
155
  // Then, the executor will use them to produce the partitions that need to be read.
156
7.47M
  bool is_multi_partition = false;
157
7.47M
  uint64_t partitions_count = 1;
158
7.47M
  for (const auto& op : key_where_ops) {
159
7.30M
    const ColumnDesc *col_desc = op.desc();
160
7.30M
    CHECK
(col_desc->is_hash()) << "Unexpected non-partition column in this context"32.8k
;
161
162
7.30M
    VLOG
(3) << "READ request, column id = " << col_desc->id()25.3k
;
163
164
7.30M
    switch (op.yb_op()) {
165
7.31M
      case QL_OP_EQUAL: {
166
7.31M
        if (!is_multi_partition) {
167
7.31M
          QLExpressionPB *col_pb = req->add_hashed_column_values();
168
7.31M
          col_pb->set_column_id(col_desc->id());
169
7.31M
          RETURN_NOT_OK(PTExprToPB(op.expr(), col_pb));
170
7.31M
          RETURN_NOT_OK(EvalExpr(col_pb, QLTableRow::empty_row()));
171
7.31M
        } else {
172
289
          QLExpressionPB col_pb;
173
289
          col_pb.set_column_id(col_desc->id());
174
289
          RETURN_NOT_OK(PTExprToPB(op.expr(), &col_pb));
175
289
          RETURN_NOT_OK(EvalExpr(&col_pb, QLTableRow::empty_row()));
176
289
          tnode_context->hash_values_options().push_back({col_pb});
177
289
        }
178
7.31M
        break;
179
7.31M
      }
180
181
7.31M
      case QL_OP_IN: {
182
890
        if (!is_multi_partition) {
183
881
          is_multi_partition = true;
184
881
        }
185
186
        // De-duplicating and ordering values from the 'IN' expression.
187
890
        QLExpressionPB col_pb;
188
890
        RETURN_NOT_OK(PTExprToPB(op.expr(), &col_pb));
189
190
        // Fast path for returning no results when 'IN' list is empty.
191
890
        if (col_pb.value().list_value().elems_size() == 0) {
192
12
          return 0;
193
12
        }
194
195
878
        std::set<QLValuePB> set_values;
196
878
        bool has_null = false;
197
5.94k
        for (QLValuePB& value_pb : *col_pb.mutable_value()->mutable_list_value()->mutable_elems()) {
198
5.94k
          if (QLValue::IsNull(value_pb)) {
199
3
            has_null = true;
200
5.94k
          } else {
201
5.94k
            set_values.insert(std::move(value_pb));
202
5.94k
          }
203
5.94k
        }
204
205
        // Special case: WHERE x IN (null)
206
878
        if (has_null && 
set_values.empty()3
&&
req->hashed_column_values().empty()3
) {
207
3
          req->add_hashed_column_values();
208
3
        }
209
210
        // Adding partition options information to the execution context.
211
878
        partitions_count *= set_values.size();
212
878
        tnode_context->hash_values_options().emplace_back();
213
878
        auto& options = tnode_context->hash_values_options().back();
214
5.94k
        for (auto& value_pb : set_values) {
215
5.94k
          options.emplace_back();
216
5.94k
          options.back().set_column_id(col_desc->id());
217
5.94k
          *options.back().mutable_value() = std::move(value_pb);
218
5.94k
        }
219
878
        break;
220
890
      }
221
222
0
      default:
223
        // This should be caught by the analyzer before getting here.
224
0
        LOG(FATAL) << "Only '=' and 'IN' operators allowed on hash keys";
225
7.30M
    }
226
7.30M
  }
227
228
7.45M
  if (!key_where_ops.empty()) {
229
    // If this is a multi-partition select, set the partitions count in the execution context.
230
7.28M
    if (is_multi_partition) {
231
869
      tnode_context->set_partitions_count(partitions_count);
232
869
    }
233
7.28M
    max_rows_estimate = partitions_count;
234
7.28M
  }
235
236
  // Generate query condition if where clause is not empty.
237
7.45M
  if (!where_ops.empty() || 
!subcol_where_ops.empty()7.43M
||
!func_ops.empty()7.43M
||
238
7.45M
      
!jsoncol_where_ops.empty()7.41M
) {
239
240
    // Setup the where clause.
241
13.9k
    QLConditionPB *where_pb = req->mutable_where_expr()->mutable_condition();
242
13.9k
    where_pb->set_op(QL_OP_AND);
243
19.9k
    for (const auto& col_op : where_ops) {
244
19.9k
      QLConditionPB* cond = where_pb->add_operands()->mutable_condition();
245
19.9k
      RETURN_NOT_OK(WhereOpToPB(cond, col_op));
246
      // Update the estimate for the number of selected rows if needed.
247
19.9k
      if (col_op.desc()->is_primary()) {
248
18.9k
        if (cond->op() == QL_OP_IN) {
249
72
          int in_size = cond->operands(1).value().list_value().elems_size();
250
72
          if (in_size == 0) {  // Fast path for returning no results when 'IN' list is empty.
251
10
            return 0;
252
62
          } else if (max_rows_estimate <= std::numeric_limits<uint64_t>::max() / in_size) {
253
56
            max_rows_estimate *= in_size;
254
56
          } else {
255
6
            max_rows_estimate = std::numeric_limits<uint64_t>::max();
256
6
          }
257
18.8k
        } else if (cond->op() == QL_OP_EQUAL) {
258
          // Nothing to do (equality condition implies one option).
259
10.0k
        } else {
260
          // Cannot yet estimate num rows for inequality (and other) conditions.
261
10.0k
          max_rows_estimate = std::numeric_limits<uint64_t>::max();
262
10.0k
        }
263
18.9k
      }
264
19.9k
    }
265
266
13.9k
    for (const auto& col_op : subcol_where_ops) {
267
85
      RETURN_NOT_OK(WhereSubColOpToPB(where_pb->add_operands()->mutable_condition(), col_op));
268
85
    }
269
13.9k
    for (const auto& col_op : jsoncol_where_ops) {
270
96
      RETURN_NOT_OK(WhereJsonColOpToPB(where_pb->add_operands()->mutable_condition(), col_op));
271
96
    }
272
13.9k
    for (const auto& func_op : func_ops) {
273
46
      RETURN_NOT_OK(FuncOpToPB(where_pb->add_operands()->mutable_condition(), func_op));
274
46
    }
275
13.9k
  }
276
277
  // If not all primary keys have '=' or 'IN' conditions, the max rows estimate is not reliable.
278
7.45M
  if (!static_cast<const PTSelectStmt*>(tnode_context->tnode())->HasPrimaryKeysSet()) {
279
7.36M
    return std::numeric_limits<uint64_t>::max();
280
7.36M
  }
281
282
94.1k
  return max_rows_estimate;
283
7.45M
}
284
285
20.2k
Status Executor::WhereOpToPB(QLConditionPB *condition, const ColumnOp& col_op) {
286
  // Set the operator.
287
20.2k
  condition->set_op(col_op.yb_op());
288
289
  // Operand 1: The column.
290
20.2k
  const ColumnDesc *col_desc = col_op.desc();
291
20.2k
  QLExpressionPB *expr_pb = condition->add_operands();
292
18.4E
  VLOG(3) << "WHERE condition, column id = " << col_desc->id();
293
20.2k
  expr_pb->set_column_id(col_desc->id());
294
295
  // Operand 2: The expression.
296
20.2k
  expr_pb = condition->add_operands();
297
298
  // Special case for IN condition arguments on primary key -- we de-duplicate and order them here
299
  // to match Cassandra semantics.
300
20.2k
  if (col_op.yb_op() == QL_OP_IN && 
col_op.desc()->is_primary()118
) {
301
72
    QLExpressionPB tmp_expr_pb;
302
72
    RETURN_NOT_OK(PTExprToPB(col_op.expr(), &tmp_expr_pb));
303
72
    std::set<QLValuePB> opts_set;
304
72
    for (QLValuePB& value_pb :
305
487
        *tmp_expr_pb.mutable_value()->mutable_list_value()->mutable_elems()) {
306
487
      if (!QLValue::IsNull(value_pb)) {
307
484
        opts_set.insert(std::move(value_pb));
308
484
      }
309
487
    }
310
311
72
    expr_pb->mutable_value()->mutable_list_value(); // Set value type to list.
312
484
    for (const QLValuePB& value_pb : opts_set) {
313
484
      *expr_pb->mutable_value()->mutable_list_value()->add_elems() = value_pb;
314
484
    }
315
72
    return Status::OK();
316
72
  }
317
318
20.1k
  return PTExprToPB(col_op.expr(), expr_pb);
319
20.2k
}
320
321
Status Executor::WhereKeyToPB(QLReadRequestPB *req,
322
                              const Schema& schema,
323
666
                              const QLRow& key) {
324
  // Add the hash column values
325
666
  DCHECK(req->hashed_column_values().empty());
326
1.50k
  for (size_t idx = 0; idx < schema.num_hash_key_columns(); 
idx++841
) {
327
841
    *req->add_hashed_column_values()->mutable_value() = key.column(idx).value();
328
841
  }
329
330
666
  if (schema.num_key_columns() > schema.num_hash_key_columns()) {
331
    // Add the range column values to the where clause
332
481
    QLConditionPB *where_pb = req->mutable_where_expr()->mutable_condition();
333
481
    if (!where_pb->has_op()) {
334
268
      where_pb->set_op(QL_OP_AND);
335
268
    }
336
481
    DCHECK_EQ(where_pb->op(), QL_OP_AND);
337
1.12k
    for (size_t idx = schema.num_hash_key_columns(); idx < schema.num_key_columns(); 
idx++643
) {
338
643
      QLConditionPB *col_cond_pb = where_pb->add_operands()->mutable_condition();
339
643
      col_cond_pb->set_op(QL_OP_EQUAL);
340
643
      col_cond_pb->add_operands()->set_column_id(schema.column_id(idx));
341
643
      *col_cond_pb->add_operands()->mutable_value() = key.column(idx).value();
342
643
    }
343
481
  } else {
344
185
    VLOG
(3) << "there is no range column for " << schema.ToString()0
;
345
185
  }
346
347
666
  return Status::OK();
348
666
}
349
350
96
Status Executor::WhereJsonColOpToPB(QLConditionPB *condition, const JsonColumnOp& col_op) {
351
  // Set the operator.
352
96
  condition->set_op(col_op.yb_op());
353
354
  // Operand 1: The column.
355
96
  const ColumnDesc *col_desc = col_op.desc();
356
96
  QLExpressionPB *expr_pb = condition->add_operands();
357
96
  VLOG
(3) << "WHERE condition, sub-column with id = " << col_desc->id()0
;
358
96
  auto col_pb = expr_pb->mutable_json_column();
359
96
  col_pb->set_column_id(col_desc->id());
360
200
  for (auto& arg : col_op.args()->node_list()) {
361
200
    RETURN_NOT_OK(PTJsonOperatorToPB(std::dynamic_pointer_cast<PTJsonOperator>(arg),
362
200
                                     col_pb->add_json_operations()));
363
200
  }
364
  // Operand 2: The expression.
365
96
  expr_pb = condition->add_operands();
366
96
  return PTExprToPB(col_op.expr(), expr_pb);
367
96
}
368
369
85
Status Executor::WhereSubColOpToPB(QLConditionPB *condition, const SubscriptedColumnOp& col_op) {
370
  // Set the operator.
371
85
  condition->set_op(col_op.yb_op());
372
373
  // Operand 1: The column.
374
85
  const ColumnDesc *col_desc = col_op.desc();
375
85
  QLExpressionPB *expr_pb = condition->add_operands();
376
85
  VLOG
(3) << "WHERE condition, sub-column with id = " << col_desc->id()0
;
377
85
  auto col_pb = expr_pb->mutable_subscripted_col();
378
85
  col_pb->set_column_id(col_desc->id());
379
85
  for (auto& arg : col_op.args()->node_list()) {
380
85
    RETURN_NOT_OK(PTExprToPB(arg, col_pb->add_subscript_args()));
381
85
  }
382
  // Operand 2: The expression.
383
85
  expr_pb = condition->add_operands();
384
85
  return PTExprToPB(col_op.expr(), expr_pb);
385
85
}
386
387
46
Status Executor::FuncOpToPB(QLConditionPB *condition, const FuncOp& func_op) {
388
  // Set the operator.
389
46
  condition->set_op(func_op.yb_op());
390
391
  // Operand 1: The function call.
392
46
  auto ptr = func_op.func_expr();
393
46
  QLExpressionPB *expr_pb = condition->add_operands();
394
46
  RETURN_NOT_OK(PTExprToPB(ptr.get(), expr_pb));
395
396
  // Operand 2: The expression.
397
46
  expr_pb = condition->add_operands();
398
46
  return PTExprToPB(func_op.value_expr(), expr_pb);
399
46
}
400
401
}  // namespace ql
402
}  // namespace yb