YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/yql/cql/ql/ptree/pt_select.cc
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
// Treenode definitions for SELECT statements.
16
//--------------------------------------------------------------------------------------------------
17
18
#include "yb/yql/cql/ql/ptree/pt_select.h"
19
20
#include <functional>
21
22
#include "yb/client/schema.h"
23
#include "yb/client/table.h"
24
25
#include "yb/common/common.pb.h"
26
#include "yb/common/index.h"
27
#include "yb/common/index_column.h"
28
#include "yb/common/ql_type.h"
29
#include "yb/common/schema.h"
30
31
#include "yb/gutil/casts.h"
32
33
#include "yb/master/master_defaults.h"
34
35
#include "yb/util/flag_tags.h"
36
#include "yb/util/memory/mc_types.h"
37
#include "yb/util/result.h"
38
#include "yb/util/status.h"
39
#include "yb/util/status_format.h"
40
41
#include "yb/yql/cql/ql/ptree/column_arg.h"
42
#include "yb/yql/cql/ql/ptree/column_desc.h"
43
#include "yb/yql/cql/ql/ptree/pt_expr.h"
44
#include "yb/yql/cql/ql/ptree/pt_option.h"
45
#include "yb/yql/cql/ql/ptree/sem_context.h"
46
#include "yb/yql/cql/ql/ptree/yb_location.h"
47
#include "yb/yql/cql/ql/ptree/ycql_predtest.h"
48
49
DEFINE_bool(enable_uncovered_index_select, true,
50
            "Enable executing select statements using uncovered index");
51
TAG_FLAG(enable_uncovered_index_select, advanced);
52
53
namespace yb {
54
namespace ql {
55
56
using std::make_shared;
57
using std::string;
58
using std::unordered_map;
59
using std::vector;
60
using yb::bfql::TSOpcode;
61
62
//--------------------------------------------------------------------------------------------------
63
64
namespace {
65
66
// Selectivity of a column operator.
67
YB_DEFINE_ENUM(OpSelectivity, (kEqual)(kRange)(kNone));
68
69
// Returns the selectivity of a column operator.
70
106k
OpSelectivity GetOperatorSelectivity(const QLOperator op) {
71
106k
  switch (op) {
72
105k
    case QL_OP_EQUAL: FALLTHROUGH_INTENDED;
73
105k
    case QL_OP_IN:
74
105k
      return OpSelectivity::kEqual;
75
440
    case QL_OP_GREATER_THAN: FALLTHROUGH_INTENDED;
76
592
    case QL_OP_GREATER_THAN_EQUAL: FALLTHROUGH_INTENDED;
77
956
    case QL_OP_LESS_THAN: FALLTHROUGH_INTENDED;
78
1.10k
    case QL_OP_LESS_THAN_EQUAL:
79
1.10k
      return OpSelectivity::kRange;
80
0
    case QL_OP_NOOP: FALLTHROUGH_INTENDED;
81
26
    case QL_OP_NOT_IN: FALLTHROUGH_INTENDED;
82
106
    case QL_OP_NOT_EQUAL:
83
106
      break;
84
0
    default:
85
      // Should have been caught beforehand (as this is called in pt_select after analyzing the
86
      // where clause).
87
0
      break;
88
106k
  }
89
106
  return OpSelectivity::kNone;
90
106k
}
91
92
// Class to compare selectivity of an index for a SELECT statement.
93
class Selectivity {
94
 public:
95
  // Selectivity of the PRIMARY index.
96
  Selectivity(MemoryContext *memctx, const PTSelectStmt& stmt, bool is_forward_scan)
97
      : is_local_(true),
98
        covers_fully_(true),
99
268k
        is_forward_scan_(is_forward_scan) {
100
268k
    const client::YBSchema& schema = stmt.table()->schema();
101
268k
    MCIdToIndexMap id_to_idx(memctx);
102
734k
    for (size_t i = 0; i < schema.num_key_columns(); 
i++465k
) {
103
465k
      id_to_idx.emplace(schema.ColumnId(i), i);
104
465k
    }
105
268k
    Analyze(memctx, stmt, id_to_idx, schema.num_key_columns(), schema.num_hash_key_columns());
106
268k
  }
107
108
  // Selectivity of a SECONDARY index.
109
  Selectivity(MemoryContext *memctx,
110
              const PTSelectStmt& stmt,
111
              const IndexInfo& index_info,
112
              bool is_forward_scan,
113
              int predicate_len,
114
              const MCUnorderedMap<int32, uint16> &column_ref_cnts)
115
      : index_id_(index_info.table_id()),
116
        is_local_(index_info.is_local()),
117
        covers_fully_(stmt.CoversFully(index_info, column_ref_cnts)),
118
        index_info_(&index_info),
119
        is_forward_scan_(is_forward_scan),
120
16.9k
        predicate_len_(predicate_len) {
121
122
16.9k
    MCIdToIndexMap id_to_idx(memctx);
123
85.2k
    for (size_t i = 0; i < index_info.key_column_count(); 
i++68.2k
) {
124
      // Map the column id if the index expression is just a column-ref.
125
68.2k
      if (index_info.column(i).colexpr.expr_case() == QLExpressionPB::ExprCase::EXPR_NOT_SET ||
126
68.2k
          index_info.column(i).colexpr.expr_case() == QLExpressionPB::ExprCase::kColumnId) {
127
64.9k
        id_to_idx.emplace(index_info.column(i).indexed_column_id, i);
128
64.9k
      }
129
68.2k
    }
130
16.9k
    Analyze(memctx, stmt, id_to_idx, index_info.key_column_count(), index_info.hash_column_count());
131
16.9k
  }
132
133
148k
  bool is_primary_index() const { return index_id_.empty(); }
134
135
268k
  bool covers_fully() const { return covers_fully_; }
136
137
269k
  const TableId& index_id() const { return index_id_; }
138
139
269k
  bool is_forward_scan() const { return is_forward_scan_; }
140
141
194
  bool supporting_orderby() const { return !full_table_scan_; }
142
143
268k
  size_t prefix_length() const { return prefix_length_; }
144
145
  // Comparison operator to sort the selectivity of an index.
146
44.0k
  bool operator>(const Selectivity& other) const {
147
    // Preference of properties:
148
    // - If both indexes have that property, check the sub-bullets for finer checks. If all
149
    // sub-bullets match, move to next point.
150
    // - If both indexes don't have that property, check next point.
151
    // - If one of them has and the other doesn't, pick the one that has the property.
152
    //
153
    //   1. Single key read
154
    //      i) Primary index
155
    //
156
    //   2. Single tserver scan
157
    //      i) Partial index
158
    //        a) Choose one with larger predicate len
159
    //      ii) Prefix length
160
    //      iii) Ends with range
161
    //      iv) Num non-key ops
162
    //      v) Covers fully
163
    //      vi) Local
164
    //      vii) Primary index
165
    //
166
    //   3. Reaching here means both are full scans
167
    //      i) Partial index
168
    //        a) Choose one with larger predicate len
169
    //      ii) Primary index
170
    //      iii) Prefix length
171
    //      iv) Ends with range
172
    //      v) Num non-key ops
173
    //      vi) Covers fully
174
    //      vii) Local
175
176
    // If one is a single-key read and the other is not, prefer the one that is.
177
44.0k
    if (single_key_read_ != other.single_key_read_) {
178
23
      return single_key_read_ > other.single_key_read_;
179
23
    }
180
181
    // If both are single read, choose primary index.
182
44.0k
    if (single_key_read_ && 
is_primary_index() != other.is_primary_index()6
) {
183
6
      return is_primary_index() > other.is_primary_index();
184
6
    }
185
186
    // If one is a full-table scan and the other is not, prefer the one that is not.
187
44.0k
    if (full_table_scan_ != other.full_table_scan_) {
188
679
      return full_table_scan_ < other.full_table_scan_;
189
679
    }
190
191
    // If one of the indexes is partial pick that index.
192
    // If both are partial, pick one which has a longer predicate.
193
    // If both are partial and have same predicate len, we defer to non-partial index rules.
194
43.3k
    if (predicate_len_ != other.predicate_len_)
195
390
      return predicate_len_ > other.predicate_len_;
196
197
42.9k
    if (false) {
198
      // TODO(Piyush) There are tests that expect secondary index to be chosen in this case, so
199
      // some discussion is needed with this change. This issue is filed in
200
      //   https://github.com/yugabyte/yugabyte-db/issues/6821.
201
      // The following formulas also need review as they do not look right.
202
      //   prefix_length (primary-scan) = number of key values.
203
      //   prefix_length (secondary-scan) = number of key values + prefix_length(primary-scan)
204
205
      // If they are both full scan or both range scan, choose primary index.
206
0
      if (is_primary_index() != other.is_primary_index()) {
207
0
        return is_primary_index() > other.is_primary_index();
208
0
      }
209
0
    }
210
211
    // If they are both full scan, choose primary index.
212
42.9k
    if (full_table_scan_ && 
is_primary_index() != other.is_primary_index()42.9k
) {
213
5.90k
      return is_primary_index() > other.is_primary_index();
214
5.90k
    }
215
216
    // When neither is a primary scan, compare the scan ranges.
217
    // If the fully-specified prefixes are different, prefer the one with longer prefix.
218
37.0k
    if (prefix_length_ != other.prefix_length_) {
219
9
      return prefix_length_ > other.prefix_length_;
220
9
    }
221
222
    // If one has a range clause after the fully specified prefix and the other does not, prefer
223
    // the one that does.
224
37.0k
    if (ends_with_range_ != other.ends_with_range_) {
225
0
      return ends_with_range_ > other.ends_with_range_;
226
0
    }
227
228
    // If the numbers of non-primary-key column operators needs to be evaluated are different,
229
    // prefer the one with less.
230
37.0k
    if (num_non_key_ops_ != other.num_non_key_ops_) {
231
0
      return num_non_key_ops_ < other.num_non_key_ops_;
232
0
    }
233
234
    // If one covers the read fully and the other does not, prefer the one that does.
235
37.0k
    if (covers_fully_ != other.covers_fully_) {
236
11.5k
      return covers_fully_ > other.covers_fully_;
237
11.5k
    }
238
239
    // If one is local read and the other is not, prefer the one that is.
240
25.5k
    if (is_local_ != other.is_local_) {
241
0
      return is_local_ > other.is_local_;
242
0
    }
243
244
    // When all the above are equal, prefer scanning the table over the index.
245
25.5k
    return is_primary_index() > other.is_primary_index();
246
25.5k
  }
247
248
0
  string ToString() const {
249
0
    return strings::Substitute("Selectivity: index_id $0 is_local $1 prefix_length $2 "
250
0
                               "single_key_read $3 full_table_scan $4 ends_with_range $5 "
251
0
                               "covers_fully $6 predicate_len $7", index_id_, is_local_,
252
0
                               prefix_length_, single_key_read_, full_table_scan_, ends_with_range_,
253
0
                               covers_fully_, predicate_len_);
254
0
  }
255
256
 private:
257
  // Analyze selectivity, currently defined as length of longest fully specified prefix and
258
  // whether there is a range operator immediately after the prefix.
259
  using MCIdToIndexMap = MCUnorderedMap<int, size_t>;
260
261
  void Analyze(MemoryContext *memctx,
262
               const PTSelectStmt& stmt,
263
               const MCIdToIndexMap& id_to_idx,
264
               const size_t num_key_columns,
265
286k
               const size_t num_hash_key_columns) {
266
267
    // There must be at least one secondary index.
268
286k
    const SelectScanInfo *scan_info = stmt.select_scan_info();
269
18.4E
    DCHECK(scan_info != nullptr) << "There is not any secondary indexes";
270
271
    // NOTE: Instead of "id_to_idx" mapping, we can also use "index_info_->FindKeyIndex()" for
272
    // ColumnRef expressions, the same way as JsonRef and SubscriptRef expressions.  However,
273
    // "id_to_idx" mapping is more efficient, so don't remove this map.
274
275
    // The operator on each column, in the order of the columns in the table or index we analyze.
276
286k
    MCVector<OpSelectivity> ops(id_to_idx.size(), OpSelectivity::kNone, memctx);
277
286k
    for (const ColumnOp& col_op : scan_info->col_ops()) {
278
109k
      const auto iter = id_to_idx.find(col_op.desc()->id());
279
109k
      if (iter != id_to_idx.end()) {
280
107k
        ops[iter->second] = GetOperatorSelectivity(col_op.yb_op());
281
107k
      } else {
282
1.93k
        num_non_key_ops_++;
283
1.93k
      }
284
109k
    }
285
286
286k
    if (index_info_) {
287
16.9k
      for (const JsonColumnOp& col_op : scan_info->col_json_ops()) {
288
84
        auto idx = index_info_->FindKeyIndex(col_op.IndexExprToColumnName());
289
84
        if (idx) {
290
25
          ops[*idx] = GetOperatorSelectivity(col_op.yb_op());
291
59
        } else {
292
59
          num_non_key_ops_++;
293
59
        }
294
84
      }
295
296
      // Enable the following code-block when allowing INDEX of collection fields.
297
16.9k
      if (false) {
298
0
        for (const SubscriptedColumnOp& col_op : scan_info->col_subscript_ops()) {
299
0
          auto idx = index_info_->FindKeyIndex(col_op.IndexExprToColumnName());
300
0
          if (idx) {
301
0
            ops[*idx] = GetOperatorSelectivity(col_op.yb_op());
302
0
          } else {
303
0
            num_non_key_ops_++;
304
0
          }
305
0
        }
306
0
      }
307
16.9k
    }
308
309
    // Find the length of fully specified prefix in index or indexed table.
310
391k
    while (prefix_length_ < ops.size() && 
ops[prefix_length_] == OpSelectivity::kEqual312k
) {
311
105k
      prefix_length_++;
312
105k
    }
313
314
    // Determine if it is a single-key read, a full-table scan, if there is a range clause after
315
    // prefix.
316
286k
    single_key_read_ = prefix_length_ >= num_key_columns;
317
286k
    full_table_scan_ = prefix_length_ < num_hash_key_columns;
318
286k
    ends_with_range_ = prefix_length_ < ops.size() && 
ops[prefix_length_] == OpSelectivity::kRange207k
;
319
286k
  }
320
321
  TableId index_id_;      // Index table id (null for indexed table).
322
  bool is_local_ = false; // Is the index local? (true for indexed table)
323
  size_t prefix_length_ = 0;     // Length of fully-specified prefix in index or indexed table.
324
  bool single_key_read_ = false; // Will this be a single-key read?
325
  bool full_table_scan_ = false; // Will this be a full table scan?
326
  bool ends_with_range_ = false; // Is there a range clause after prefix?
327
  size_t num_non_key_ops_ = 0; // How many non-primary-key column operators needs to be evaluated?
328
  bool covers_fully_ = false;  // Does the index cover the read fully? (true for indexed table)
329
  const IndexInfo* index_info_ = nullptr;
330
  bool is_forward_scan_ = true;
331
  int predicate_len_ = 0; // Length of index predicate. 0 if not a partial index.
332
};
333
334
} // namespace
335
336
//--------------------------------------------------------------------------------------------------
337
338
PTSelectStmt::PTSelectStmt(MemoryContext *memctx,
339
                           YBLocationPtr loc,
340
                           const bool distinct,
341
                           PTExprListNode::SharedPtr selected_exprs,
342
                           PTTableRefListNode::SharedPtr from_clause,
343
                           PTExpr::SharedPtr where_clause,
344
                           PTExpr::SharedPtr if_clause,
345
                           PTListNode::SharedPtr group_by_clause,
346
                           PTListNode::SharedPtr having_clause,
347
                           PTOrderByListNode::SharedPtr order_by_clause,
348
                           PTExpr::SharedPtr limit_clause,
349
                           PTExpr::SharedPtr offset_clause)
350
    : PTDmlStmt(memctx, loc, where_clause, if_clause),
351
      distinct_(distinct),
352
      selected_exprs_(selected_exprs),
353
      from_clause_(from_clause),
354
      group_by_clause_(group_by_clause),
355
      having_clause_(having_clause),
356
      order_by_clause_(order_by_clause),
357
      limit_clause_(limit_clause),
358
      offset_clause_(offset_clause),
359
      covering_exprs_(memctx),
360
      filtering_exprs_(memctx),
361
271k
      referenced_index_colnames_(memctx) {
362
271k
}
363
364
// Construct a nested select tnode to select from the index. Only the syntactic information
365
// populated by the parser should be cloned or set here. Semantic information should be left in
366
// the initial state to be populated when this tnode is analyzed.
367
// NOTE:
368
//   Only copy and execute IF clause on IndexTable if all expressions are fully covered.
369
PTSelectStmt::PTSelectStmt(MemoryContext *memctx,
370
                           const PTSelectStmt& other,
371
                           PTExprListNode::SharedPtr selected_exprs,
372
                           const SelectScanSpec& scan_spec)
373
    : PTDmlStmt(memctx, other, scan_spec.covers_fully()),
374
      distinct_(other.distinct_),
375
      selected_exprs_(selected_exprs),
376
      from_clause_(other.from_clause_),
377
      group_by_clause_(other.group_by_clause_),
378
      having_clause_(other.having_clause_),
379
      order_by_clause_(other.order_by_clause_),
380
      limit_clause_(other.limit_clause_),
381
      offset_clause_(other.offset_clause_),
382
      covering_exprs_(memctx),
383
      filtering_exprs_(memctx),
384
      is_forward_scan_(scan_spec.is_forward_scan()),
385
      index_id_(scan_spec.index_id()),
386
      covers_fully_(scan_spec.covers_fully()),
387
      referenced_index_colnames_(memctx),
388
765
      is_top_level_(false) {
389
765
}
390
391
270k
PTSelectStmt::~PTSelectStmt() {
392
270k
}
393
394
765
Status PTSelectStmt::LookupIndex(SemContext *sem_context) {
395
765
  VLOG
(3) << "Loading table descriptor for index " << index_id_0
;
396
765
  table_ = sem_context->GetTableDesc(index_id_);
397
765
  if (!table_ || !table_->IsIndex() ||
398
      // Only looking for CQL Indexes.
399
765
      (table_->table_type() != client::YBTableType::YQL_TABLE_TYPE)) {
400
0
    return sem_context->Error(table_loc(), ErrorCode::OBJECT_NOT_FOUND);
401
0
  }
402
403
765
  VLOG
(3) << "Found index. name = " << table_->name().ToString() << ", id = " << index_id_0
;
404
765
  LoadSchema(sem_context, table_, &column_map_, true /* is_index */);
405
765
  return Status::OK();
406
765
}
407
408
269k
CHECKED_STATUS PTSelectStmt::Analyze(SemContext *sem_context) {
409
  // If use_cassandra_authentication is set, permissions are checked in PTDmlStmt::Analyze.
410
269k
  RETURN_NOT_OK(PTDmlStmt::Analyze(sem_context));
411
412
  // Load and check <table> reference - FROM Clause.
413
  //   SELECT ... FROM <table> ...
414
269k
  Status s = AnalyzeFromClause(sem_context);
415
269k
  if (PREDICT_FALSE(!s.ok())) {
416
    // If it is a system table and it does not exist, do not analyze further. We will return
417
    // void result when the SELECT statement is executed.
418
2.14k
    return (is_system() && 
table_ == nullptr2.11k
) ?
Status::OK()2.11k
:
s39
;
419
2.14k
  }
420
421
  // Analyze column references against the loaded <table> - SELECT List.
422
  //   SELECT <select_list> FROM ...
423
267k
  RETURN_NOT_OK(AnalyzeSelectList(sem_context));
424
425
  // Find the optimal scan path.
426
269k
  
if (267k
is_top_level_267k
) {
427
    // Variable "scan_spec" will contain specification for the optimal scan path.
428
269k
    SelectScanSpec scan_spec;
429
430
    // select_scan_info_ is used to collect information on references for columns, operators, etc.
431
269k
    SelectScanInfo select_scan_info(sem_context->PTempMem(),
432
269k
                                    num_columns(),
433
269k
                                    &filtering_exprs_,
434
269k
                                    &column_map_);
435
269k
    select_scan_info_ = &select_scan_info;
436
437
    // Collect column references from SELECT parse tree.
438
269k
    RETURN_NOT_OK(AnalyzeReferences(sem_context));
439
440
    // Analyze column references and available indexes to find the best scan path.
441
    // Save the optimal scan-path to scan spec.
442
269k
    RETURN_NOT_OK(AnalyzeIndexes(sem_context, &scan_spec));
443
444
    // Reset select_scan_info_ to make sure it is not used elsewhere as scan analysis is done.
445
269k
    select_scan_info_ = nullptr;
446
447
    // Setup this primary select parse tree or create nested one with the chosen scan_spec.
448
269k
    RETURN_NOT_OK(SetupScanPath(sem_context, scan_spec));
449
450
    // Decide if reading primary table is needed.
451
    // - Skip the analysis for top-level SELECT if its nested select can complete the execution.
452
    // - When a nested select is used but does not have all data, querying the primary table is
453
    //   needed. However, the analysis should void all primary key conditions in WHERE & IF clauses
454
    //   because the condition is pushed down to the nested query.
455
    //
456
    // User Statement:
457
    //   SELECT <select_list> FROM <table>
458
    //     WHERE <user's primary key cond> AND <user's regular column cond>
459
    // Translated Statement:
460
    //   SELECT <select-list> FROM <table>
461
    //   WHERE
462
    //      primary_key IN (SELECT primary_key FROM <index> WHERE <user's primary key cond>)
463
    //      AND
464
    //      <user's regular column cond>
465
269k
    if (child_select_) {
466
765
      if (child_select_->covers_fully_) {
467
223
        return Status::OK();
468
223
      }
469
542
      sem_context->set_void_primary_key_condition(true);
470
542
    }
471
269k
  }
472
473
  // Run error checking on the WHERE conditions.
474
267k
  RETURN_NOT_OK(AnalyzeWhereClause(sem_context));
475
476
  // Run error checking on the IF conditions.
477
267k
  RETURN_NOT_OK(AnalyzeIfClause(sem_context));
478
479
  // Run error checking on the LIMIT clause.
480
267k
  RETURN_NOT_OK(AnalyzeLimitClause(sem_context));
481
482
  // Run error checking on the OFFSET clause.
483
267k
  RETURN_NOT_OK(AnalyzeOffsetClause(sem_context));
484
485
  // Constructing the schema of the result set.
486
267k
  RETURN_NOT_OK(ConstructSelectedSchema());
487
488
267k
  return Status::OK();
489
267k
}
490
491
268k
CHECKED_STATUS PTSelectStmt::AnalyzeFromClause(SemContext *sem_context) {
492
  // Table / index reference.
493
268k
  if (index_id_.empty()) {
494
    // Get the table descriptor.
495
264k
    if (from_clause_->size() > 1) {
496
0
      return sem_context->Error(from_clause_, "Only one selected table is allowed",
497
0
                                ErrorCode::CQL_STATEMENT_INVALID);
498
0
    }
499
264k
    RETURN_NOT_OK(from_clause_->Analyze(sem_context));
500
501
    // Collect table's schema for semantic analysis.
502
264k
    RETURN_NOT_OK(LookupTable(sem_context));
503
264k
  } else {
504
3.74k
    RETURN_NOT_OK(LookupIndex(sem_context));
505
3.74k
  }
506
507
266k
  return Status::OK();
508
268k
}
509
510
270k
CHECKED_STATUS PTSelectStmt::AnalyzeSelectList(SemContext *sem_context) {
511
  // Create state variable to compile references.
512
270k
  SemState sem_state(sem_context);
513
514
  // Analyze expressions in selected-list and check that references to columns and operators.
515
  //   SELECT <selected-list> FROM ...
516
270k
  sem_state.set_allowing_aggregate(true);
517
270k
  sem_state.set_allowing_column_refs(true);
518
519
270k
  RETURN_NOT_OK(selected_exprs_->Analyze(sem_context));
520
521
270k
  sem_state.set_allowing_aggregate(false);
522
270k
  sem_state.set_allowing_column_refs(false);
523
524
270k
  if (distinct_) {
525
25
    RETURN_NOT_OK(AnalyzeDistinctClause(sem_context));
526
25
  }
527
528
  // Collect covering expression.
529
543k
  
for (auto expr_node : selected_exprs_->node_list())270k
{
530
543k
    covering_exprs_.push_back(expr_node.get());
531
543k
  }
532
533
  // Check if this is an aggregate read.
534
270k
  bool has_aggregate_expr = false;
535
270k
  bool has_singular_expr = false;
536
543k
  for (auto expr_node : selected_exprs_->node_list()) {
537
543k
    if (expr_node->IsAggregateCall()) {
538
168
      has_aggregate_expr = true;
539
543k
    } else {
540
543k
      has_singular_expr = true;
541
543k
    }
542
543k
  }
543
270k
  if (has_aggregate_expr && 
has_singular_expr83
) {
544
0
    return sem_context->Error(
545
0
        selected_exprs_,
546
0
        "Selecting aggregate together with rows of non-aggregate values is not allowed",
547
0
        ErrorCode::CQL_STATEMENT_INVALID);
548
0
  }
549
270k
  is_aggregate_ = has_aggregate_expr;
550
551
270k
  return Status::OK();
552
270k
}
553
554
//--------------------------------------------------------------------------------------------------
555
556
0
void PTSelectStmt::PrintSemanticAnalysisResult(SemContext *sem_context) {
557
0
  VLOG(3) << "SEMANTIC ANALYSIS RESULT (" << *loc_ << "):\n" << "Not yet avail";
558
0
}
559
560
161
ExplainPlanPB PTSelectStmt::AnalysisResultToPB() {
561
161
  ExplainPlanPB explain_plan;
562
161
  SelectPlanPB *select_plan = explain_plan.mutable_select_plan();
563
  // Determines scan_type, child_select_ != null means an index is being used.
564
161
  if (child_select_) {
565
147
    string index_type = (child_select_->covers_fully() ? 
"Index Only"26
:
"Index"121
);
566
147
    string lookup_type = (child_select_->select_has_primary_keys_set_ ? 
"Key Lookup"0
: "Scan");
567
147
    select_plan->set_select_type(index_type + " " + lookup_type + " using " +
568
147
      child_select()->table()->name().ToString() + " on " + table_name().ToString());
569
  // Index is not being used, query only uses main table.
570
147
  } else 
if (14
select_has_primary_keys_set_14
) {
571
3
    select_plan->set_select_type("Primary Key Lookup on " + table_name().ToString());
572
11
  } else if (!(key_where_ops().empty() && 
partition_key_ops().empty()5
)) {
573
8
    select_plan->set_select_type("Range Scan on " + table_name().ToString());
574
8
  } else {
575
3
    select_plan->set_select_type("Seq Scan on " + table_name().ToString());
576
3
  }
577
161
  string key_conditions = "  Key Conditions: ";
578
161
  string filter = "  Filter: ";
579
161
  size_t longest = 0;
580
  // If overarching information( "Aggregate" | "Limit") then rest of the explain plan output needs
581
  // to be indented.
582
161
  if (is_aggregate() || limit_clause_) {
583
0
    string aggr = (is_aggregate()) ? "Aggregate" : "Limit";
584
0
    select_plan->set_aggregate(aggr);
585
0
    key_conditions = "      " + key_conditions;
586
0
    filter = "      " + filter;
587
0
    select_plan->set_select_type("  ->  " + select_plan->select_type());
588
0
    longest = max(longest, aggr.length());
589
0
  }
590
161
  longest = max(longest, select_plan->select_type().length());
591
  // If index is being used, change the split of key conditions and filters to that of the index.
592
161
  const auto& keys = child_select_ ? 
child_select_->key_where_ops()147
:
key_where_ops()14
;
593
161
  const auto& filters = child_select_ ? 
child_select_->where_ops()147
:
where_ops()14
;
594
  // Rebuild the conditions and filter into strings from internal format.
595
161
  string filled_key_conds = ConditionsToString<MCVector<ColumnOp>>(keys);
596
161
  string filled_filter = ConditionsToString<MCList<ColumnOp>>(filters);
597
598
161
  filled_key_conds += PartitionKeyToString(partition_key_ops());
599
600
  // If the query has key conditions or filters on either the index or the main table, then output
601
  // to query plan.
602
161
  if (!filled_key_conds.empty()) {
603
26
    key_conditions += filled_key_conds;
604
26
    longest = max(longest, key_conditions.length());
605
26
    select_plan->set_key_conditions(key_conditions);
606
26
  }
607
161
  if (!filled_filter.empty()) {
608
142
    filter += filled_filter;
609
142
    longest = max(longest, filter.length());
610
142
    select_plan->set_filter(filter);
611
142
  }
612
613
  // Set the output_width that has been calculated throughout the construction of the query plan.
614
161
  select_plan->set_output_width(narrow_cast<int32_t>(longest));
615
161
  return explain_plan;
616
161
}
617
618
//--------------------------------------------------------------------------------------------------
619
620
268k
CHECKED_STATUS PTSelectStmt::AnalyzeReferences(SemContext *sem_context) {
621
  // Create state variable to compile references.
622
268k
  SemState clause_state(sem_context);
623
268k
  clause_state.SetScanState(select_scan_info_);
624
625
  // Analyze expression in WHERE, IF, and ORDER BY.
626
  //     SELECT ... WHERE <where_expr> IF <if_expr> ORDER BY <orderby_expr>;
627
  // - Only need to run once for each SELECT. There's no need to run these on duplicated and nested
628
  //   SELECT statement tree.
629
  // - Validate the expressions without processing clauses. This check is to verify that columns
630
  //   exist, and the columns' datatypes allow comparison. The clauses' semantics will be analyzed
631
  //   after an INDEX is chosen.
632
268k
  if (where_clause_) {
633
    // Walk the <where_expr> tree, which is expected to be of BOOL type.
634
96.1k
    SemState sem_state(sem_context, QLType::Create(BOOL), InternalType::kBoolValue);
635
96.1k
    select_scan_info_->set_analyze_where(true);
636
96.1k
    RETURN_NOT_OK(where_clause_->Analyze(sem_context));
637
96.0k
    select_scan_info_->set_analyze_where(false);
638
96.0k
  }
639
640
268k
  if (if_clause_) {
641
    // Walk the <if_expr> tree, which is expected to be of BOOL type.
642
177
    SemState sem_state(sem_context, QLType::Create(BOOL), InternalType::kBoolValue);
643
177
    select_scan_info_->set_analyze_if(true);
644
177
    RETURN_NOT_OK(if_clause_->Analyze(sem_context));
645
150
    select_scan_info_->set_analyze_if(false);
646
150
  }
647
648
  // Walk the <orderby_expr> tree to make sure all column references are valid.
649
268k
  if (order_by_clause_) {
650
212
    SemState sem_state(sem_context);
651
212
    RETURN_NOT_OK(order_by_clause_->Analyze(sem_context));
652
212
  }
653
654
  // WORKAROUND for bug #4881 on secondary index.
655
268k
  if (!table_->index_map().empty() && 
partition_key_ops_.empty()2.80k
) {
656
    // TODO(neil) Remove this construction if "referenced_index_colnames_" is no longer needed.
657
    // Constructing a list of index column names that is being referenced.
658
    // - This is similar to the list "column_refs_", but this is a list of column names instead of
659
    //   column ids. When indexing by expression, a mangled-name of the expression is used to
660
    //   represent the column, so column id cannot be used to identify coverage.
661
    //
662
    // - This list is to support a quick fix for github #4881. Once column and expression names
663
    //   are mangled correctly, this code should be removed.
664
    //
665
    // - In CQL semantics, ORDER BY must used only indexed column, so not need to check for its
666
    //   coverage. Neither "column_refs_" nor "referenced_index_colnames_" has ORDER BY columns.
667
11.0k
    for (const PTExpr *expr : covering_exprs_) {
668
11.0k
      if (!expr->HaveColumnRef()) {
669
21
        continue;
670
21
      }
671
11.0k
      if (expr->opcode() == TreeNodeOpcode::kPTAllColumns) {
672
1.21k
        for (const ColumnDesc& coldesc : static_cast<const PTAllColumns*>(expr)->columns()) {
673
1.21k
          referenced_index_colnames_.insert(coldesc.MangledName());
674
1.21k
        }
675
10.8k
      } else {
676
10.8k
        expr->CollectReferencedIndexColnames(&referenced_index_colnames_);
677
10.8k
      }
678
11.0k
    }
679
2.80k
    for (const PTExpr *expr : filtering_exprs_) {
680
1.72k
      expr->CollectReferencedIndexColnames(&referenced_index_colnames_);
681
1.72k
    }
682
2.80k
  }
683
684
268k
  return Status::OK();
685
268k
}
686
687
269k
CHECKED_STATUS PTSelectStmt::AnalyzeIndexes(SemContext *sem_context, SelectScanSpec *scan_spec) {
688
269k
  VLOG
(3) << "AnalyzeIndexes: " << sem_context->stmt()289
;
689
690
269k
  SemState index_state(sem_context);
691
269k
  index_state.SetScanState(select_scan_info_);
692
693
  // Construct a list of scan-paths - "selectivities", sort the list for best to worst match.
694
269k
  bool is_forward_scan = true;
695
269k
  MCVector<Selectivity> selectivities(sem_context->PTempMem());
696
269k
  selectivities.reserve(table_->index_map().size() + 1);
697
698
  // Add entry for the PRIMARY scan.
699
269k
  Status orderby_status = AnalyzeOrderByClause(sem_context, "", &is_forward_scan);
700
269k
  if (orderby_status.ok()) {
701
268k
    Selectivity sel(sem_context->PTempMem(), *this, is_forward_scan);
702
269k
    if (
!order_by_clause_268k
||
sel.supporting_orderby()184
) {
703
269k
      selectivities.push_back(std::move(sel));
704
18.4E
    } else {
705
18.4E
      orderby_status = STATUS(InvalidArgument,
706
18.4E
                              "All hash columns must be set if order by clause is present");
707
18.4E
    }
708
268k
  }
709
710
  // Append entries for secondary INDEX scans. Skip this step for the following scenarios.
711
  // - When there is no secondary index, selection-list would have only primary index.
712
  // - When SELECT statement uses token(), querying by partition_key_ops_ on the <primary table> is
713
  //   more efficient than using secondary index scan.
714
269k
  if (!table_->index_map().empty() && 
partition_key_ops_.empty()2.80k
) {
715
17.1k
    for (const std::pair<TableId, IndexInfo> index : table_->index_map()) {
716
17.1k
      if (!index.second.HasReadPermission()) {
717
148
        continue;
718
148
      }
719
720
17.0k
      int predicate_len = 0;
721
17.0k
      MCUnorderedMap<int32, uint16> column_ref_cnts = column_ref_cnts_;
722
      // TODO(Piyush): We don't support json/collection cols with subscripted args in partial index
723
      // predicates yet. They are blocked on index creation.
724
17.0k
      if (index.second.where_predicate_spec() &&
725
17.0k
          
!432
VERIFY_RESULT432
(WhereClauseImpliesPred(select_scan_info_->col_ops(),
726
17.0k
            index.second.where_predicate_spec()->where_expr(), &predicate_len,
727
17.0k
            &column_ref_cnts))) {
728
        // For a partial index, if WHERE clause doesn't imply index predicate, don't use this index.
729
15
        VLOG
(3) << "where_clause_implies_predicate_=false for index_id=" << index.second.table_id()0
;
730
15
        continue;
731
15
      }
732
733
16.9k
      if (AnalyzeOrderByClause(sem_context, index.second.table_id(), &is_forward_scan).ok()) {
734
16.9k
        Selectivity sel(sem_context->PTempMem(), *this, index.second,
735
16.9k
          is_forward_scan, predicate_len, column_ref_cnts);
736
16.9k
        if (!order_by_clause_ || 
sel.supporting_orderby()10
) {
737
16.9k
          selectivities.push_back(std::move(sel));
738
16.9k
        }
739
16.9k
      }
740
16.9k
    }
741
2.80k
  }
742
743
  // Raise error if a scan path does not exist.
744
269k
  if (selectivities.empty()) {
745
    // There is no scanning path for this query that can satisfy the ORDER BY restriction.
746
17
    return sem_context->Error(order_by_clause_, orderby_status, ErrorCode::INVALID_ARGUMENTS);
747
17
  }
748
749
  // Sort the selection list from best to worst.
750
269k
  std::sort(selectivities.begin(), selectivities.end(), std::greater<Selectivity>());
751
269k
  if (VLOG_IS_ON(3)) {
752
0
    for (const auto& selectivity : selectivities) {
753
0
      VLOG(3) << selectivity.ToString();
754
0
    }
755
0
  }
756
757
  // Find the best selectivity and save it.
758
269k
  for (const Selectivity& selectivity : selectivities) {
759
269k
    if (
FLAGS_enable_uncovered_index_select268k
||
selectivity.covers_fully()0
) {
760
18.4E
      VLOG(3) << "Selected = " << selectivity.ToString();
761
762
      // Save the best scan path.
763
269k
      scan_spec->set_index_id(selectivity.index_id());
764
269k
      scan_spec->set_covers_fully(selectivity.covers_fully());
765
269k
      scan_spec->set_is_forward_scan(selectivity.is_forward_scan());
766
269k
      scan_spec->set_prefix_length(selectivity.prefix_length());
767
269k
      break;
768
269k
    }
769
268k
  }
770
771
269k
  return Status::OK();
772
269k
}
773
774
269k
Status PTSelectStmt::SetupScanPath(SemContext *sem_context, const SelectScanSpec& scan_spec) {
775
269k
  if (scan_spec.use_primary_scan()) {
776
    // Only need to set scan flag if the primary index is the best option.
777
268k
    is_forward_scan_ = scan_spec.is_forward_scan();
778
268k
    return Status::OK();
779
268k
  }
780
781
  // Sanity check that we have not analyzed WHERE clause semantics and create operator for protobuf
782
  // code generation yet at this point.
783
269k
  RSTATUS_DCHECK(key_where_ops_.empty() && where_ops_.empty(),
784
1.05k
                 IllegalState,
785
1.05k
                 "WHERE clause semantics should not have been analyzed at this point");
786
787
  // A secondary index is the best option.
788
1.05k
  MemoryContext* memctx = sem_context->PTreeMem();
789
1.05k
  auto selected_exprs = selected_exprs_;
790
791
  // If the index does not cover the query fully, select the primary key from the index.
792
1.05k
  if (!scan_spec.covers_fully()) {
793
542
    const auto& loc = selected_exprs_->loc_ptr();
794
542
    selected_exprs = PTExprListNode::MakeShared(memctx, loc);
795
2.13k
    for (size_t i = 0; i < num_key_columns(); 
i++1.59k
) {
796
1.59k
      const client::YBColumnSchema& column = table_->schema().Column(i);
797
1.59k
      auto column_name_str = MCMakeShared<MCString>(memctx, column.name().c_str());
798
1.59k
      auto column_name = PTQualifiedName::MakeShared(memctx, loc, column_name_str);
799
1.59k
      selected_exprs->Append(PTRef::MakeShared(memctx, loc, column_name));
800
1.59k
    }
801
802
    // Add ref for all primary key columns to indicate that they must be read for comparison.
803
    //   SELECT ... FROM <table> WHERE <primary_key> IN (nested-select).
804
542
    const client::YBSchema& schema = table_->schema();
805
2.13k
    for (size_t i = 0; i < schema.num_key_columns(); 
i++1.59k
) {
806
1.59k
      column_refs_.insert(schema.ColumnId(i));
807
1.59k
    }
808
542
  }
809
810
  // Create a child select statement for nested index query and compile it again.
811
  // NOTE: This parse-tree-duo is a very bad design. If the language is extented to support more
812
  // advance features, we should redo this work.
813
1.05k
  SemState select_state(sem_context);
814
1.05k
  child_select_ = MakeShared(memctx, *this, selected_exprs, scan_spec);
815
1.05k
  select_state.set_selecting_from_index(true);
816
1.05k
  select_state.set_index_select_prefix_length(scan_spec.prefix_length());
817
818
  // Analyze whether or not the INDEX scan should execute LIMIT & OFFSET clause execution.
819
1.05k
  if (scan_spec.covers_fully()) {
820
    // If the index covers all requested columns, all rows are read directly from the index,
821
    // so LIMIT and OFFSET should be applied to the INDEX ReadRequest.
822
    //
823
    // TODO(neil) We should not modify the original tree, but we have to do it here because some
824
    // code rely on "!limit_clause_" condition. When cleaning up code, a boolean predicate for not
825
    // using limit clause should be used in place of "!limit_clause_" check.
826
223
    limit_clause_ = nullptr;
827
223
    offset_clause_ = nullptr;
828
829
  } else {
829
    // If the index does not cover fully, the data will be read from PRIMARY table. Therefore,
830
    // the LIMIT and OFFSET should be applied to the PRIMARY ReadRequest.
831
829
    child_select_->limit_clause_ = nullptr;
832
829
    child_select_->offset_clause_ = nullptr;
833
829
  }
834
835
  // Compile the child tree.
836
1.05k
  RETURN_NOT_OK(child_select_->Analyze(sem_context));
837
1.05k
  return Status::OK();
838
1.05k
}
839
840
// Return whether the index covers the read fully.
841
// INDEXes that were created before v2.0 are defined by column IDs instead of mangled_names.
842
// - Returns TRUE if a list of column refs of a statement is a subset of INDEX columns.
843
// - Use ColumnID to check if a column in a query is covered by the index.
844
// - The list "column_refs_" contains IDs of all columns that are referred to by SELECT.
845
// - The list "IndexInfo::columns_" contains the IDs of all columns in the INDEX.
846
bool PTSelectStmt::CoversFully(const IndexInfo& index_info,
847
16.9k
                               const MCUnorderedMap<int32, uint16> &column_ref_cnts) const {
848
  // First, check covering by ID.
849
16.9k
  bool all_ref_id_covered = true;
850
78.7k
  for (const int32 table_col_id : column_refs_) {
851
78.7k
    DCHECK(column_ref_cnts.find(table_col_id) != column_ref_cnts.end());
852
78.7k
    if (column_ref_cnts.find(table_col_id) == column_ref_cnts.end() ||
853
78.7k
        column_ref_cnts.at(table_col_id) == 0)
854
32
      continue; // All occurrences of the column's ref were part of the partial index predicate.
855
856
78.7k
    if (!index_info.IsColumnCovered(ColumnId(table_col_id))) {
857
12.9k
      all_ref_id_covered = false;
858
12.9k
    }
859
78.7k
  }
860
861
  // Return result if name-resolution for covering is NOT needed.
862
  // - If all column references are found by ID, return true without further resolution.
863
  // - Index is not by expression, so name resolution for covering is not needed.
864
  // - Index uses older Protobuf versions, which doesn't use column name (id only).
865
16.9k
  if (all_ref_id_covered ||
866
16.9k
      
!index_info.has_index_by_expr()8.66k
||
867
16.9k
      
!index_info.use_mangled_column_name()2.79k
) {
868
14.1k
    return all_ref_id_covered;
869
14.1k
  }
870
871
  // Now, check for covering by column names to support index by expression.
872
  //
873
  // TODO(neil) We use a quick fix for now, but eventually we should mangle column name correctly
874
  // and remove the following quick fix and its associated code. See github #4881 for the bug
875
  // in column name mangling.
876
  //
877
  // Iterating names in "referenced_index_colnames_", which consists of names of all columns that
878
  // are referenced by SELECT and check if the name is covered in the index.
879
5.83k
  
for (const string &column_name : referenced_index_colnames_)2.79k
{
880
5.83k
    if (!index_info.IsColumnCovered(column_name)) {
881
1.96k
      return false;
882
1.96k
    }
883
5.83k
  }
884
885
  // TODO(neil) Change INDEX's metadata for column name-mangling to support the following code.
886
  // As shown in github #4881, the following is not working correctly.
887
  //   CREATE INDEX idx ON a_table(v);
888
  //   - Index column "v" is named as "$C_v"
889
  //   - Index column "v1" is named as "$C_v1"
890
  // As a result, IsExprCovered("$C_v1") would return true as it thought "$C_v1" is just an
891
  // expression for "$C_v" column.
892
893
  // Correct fix for #4881 would be adding a postfix when mangling column name. For example:
894
  // - column name "v" is mangled to "$C_v_E$"
895
  // - column name "v1" is mangled to "$C_v1_E$"
896
  // That way, IsExprCovered() would know that "$C_v1_E$" is NOT an expression of "$C_v_E$".
897
  // However, this changes metadata for INDEX, so compatible flag must be added to distinguish
898
  // between different mangling method, such as
899
  //   "use_mangled_column_name" versus "use_mangled_column_name_2"
900
823
  if (false) {
901
    // Check all ColumnRef in selected list.
902
0
    for (const PTExpr *expr : covering_exprs_) {
903
      // If this expression does not have column reference, it is considered "coverred".
904
0
      if (!expr->HaveColumnRef()) {
905
0
        continue;
906
0
      }
907
908
0
      if (expr->opcode() == TreeNodeOpcode::kPTAllColumns) {
909
0
        for (const ColumnDesc& coldesc : static_cast<const PTAllColumns*>(expr)->columns()) {
910
0
          if (index_info.IsExprCovered(coldesc.MangledName()) < 0) {
911
0
            return false;
912
0
          }
913
0
        }
914
0
      } else if (index_info.IsExprCovered(expr->MangledName()) < 0) {
915
0
        return false;
916
0
      }
917
0
    }
918
919
    // Check all ColumnRef in filtering list.
920
0
    for (const PTExpr *expr : filtering_exprs_) {
921
0
      if (index_info.IsExprCovered(expr->MangledName()) < 0) {
922
0
        return false;
923
0
      }
924
0
    }
925
0
  }
926
927
  // All referenced columns are covered.
928
823
  return true;
929
823
}
930
931
// -------------------------------------------------------------------------------------------------
932
933
25
CHECKED_STATUS PTSelectStmt::AnalyzeDistinctClause(SemContext *sem_context) {
934
  // Only partition and static columns are allowed to be used with distinct clause.
935
25
  size_t key_count = 0;
936
139
  for (const auto& pair : column_map_) {
937
139
    const ColumnDesc& desc = pair.second;
938
139
    if (desc.is_hash()) {
939
37
      if (column_refs_.find(desc.id()) != column_refs_.end()) {
940
25
        key_count++;
941
25
      }
942
102
    } else if (!desc.is_static()) {
943
68
      if (column_refs_.find(desc.id()) != column_refs_.end()) {
944
0
        return sem_context->Error(
945
0
            selected_exprs_,
946
0
            "Selecting distinct must request only partition keys and static columns",
947
0
            ErrorCode::CQL_STATEMENT_INVALID);
948
0
      }
949
68
    }
950
139
  }
951
952
25
  if (key_count != 0 && 
key_count != num_hash_key_columns()17
) {
953
0
    return sem_context->Error(selected_exprs_,
954
0
                              "Selecting distinct must request all or none of partition keys",
955
0
                              ErrorCode::CQL_STATEMENT_INVALID);
956
0
  }
957
25
  return Status::OK();
958
25
}
959
960
233k
bool PTSelectStmt::IsReadableByAllSystemTable() const {
961
233k
  const client::YBTableName t = table_name();
962
233k
  const string& keyspace = t.namespace_name();
963
233k
  const string& table = t.table_name();
964
233k
  if (keyspace == master::kSystemSchemaNamespaceName) {
965
110k
    return true;
966
122k
  } else if (keyspace == master::kSystemNamespaceName) {
967
117k
    if (table == master::kSystemLocalTableName ||
968
117k
        
table == master::kSystemPeersTableName54.7k
||
969
117k
        
table == master::kSystemPeersV2TableName15.2k
||
970
118k
        
table == master::kSystemPartitionsTableName12.1k
) {
971
118k
      return true;
972
118k
    }
973
117k
  }
974
4.01k
  return false;
975
233k
}
976
977
//--------------------------------------------------------------------------------------------------
978
979
namespace {
980
981
296
PTOrderBy::Direction directionFromSortingType(SortingType sorting_type) {
982
296
  return sorting_type == SortingType::kDescending ?
983
209
      
PTOrderBy::Direction::kDESC87
: PTOrderBy::Direction::kASC;
984
296
}
985
986
} // namespace
987
988
CHECKED_STATUS PTSelectStmt::AnalyzeOrderByClause(SemContext *sem_context,
989
                                                  const TableId& index_id,
990
285k
                                                  bool *is_forward_scan) {
991
285k
  if (order_by_clause_ == nullptr) {
992
285k
    return Status::OK();
993
285k
  }
994
995
  // Set state variables for processing order by.
996
323
  SemState orderby_state(sem_context);
997
323
  orderby_state.set_validate_orderby_expr(true);
998
999
  // Setup column_map to analyze the ORDER BY clause.
1000
323
  MCColumnMap index_column_map(sem_context->PTempMem());
1001
323
  MCColumnMap *scan_column_map = &index_column_map;
1002
1003
  // Load the index.
1004
323
  client::YBTablePtr scan_index;
1005
323
  if (index_id.empty()) {
1006
211
    scan_index = table_;
1007
211
    scan_column_map = &column_map_;
1008
211
  } else {
1009
112
    orderby_state.set_selecting_from_index(true);
1010
112
    scan_index = sem_context->GetTableDesc(index_id);
1011
112
    LoadSchema(sem_context, scan_index, scan_column_map, true /* is_index */);
1012
112
  }
1013
1014
  // Analyze ORDER BY against the index.
1015
323
  select_scan_info_->StartOrderbyAnalysis(scan_column_map);
1016
1017
323
  unordered_map<string, PTOrderBy::Direction> order_by_map;
1018
326
  for (auto& order_by : order_by_clause_->node_list()) {
1019
326
    RETURN_NOT_OK(order_by->Analyze(sem_context));
1020
315
    order_by_map[order_by->order_expr()->MetadataName()] = order_by->direction();
1021
315
  }
1022
1023
312
  const auto& scan_schema = scan_index->schema();
1024
312
  vector<bool> is_column_forward;
1025
312
  is_column_forward.reserve(scan_schema.num_range_key_columns());
1026
312
  bool last_column_order_specified = true;
1027
701
  for (size_t i = scan_schema.num_hash_key_columns(); i < scan_schema.num_key_columns(); 
i++389
) {
1028
395
    const auto& column = scan_schema.Column(i);
1029
395
    if (order_by_map.find(column.name()) != order_by_map.end()) {
1030
302
      if (!last_column_order_specified) {
1031
6
        return STATUS(InvalidArgument,
1032
6
                      "Order by currently only support the ordering of columns following their "
1033
6
                      "declared order in the PRIMARY KEY");
1034
6
      }
1035
296
      is_column_forward.push_back(
1036
296
          directionFromSortingType(column.sorting_type()) == order_by_map[column.name()]);
1037
296
      order_by_map.erase(column.name());
1038
296
    } else {
1039
93
      last_column_order_specified = false;
1040
93
      is_column_forward.push_back(is_column_forward.empty() || 
is_column_forward.back()86
);
1041
93
    }
1042
395
  }
1043
306
  if (!order_by_map.empty()) {
1044
13
    return STATUS(InvalidArgument, "Order by clause should only contain clustering columns");
1045
13
  }
1046
293
  *is_forward_scan = is_column_forward[0];
1047
368
  for (auto&& b : is_column_forward) {
1048
368
    if (b != *is_forward_scan) {
1049
6
      return STATUS(InvalidArgument, "Unsupported order by relation");
1050
6
    }
1051
368
  }
1052
1053
287
  select_scan_info_->FinishOrderbyAnalysis();
1054
287
  return Status::OK();
1055
293
}
1056
1057
//--------------------------------------------------------------------------------------------------
1058
1059
269k
CHECKED_STATUS PTSelectStmt::AnalyzeLimitClause(SemContext *sem_context) {
1060
269k
  if (limit_clause_ == nullptr) {
1061
268k
    return Status::OK();
1062
268k
  }
1063
1064
1.07k
  RETURN_NOT_OK(limit_clause_->CheckRhsExpr(sem_context));
1065
1066
1.07k
  SemState sem_state(sem_context, QLType::Create(INT32), InternalType::kInt32Value);
1067
1.07k
  sem_state.set_bindvar_name(PTBindVar::limit_bindvar_name());
1068
1.07k
  RETURN_NOT_OK(limit_clause_->Analyze(sem_context));
1069
1070
1.07k
  return Status::OK();
1071
1.07k
}
1072
1073
269k
CHECKED_STATUS PTSelectStmt::AnalyzeOffsetClause(SemContext *sem_context) {
1074
269k
  if (offset_clause_ == nullptr) {
1075
268k
    return Status::OK();
1076
268k
  }
1077
1078
865
  RETURN_NOT_OK(offset_clause_->CheckRhsExpr(sem_context));
1079
1080
865
  SemState sem_state(sem_context, QLType::Create(INT32), InternalType::kInt32Value);
1081
865
  sem_state.set_bindvar_name(PTBindVar::offset_bindvar_name());
1082
865
  RETURN_NOT_OK(offset_clause_->Analyze(sem_context));
1083
1084
865
  return Status::OK();
1085
865
}
1086
1087
//--------------------------------------------------------------------------------------------------
1088
1089
269k
CHECKED_STATUS PTSelectStmt::ConstructSelectedSchema() {
1090
269k
  const MCList<PTExpr::SharedPtr>& exprs = selected_exprs();
1091
269k
  selected_schemas_ = make_shared<vector<ColumnSchema>>();
1092
269k
  selected_schemas_->reserve(exprs.size());
1093
541k
  for (auto expr : exprs) {
1094
541k
    if (expr->opcode() == TreeNodeOpcode::kPTAllColumns) {
1095
155k
      const PTAllColumns *ref = static_cast<const PTAllColumns*>(expr.get());
1096
1.73M
      for (const auto& col_desc : ref->columns()) {
1097
1.73M
        selected_schemas_->emplace_back(col_desc.name(), col_desc.ql_type());
1098
1.73M
      }
1099
385k
    } else {
1100
385k
      selected_schemas_->emplace_back(expr->QLName(), expr->ql_type());
1101
385k
    }
1102
541k
  }
1103
269k
  return Status::OK();
1104
269k
}
1105
1106
2.13k
const std::shared_ptr<client::YBTable>& PTSelectStmt::bind_table() const {
1107
2.13k
  return child_select_ ? 
child_select_->bind_table()228
:
PTDmlStmt::bind_table()1.90k
;
1108
2.13k
}
1109
1110
//--------------------------------------------------------------------------------------------------
1111
1112
PTOrderBy::PTOrderBy(MemoryContext *memctx,
1113
                     YBLocation::SharedPtr loc,
1114
                     const PTExpr::SharedPtr& order_expr,
1115
                     const Direction direction,
1116
                     const NullPlacement null_placement)
1117
  : TreeNode(memctx, loc),
1118
    order_expr_(order_expr),
1119
    direction_(direction),
1120
308
    null_placement_(null_placement) {
1121
308
}
1122
1123
634
Status PTOrderBy::Analyze(SemContext *sem_context) {
1124
634
  RETURN_NOT_OK(order_expr_->Analyze(sem_context));
1125
1126
  // Validate the expression in SELECT statement.
1127
626
  if (sem_context->validate_orderby_expr()) {
1128
319
    if (order_expr_->expr_op() == ExprOperator::kRef) {
1129
      // This check is for clause ORDER BY <column> that is not part of the index (NULL desc).
1130
      // Example
1131
      //   Statement: CREATE INDEX ON table ( x ) INCLUDE ( y );
1132
      //   Columns "x" and "y" would be part of the index.
1133
312
      auto colref = dynamic_cast<PTRef*>(order_expr_.get());
1134
312
      if (!colref || !colref->desc()) {
1135
0
        return STATUS(InvalidArgument, "Order by clause contains invalid columns");
1136
0
      }
1137
1138
312
    } else 
if (7
!order_expr_->index_desc()7
) {
1139
      // This check is for clause ORDER BY <expression> that is not part of the index (NULL desc).
1140
      // Example
1141
      //   Statement: CREATE INDEX ON table ( <expr> );
1142
      //   Column "<expr>" would be part of the index.
1143
4
      return STATUS(InvalidArgument, "Order by clause contains invalid expression");
1144
4
    }
1145
319
  }
1146
1147
622
  return Status::OK();
1148
626
}
1149
1150
306
PTOrderBy::~PTOrderBy() {
1151
306
}
1152
1153
//--------------------------------------------------------------------------------------------------
1154
1155
PTTableRef::PTTableRef(MemoryContext *memctx,
1156
                       YBLocation::SharedPtr loc,
1157
                       const PTQualifiedName::SharedPtr& name,
1158
                       MCSharedPtr<MCString> alias)
1159
    : TreeNode(memctx, loc),
1160
      name_(name),
1161
276k
      alias_(alias) {
1162
276k
}
1163
1164
274k
PTTableRef::~PTTableRef() {
1165
274k
}
1166
1167
269k
CHECKED_STATUS PTTableRef::Analyze(SemContext *sem_context) {
1168
269k
  if (alias_ != nullptr) {
1169
0
    return sem_context->Error(this, "Alias is not allowed", ErrorCode::CQL_STATEMENT_INVALID);
1170
0
  }
1171
269k
  return name_->AnalyzeName(sem_context, ObjectType::TABLE);
1172
269k
}
1173
1174
//--------------------------------------------------------------------------------------------------
1175
1176
SelectScanInfo::SelectScanInfo(MemoryContext *memctx,
1177
                               size_t num_columns,
1178
                               MCVector<const PTExpr*> *scan_filtering_exprs,
1179
                               MCMap<MCString, ColumnDesc> *scan_column_map)
1180
    : col_ops_(memctx),
1181
      col_op_counters_(memctx),
1182
      col_json_ops_(memctx),
1183
      col_subscript_ops_(memctx),
1184
      scan_filtering_exprs_(scan_filtering_exprs),
1185
269k
      scan_column_map_(scan_column_map) {
1186
269k
  col_op_counters_.resize(num_columns);
1187
269k
}
1188
1189
const ColumnDesc* SelectScanInfo::GetColumnDesc(const SemContext *sem_context,
1190
109k
                                                const MCString& col_name) {
1191
109k
  if (scan_column_map_) {
1192
108k
    const auto iter = scan_column_map_->find(col_name);
1193
109k
    if (
iter != scan_column_map_->end()108k
) {
1194
109k
      sem_context->current_dml_stmt()->AddColumnRef(iter->second);
1195
109k
      return &iter->second;
1196
109k
    }
1197
108k
  }
1198
1199
18.4E
  return nullptr;
1200
109k
}
1201
1202
Status SelectScanInfo::AddWhereExpr(SemContext *sem_context,
1203
                                    const PTRelationExpr *expr,
1204
                                    const ColumnDesc *col_desc,
1205
                                    PTExpr::SharedPtr value,
1206
107k
                                    PTExprListNode::SharedPtr col_args) {
1207
107k
  SCHECK(analyze_where_, Corruption, "Expect state variable is setup for where clause");
1208
1209
  // Append filtering expression.
1210
107k
  RETURN_NOT_OK(AddFilteringExpr(sem_context, expr));
1211
1212
  // Append operator to appropriate list.
1213
107k
  switch (expr->ql_op()) {
1214
105k
    case QL_OP_EQUAL: {
1215
105k
      if (!col_args) {
1216
104k
        col_ops_.emplace_back(col_desc, value, QLOperator::QL_OP_EQUAL);
1217
1218
104k
      } else 
if (204
col_desc->ql_type()->IsJson()204
) {
1219
125
        col_json_ops_.emplace_back(col_desc, col_args, value, expr->ql_op());
1220
1221
125
      } else {
1222
79
        col_subscript_ops_.emplace_back(col_desc, col_args, value, expr->ql_op());
1223
79
      }
1224
105k
      break;
1225
0
    }
1226
1227
419
    case QL_OP_LESS_THAN: FALLTHROUGH_INTENDED;
1228
583
    case QL_OP_LESS_THAN_EQUAL: FALLTHROUGH_INTENDED;
1229
775
    case QL_OP_GREATER_THAN_EQUAL: FALLTHROUGH_INTENDED;
1230
1.41k
    case QL_OP_GREATER_THAN: {
1231
1.41k
      if (!col_args) {
1232
1.41k
        col_ops_.emplace_back(col_desc, value, expr->ql_op());
1233
1.41k
      } else 
if (5
col_desc->ql_type()->IsJson()5
) {
1234
0
        col_json_ops_.emplace_back(col_desc, col_args, value, expr->ql_op());
1235
1236
5
      } else {
1237
5
        col_subscript_ops_.emplace_back(col_desc, col_args, value, expr->ql_op());
1238
5
      }
1239
1.41k
      break;
1240
775
    }
1241
1242
144
    case QL_OP_NOT_EQUAL: FALLTHROUGH_INTENDED;
1243
180
    case QL_OP_NOT_IN: FALLTHROUGH_INTENDED;
1244
469
    case QL_OP_IN: {
1245
469
      if (!col_args) {
1246
451
        col_ops_.emplace_back(col_desc, value, expr->ql_op());
1247
451
      }
1248
469
      break;
1249
180
    }
1250
1251
0
    default:
1252
      // This function only needs to check for references and collects them. However, since CQL
1253
      // definitely does not allow this operator, just raise error right away.
1254
0
      return sem_context->Error(expr, "Operator is not supported in where clause",
1255
0
                                ErrorCode::CQL_STATEMENT_INVALID);
1256
107k
  }
1257
1258
107k
  return Status::OK();
1259
107k
}
1260
1261
108k
Status SelectScanInfo::AddFilteringExpr(SemContext *sem_context, const PTRelationExpr *expr) {
1262
108k
  SCHECK(analyze_where_ || analyze_if_,
1263
108k
         Corruption, "Expect state variable is setup for where clause");
1264
1265
  // Collecting all filtering expressions to help choosing INDEX when processing a DML.
1266
108k
  scan_filtering_exprs_->push_back(expr);
1267
108k
  return Status::OK();
1268
108k
}
1269
1270
//--------------------------------------------------------------------------------------------------
1271
1272
}  // namespace ql
1273
}  // namespace yb