YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/yql/cql/ql/ptree/pt_dml.h
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
// Tree node definitions for INSERT statement.
16
//--------------------------------------------------------------------------------------------------
17
18
#ifndef YB_YQL_CQL_QL_PTREE_PT_DML_H_
19
#define YB_YQL_CQL_QL_PTREE_PT_DML_H_
20
21
#include <iosfwd>
22
23
#include "yb/client/client_fwd.h"
24
25
#include "yb/common/common_fwd.h"
26
27
#include "yb/util/memory/arena.h"
28
#include "yb/util/memory/mc_types.h"
29
30
#include "yb/yql/cql/ql/ptree/ptree_fwd.h"
31
#include "yb/yql/cql/ql/ptree/tree_node.h"
32
33
namespace yb {
34
namespace ql {
35
36
//--------------------------------------------------------------------------------------------------
37
// Counter of operators on each column. "gt" includes ">" and ">=". "lt" includes "<" and "<=".
38
class ColumnOpCounter {
39
 public:
40
4.75M
  ColumnOpCounter() {}
41
0
  int gt_count() const { return gt_count_; }
42
0
  int lt_count() const { return lt_count_; }
43
13.7k
  int eq_count() const { return eq_count_; }
44
0
  int in_count() const { return in_count_; }
45
46
732
  void increase_gt(bool col_arg = false) { !col_arg ? gt_count_++ : 
partial_col_gt_count_++0
; }
47
662
  void increase_lt(bool col_arg = false) { !col_arg ? lt_count_++ : 
partial_col_lt_count_++0
; }
48
118k
  void increase_eq(bool col_arg = false) { !col_arg ? 
eq_count_++118k
:
partial_col_eq_count_++108
; }
49
407
  void increase_in(bool col_arg = false) { !col_arg ? in_count_++ : 
partial_col_in_count_++0
; }
50
51
121k
  bool is_valid() {
52
    // A. At most one condition can be set for a column.
53
    // B. More than one condition can be set for a partial column such as col[1] or col->'a'.
54
    // C. Conditions on a column and its partial member cannot co-exist in the same statement.
55
121k
    if (
in_count_ + eq_count_ + gt_count_ > 1121k
|| in_count_ + eq_count_ + lt_count_ > 1 ||
56
121k
        (in_count_ + eq_count_ + gt_count_ + lt_count_ > 0 &&
57
121k
        partial_col_eq_count_ + partial_col_gt_count_ + partial_col_in_count_ +
58
121k
            partial_col_lt_count_ > 0)) {
59
54
      return false;
60
54
    }
61
    // D. Both inequality (less and greater) set together.
62
121k
    if (gt_count_ + lt_count_ > 2 || 
(120k
gt_count_ + lt_count_ == 2120k
&&
gt_count_ != lt_count_283
)) {
63
0
      return false;
64
0
    }
65
121k
    return true;
66
121k
  }
67
68
25.2k
  bool IsEmpty() const {
69
25.2k
    return gt_count_ == 0 && 
lt_count_ == 024.7k
&&
eq_count_ == 024.4k
&&
in_count_ == 016.1k
;
70
25.2k
  }
71
72
 private:
73
  // These are counts for regular columns.
74
  int gt_count_ = 0;
75
  int lt_count_ = 0;
76
  int eq_count_ = 0;
77
  int in_count_ = 0;
78
  // These are counts for partial columns like json(c1->'a') and collection(c1[0]) operators.
79
  int partial_col_gt_count_ = 0;
80
  int partial_col_lt_count_ = 0;
81
  int partial_col_eq_count_ = 0;
82
  int partial_col_in_count_ = 0;
83
};
84
85
// State variables for where clause.
86
class WhereExprState {
87
 public:
88
  WhereExprState(MCList<ColumnOp> *ops,
89
                 MCVector<ColumnOp> *key_ops,
90
                 MCList<SubscriptedColumnOp> *subscripted_col_ops,
91
                 MCList<JsonColumnOp> *json_col_ops,
92
                 MCList<PartitionKeyOp> *partition_key_ops,
93
                 MCVector<ColumnOpCounter> *op_counters,
94
                 ColumnOpCounter *partition_key_counter,
95
                 TreeNodeOpcode statement_type,
96
                 MCList<FuncOp> *func_ops)
97
    : ops_(ops),
98
      key_ops_(key_ops),
99
      subscripted_col_ops_(subscripted_col_ops),
100
      json_col_ops_(json_col_ops),
101
      partition_key_ops_(partition_key_ops),
102
      op_counters_(op_counters),
103
      partition_key_counter_(partition_key_counter),
104
      statement_type_(statement_type),
105
100k
      func_ops_(func_ops) {
106
100k
  }
107
108
  CHECKED_STATUS AnalyzeColumnOp(SemContext *sem_context,
109
                                 const PTRelationExpr *expr,
110
                                 const ColumnDesc *col_desc,
111
                                 PTExprPtr value,
112
                                 PTExprListNodePtr args = nullptr);
113
114
  CHECKED_STATUS AnalyzeColumnFunction(SemContext *sem_context,
115
                                       const PTRelationExpr *expr,
116
                                       PTExprPtr value,
117
                                       PTBcallPtr call);
118
119
  CHECKED_STATUS AnalyzePartitionKeyOp(SemContext *sem_context,
120
                                       const PTRelationExpr *expr,
121
                                       PTExprPtr value);
122
123
0
  MCList<FuncOp> *func_ops() {
124
0
    return func_ops_;
125
0
  }
126
127
 private:
128
  MCList<ColumnOp> *ops_;
129
130
  // Operators on key columns.
131
  MCVector<ColumnOp> *key_ops_;
132
133
  // Operators on subscripted columns (e.g. mp['x'] or lst[2]['x'])
134
  MCList<SubscriptedColumnOp> *subscripted_col_ops_;
135
136
  // Operators on json columns (e.g. c1->'a'->'b'->>'c')
137
  MCList<JsonColumnOp> *json_col_ops_;
138
139
  MCList<PartitionKeyOp> *partition_key_ops_;
140
141
  // Counters of '=', '<', and '>' operators for each column in the where expression.
142
  MCVector<ColumnOpCounter> *op_counters_;
143
144
  // Counters on conditions on the partition key (i.e. using `token`)
145
  ColumnOpCounter *partition_key_counter_;
146
147
  // update, insert, delete, select.
148
  TreeNodeOpcode statement_type_;
149
150
  MCList<FuncOp> *func_ops_;
151
};
152
153
//--------------------------------------------------------------------------------------------------
154
// This class represents the data of collection type. PostgreQL syntax rules dictate how we form
155
// the hierarchy of our C++ classes, so classes for VALUES and SELECT clause must share the same
156
// base class.
157
// - VALUES (x, y, z)
158
// - (SELECT x, y, z FROM tab)
159
// Functionalities of this class should be "protected" to make sure that PTCollection instances are
160
// not created and used by application.
161
class PTCollection : public TreeNode {
162
 public:
163
  //------------------------------------------------------------------------------------------------
164
  // Public types.
165
  typedef MCSharedPtr<PTCollection> SharedPtr;
166
  typedef MCSharedPtr<const PTCollection> SharedPtrConst;
167
168
  // Node type.
169
0
  virtual TreeNodeOpcode opcode() const override {
170
0
    return TreeNodeOpcode::kPTCollection;
171
0
  }
172
173
 protected:
174
  //------------------------------------------------------------------------------------------------
175
  // Constructor and destructor. Define them in protected section to prevent application from
176
  // declaring them.
177
  PTCollection(MemoryContext *memctx, YBLocationPtr loc)
178
370k
      : TreeNode(memctx, loc) {
179
370k
  }
180
372k
  virtual ~PTCollection() {
181
372k
  }
182
};
183
184
//--------------------------------------------------------------------------------------------------
185
186
class PTDmlStmt : public PTCollection {
187
 public:
188
  // Table column name to description map.
189
  using MCColumnMap = MCMap<MCString, ColumnDesc>;
190
191
  //------------------------------------------------------------------------------------------------
192
  // Constructor and destructor.
193
  PTDmlStmt(MemoryContext *memctx,
194
            YBLocationPtr loc,
195
            PTExprPtr where_clause = nullptr,
196
            PTExprPtr if_clause = nullptr,
197
            bool else_error = false,
198
            PTDmlUsingClausePtr using_clause = nullptr,
199
            bool returns_status = false);
200
  // Clone a DML tnode for re-analysis.
201
  PTDmlStmt(MemoryContext *memctx, const PTDmlStmt& other, bool copy_if_clause);
202
  virtual ~PTDmlStmt();
203
204
  template<typename... TypeArgs>
205
  inline static PTDmlStmt::SharedPtr MakeShared(MemoryContext *memctx, TypeArgs&&... args) {
206
    return MCMakeShared<PTDmlStmt>(memctx, std::forward<TypeArgs>(args)...);
207
  }
208
209
  // Node semantics analysis.
210
  virtual CHECKED_STATUS Analyze(SemContext *sem_context) override;
211
212
  virtual ExplainPlanPB AnalysisResultToPB() = 0;
213
214
  // Find column descriptor. From the context, the column value will be marked to be read if
215
  // necessary when executing the QL statement.
216
  const ColumnDesc *GetColumnDesc(const SemContext *sem_context, const MCString& col_name);
217
218
28.8M
  virtual bool IsDml() const override {
219
28.8M
    return true;
220
28.8M
  }
221
222
  // Table name.
223
  virtual client::YBTableName table_name() const = 0;
224
225
  // Returns location of table name.
226
  virtual const YBLocation& table_loc() const = 0;
227
228
  // Access functions.
229
2.03M
  const std::shared_ptr<client::YBTable>& table() const {
230
2.03M
    return table_;
231
2.03M
  }
232
233
14.9M
  bool is_system() const {
234
14.9M
    return is_system_;
235
14.9M
  }
236
237
315k
  const MCColumnMap& column_map() const {
238
315k
    return column_map_;
239
315k
  }
240
241
  size_t num_columns() const;
242
243
  size_t num_key_columns() const;
244
245
  size_t num_hash_key_columns() const;
246
247
  std::string hash_key_columns() const;
248
249
7.48M
  const MCVector<ColumnOp>& key_where_ops() const {
250
7.48M
    return key_where_ops_;
251
7.48M
  }
252
253
7.49M
  const MCList<ColumnOp>& where_ops() const {
254
7.49M
    return where_ops_;
255
7.49M
  }
256
257
7.49M
  const MCList<SubscriptedColumnOp>& subscripted_col_where_ops() const {
258
7.49M
    return subscripted_col_where_ops_;
259
7.49M
  }
260
261
7.49M
  const MCList<JsonColumnOp>& json_col_where_ops() const {
262
7.49M
    return json_col_where_ops_;
263
7.49M
  }
264
265
7.50M
  const MCList<PartitionKeyOp>& partition_key_ops() const {
266
7.50M
    return partition_key_ops_;
267
7.50M
  }
268
269
7.50M
  const MCList <yb::ql::FuncOp>& func_ops() const {
270
7.50M
    return func_ops_;
271
7.50M
  }
272
273
188
  bool else_error() const {
274
188
    return else_error_;
275
188
  }
276
277
2.39M
  bool returns_status() const {
278
2.39M
    return returns_status_;
279
2.39M
  }
280
281
0
  const PTExprPtr& where_clause() const {
282
0
    return where_clause_;
283
0
  }
284
285
9.89M
  const PTExprPtr& if_clause() const {
286
9.89M
    return if_clause_;
287
9.89M
  }
288
289
  PTExprPtr ttl_seconds() const;
290
291
  PTExprPtr user_timestamp_usec() const;
292
293
7.94M
  virtual const std::shared_ptr<client::YBTable>& bind_table() const {
294
7.94M
    return table_;
295
7.94M
  }
296
297
9.99k
  virtual const MCVector<PTBindVar*> &bind_variables() const {
298
9.99k
    return bind_variables_;
299
9.99k
  }
300
320k
  virtual MCVector<PTBindVar*> &bind_variables() {
301
320k
    return bind_variables_;
302
320k
  }
303
304
  virtual std::vector<int64_t> hash_col_indices() const;
305
306
  // Access for column_args.
307
2.02M
  const MCVector<ColumnArg>& column_args() const {
308
2.02M
    return *CHECK_NOTNULL(column_args_.get());
309
2.02M
  }
310
311
  // Mutable acccess to column_args, used in PreExec phase
312
0
  MCVector<ColumnArg>& column_args() {
313
0
    return *CHECK_NOTNULL(column_args_.get());
314
0
  }
315
316
  // Add column ref to be read by DocDB.
317
  void AddColumnRef(const ColumnDesc& col_desc);
318
319
  // Add column ref to be read.
320
  void AddHashColumnBindVar(PTBindVar* bindvar);
321
322
  // Add all column refs to be read by DocDB.
323
  void AddRefForAllColumns();
324
325
  // Access for column_args.
326
9.52M
  const MCSet<int32>& column_refs() const {
327
9.52M
    return column_refs_;
328
9.52M
  }
329
330
  // Access for column_args.
331
9.50M
  const MCSet<int32>& static_column_refs() const {
332
9.50M
    return static_column_refs_;
333
9.50M
  }
334
335
  // Access for column_args.
336
2.02M
  const MCVector<SubscriptedColumnArg>& subscripted_col_args() const {
337
2.02M
    CHECK
(subscripted_col_args_ != nullptr) << "subscripted-column arguments not set up"100
;
338
2.02M
    return *subscripted_col_args_;
339
2.02M
  }
340
341
2.02M
  const MCVector<JsonColumnArg>& json_col_args() const {
342
2.02M
    CHECK
(json_col_args_ != nullptr) << "json-column arguments not set up"2.48k
;
343
2.02M
    return *json_col_args_;
344
2.02M
  }
345
346
  // Access for selected result.
347
5.05k
  const std::shared_ptr<vector<ColumnSchema>>& selected_schemas() const {
348
5.05k
    return selected_schemas_;
349
5.05k
  }
350
351
  virtual bool IsWriteOp() const = 0;
352
353
  bool RequiresTransaction() const;
354
355
69.5k
  const MCUnorderedSet<std::shared_ptr<client::YBTable>>& pk_only_indexes() const {
356
69.5k
    return pk_only_indexes_;
357
69.5k
  }
358
359
69.4k
  const MCUnorderedSet<TableId>& non_pk_only_indexes() const {
360
69.4k
    return non_pk_only_indexes_;
361
69.4k
  }
362
363
  // Does this DML modify the static or primary or multiple rows?
364
2.02M
  bool ModifiesStaticRow() const {
365
2.02M
    return modifies_static_row_;
366
2.02M
  }
367
2.02M
  bool ModifiesPrimaryRow() const {
368
2.02M
    return modifies_primary_row_;
369
2.02M
  }
370
369k
  bool ModifiesMultipleRows() const {
371
369k
    return modifies_multiple_rows_;
372
369k
  }
373
374
7.47M
  bool HasPrimaryKeysSet() const {
375
7.47M
    DCHECK(!IsWriteOp());
376
7.47M
    return select_has_primary_keys_set_;
377
7.47M
  }
378
379
 protected:
380
381
  template <typename T>
382
326
  static std::string ConditionsToString(T conds) {
383
326
    std::stringstream s;
384
326
    bool first = true;
385
326
    for (const auto& col_op : conds) {
386
194
      if (first) {
387
170
        first = false;
388
170
      } else {
389
24
        s << " AND ";
390
24
      }
391
194
      col_op.OutputTo(&s);
392
194
    }
393
326
    return s.str();
394
326
  }
std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > yb::ql::PTDmlStmt::ConditionsToString<std::__1::vector<yb::ql::ColumnOp, yb::internal::ArenaAllocatorBase<yb::ql::ColumnOp, yb::internal::ArenaTraits> > >(std::__1::vector<yb::ql::ColumnOp, yb::internal::ArenaAllocatorBase<yb::ql::ColumnOp, yb::internal::ArenaTraits> >)
Line
Count
Source
382
164
  static std::string ConditionsToString(T conds) {
383
164
    std::stringstream s;
384
164
    bool first = true;
385
164
    for (const auto& col_op : conds) {
386
44
      if (first) {
387
27
        first = false;
388
27
      } else {
389
17
        s << " AND ";
390
17
      }
391
44
      col_op.OutputTo(&s);
392
44
    }
393
164
    return s.str();
394
164
  }
std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > yb::ql::PTDmlStmt::ConditionsToString<std::__1::list<yb::ql::ColumnOp, yb::internal::ArenaAllocatorBase<yb::ql::ColumnOp, yb::internal::ArenaTraits> > >(std::__1::list<yb::ql::ColumnOp, yb::internal::ArenaAllocatorBase<yb::ql::ColumnOp, yb::internal::ArenaTraits> >)
Line
Count
Source
382
162
  static std::string ConditionsToString(T conds) {
383
162
    std::stringstream s;
384
162
    bool first = true;
385
162
    for (const auto& col_op : conds) {
386
150
      if (first) {
387
143
        first = false;
388
143
      } else {
389
7
        s << " AND ";
390
7
      }
391
150
      col_op.OutputTo(&s);
392
150
    }
393
162
    return s.str();
394
162
  }
395
396
  std::string PartitionKeyToString(const MCList<PartitionKeyOp>& conds);
397
398
  // Lookup table from the metadata database.
399
  CHECKED_STATUS LookupTable(SemContext *sem_context);
400
401
  // Load table schema into symbol table.
402
  static void LoadSchema(SemContext *sem_context,
403
                         const client::YBTablePtr& table,
404
                         MCColumnMap* column_map,
405
                         bool is_index);
406
407
  // Semantic-analyzing the where clause.
408
  CHECKED_STATUS AnalyzeWhereClause(SemContext *sem_context);
409
410
  // Semantic-analyzing the if clause.
411
  CHECKED_STATUS AnalyzeIfClause(SemContext *sem_context);
412
413
  // Semantic-analyzing the USING TTL clause.
414
  CHECKED_STATUS AnalyzeUsingClause(SemContext *sem_context);
415
416
  // Semantic-analyzing the indexes for write operations.
417
  CHECKED_STATUS AnalyzeIndexesForWrites(SemContext *sem_context);
418
419
  // Protected functions.
420
  CHECKED_STATUS AnalyzeWhereExpr(SemContext *sem_context, PTExpr *expr);
421
422
  // Semantic-analyzing the bind variables for hash columns.
423
  CHECKED_STATUS AnalyzeHashColumnBindVars(SemContext *sem_context);
424
425
  // Semantic-analyzing the modified columns for inter-statement dependency.
426
  CHECKED_STATUS AnalyzeColumnArgs(SemContext *sem_context);
427
428
  // Does column_args_ contain static columns only (i.e. writing static column only)?
429
  bool StaticColumnArgsOnly() const;
430
431
  // --- The parser will decorate this node with the following information --
432
433
  const PTExprPtr where_clause_;
434
  const PTExprPtr if_clause_;
435
  const bool else_error_ = false;
436
  const PTDmlUsingClausePtr using_clause_;
437
  const bool returns_status_ = false;
438
  MCVector<PTBindVar*> bind_variables_;
439
440
  // -- The semantic analyzer will decorate this node with the following information --
441
442
  // Is the target table a system table?
443
  bool is_system_ = false;
444
445
  // Target table and column name->description map.
446
  client::YBTablePtr table_;
447
  MCColumnMap column_map_;
448
449
  // Where operator list.
450
  // - When reading (SELECT), key_where_ops_ has only HASH (partition) columns.
451
  // - When writing (UPDATE & DELETE), key_where_ops_ has both has (partition) & range columns.
452
  // This is just a workaround for UPDATE and DELETE. Backend supports only single row. It also
453
  // requires that conditions on columns are ordered the same way as they were defined in
454
  // CREATE TABLE statement.
455
  MCList<FuncOp> func_ops_;
456
  MCVector<ColumnOp> key_where_ops_;
457
  MCList<ColumnOp> where_ops_;
458
  MCList<SubscriptedColumnOp> subscripted_col_where_ops_;
459
  MCList<JsonColumnOp> json_col_where_ops_;
460
461
  // restrictions involving all hash/partition columns -- i.e. read requests using Token builtin
462
  MCList<PartitionKeyOp> partition_key_ops_;
463
464
  // Compare 2 bind variables for their hash column ids.
465
  struct HashColCmp {
466
    bool operator()(const PTBindVar* v1, const PTBindVar* v2) const;
467
  };
468
469
  // List of bind variables associated with hash columns ordered by their column ids.
470
  MCSet<PTBindVar*, HashColCmp> hash_col_bindvars_;
471
472
  MCSharedPtr<MCVector<ColumnArg>> column_args_;
473
  MCSharedPtr<MCVector<SubscriptedColumnArg>> subscripted_col_args_;
474
  MCSharedPtr<MCVector<JsonColumnArg>> json_col_args_;
475
476
  // Columns that are being referenced by this statement. The tservers will need to read these
477
  // columns when processing the statements. These are different from selected columns whose values
478
  // must be sent back to the proxy from the tservers.
479
  MCSet<int32> column_refs_;
480
  MCSet<int32> static_column_refs_;
481
482
  // Ref count of occurrences of cols in where/if clauses. This is used to check, in case of a
483
  // partial index scan, if there are more refs of a column after partial index predicate covers
484
  // some refs of the col.
485
  MCUnorderedMap<int32, uint16> column_ref_cnts_;
486
487
  // TODO(neil) This should have been a resultset's row descriptor. However, because rowblock is
488
  // using schema, this must be declared as vector<ColumnSchema>.
489
  //
490
  // Selected schema - a vector pair<name, datatype> - is used when describing the result set.
491
  // NOTE: Only SELECT and DML with RETURN clause statements have outputs.
492
  //       We prepare this vector once at compile time and use it at execution times.
493
  std::shared_ptr<vector<ColumnSchema>> selected_schemas_;
494
495
  // The set of indexes that index primary key columns of the indexed table only and the set of
496
  // indexes that do not.
497
  MCUnorderedSet<client::YBTablePtr> pk_only_indexes_;
498
  MCUnorderedSet<TableId> non_pk_only_indexes_;
499
500
  // For inter-dependency analysis of DMLs in a batch/transaction
501
  bool modifies_primary_row_ = false;
502
  bool modifies_static_row_ = false;
503
  bool modifies_multiple_rows_ = false; // Currently only allowed for (range) deletes.
504
505
  // For optimizing SELECT queries with IN condition on hash key: does this SELECT have all primary
506
  // key columns set with '=' or 'IN' conditions.
507
  bool select_has_primary_keys_set_ = false;
508
  bool has_incomplete_hash_ = false;
509
};
510
511
}  // namespace ql
512
}  // namespace yb
513
514
#endif  // YB_YQL_CQL_QL_PTREE_PT_DML_H_