/Users/deen/code/yugabyte-db/src/yb/yql/cql/ql/ptree/pt_dml.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //-------------------------------------------------------------------------------------------------- |
2 | | // Copyright (c) YugaByte, Inc. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
5 | | // in compliance with the License. You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
10 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
11 | | // or implied. See the License for the specific language governing permissions and limitations |
12 | | // under the License. |
13 | | // |
14 | | // |
15 | | // Treenode implementation for DML including SELECT statements. |
16 | | //-------------------------------------------------------------------------------------------------- |
17 | | |
18 | | #include "yb/yql/cql/ql/ptree/pt_dml.h" |
19 | | |
20 | | #include <unordered_map> |
21 | | |
22 | | #include "yb/client/schema.h" |
23 | | #include "yb/client/table.h" |
24 | | |
25 | | #include "yb/common/common.pb.h" |
26 | | #include "yb/common/index.h" |
27 | | #include "yb/common/index_column.h" |
28 | | #include "yb/common/ql_type.h" |
29 | | #include "yb/common/schema.h" |
30 | | |
31 | | #include "yb/gutil/casts.h" |
32 | | |
33 | | #include "yb/util/result.h" |
34 | | #include "yb/util/status_format.h" |
35 | | #include "yb/util/status_log.h" |
36 | | |
37 | | #include "yb/yql/cql/ql/ptree/column_arg.h" |
38 | | #include "yb/yql/cql/ql/ptree/column_desc.h" |
39 | | #include "yb/yql/cql/ql/ptree/pt_dml_using_clause.h" |
40 | | #include "yb/yql/cql/ql/ptree/pt_expr.h" |
41 | | #include "yb/yql/cql/ql/ptree/pt_select.h" |
42 | | #include "yb/yql/cql/ql/ptree/sem_context.h" |
43 | | #include "yb/yql/cql/ql/ptree/ycql_predtest.h" |
44 | | |
45 | | DECLARE_bool(allow_index_table_read_write); |
46 | | DECLARE_bool(use_cassandra_authentication); |
47 | | |
48 | | namespace yb { |
49 | | namespace ql { |
50 | | |
51 | | using strings::Substitute; |
52 | | |
53 | | PTDmlStmt::PTDmlStmt(MemoryContext *memctx, |
54 | | YBLocationPtr loc, |
55 | | PTExpr::SharedPtr where_clause, |
56 | | PTExpr::SharedPtr if_clause, |
57 | | const bool else_error, |
58 | | PTDmlUsingClausePtr using_clause, |
59 | | const bool returns_status) |
60 | | : PTCollection(memctx, loc), |
61 | | where_clause_(where_clause), |
62 | | if_clause_(if_clause), |
63 | | else_error_(else_error), |
64 | | using_clause_(using_clause), |
65 | | returns_status_(returns_status), |
66 | | bind_variables_(memctx), |
67 | | column_map_(memctx), |
68 | | func_ops_(memctx), |
69 | | key_where_ops_(memctx), |
70 | | where_ops_(memctx), |
71 | | subscripted_col_where_ops_(memctx), |
72 | | json_col_where_ops_(memctx), |
73 | | partition_key_ops_(memctx), |
74 | | hash_col_bindvars_(memctx), |
75 | | column_refs_(memctx), |
76 | | static_column_refs_(memctx), |
77 | | column_ref_cnts_(memctx), |
78 | | pk_only_indexes_(memctx), |
79 | 323k | non_pk_only_indexes_(memctx) { |
80 | 323k | } |
81 | | |
82 | | // Clone a DML tnode for re-analysis. Only the syntactic information populated by the parser should |
83 | | // be cloned here. Semantic information should be left in the initial state to be populated when |
84 | | // this tnode is analyzed. |
85 | | PTDmlStmt::PTDmlStmt(MemoryContext *memctx, const PTDmlStmt& other, bool copy_if_clause) |
86 | | : PTCollection(memctx, other.loc_ptr()), |
87 | | where_clause_(other.where_clause_), |
88 | | if_clause_(copy_if_clause ? other.if_clause_ : nullptr), |
89 | | else_error_(other.else_error_), |
90 | | using_clause_(other.using_clause_), |
91 | | returns_status_(other.returns_status_), |
92 | | bind_variables_(other.bind_variables_, memctx), |
93 | | column_map_(memctx), |
94 | | func_ops_(memctx), |
95 | | key_where_ops_(memctx), |
96 | | where_ops_(memctx), |
97 | | subscripted_col_where_ops_(memctx), |
98 | | json_col_where_ops_(memctx), |
99 | | partition_key_ops_(memctx), |
100 | | hash_col_bindvars_(memctx), |
101 | | column_refs_(memctx), |
102 | | static_column_refs_(memctx), |
103 | | column_ref_cnts_(memctx), |
104 | | pk_only_indexes_(memctx), |
105 | 765 | non_pk_only_indexes_(memctx) { |
106 | 765 | } |
107 | | |
108 | 323k | PTDmlStmt::~PTDmlStmt() { |
109 | 323k | } |
110 | | |
111 | 423k | size_t PTDmlStmt::num_columns() const { |
112 | 423k | return table_->schema().num_columns(); |
113 | 423k | } |
114 | | |
115 | 607k | size_t PTDmlStmt::num_key_columns() const { |
116 | 607k | return table_->schema().num_key_columns(); |
117 | 607k | } |
118 | | |
119 | 827k | size_t PTDmlStmt::num_hash_key_columns() const { |
120 | 827k | return table_->schema().num_hash_key_columns(); |
121 | 827k | } |
122 | | |
123 | 4 | string PTDmlStmt::hash_key_columns() const { |
124 | 4 | std::stringstream s; |
125 | 4 | auto &schema = table_->schema(); |
126 | 8 | for (size_t i = 0; i < schema.num_hash_key_columns(); ++i4 ) { |
127 | 4 | if (i != 0) s << ", "0 ; |
128 | 4 | s << schema.Column(i).name(); |
129 | 4 | } |
130 | 4 | return s.str(); |
131 | 4 | } |
132 | | |
133 | 322k | Status PTDmlStmt::LookupTable(SemContext *sem_context) { |
134 | 322k | if (FLAGS_use_cassandra_authentication) { |
135 | 116k | switch (opcode()) { |
136 | 115k | case TreeNodeOpcode::kPTSelectStmt: { |
137 | 115k | if (!internal_115k ) { |
138 | 115k | if (down_cast<PTSelectStmt *>(this)->IsReadableByAllSystemTable()) { |
139 | 113k | break; |
140 | 113k | } |
141 | 2.56k | RETURN_NOT_OK(sem_context->CheckHasTablePermission(loc(), |
142 | 2.56k | PermissionType::SELECT_PERMISSION, this->table_name())); |
143 | 2.56k | } |
144 | 2.37k | break; |
145 | 115k | } |
146 | 2.37k | case TreeNodeOpcode::kPTUpdateStmt: 35 FALLTHROUGH_INTENDED35 ; |
147 | 194 | case TreeNodeOpcode::kPTInsertStmt: FALLTHROUGH_INTENDED; |
148 | 204 | case TreeNodeOpcode::kPTDeleteStmt: { |
149 | 204 | RETURN_NOT_OK(sem_context->CheckHasTablePermission(loc(), |
150 | 204 | PermissionType::MODIFY_PERMISSION, this->table_name())); |
151 | 174 | break; |
152 | 204 | } |
153 | 174 | default: |
154 | 0 | DFATAL_OR_RETURN_NOT_OK(STATUS_FORMAT(InternalError, "Unexpected operation $0", opcode())); |
155 | 116k | } |
156 | 116k | } |
157 | 322k | is_system_ = table_name().is_system(); |
158 | 322k | if (is_system_ && IsWriteOp()256k && client::FLAGS_yb_system_namespace_readonly3 ) { |
159 | 3 | return sem_context->Error(table_loc(), ErrorCode::SYSTEM_NAMESPACE_READONLY); |
160 | 3 | } |
161 | | |
162 | 322k | VLOG(3) << "Loading table descriptor for " << table_name().ToString()3.20k ; |
163 | 322k | table_ = sem_context->GetTableDesc(table_name()); |
164 | 323k | if (!table_322k || (table_->IsIndex() && !FLAGS_allow_index_table_read_write3.96k ) || |
165 | | // Only looking for CQL tables. |
166 | 323k | (table_->table_type() != client::YBTableType::YQL_TABLE_TYPE)) { |
167 | 2.26k | return sem_context->Error(table_loc(), ErrorCode::OBJECT_NOT_FOUND); |
168 | 2.26k | } |
169 | 319k | LoadSchema(sem_context, table_, &column_map_, false /* is_index */); |
170 | 319k | return Status::OK(); |
171 | 322k | } |
172 | | |
173 | | void PTDmlStmt::LoadSchema(SemContext *sem_context, |
174 | | const client::YBTablePtr& table, |
175 | | MCColumnMap* column_map, |
176 | 324k | bool is_index) { |
177 | 324k | column_map->clear(); |
178 | 324k | const client::YBSchema& schema = table->schema(); |
179 | 3.70M | for (size_t idx = 0; idx < schema.num_columns(); idx++3.37M ) { |
180 | 3.37M | const client::YBColumnSchema col = schema.Column(idx); |
181 | 3.37M | string colname = col.name(); |
182 | 3.37M | if (is_index && !schema.table_properties().use_mangled_column_name()3.43k ) { |
183 | | // This is an OLD INDEX. We need to mangled its column name to work with new implementation. |
184 | 0 | colname = YcqlName::MangleColumnName(colname); |
185 | 0 | } |
186 | 3.37M | column_map->emplace(MCString(colname.c_str(), sem_context->PSemMem()), |
187 | 3.37M | ColumnDesc(idx, |
188 | 3.37M | schema.ColumnId(idx), |
189 | 3.37M | col.name(), |
190 | 3.37M | idx < schema.num_hash_key_columns(), |
191 | 3.37M | idx < schema.num_key_columns(), |
192 | 3.37M | col.is_static(), |
193 | 3.37M | col.is_counter(), |
194 | 3.37M | col.type(), |
195 | 3.37M | client::YBColumnSchema::ToInternalDataType(col.type()), |
196 | 3.37M | is_index)); |
197 | 3.37M | } |
198 | 324k | } |
199 | | |
200 | | // Node semantics analysis. |
201 | 325k | Status PTDmlStmt::Analyze(SemContext *sem_context) { |
202 | 325k | sem_context->set_current_dml_stmt(this); |
203 | 325k | MemoryContext *psem_mem = sem_context->PSemMem(); |
204 | 325k | column_args_ = MCMakeShared<MCVector<ColumnArg>>(psem_mem); |
205 | 325k | subscripted_col_args_ = MCMakeShared<MCVector<SubscriptedColumnArg>>(psem_mem); |
206 | 325k | json_col_args_ = MCMakeShared<MCVector<JsonColumnArg>>(psem_mem); |
207 | 325k | return Status::OK(); |
208 | 325k | } |
209 | | |
210 | | const ColumnDesc* PTDmlStmt::GetColumnDesc(const SemContext *sem_context, |
211 | 751k | const MCString& col_name) { |
212 | 751k | const auto iter = column_map_.find(col_name); |
213 | 751k | if (iter == column_map_.end()) { |
214 | 191 | return nullptr; |
215 | 191 | } |
216 | | |
217 | 750k | const ColumnDesc* column_desc = &iter->second; |
218 | | |
219 | | // To indicate that DocDB must read a columm value to execute an expression, the column is added |
220 | | // to the column_refs list. |
221 | 750k | bool reading_column = false; |
222 | | |
223 | 750k | switch (opcode()) { |
224 | 495k | case TreeNodeOpcode::kPTSelectStmt: |
225 | 495k | reading_column = true; |
226 | 495k | break; |
227 | 18.8k | case TreeNodeOpcode::kPTUpdateStmt: |
228 | 18.8k | if (sem_context->sem_state() != nullptr && |
229 | 18.8k | sem_context->processing_set_clause() && |
230 | 18.8k | !sem_context->processing_assignee()7.23k ) { |
231 | 198 | reading_column = true; |
232 | 198 | break; |
233 | 198 | } |
234 | 18.8k | FALLTHROUGH_INTENDED18.7k ;18.7k |
235 | 252k | case TreeNodeOpcode::kPTInsertStmt: |
236 | 254k | case TreeNodeOpcode::kPTDeleteStmt: |
237 | 254k | if (sem_context->sem_state() != nullptr && |
238 | 254k | sem_context->processing_if_clause()254k ) { |
239 | 116 | reading_column = true; |
240 | 116 | break; |
241 | 116 | } |
242 | 254k | break; |
243 | 254k | default: |
244 | 0 | break; |
245 | 750k | } |
246 | | |
247 | 751k | if (reading_column) { |
248 | | // TODO(neil) Currently AddColumnRef() relies on MCSet datatype to guarantee that we have a |
249 | | // unique list of IDs, but we should take advantage to "symbol table" when collecting data |
250 | | // for execution. Symbol table and "column_read_count_" need to be corrected so that we can |
251 | | // use MCList instead. |
252 | | |
253 | | // Indicate that this column must be read for the statement execution. |
254 | 496k | AddColumnRef(*column_desc); |
255 | 496k | } |
256 | | |
257 | 751k | return column_desc; |
258 | 750k | } |
259 | | |
260 | 273k | Status PTDmlStmt::AnalyzeWhereClause(SemContext *sem_context) { |
261 | 273k | if (!where_clause_) { |
262 | 172k | if (IsWriteOp()) { |
263 | 6 | return sem_context->Error(this, "Missing partition key", ErrorCode::CQL_STATEMENT_INVALID); |
264 | 6 | } |
265 | 172k | return Status::OK(); |
266 | 172k | } |
267 | | |
268 | | // Analyze where expression. |
269 | 100k | if (IsWriteOp()) { |
270 | 3.95k | key_where_ops_.resize(num_key_columns()); |
271 | 96.1k | } else { |
272 | 96.1k | key_where_ops_.resize(num_hash_key_columns()); |
273 | 96.1k | } |
274 | 100k | RETURN_NOT_OK(AnalyzeWhereExpr(sem_context, where_clause_.get())); |
275 | 100k | return Status::OK(); |
276 | 100k | } |
277 | | |
278 | 99.9k | Status PTDmlStmt::AnalyzeWhereExpr(SemContext *sem_context, PTExpr *expr) { |
279 | | // Construct the state variables and analyze the expression. |
280 | 99.9k | MCVector<ColumnOpCounter> op_counters(sem_context->PTempMem()); |
281 | 99.9k | op_counters.resize(num_columns()); |
282 | 99.9k | ColumnOpCounter partition_key_counter; |
283 | 99.9k | WhereExprState where_state(&where_ops_, &key_where_ops_, &subscripted_col_where_ops_, |
284 | 99.9k | &json_col_where_ops_, &partition_key_ops_, &op_counters, |
285 | 99.9k | &partition_key_counter, opcode(), &func_ops_); |
286 | | |
287 | 99.9k | SemState sem_state(sem_context, QLType::Create(BOOL), InternalType::kBoolValue); |
288 | 99.9k | sem_state.SetWhereState(&where_state); |
289 | 99.9k | RETURN_NOT_OK(expr->Analyze(sem_context)); |
290 | | |
291 | 99.8k | if (IsWriteOp()) { |
292 | | // Make sure that all hash entries are referenced in where expression. |
293 | 10.9k | for (size_t idx = 0; idx < num_hash_key_columns(); idx++7.08k ) { |
294 | 7.08k | if (op_counters[idx].eq_count() == 0) { |
295 | 8 | return sem_context->Error(expr, "Missing condition on key columns in WHERE clause", |
296 | 8 | ErrorCode::CQL_STATEMENT_INVALID); |
297 | 8 | } |
298 | 7.08k | } |
299 | | |
300 | | // If writing static columns only, check that either all range key entries are referenced in the |
301 | | // where expression or none is referenced. Else, check that all range key are referenced. |
302 | 3.88k | size_t range_keys = 0; |
303 | 10.5k | for (auto idx = num_hash_key_columns(); idx < num_key_columns(); idx++6.63k ) { |
304 | 6.63k | if (op_counters[idx].eq_count() != 0) { |
305 | 6.55k | range_keys++; |
306 | 6.55k | } |
307 | 6.63k | } |
308 | 3.88k | if (StaticColumnArgsOnly()) { |
309 | 25 | if (range_keys != num_key_columns() - num_hash_key_columns() && range_keys != 020 ) |
310 | 0 | return sem_context->Error(expr, "Missing condition on key columns in WHERE clause", |
311 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
312 | 25 | if (range_keys == 0) { |
313 | 20 | key_where_ops_.resize(num_hash_key_columns()); |
314 | 20 | } |
315 | 3.86k | } else { |
316 | 3.86k | if (range_keys != num_key_columns() - num_hash_key_columns()) { |
317 | 30 | if (opcode() == TreeNodeOpcode::kPTDeleteStmt) { |
318 | | // Range expression in write requests are allowed for deletes only. |
319 | 89 | for (auto idx = num_hash_key_columns(); idx < num_key_columns(); idx++59 ) { |
320 | 59 | if (op_counters[idx].eq_count() != 0) { |
321 | 4 | where_ops_.push_front(key_where_ops_[idx]); |
322 | 4 | } |
323 | 59 | } |
324 | 30 | key_where_ops_.resize(num_hash_key_columns()); |
325 | 30 | } else { |
326 | 0 | return sem_context->Error(expr, "Missing condition on key columns in WHERE clause", |
327 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
328 | 0 | } |
329 | 30 | } |
330 | 3.86k | } |
331 | 95.9k | } else { // ReadOp |
332 | | // Add the hash to the where clause if the list is incomplete. Clear key_where_ops_ to do |
333 | | // whole-table scan. |
334 | 191k | for (size_t idx = 0; idx < num_hash_key_columns(); idx++96.0k ) { |
335 | 97.5k | if (!key_where_ops_[idx].IsInitialized()) { |
336 | 1.51k | has_incomplete_hash_ = true; |
337 | 1.51k | break; |
338 | 1.51k | } |
339 | 97.5k | } |
340 | 95.9k | if (has_incomplete_hash_) { |
341 | 3.98k | for (auto idx = num_hash_key_columns(); idx > 0;) { |
342 | 2.47k | --idx; |
343 | 2.47k | if (key_where_ops_[idx].IsInitialized()) { |
344 | 164 | where_ops_.push_front(key_where_ops_[idx]); |
345 | 164 | } |
346 | 2.47k | } |
347 | 1.51k | key_where_ops_.clear(); |
348 | 94.4k | } else { |
349 | 94.4k | select_has_primary_keys_set_ = true; |
350 | | // Unset if there is a range key without a condition. |
351 | 103k | for (auto idx = num_hash_key_columns(); idx < num_key_columns(); idx++9.15k ) { |
352 | 25.1k | if (op_counters[idx].IsEmpty()) { |
353 | 16.0k | select_has_primary_keys_set_ = false; |
354 | 16.0k | break; |
355 | 16.0k | } |
356 | 25.1k | } |
357 | 94.4k | } |
358 | 95.9k | } |
359 | | |
360 | | // Analyze bind variables for hash columns in the WHERE clause. |
361 | 99.8k | RETURN_NOT_OK(AnalyzeHashColumnBindVars(sem_context)); |
362 | | |
363 | 99.8k | return Status::OK(); |
364 | 99.8k | } |
365 | | |
366 | 323k | Status PTDmlStmt::AnalyzeIfClause(SemContext *sem_context) { |
367 | 323k | if (if_clause_) { |
368 | 327 | SemState sem_state(sem_context, QLType::Create(BOOL), InternalType::kBoolValue); |
369 | 327 | sem_state.set_processing_if_clause(true); |
370 | 327 | return if_clause_->Analyze(sem_context); |
371 | 327 | } |
372 | 323k | return Status::OK(); |
373 | 323k | } |
374 | | |
375 | 53.7k | Status PTDmlStmt::AnalyzeUsingClause(SemContext *sem_context) { |
376 | 53.7k | if (using_clause_ == nullptr) { |
377 | 53.5k | return Status::OK(); |
378 | 53.5k | } |
379 | | |
380 | 201 | RETURN_NOT_OK(using_clause_->Analyze(sem_context)); |
381 | 191 | return Status::OK(); |
382 | 201 | } |
383 | | |
384 | 53.4k | Status PTDmlStmt::AnalyzeIndexesForWrites(SemContext *sem_context) { |
385 | 53.4k | const Schema& indexed_schema = table_->InternalSchema(); |
386 | 53.4k | for (const auto& itr : table_->index_map()) { |
387 | 15.8k | const TableId& index_id = itr.first; |
388 | 15.8k | const IndexInfo& index = itr.second; |
389 | | |
390 | 15.8k | bool primary_key_cols_only = index.PrimaryKeyColumnsOnly(indexed_schema); |
391 | | |
392 | 15.8k | std::shared_ptr<const IndexInfoPB::WherePredicateSpecPB>& where_predicate_spec_pb = |
393 | 15.8k | index.where_predicate_spec(); |
394 | | |
395 | | // If the index has primary key columns only and doesn't reference non-pk columns in |
396 | | // predicate, index updates can be issued from the CQL proxy side without reading the current |
397 | | // row as long as the DML does not delete the column (including setting the value to null). |
398 | | // Otherwise, the updates needed can only be determined from the tserver side after the current |
399 | | // values are read. |
400 | | |
401 | | // TODO (Piyush) - Right now we are not distinguishing between - |
402 | | // 1. primary columns only in index where clause (if it is present) and |
403 | | // 2. non-primary columns in index where clause. |
404 | | // This distinction can help in optimizing the index write path as per discussion |
405 | | // in the Partial Indexes design doc. |
406 | 15.8k | if (primary_key_cols_only && !where_predicate_spec_pb650 ) { |
407 | 650 | std::shared_ptr<client::YBTable> index_table = sem_context->GetTableDesc(index_id); |
408 | 650 | if (index_table == nullptr) { |
409 | 0 | return sem_context->Error(this, Substitute("Index table $0 not found", index_id).c_str(), |
410 | 0 | ErrorCode::OBJECT_NOT_FOUND); |
411 | 0 | } |
412 | 650 | pk_only_indexes_.insert(index_table); |
413 | 15.1k | } else { |
414 | 15.1k | non_pk_only_indexes_.insert(index_id); |
415 | 51.1k | for (const auto& column : index.columns()) { |
416 | 51.1k | const ColumnId indexed_column_id = column.indexed_column_id; |
417 | 51.1k | if (!indexed_schema.is_key_column(indexed_column_id)) { |
418 | 18.5k | column_refs_.insert(indexed_column_id); |
419 | 18.5k | } |
420 | 51.1k | } |
421 | | |
422 | | // In case non-pk columns are present in the index predicate (valid only for a partial index), |
423 | | // add those references as well. |
424 | 15.1k | if (where_predicate_spec_pb) { |
425 | 4.51k | for (auto column_id : where_predicate_spec_pb->column_ids()) |
426 | 5.59k | column_refs_.insert(column_id); |
427 | 4.51k | } |
428 | 15.1k | } |
429 | 15.8k | } |
430 | 53.4k | return Status::OK(); |
431 | 53.4k | } |
432 | | |
433 | 9.59M | bool PTDmlStmt::RequiresTransaction() const { |
434 | 9.59M | return IsWriteOp() && !DCHECK_NOTNULL(table_.get())->index_map().empty()2.09M && |
435 | 9.59M | table_->InternalSchema().table_properties().is_transactional()138k ; |
436 | 9.59M | } |
437 | | |
438 | 149k | Status PTDmlStmt::AnalyzeHashColumnBindVars(SemContext *sem_context) { |
439 | | // If not all hash columns are bound, clear hash_col_bindvars_ because the client driver will not |
440 | | // be able to compute the full hash key unless it parses the SQL statement and extracts the |
441 | | // literals also, which is not currently supported and unlikely to be. |
442 | 149k | if (hash_col_bindvars_.size() != num_hash_key_columns()) { |
443 | 144k | hash_col_bindvars_.clear(); |
444 | 144k | } |
445 | | |
446 | 149k | return Status::OK(); |
447 | 149k | } |
448 | | |
449 | 53.5k | Status PTDmlStmt::AnalyzeColumnArgs(SemContext *sem_context) { |
450 | | |
451 | | // If we have no args, this must be a delete modifying primary key only. |
452 | 53.5k | if (column_args_->empty() && subscripted_col_args_->empty()741 && json_col_args_->empty()741 ) { |
453 | 741 | modifies_primary_row_ = true; |
454 | 741 | return Status::OK(); |
455 | 741 | } |
456 | | |
457 | | // If we have range keys we modify the primary row. |
458 | 58.6k | for (auto idx = num_hash_key_columns(); 52.7k idx < num_key_columns(); idx++5.89k ) { |
459 | 47.3k | if (column_args_->at(idx).IsInitialized()) { |
460 | 41.5k | modifies_primary_row_ = true; |
461 | 41.5k | break; |
462 | 41.5k | } |
463 | 47.3k | } |
464 | | |
465 | | // If we have column args: |
466 | | // - Writing to static columns => modify static row. |
467 | | // - Writing to non-static columns -> modify primary row. |
468 | | |
469 | | // Check plain column args. |
470 | 151k | for (auto idx = num_key_columns(); idx < column_args_->size(); idx++98.5k ) { |
471 | 98.6k | if (column_args_->at(idx).IsInitialized()) { |
472 | 93.6k | if (column_args_->at(idx).desc()->is_static()) { |
473 | 140 | modifies_static_row_ = true; |
474 | 93.5k | } else { |
475 | 93.5k | modifies_primary_row_ = true; |
476 | 93.5k | } |
477 | 93.6k | if (modifies_static_row_ && modifies_primary_row_151 ) { |
478 | 62 | return Status::OK(); |
479 | 62 | } |
480 | 93.6k | } |
481 | 98.6k | } |
482 | | |
483 | | // Check subscripted column args (e.g. map['k'] or list[1]) |
484 | 52.7k | for (auto& arg : *subscripted_col_args_) { |
485 | 50 | if (arg.desc()->is_static()) { |
486 | 10 | modifies_static_row_ = true; |
487 | 40 | } else { |
488 | 40 | modifies_primary_row_ = true; |
489 | 40 | } |
490 | 50 | if (modifies_static_row_ && modifies_primary_row_10 ) { |
491 | 0 | return Status::OK(); |
492 | 0 | } |
493 | 50 | } |
494 | | |
495 | | // Check json column args (e.g. json->'key' or json->1) |
496 | 52.7k | for (auto& arg : *json_col_args_) { |
497 | 201 | if (arg.desc()->is_static()) { |
498 | 0 | modifies_static_row_ = true; |
499 | 201 | } else { |
500 | 201 | modifies_primary_row_ = true; |
501 | 201 | } |
502 | 201 | if (modifies_static_row_ && modifies_primary_row_0 ) { |
503 | 0 | return Status::OK(); |
504 | 0 | } |
505 | 201 | } |
506 | | |
507 | 52.7k | return Status::OK(); |
508 | 52.7k | } |
509 | | |
510 | | // Are we writing to static columns only, i.e. no range columns or non-static columns. |
511 | 53.4k | bool PTDmlStmt::StaticColumnArgsOnly() const { |
512 | 53.4k | return modifies_static_row_ && !modifies_primary_row_117 ; |
513 | 53.4k | } |
514 | | |
515 | | //-------------------------------------------------------------------------------------------------- |
516 | | |
517 | | Status WhereExprState::AnalyzeColumnOp(SemContext *sem_context, |
518 | | const PTRelationExpr *expr, |
519 | | const ColumnDesc *col_desc, |
520 | | PTExpr::SharedPtr value, |
521 | 122k | PTExprListNode::SharedPtr col_args) { |
522 | | // If this is a nested select from an uncovered index/partial index, |
523 | | // ignore column that is uncovered/only in partial index predicate and not in index cols. |
524 | 122k | if (col_desc == nullptr) { |
525 | 156 | if (sem_context->IsUncoveredIndexSelect() || sem_context->IsPartialIndexSelect()32 ) |
526 | 156 | return Status::OK(); |
527 | | |
528 | 0 | return STATUS(InternalError, "Column does not exist"); |
529 | 156 | } |
530 | | |
531 | | // If a SELECT involves a partial index scan, we can safely ignore some sub-clauses of WHERE |
532 | | // clause if they are taken care of by the index predicate. |
533 | | // |
534 | | // Preserve the sub-clause in case of below exceptions for child SELECT: |
535 | | // For col = val type of sub-clauses on a prefix of the index's primary key, it is better |
536 | | // to again take note of the sub-clauses because it will help decide - |
537 | | // i) the tserver to land on (if all hash cols are specified) |
538 | | // ii) the range to scan (if range col is specified and is part of a prefix found in WHERE) |
539 | | |
540 | 122k | if (statement_type_ == TreeNodeOpcode::kPTSelectStmt) { |
541 | 108k | PTSelectStmt* select_stmt = static_cast<PTSelectStmt*>(sem_context->current_dml_stmt()); |
542 | 108k | if (select_stmt->child_select()) { |
543 | | // Parent SELECT (of a nested select). |
544 | 1.02k | std::shared_ptr<client::YBTable> table = select_stmt->table(); |
545 | 1.02k | std::unordered_map<TableId, IndexInfo>::const_iterator it = |
546 | 1.02k | table->index_map().find(select_stmt->child_select()->index_id()); |
547 | | |
548 | 1.02k | RSTATUS_DCHECK(it != table->index_map().end(), InternalError, "Index should be present"); |
549 | 1.02k | const IndexInfo& idx_info = it->second; |
550 | | |
551 | 1.02k | if (idx_info.where_predicate_spec()) { |
552 | | // It is a partial index. |
553 | 678 | ColumnOp op(col_desc, value, expr->ql_op()); |
554 | 678 | if (VERIFY_RESULT(OpInExpr(idx_info.where_predicate_spec()->where_expr(), op))) { |
555 | 363 | return Status::OK(); |
556 | 363 | } |
557 | 678 | } |
558 | 107k | } else if (!select_stmt->index_id().empty()) { |
559 | | // Child SELECT. |
560 | 1.23k | std::shared_ptr<client::YBTable> table = select_stmt->table(); |
561 | 1.23k | const IndexInfo& idx_info = table->index_info(); |
562 | | |
563 | 1.23k | if (idx_info.where_predicate_spec()) { |
564 | | // First attempt to preserve the sub-clause if it might be useful. |
565 | 780 | bool preserve_col_op = false; |
566 | 780 | auto prefix_len = sem_context->index_select_prefix_length(); |
567 | | // Only in case all hash cols are set, we can even attempt to preserve the sub-clause. |
568 | 780 | bool all_hash_cols_set = prefix_len >= idx_info.hash_column_count(); |
569 | 780 | if (all_hash_cols_set) { |
570 | 67 | if (col_desc->index() < prefix_len && expr->ql_op() == QL_OP_EQUAL) |
571 | 67 | preserve_col_op = true; |
572 | 0 | else if (col_desc->index() == prefix_len + 1 && |
573 | 0 | col_desc->index() < idx_info.range_column_count() && |
574 | 0 | (expr->ql_op() == QL_OP_LESS_THAN || |
575 | 0 | expr->ql_op() == QL_OP_LESS_THAN_EQUAL || |
576 | 0 | expr->ql_op() == QL_OP_GREATER_THAN || |
577 | 0 | expr->ql_op() == QL_OP_GREATER_THAN_EQUAL) |
578 | 0 | ) |
579 | 0 | preserve_col_op = true; |
580 | 67 | } |
581 | | |
582 | 780 | if (!preserve_col_op) { |
583 | 713 | const auto& idx_col = idx_info.column(col_desc->index()); |
584 | | // Change to id in indexed table because we are have those ids in the index predicate |
585 | | // as well. |
586 | 713 | ColumnDesc translated_col_desc(*col_desc); |
587 | 713 | translated_col_desc.set_id(idx_col.indexed_column_id); |
588 | | |
589 | 713 | ColumnOp op(&translated_col_desc, value, expr->ql_op()); |
590 | 713 | if (VERIFY_RESULT(OpInExpr(idx_info.where_predicate_spec()->where_expr(), op))) { |
591 | 366 | return Status::OK(); |
592 | 366 | } |
593 | 713 | } |
594 | 780 | } |
595 | 1.23k | } |
596 | 108k | } |
597 | | |
598 | 121k | if (sem_context->void_primary_key_condition() && col_desc->is_primary()663 ) { |
599 | | // Drop the key condition from where clause as instructed. |
600 | 354 | return Status::OK(); |
601 | 354 | } |
602 | | |
603 | 121k | ColumnOpCounter& counter = op_counters_->at(col_desc->index()); |
604 | 121k | switch (expr->ql_op()) { |
605 | 118k | case QL_OP_EQUAL: { |
606 | 118k | counter.increase_eq(col_args != nullptr); |
607 | 118k | if (!counter.is_valid()) { |
608 | 12 | return sem_context->Error(expr, "Illogical condition for where clause", |
609 | 12 | ErrorCode::CQL_STATEMENT_INVALID); |
610 | 12 | } |
611 | | |
612 | | // Check that the column is being used correctly. |
613 | 118k | switch (statement_type_) { |
614 | 0 | case TreeNodeOpcode::kPTInsertStmt: |
615 | 11.5k | case TreeNodeOpcode::kPTUpdateStmt: |
616 | 13.6k | case TreeNodeOpcode::kPTDeleteStmt: { |
617 | 13.6k | if (!col_desc->is_primary()) { |
618 | 0 | return sem_context->Error(expr, |
619 | 0 | "Non primary key cannot be used in where clause for write requests", |
620 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
621 | 0 | } |
622 | 13.6k | (*key_ops_)[col_desc->index()].Init(col_desc, value, QLOperator::QL_OP_EQUAL); |
623 | 13.6k | break; |
624 | 13.6k | } |
625 | 105k | case TreeNodeOpcode::kPTSelectStmt: { |
626 | 105k | if (col_desc->is_hash()) { |
627 | 94.9k | (*key_ops_)[col_desc->index()].Init(col_desc, value, QLOperator::QL_OP_EQUAL); |
628 | 94.9k | } else if (10.8k col_args != nullptr10.8k ) { |
629 | 185 | if (col_desc->ql_type()->IsJson()) { |
630 | 100 | json_col_ops_->emplace_back(col_desc, col_args, value, expr->ql_op()); |
631 | 100 | } else { |
632 | 85 | subscripted_col_ops_->emplace_back(col_desc, col_args, value, expr->ql_op()); |
633 | 85 | } |
634 | 10.7k | } else { |
635 | 10.7k | ops_->emplace_back(col_desc, value, QLOperator::QL_OP_EQUAL); |
636 | 10.7k | } |
637 | 105k | break; |
638 | 13.6k | } |
639 | 0 | default: |
640 | 0 | return sem_context->Error(expr, "Statement type cannot have where condition", |
641 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
642 | 118k | } |
643 | 118k | break; |
644 | 118k | } |
645 | | |
646 | 118k | case QL_OP_LESS_THAN: 427 FALLTHROUGH_INTENDED427 ; |
647 | 591 | case QL_OP_LESS_THAN_EQUAL: FALLTHROUGH_INTENDED; |
648 | 785 | case QL_OP_GREATER_THAN_EQUAL: FALLTHROUGH_INTENDED; |
649 | 1.23k | case QL_OP_GREATER_THAN: { |
650 | | // Inequality conditions on hash columns are not allowed. |
651 | | // - Ignore the error if key condition is not needed. |
652 | 1.23k | if (col_desc->is_hash()) { |
653 | 2 | return sem_context->Error(expr, "Partition column cannot be used in this expression", |
654 | 2 | ErrorCode::CQL_STATEMENT_INVALID); |
655 | 2 | } |
656 | | |
657 | | // Check for illogical conditions. |
658 | 1.23k | if (col_args == nullptr) { // subcolumn conditions don't affect the condition counter. |
659 | 1.23k | if (expr->ql_op() == QL_OP_LESS_THAN || expr->ql_op() == QL_OP_LESS_THAN_EQUAL805 ) { |
660 | 589 | counter.increase_lt(col_args != nullptr); |
661 | 643 | } else { |
662 | 643 | counter.increase_gt(col_args != nullptr); |
663 | 643 | } |
664 | | |
665 | 1.23k | if (!counter.is_valid()) { |
666 | 18 | return sem_context->Error(expr, "Illogical condition for where clause", |
667 | 18 | ErrorCode::CQL_STATEMENT_INVALID); |
668 | 18 | } |
669 | 1.23k | } |
670 | | |
671 | | // Check that the column is being used correctly. |
672 | 1.21k | switch (statement_type_) { |
673 | 0 | case TreeNodeOpcode::kPTInsertStmt: |
674 | 0 | case TreeNodeOpcode::kPTUpdateStmt: |
675 | 0 | if (col_desc->is_primary()) { |
676 | 0 | return sem_context->Error(expr, |
677 | 0 | "Range expressions are not supported for inserts and updates", |
678 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
679 | 0 | } |
680 | 0 | FALLTHROUGH_INTENDED; |
681 | 21 | case TreeNodeOpcode::kPTDeleteStmt: { |
682 | 21 | if (!col_desc->is_primary()) { |
683 | 0 | return sem_context->Error(expr, |
684 | 0 | "Non primary key cannot be used in where clause for write requests", |
685 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
686 | 0 | } |
687 | 21 | ops_->emplace_back(col_desc, value, expr->ql_op()); |
688 | 21 | break; |
689 | 21 | } |
690 | 1.19k | case TreeNodeOpcode::kPTSelectStmt: { |
691 | | // Cache the column operator for execution. |
692 | 1.19k | if (col_args != nullptr) { |
693 | 3 | if (col_desc->ql_type()->IsJson()) { |
694 | 0 | json_col_ops_->emplace_back(col_desc, col_args, value, expr->ql_op()); |
695 | 3 | } else { |
696 | 3 | subscripted_col_ops_->emplace_back(col_desc, col_args, value, expr->ql_op()); |
697 | 3 | } |
698 | 1.19k | } else { |
699 | 1.19k | ops_->emplace_back(col_desc, value, expr->ql_op()); |
700 | 1.19k | } |
701 | 1.19k | break; |
702 | 21 | } |
703 | 0 | default: |
704 | 0 | return sem_context->Error(expr, "Statement type cannot have where condition", |
705 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
706 | 1.21k | } |
707 | 1.21k | break; |
708 | 1.21k | } |
709 | | |
710 | 1.21k | case QL_OP_NOT_EQUAL: 68 FALLTHROUGH_INTENDED68 ; |
711 | 116 | case QL_OP_NOT_IN: FALLTHROUGH_INTENDED; |
712 | 451 | case QL_OP_IN: { |
713 | 451 | if (statement_type_ != TreeNodeOpcode::kPTSelectStmt) { |
714 | 26 | return sem_context->Error(expr, "Operator not supported for write operations", |
715 | 26 | ErrorCode::FEATURE_NOT_YET_IMPLEMENTED); |
716 | 26 | } |
717 | | |
718 | 425 | if (col_args != nullptr) { |
719 | 18 | return sem_context->Error(expr, "Operator not supported for subscripted column", |
720 | 18 | ErrorCode::CQL_STATEMENT_INVALID); |
721 | 18 | } |
722 | | |
723 | 407 | if(!value->has_no_column_ref()) { |
724 | 0 | return sem_context->Error(expr, |
725 | 0 | "Argument of this opreator cannot reference a column", |
726 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
727 | 0 | } |
728 | | |
729 | 407 | counter.increase_in(col_args != nullptr); |
730 | 407 | if (!counter.is_valid()) { |
731 | 24 | return sem_context->Error(expr, "Illogical condition for where clause", |
732 | 24 | ErrorCode::CQL_STATEMENT_INVALID); |
733 | 24 | } |
734 | | |
735 | 383 | if (expr->ql_op() == QL_OP_IN && col_desc->is_hash()313 ) { |
736 | 161 | (*key_ops_)[col_desc->index()].Init(col_desc, value, QLOperator::QL_OP_IN); |
737 | 222 | } else { |
738 | 222 | ops_->emplace_back(col_desc, value, expr->ql_op()); |
739 | 222 | } |
740 | 383 | break; |
741 | 407 | } |
742 | | |
743 | 0 | default: |
744 | 0 | return sem_context->Error(expr, "Operator is not supported in where clause", |
745 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
746 | 121k | } |
747 | | |
748 | 120k | return Status::OK(); |
749 | 121k | } |
750 | | |
751 | | Status WhereExprState::AnalyzeColumnFunction(SemContext *sem_context, |
752 | | const PTRelationExpr *expr, |
753 | | PTExpr::SharedPtr value, |
754 | 46 | PTBcallPtr call) { |
755 | 46 | switch (expr->ql_op()) { |
756 | 6 | case QL_OP_LESS_THAN: |
757 | 11 | case QL_OP_LESS_THAN_EQUAL: |
758 | 25 | case QL_OP_EQUAL: |
759 | 31 | case QL_OP_GREATER_THAN_EQUAL: |
760 | 37 | case QL_OP_IN: |
761 | 39 | case QL_OP_NOT_IN: |
762 | 46 | case QL_OP_GREATER_THAN: { |
763 | 46 | func_ops_->emplace_back(value, call, expr->ql_op()); |
764 | 46 | break; |
765 | 39 | } |
766 | | |
767 | 0 | default: |
768 | 0 | return sem_context->Error(expr, "Operator is not supported in where clause", |
769 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
770 | 46 | } |
771 | | |
772 | | // Check that if where clause is present, it must follow CQL rules. |
773 | 46 | return Status::OK(); |
774 | 46 | } |
775 | | |
776 | | Status WhereExprState::AnalyzePartitionKeyOp(SemContext *sem_context, |
777 | | const PTRelationExpr *expr, |
778 | 188 | PTExpr::SharedPtr value) { |
779 | 188 | switch (expr->ql_op()) { |
780 | 69 | case QL_OP_LESS_THAN: { |
781 | 69 | partition_key_counter_->increase_lt(); |
782 | 69 | break; |
783 | 0 | } |
784 | 4 | case QL_OP_LESS_THAN_EQUAL: { |
785 | 4 | partition_key_counter_->increase_lt(); |
786 | 4 | break; |
787 | 0 | } |
788 | 26 | case QL_OP_EQUAL: { |
789 | 26 | partition_key_counter_->increase_eq(); |
790 | 26 | break; |
791 | 0 | } |
792 | 89 | case QL_OP_GREATER_THAN_EQUAL: { |
793 | 89 | partition_key_counter_->increase_gt(); |
794 | 89 | break; |
795 | 0 | } |
796 | 0 | case QL_OP_GREATER_THAN: { |
797 | 0 | partition_key_counter_->increase_gt(); |
798 | 0 | break; |
799 | 0 | } |
800 | | |
801 | 0 | default: |
802 | 0 | return sem_context->Error(expr, "Operator is not supported for token in where clause", |
803 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
804 | 188 | } |
805 | | |
806 | 188 | if (!partition_key_counter_->is_valid()) { |
807 | 0 | return sem_context->Error(expr, "Illogical where condition for token in where clause", |
808 | 0 | ErrorCode::CQL_STATEMENT_INVALID); |
809 | 0 | } |
810 | | |
811 | 188 | partition_key_ops_->emplace_back(expr->ql_op(), value); |
812 | 188 | return Status::OK(); |
813 | 188 | } |
814 | | |
815 | 5.02k | std::vector<int64_t> PTDmlStmt::hash_col_indices() const { |
816 | 5.02k | std::vector<int64_t> indices; |
817 | 5.02k | indices.reserve(hash_col_bindvars_.size()); |
818 | 6.49k | for (const PTBindVar* bindvar : hash_col_bindvars_) { |
819 | 6.49k | indices.emplace_back(bindvar->pos()); |
820 | 6.49k | } |
821 | 5.02k | return indices; |
822 | 5.02k | } |
823 | | |
824 | 2.34M | void PTDmlStmt::AddColumnRef(const ColumnDesc& col_desc) { |
825 | 2.34M | if (col_desc.is_static()) { |
826 | 151 | static_column_refs_.insert(col_desc.id()); |
827 | 2.34M | } else { |
828 | 2.34M | column_refs_.insert(col_desc.id()); |
829 | 2.34M | } |
830 | | |
831 | 2.34M | if (column_ref_cnts_.find(col_desc.id()) == column_ref_cnts_.end()) |
832 | 2.18M | column_ref_cnts_[col_desc.id()] = 0; |
833 | 2.34M | column_ref_cnts_[col_desc.id()]++; |
834 | 2.34M | } |
835 | | |
836 | 161 | std::string PTDmlStmt::PartitionKeyToString(const MCList<PartitionKeyOp>& conds) { |
837 | 161 | std::string str; |
838 | 165 | for (auto col_op = conds.begin(); col_op != conds.end(); ++col_op4 ) { |
839 | 4 | std::stringstream s; |
840 | 4 | if (col_op != conds.begin()) { |
841 | 2 | s << " AND "; |
842 | 2 | } |
843 | | // Partition_hash is stored as INT32, token is stored as INT64, unless you specify the |
844 | | // rhs expression e.g partition_hash(h1, h2) >= 3 in which case it's stored as an VARINT. |
845 | | // So setting the default to the yql partition_hash in that case seems reasonable. |
846 | 4 | string label = (col_op->expr()->expected_internal_type() == InternalType::kInt64Value) ? |
847 | 2 | "token" : "partition_hash"; |
848 | 4 | s << "(" << label << "(" << hash_key_columns() << ") " << QLOperatorAsString(col_op->yb_op()) |
849 | 4 | << " " << col_op->expr()->QLName() << ")"; |
850 | 4 | str += s.str(); |
851 | 4 | } |
852 | 161 | return str; |
853 | 161 | } |
854 | | |
855 | 2.02M | PTExprPtr PTDmlStmt::ttl_seconds() const { |
856 | 2.02M | return using_clause_ ? using_clause_->ttl_seconds()233 : nullptr2.02M ; |
857 | 2.02M | } |
858 | | |
859 | 2.02M | PTExprPtr PTDmlStmt::user_timestamp_usec() const { |
860 | 2.02M | return using_clause_ ? using_clause_->user_timestamp_usec()217 : nullptr2.02M ; |
861 | 2.02M | } |
862 | | |
863 | 157k | void PTDmlStmt::AddRefForAllColumns() { |
864 | 1.74M | for (const auto& pair : column_map_) { |
865 | 1.74M | AddColumnRef(pair.second); |
866 | 1.74M | } |
867 | 157k | } |
868 | | |
869 | 7.80k | void PTDmlStmt::AddHashColumnBindVar(PTBindVar* bindvar) { |
870 | 7.80k | hash_col_bindvars_.insert(bindvar); |
871 | 7.80k | } |
872 | | |
873 | 5.28k | bool PTDmlStmt::HashColCmp::operator()(const PTBindVar* v1, const PTBindVar* v2) const { |
874 | 5.28k | DCHECK(v1->hash_col() != nullptr) << "bindvar pos " << v1->pos() << " is not a hash column"0 ; |
875 | 5.28k | DCHECK(v2->hash_col() != nullptr) << "bindvar pos " << v2->pos() << " is not a hash column"0 ; |
876 | 5.28k | return v1->hash_col()->id() < v2->hash_col()->id(); |
877 | 5.28k | } |
878 | | |
879 | | } // namespace ql |
880 | | } // namespace yb |