/Users/deen/code/yugabyte-db/src/yb/yql/cql/ql/exec/eval_json.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //-------------------------------------------------------------------------------------------------- |
2 | | // Copyright (c) YugaByte, Inc. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
5 | | // in compliance with the License. You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
10 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
11 | | // or implied. See the License for the specific language governing permissions and limitations |
12 | | // under the License. |
13 | | //-------------------------------------------------------------------------------------------------- |
14 | | |
15 | | #include <numeric> // NOLINT - needed because header name mismatch source name |
16 | | |
17 | | #include <boost/algorithm/string.hpp> |
18 | | #include <rapidjson/document.h> |
19 | | #include <rapidjson/error/en.h> |
20 | | |
21 | | #include "yb/client/schema.h" |
22 | | |
23 | | #include "yb/common/ql_value.h" |
24 | | |
25 | | #include "yb/util/result.h" |
26 | | #include "yb/util/string_util.h" |
27 | | |
28 | | #include "yb/yql/cql/ql/exec/exec_context.h" |
29 | | #include "yb/yql/cql/ql/exec/executor.h" |
30 | | #include "yb/yql/cql/ql/ptree/column_desc.h" |
31 | | #include "yb/yql/cql/ql/ptree/pt_expr.h" |
32 | | #include "yb/yql/cql/ql/ptree/pt_insert.h" |
33 | | #include "yb/yql/cql/ql/ptree/pt_insert_json_clause.h" |
34 | | #include "yb/yql/cql/ql/ptree/pt_name.h" |
35 | | |
36 | | namespace yb { |
37 | | namespace ql { |
38 | | |
39 | | namespace { |
40 | | // General error that should be shown when outermost JSON decoding fails |
41 | | const std::string kJsonMapDecodeErrMsg("Could not decode JSON string as a map"); |
42 | | |
43 | | // Error message stating that the given JSON string could not be parsed as a given type |
44 | | std::string GetInnerParseErr(const Slice& string_value, |
45 | 1 | const QLType::SharedPtr& dst_type) { |
46 | 1 | return Format("Unable to make $0 from '$1'", *dst_type, string_value); |
47 | 1 | } |
48 | | |
49 | | // Error message stating that types are not compatible |
50 | | std::string GetCoercionErr(const DataType src_type, |
51 | 0 | const DataType dst_type) { |
52 | 0 | return Format("Unable to make $0 from $1", |
53 | 0 | QLType::ToCQLString(dst_type), |
54 | 0 | QLType::ToCQLString(src_type)); |
55 | 0 | } |
56 | | |
57 | | // Parses JSON string as rapidjson document |
58 | | Result<rapidjson::Document> ParseJsonString(const char* json_string, |
59 | | ExecContext* exec_context, |
60 | 108 | const YBLocationPtr& loc) { |
61 | | // Special case: boolean strings in CQL are case-insensitive, but rapidjson disagrees |
62 | 108 | if (strcasecmp(json_string, "true") == 0) { |
63 | 4 | json_string = "true"; |
64 | 104 | } else if (strcasecmp(json_string, "false") == 0) { |
65 | 5 | json_string = "false"; |
66 | 5 | } |
67 | | |
68 | 108 | rapidjson::Document document; |
69 | 108 | document.Parse<rapidjson::ParseFlag::kParseNumbersAsStringsFlag>(json_string); |
70 | 108 | if (document.HasParseError()) { |
71 | | // TODO: Location offset to pinpoint an error? |
72 | 1 | return exec_context->Error(*loc, |
73 | 1 | rapidjson::GetParseError_En(document.GetParseError()), |
74 | 1 | ErrorCode::INVALID_ARGUMENTS); |
75 | 1 | } |
76 | 107 | return document; |
77 | 108 | } |
78 | | } // anonymous namespace |
79 | | |
80 | 167 | std::string NormalizeJsonKey(const std::string& key) { |
81 | 167 | if (boost::starts_with(key, "\"") && boost::ends_with(key, "\"")6 ) { |
82 | 6 | return key.substr(1, key.size() - 2); |
83 | 161 | } else { |
84 | 161 | return boost::algorithm::to_lower_copy(key); |
85 | 161 | } |
86 | 167 | } |
87 | | |
88 | | Result<PTExpr::SharedPtr> Executor::ConvertJsonToExpr(const rapidjson::Value& json_value, |
89 | | const QLType::SharedPtr& type, |
90 | 336 | const YBLocationPtr& loc) { |
91 | 336 | CHECK_NOTNULL(type.get()); |
92 | | |
93 | | // Strip FROZEN wrapping and process underlying type |
94 | 336 | if (type->main() == DataType::FROZEN) { |
95 | 22 | auto result = VERIFY_RESULT(ConvertJsonToExpr(json_value, type->param_type(0), loc)); |
96 | 0 | result->set_ql_type(type); // Execution expects explicit FROZEN type |
97 | 22 | return result; |
98 | 22 | } |
99 | | |
100 | 314 | PTExpr::SharedPtr value_expr = VERIFY_RESULT313 (ConvertJsonToExprInner(json_value, type, loc));313 |
101 | 0 | value_expr->set_expected_internal_type(client::YBColumnSchema::ToInternalDataType(type)); |
102 | | |
103 | 313 | if (!QLType::IsImplicitlyConvertible(type, value_expr->ql_type())) { |
104 | 0 | return exec_context_->Error(value_expr, |
105 | 0 | GetCoercionErr(value_expr->ql_type_id(), type->main()), |
106 | 0 | ErrorCode::DATATYPE_MISMATCH); |
107 | 0 | } |
108 | 313 | return value_expr; |
109 | 313 | } |
110 | | |
111 | | Result<PTExpr::SharedPtr> Executor::ConvertJsonToExprInner(const rapidjson::Value& json_value, |
112 | | const QLType::SharedPtr& type, |
113 | 314 | const YBLocationPtr& loc) { |
114 | 314 | MemoryContext* memctx = exec_context_->PTempMem(); |
115 | 314 | switch (json_value.GetType()) { |
116 | 3 | case rapidjson::Type::kNullType: { |
117 | 3 | return PTNull::MakeShared(memctx, loc, nullptr); |
118 | 0 | } |
119 | 21 | case rapidjson::Type::kTrueType: FALLTHROUGH_INTENDED; |
120 | 43 | case rapidjson::Type::kFalseType: { |
121 | 43 | return PTConstBool::MakeShared(memctx, loc, json_value.GetBool()); |
122 | 21 | } |
123 | 222 | case rapidjson::Type::kStringType: { |
124 | | // |
125 | | // Process strings |
126 | | // |
127 | | // Things to keep in mind here: |
128 | | // 1) INSERT JSON allows string for every single type of value, |
129 | | // which requires additional layer of JSON parsing. |
130 | | // 2) We specifically instruct JSON parser to parse numerics as strings |
131 | | // to avoid precision loss and overflow. |
132 | | // |
133 | 222 | const char* json_value_string = json_value.GetString(); |
134 | 222 | const auto mc_string = MCMakeShared<MCString>(memctx, json_value_string); |
135 | 222 | if (QLType::IsImplicitlyConvertible(type->main(), DataType::STRING)) { |
136 | 20 | return PTConstText::MakeShared(memctx, loc, mc_string); |
137 | 202 | } else if (yb::IsBigInteger(json_value_string)) { |
138 | 155 | return PTConstVarInt::MakeShared(memctx, loc, mc_string); |
139 | 155 | } else if (47 yb::IsDecimal(json_value_string)47 ) { |
140 | 19 | return PTConstDecimal::MakeShared(memctx, loc, mc_string); |
141 | 28 | } else { |
142 | | // Parse string as JSON |
143 | 28 | auto json_expr_result = ParseJsonString(json_value_string, exec_context_, loc); |
144 | 28 | if (json_expr_result.ok()) { |
145 | 27 | return VERIFY_RESULT(ConvertJsonToExpr(*json_expr_result, type, loc)); |
146 | 27 | } else { |
147 | 1 | return exec_context_->Error(*loc, |
148 | 1 | GetInnerParseErr(json_value_string, type), |
149 | 1 | ErrorCode::DATATYPE_MISMATCH); |
150 | 1 | } |
151 | 28 | } |
152 | 222 | } |
153 | 15 | case rapidjson::Type::kArrayType: { |
154 | | // All of these collections are represented as JSON lists |
155 | 15 | if (type->main() != LIST && type->main() != SET10 ) { // TODO: Add tuple during #936 |
156 | 0 | return exec_context_->Error(*loc, |
157 | 0 | GetCoercionErr(DataType::LIST, type->main()), |
158 | 0 | ErrorCode::DATATYPE_MISMATCH); |
159 | 0 | } |
160 | 15 | auto result = PTCollectionExpr::MakeShared(memctx, loc, type); |
161 | 45 | for (const auto& member : json_value.GetArray()) { |
162 | 45 | result->AddElement(VERIFY_RESULT(ConvertJsonToExpr(member, type->values_type(), loc))); |
163 | 45 | } |
164 | 15 | return result; |
165 | 15 | } |
166 | 31 | case rapidjson::Type::kObjectType: { |
167 | | // Could be either Map or UDT |
168 | 31 | if (type->main() != MAP && type->main() != USER_DEFINED_TYPE10 ) { |
169 | 0 | return exec_context_->Error(*loc, |
170 | 0 | GetCoercionErr(DataType::MAP, type->main()), |
171 | 0 | ErrorCode::DATATYPE_MISMATCH); |
172 | 0 | } |
173 | 31 | auto result = PTCollectionExpr::MakeShared(memctx, loc, type); |
174 | 52 | for (const auto& member : json_value.GetObject()) { |
175 | 52 | if (type->main() == MAP) { |
176 | 38 | const PTExpr::SharedPtr processed_key = |
177 | 38 | VERIFY_RESULT(ConvertJsonToExpr(member.name, type->keys_type(), loc)); |
178 | 0 | const PTExpr::SharedPtr processed_value = |
179 | 38 | VERIFY_RESULT(ConvertJsonToExpr(member.value, type->values_type(), loc)); |
180 | 0 | result->AddKeyValuePair(processed_key, processed_value); |
181 | 38 | } else { // UDT |
182 | 14 | const auto key_string = NormalizeJsonKey(member.name.GetString()); |
183 | 14 | const auto value_type = VERIFY_RESULT(type->GetUDTFieldTypeByName(key_string)); |
184 | 14 | if (!value_type) { |
185 | 0 | return exec_context_->Error(*loc, |
186 | 0 | "Key '" + key_string + "' not found in user-defined type", |
187 | 0 | ErrorCode::DATATYPE_MISMATCH); |
188 | 0 | } |
189 | 14 | const auto name_node = |
190 | 14 | PTQualifiedName::MakeShared(memctx, |
191 | 14 | loc, |
192 | 14 | MCMakeShared<MCString>(memctx, key_string.c_str())); |
193 | 14 | const PTExpr::SharedPtr processed_key = |
194 | 14 | PTRef::MakeShared(memctx, loc, name_node); |
195 | 14 | const PTExpr::SharedPtr processed_value = |
196 | 14 | VERIFY_RESULT(ConvertJsonToExpr(member.value, value_type, loc)); |
197 | 0 | result->AddKeyValuePair(processed_key, processed_value); |
198 | 14 | } |
199 | 52 | } |
200 | 31 | if (type->main() == USER_DEFINED_TYPE) { |
201 | 10 | RETURN_NOT_OK(result->InitializeUDTValues(type, exec_context_)); |
202 | 10 | } |
203 | 31 | return result; |
204 | 31 | } |
205 | 0 | case rapidjson::Type::kNumberType: { |
206 | | // We're using kParseNumbersAsStringsFlag flag, so this shouldn't be possible |
207 | 0 | return exec_context_->Error(*loc, |
208 | 0 | "Unexpected numeric type in JSON processing", |
209 | 0 | ErrorCode::SERVER_ERROR); |
210 | 31 | } |
211 | 314 | } |
212 | 0 | FATAL_INVALID_ENUM_VALUE(rapidjson::Type, json_value.GetType()); |
213 | 0 | } |
214 | | |
215 | 80 | CHECKED_STATUS Executor::PreExecTreeNode(PTInsertJsonClause* json_clause) { |
216 | | |
217 | | // |
218 | | // Resolve JSON string |
219 | | // |
220 | 80 | QLValuePB json_expr_pb; |
221 | 80 | RETURN_NOT_OK(PTConstToPB(json_clause->Expr(), &json_expr_pb)); |
222 | 80 | const std::string& json_string = json_expr_pb.string_value(); |
223 | | |
224 | | // |
225 | | // Parse JSON and store the result |
226 | | // |
227 | 80 | auto json_document = VERIFY_RESULT(ParseJsonString(json_string.c_str(), |
228 | 80 | exec_context_, |
229 | 80 | json_clause->Expr()->loc_ptr())); |
230 | 80 | if (json_document.GetType() != rapidjson::Type::kObjectType) { |
231 | 3 | return exec_context_->Error(json_clause->Expr(), |
232 | 3 | kJsonMapDecodeErrMsg, |
233 | 3 | ErrorCode::INVALID_ARGUMENTS); |
234 | 3 | } |
235 | 77 | return json_clause->PreExecInit(json_string, std::move(json_document)); |
236 | 80 | } |
237 | | |
238 | | CHECKED_STATUS Executor::InsertJsonClauseToPB(const PTInsertStmt* insert_stmt, |
239 | | const PTInsertJsonClause* json_clause, |
240 | 77 | QLWriteRequestPB* req) { |
241 | 77 | const auto& column_map = insert_stmt->column_map(); |
242 | 77 | const auto& loc = json_clause->Expr()->loc_ptr(); |
243 | | |
244 | | // Processed columns with their associated QL expressions |
245 | 77 | std::map<const ColumnDesc*, QLExpressionPB*> processed_cols; |
246 | | |
247 | | // Process all columns in JSON clause |
248 | 153 | for (const auto& member : json_clause->JsonDocument().GetObject()) { |
249 | 153 | const rapidjson::Value& key = member.name; |
250 | 153 | const rapidjson::Value& value = member.value; |
251 | | |
252 | 153 | SCHECK(key.IsString(), InvalidArgument, "JSON root object key must be a string"); |
253 | 153 | const MCSharedPtr<MCString>& mc_col_name = |
254 | 153 | MCMakeShared<MCString>(exec_context_->PTempMem(), |
255 | 153 | NormalizeJsonKey(key.GetString()).c_str()); |
256 | 153 | auto found_column_entry = column_map.find(*mc_col_name); |
257 | | // Check that the column exists. |
258 | 153 | if (found_column_entry == column_map.end()) { |
259 | 1 | return exec_context_->Error(*loc, mc_col_name->c_str(), ErrorCode::UNDEFINED_COLUMN); |
260 | 1 | } |
261 | | |
262 | 152 | const ColumnDesc& col_desc = found_column_entry->second; |
263 | 152 | QLExpressionPB* expr_pb = processed_cols[&col_desc]; |
264 | 152 | if (!expr_pb) { |
265 | 149 | expr_pb = CreateQLExpression(req, col_desc); |
266 | 149 | processed_cols[&col_desc] = expr_pb; |
267 | 149 | } |
268 | 152 | QLValuePB* value_pb = expr_pb->mutable_value(); |
269 | 152 | PTExpr::SharedPtr expr = VERIFY_RESULT151 (ConvertJsonToExpr(value, col_desc.ql_type(), loc));151 |
270 | 151 | RETURN_NOT_OK(PTConstToPB(expr, value_pb, false)); |
271 | 151 | } |
272 | | |
273 | | // Perform checks and init columns not mentioned in JSON |
274 | 514 | for (auto iter = column_map.begin(); 74 iter != column_map.end(); iter++440 ) { |
275 | 445 | const ColumnDesc& col_desc = iter->second; |
276 | | |
277 | 445 | const auto& found_col_entry = processed_cols.find(&col_desc); |
278 | 445 | bool not_found = found_col_entry == processed_cols.end(); |
279 | | |
280 | | // Null values not allowed for primary key |
281 | 445 | if (col_desc.is_primary() |
282 | 445 | && (76 not_found76 |
283 | 76 | || !found_col_entry->second->has_value()71 |
284 | 76 | || IsNull(found_col_entry->second->value())71 )) { |
285 | 5 | LOG(INFO) << "Unexpected null value. Current request: " << req->DebugString(); |
286 | 5 | return exec_context_->Error(*loc, ErrorCode::NULL_ARGUMENT_FOR_PRIMARY_KEY); |
287 | 5 | } |
288 | | |
289 | | // All non-mentioned columns should be set to NULL |
290 | 440 | if (not_found && json_clause->IsDefaultNull()298 ) { |
291 | 287 | QLExpressionPB* expr_pb = CreateQLExpression(req, col_desc); |
292 | 287 | SetNull(expr_pb->mutable_value()); |
293 | 287 | } |
294 | 440 | } |
295 | | |
296 | 69 | return Status::OK(); |
297 | 74 | } |
298 | | |
299 | | } // namespace ql |
300 | | } // namespace yb |