/Users/deen/code/yugabyte-db/src/yb/common/index.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //-------------------------------------------------------------------------------------------------- |
2 | | // Copyright (c) YugaByte, Inc. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
5 | | // in compliance with the License. You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
10 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
11 | | // or implied. See the License for the specific language governing permissions and limitations |
12 | | // under the License. |
13 | | // |
14 | | // Classes that implement secondary index. |
15 | | //-------------------------------------------------------------------------------------------------- |
16 | | |
17 | | #include "yb/common/index.h" |
18 | | |
19 | | #include "yb/common/common.pb.h" |
20 | | #include "yb/common/index_column.h" |
21 | | #include "yb/common/schema.h" |
22 | | |
23 | | #include "yb/gutil/casts.h" |
24 | | |
25 | | #include "yb/util/result.h" |
26 | | |
27 | | using std::vector; |
28 | | using std::unordered_map; |
29 | | using google::protobuf::RepeatedField; |
30 | | using google::protobuf::RepeatedPtrField; |
31 | | using google::protobuf::uint32; |
32 | | |
33 | | namespace yb { |
34 | | |
35 | | // When DocDB receive messages from older clients, those messages won't have "column_name" and |
36 | | // "colexpr" attributes. |
37 | | IndexColumn::IndexColumn(const IndexInfoPB::IndexColumnPB& pb) |
38 | | : column_id(ColumnId(pb.column_id())), |
39 | | column_name(pb.column_name()), // Default to empty. |
40 | | indexed_column_id(ColumnId(pb.indexed_column_id())), |
41 | 309k | colexpr(pb.colexpr()) /* Default to empty message */ { |
42 | 309k | } |
43 | | |
44 | 430k | void IndexColumn::ToPB(IndexInfoPB::IndexColumnPB* pb) const { |
45 | 430k | pb->set_column_id(column_id); |
46 | 430k | pb->set_column_name(column_name); |
47 | 430k | pb->set_indexed_column_id(indexed_column_id); |
48 | 430k | pb->mutable_colexpr()->CopyFrom(colexpr); |
49 | 430k | } |
50 | | |
51 | 0 | std::string IndexColumn::ToString() const { |
52 | 0 | return YB_STRUCT_TO_STRING(column_id, column_name, indexed_column_id, colexpr); |
53 | 0 | } |
54 | | |
55 | | namespace { |
56 | | |
57 | | vector<IndexColumn> IndexColumnFromPB( |
58 | 119k | const RepeatedPtrField<IndexInfoPB::IndexColumnPB>& columns) { |
59 | 119k | vector<IndexColumn> cols; |
60 | 119k | cols.reserve(columns.size()); |
61 | 309k | for (const auto& column : columns) { |
62 | 309k | cols.emplace_back(column); |
63 | 309k | } |
64 | 119k | return cols; |
65 | 119k | } |
66 | | |
67 | 240k | vector<ColumnId> ColumnIdsFromPB(const RepeatedField<uint32>& ids) { |
68 | 240k | vector<ColumnId> column_ids; |
69 | 240k | column_ids.reserve(ids.size()); |
70 | 212k | for (const auto& id : ids) { |
71 | 212k | column_ids.emplace_back(id); |
72 | 212k | } |
73 | 240k | return column_ids; |
74 | 240k | } |
75 | | |
76 | | } // namespace |
77 | | |
78 | | IndexInfo::IndexInfo(const IndexInfoPB& pb) |
79 | | : table_id_(pb.table_id()), |
80 | | indexed_table_id_(pb.indexed_table_id()), |
81 | | schema_version_(pb.version()), |
82 | | is_local_(pb.is_local()), |
83 | | is_unique_(pb.is_unique()), |
84 | | columns_(IndexColumnFromPB(pb.columns())), |
85 | | hash_column_count_(pb.hash_column_count()), |
86 | | range_column_count_(pb.range_column_count()), |
87 | | indexed_hash_column_ids_(ColumnIdsFromPB(pb.indexed_hash_column_ids())), |
88 | | indexed_range_column_ids_(ColumnIdsFromPB(pb.indexed_range_column_ids())), |
89 | | index_permissions_(pb.index_permissions()), |
90 | | backfill_error_message_(pb.backfill_error_message()), |
91 | | use_mangled_column_name_(pb.use_mangled_column_name()), |
92 | | where_predicate_spec_(pb.has_where_predicate_spec() ? |
93 | 120k | std::make_shared<IndexInfoPB::WherePredicateSpecPB>(pb.where_predicate_spec()) : nullptr) { |
94 | 309k | for (const auto& index_col : columns_) { |
95 | | // Mark column as covered if the index column is the column itself. |
96 | | // Do not mark a column as covered when indexing by an expression of that column. |
97 | | // - When an expression such as "jsonb->>'field'" is used, then the "jsonb" column should not |
98 | | // be included in the covered list. |
99 | | // - Currently we only support "jsonb->>" expression, but this is true for all expressions. |
100 | 309k | if (index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::kColumnId || |
101 | 304k | index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::EXPR_NOT_SET) { |
102 | 304k | covered_column_ids_.insert(index_col.indexed_column_id); |
103 | 5.41k | } else { |
104 | 5.41k | has_index_by_expr_ = true; |
105 | 5.41k | } |
106 | 309k | } |
107 | 120k | } |
108 | | |
109 | 168 | IndexInfo::IndexInfo() = default; |
110 | | |
111 | 138k | IndexInfo::IndexInfo(const IndexInfo& rhs) = default; |
112 | 65.6k | IndexInfo::IndexInfo(IndexInfo&& rhs) = default; |
113 | | |
114 | 313k | IndexInfo::~IndexInfo() = default; |
115 | | |
116 | 117k | void IndexInfo::ToPB(IndexInfoPB* pb) const { |
117 | 117k | pb->set_table_id(table_id_); |
118 | 117k | pb->set_indexed_table_id(indexed_table_id_); |
119 | 117k | pb->set_version(schema_version_); |
120 | 117k | pb->set_is_local(is_local_); |
121 | 117k | pb->set_is_unique(is_unique_); |
122 | 430k | for (const auto& column : columns_) { |
123 | 430k | column.ToPB(pb->add_columns()); |
124 | 430k | } |
125 | 117k | pb->set_hash_column_count(narrow_cast<uint32_t>(hash_column_count_)); |
126 | 117k | pb->set_range_column_count(narrow_cast<uint32_t>(range_column_count_)); |
127 | 164k | for (const auto& id : indexed_hash_column_ids_) { |
128 | 164k | pb->add_indexed_hash_column_ids(id); |
129 | 164k | } |
130 | 126k | for (const auto& id : indexed_range_column_ids_) { |
131 | 126k | pb->add_indexed_range_column_ids(id); |
132 | 126k | } |
133 | 117k | pb->set_index_permissions(index_permissions_); |
134 | 117k | pb->set_backfill_error_message(backfill_error_message_); |
135 | 117k | pb->set_use_mangled_column_name(use_mangled_column_name_); |
136 | 117k | } |
137 | | |
138 | 1.93k | vector<ColumnId> IndexInfo::index_key_column_ids() const { |
139 | 1.93k | std::unordered_map<ColumnId, ColumnId, boost::hash<ColumnId>> map; |
140 | 6.68k | for (const auto& column : columns_) { |
141 | 6.68k | map[column.indexed_column_id] = column.column_id; |
142 | 6.68k | } |
143 | 1.93k | vector<ColumnId> ids; |
144 | 1.93k | ids.reserve(indexed_hash_column_ids_.size() + indexed_range_column_ids_.size()); |
145 | 2.54k | for (const auto& id : indexed_hash_column_ids_) { |
146 | 2.54k | ids.push_back(map[id]); |
147 | 2.54k | } |
148 | 2.14k | for (const auto& id : indexed_range_column_ids_) { |
149 | 2.14k | ids.push_back(map[id]); |
150 | 2.14k | } |
151 | 1.93k | return ids; |
152 | 1.93k | } |
153 | | |
154 | 15.7k | bool IndexInfo::PrimaryKeyColumnsOnly(const Schema& indexed_schema) const { |
155 | 18.4k | for (size_t i = 0; i < hash_column_count_ + range_column_count_; i++) { |
156 | 17.7k | if (!indexed_schema.is_key_column(columns_[i].indexed_column_id)) { |
157 | 15.0k | return false; |
158 | 15.0k | } |
159 | 17.7k | } |
160 | 655 | return true; |
161 | 15.7k | } |
162 | | |
163 | 78.7k | bool IndexInfo::IsColumnCovered(const ColumnId column_id) const { |
164 | 78.7k | return covered_column_ids_.find(column_id) != covered_column_ids_.end(); |
165 | 78.7k | } |
166 | | |
167 | 5.91k | bool IndexInfo::IsColumnCovered(const std::string& column_name) const { |
168 | 15.5k | for (const auto &col : columns_) { |
169 | 15.5k | if (column_name == col.column_name) { |
170 | 3.91k | return true; |
171 | 3.91k | } |
172 | 15.5k | } |
173 | 2.00k | return false; |
174 | 5.91k | } |
175 | | |
176 | 0 | int32_t IndexInfo::IsExprCovered(const string& expr_name) const { |
177 | | // CHECKING if an expression is covered. |
178 | | // - If IndexColumn name is a substring of "expr_name", the given expression is covered. That is, |
179 | | // it can be computed using the value of this column. |
180 | | // |
181 | | // - For this function to work properly, the column and expression name MUST be serialized in a |
182 | | // way that guarantees their uniqueness. Function PTExpr::MangledName() resolves this issue. |
183 | | // |
184 | | // - Example: |
185 | | // CREATE TABLE tab (pk int primary key, a int, j jsonb); |
186 | | // CREATE INDEX a_index ON tab (a); |
187 | | // SELECT pk FROM tab WHERE j->'b'->>'a' = '99'; |
188 | | // In this example, clearly "a_index" doesn't cover the seleted json expression, but the name |
189 | | // "a" is a substring of "j->b->>a", and this function would return TRUE, which is wrong. To |
190 | | // avoid this issue, <column names> and JSONB <attribute names> must be escaped uniquely and |
191 | | // differently. To cover the above SELECT, the following index must be defined. |
192 | | // CREATE INDEX jindex on tab(j->'b'->>'a'); |
193 | 0 | int32_t idx = 0; |
194 | 0 | for (const auto &col : columns_) { |
195 | 0 | if (!col.column_name.empty() && expr_name.find(col.column_name) != expr_name.npos) { |
196 | 0 | return idx; |
197 | 0 | } |
198 | 0 | idx++; |
199 | 0 | } |
200 | |
|
201 | 0 | return -1; |
202 | 0 | } |
203 | | |
204 | | // Check for dependency is used for DDL operations, so it does not need to be fast. As a result, |
205 | | // the dependency list does not need to be cached in a member id list for fast access. |
206 | 6 | bool IndexInfo::CheckColumnDependency(ColumnId column_id) const { |
207 | 10 | for (const auto& index_col : columns_) { |
208 | | // The protobuf data contains IDs of all columns that this index is referencing. |
209 | | // Examples: |
210 | | // 1. Index by column |
211 | | // - INDEX ON tab (a_column) |
212 | | // - The ID of "a_column" is included in protobuf data. |
213 | | // |
214 | | // 2. Index by expression of column: |
215 | | // - INDEX ON tab (j_column->>'field') |
216 | | // - The ID of "j_column" is included in protobuf data. |
217 | 10 | if (index_col.indexed_column_id == column_id) { |
218 | 2 | return true; |
219 | 2 | } |
220 | 10 | } |
221 | | |
222 | 4 | if (where_predicate_spec_) { |
223 | 3 | for (auto indexed_col_id : where_predicate_spec_->column_ids()) { |
224 | 3 | if (ColumnId(indexed_col_id) == column_id) return true; |
225 | 3 | } |
226 | 3 | } |
227 | | |
228 | 2 | return false; |
229 | 4 | } |
230 | | |
231 | 104 | boost::optional<size_t> IndexInfo::FindKeyIndex(const string& key_expr_name) const { |
232 | 267 | for (size_t idx = 0; idx < key_column_count(); idx++) { |
233 | 207 | const auto& col = columns_[idx]; |
234 | 207 | if (!col.column_name.empty() && key_expr_name.find(col.column_name) != key_expr_name.npos) { |
235 | | // Return the found key column that is referenced by the expression. |
236 | 44 | return idx; |
237 | 44 | } |
238 | 207 | } |
239 | | |
240 | 60 | return boost::none; |
241 | 104 | } |
242 | | |
243 | 235 | std::string IndexInfo::ToString() const { |
244 | 235 | IndexInfoPB pb; |
245 | 235 | ToPB(&pb); |
246 | 235 | return pb.ShortDebugString(); |
247 | 235 | } |
248 | | |
249 | 357k | const IndexColumn& IndexInfo::column(const size_t idx) const { |
250 | 357k | return columns_[idx]; |
251 | 357k | } |
252 | | |
253 | 2.23k | IndexMap::IndexMap(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) { |
254 | 2.23k | FromPB(indexes); |
255 | 2.23k | } |
256 | | |
257 | 701k | void IndexMap::FromPB(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) { |
258 | 701k | clear(); |
259 | 65.3k | for (const auto& index : indexes) { |
260 | 65.3k | emplace(index.table_id(), IndexInfo(index)); |
261 | 65.3k | } |
262 | 701k | } |
263 | | |
264 | 140M | void IndexMap::ToPB(google::protobuf::RepeatedPtrField<IndexInfoPB>* indexes) const { |
265 | 140M | indexes->Clear(); |
266 | 69.6k | for (const auto& itr : *this) { |
267 | 69.6k | itr.second.ToPB(indexes->Add()); |
268 | 69.6k | } |
269 | 140M | } |
270 | | |
271 | 54.9k | Result<const IndexInfo*> IndexMap::FindIndex(const TableId& index_id) const { |
272 | 54.9k | const auto itr = find(index_id); |
273 | 54.9k | if (itr == end()) { |
274 | 267 | return STATUS(NotFound, Format("Index id $0 not found", index_id)); |
275 | 267 | } |
276 | 54.6k | return &itr->second; |
277 | 54.6k | } |
278 | | |
279 | | } // namespace yb |