YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/common/index.cc
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
// Classes that implement secondary index.
15
//--------------------------------------------------------------------------------------------------
16
17
#include "yb/common/index.h"
18
19
#include "yb/common/common.pb.h"
20
#include "yb/common/index_column.h"
21
#include "yb/common/schema.h"
22
23
#include "yb/gutil/casts.h"
24
25
#include "yb/util/result.h"
26
27
using std::vector;
28
using std::unordered_map;
29
using google::protobuf::RepeatedField;
30
using google::protobuf::RepeatedPtrField;
31
using google::protobuf::uint32;
32
33
namespace yb {
34
35
// When DocDB receive messages from older clients, those messages won't have "column_name" and
36
// "colexpr" attributes.
37
IndexColumn::IndexColumn(const IndexInfoPB::IndexColumnPB& pb)
38
    : column_id(ColumnId(pb.column_id())),
39
      column_name(pb.column_name()), // Default to empty.
40
      indexed_column_id(ColumnId(pb.indexed_column_id())),
41
307k
      colexpr(pb.colexpr()) /* Default to empty message */ {
42
307k
}
43
44
436k
void IndexColumn::ToPB(IndexInfoPB::IndexColumnPB* pb) const {
45
436k
  pb->set_column_id(column_id);
46
436k
  pb->set_column_name(column_name);
47
436k
  pb->set_indexed_column_id(indexed_column_id);
48
436k
  pb->mutable_colexpr()->CopyFrom(colexpr);
49
436k
}
50
51
0
std::string IndexColumn::ToString() const {
52
0
  return YB_STRUCT_TO_STRING(column_id, column_name, indexed_column_id, colexpr);
53
0
}
54
55
namespace {
56
57
vector<IndexColumn> IndexColumnFromPB(
58
147k
    const RepeatedPtrField<IndexInfoPB::IndexColumnPB>& columns) {
59
147k
  vector<IndexColumn> cols;
60
147k
  cols.reserve(columns.size());
61
307k
  for (const auto& column : columns) {
62
307k
    cols.emplace_back(column);
63
307k
  }
64
147k
  return cols;
65
147k
}
66
67
296k
vector<ColumnId> ColumnIdsFromPB(const RepeatedField<uint32>& ids) {
68
296k
  vector<ColumnId> column_ids;
69
296k
  column_ids.reserve(ids.size());
70
296k
  for (const auto& id : ids) {
71
211k
    column_ids.emplace_back(id);
72
211k
  }
73
296k
  return column_ids;
74
296k
}
75
76
} // namespace
77
78
IndexInfo::IndexInfo(const IndexInfoPB& pb)
79
    : table_id_(pb.table_id()),
80
      indexed_table_id_(pb.indexed_table_id()),
81
      schema_version_(pb.version()),
82
      is_local_(pb.is_local()),
83
      is_unique_(pb.is_unique()),
84
      columns_(IndexColumnFromPB(pb.columns())),
85
      hash_column_count_(pb.hash_column_count()),
86
      range_column_count_(pb.range_column_count()),
87
      indexed_hash_column_ids_(ColumnIdsFromPB(pb.indexed_hash_column_ids())),
88
      indexed_range_column_ids_(ColumnIdsFromPB(pb.indexed_range_column_ids())),
89
      index_permissions_(pb.index_permissions()),
90
      backfill_error_message_(pb.backfill_error_message()),
91
      use_mangled_column_name_(pb.use_mangled_column_name()),
92
      where_predicate_spec_(pb.has_where_predicate_spec() ?
93
148k
        std::make_shared<IndexInfoPB::WherePredicateSpecPB>(pb.where_predicate_spec()) : nullptr) {
94
308k
  for (const auto& index_col : columns_) {
95
    // Mark column as covered if the index column is the column itself.
96
    // Do not mark a column as covered when indexing by an expression of that column.
97
    // - When an expression such as "jsonb->>'field'" is used, then the "jsonb" column should not
98
    //   be included in the covered list.
99
    // - Currently we only support "jsonb->>" expression, but this is true for all expressions.
100
308k
    if (index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::kColumnId ||
101
308k
        
index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::EXPR_NOT_SET6.98k
) {
102
302k
      covered_column_ids_.insert(index_col.indexed_column_id);
103
302k
    } else {
104
5.35k
      has_index_by_expr_ = true;
105
5.35k
    }
106
308k
  }
107
148k
}
108
109
183
IndexInfo::IndexInfo() = default;
110
111
165k
IndexInfo::IndexInfo(const IndexInfo& rhs) = default;
112
96.6k
IndexInfo::IndexInfo(IndexInfo&& rhs) = default;
113
114
391k
IndexInfo::~IndexInfo() = default;
115
116
167k
void IndexInfo::ToPB(IndexInfoPB* pb) const {
117
167k
  pb->set_table_id(table_id_);
118
167k
  pb->set_indexed_table_id(indexed_table_id_);
119
167k
  pb->set_version(schema_version_);
120
167k
  pb->set_is_local(is_local_);
121
167k
  pb->set_is_unique(is_unique_);
122
436k
  for (const auto& column : columns_) {
123
436k
    column.ToPB(pb->add_columns());
124
436k
  }
125
167k
  pb->set_hash_column_count(narrow_cast<uint32_t>(hash_column_count_));
126
167k
  pb->set_range_column_count(narrow_cast<uint32_t>(range_column_count_));
127
167k
  for (const auto& id : indexed_hash_column_ids_) {
128
166k
    pb->add_indexed_hash_column_ids(id);
129
166k
  }
130
167k
  for (const auto& id : indexed_range_column_ids_) {
131
129k
    pb->add_indexed_range_column_ids(id);
132
129k
  }
133
167k
  pb->set_index_permissions(index_permissions_);
134
167k
  pb->set_backfill_error_message(backfill_error_message_);
135
167k
  pb->set_use_mangled_column_name(use_mangled_column_name_);
136
167k
}
137
138
3.87k
vector<ColumnId> IndexInfo::index_key_column_ids() const {
139
3.87k
  std::unordered_map<ColumnId, ColumnId, boost::hash<ColumnId>> map;
140
6.82k
  for (const auto& column : columns_) {
141
6.82k
    map[column.indexed_column_id] = column.column_id;
142
6.82k
  }
143
3.87k
  vector<ColumnId> ids;
144
3.87k
  ids.reserve(indexed_hash_column_ids_.size() + indexed_range_column_ids_.size());
145
3.87k
  for (const auto& id : indexed_hash_column_ids_) {
146
2.59k
    ids.push_back(map[id]);
147
2.59k
  }
148
3.87k
  for (const auto& id : indexed_range_column_ids_) {
149
2.19k
    ids.push_back(map[id]);
150
2.19k
  }
151
3.87k
  return ids;
152
3.87k
}
153
154
15.8k
bool IndexInfo::PrimaryKeyColumnsOnly(const Schema& indexed_schema) const {
155
18.4k
  for (size_t i = 0; i < hash_column_count_ + range_column_count_; 
i++2.68k
) {
156
17.8k
    if (!indexed_schema.is_key_column(columns_[i].indexed_column_id)) {
157
15.1k
      return false;
158
15.1k
    }
159
17.8k
  }
160
650
  return true;
161
15.8k
}
162
163
78.7k
bool IndexInfo::IsColumnCovered(const ColumnId column_id) const {
164
78.7k
  return covered_column_ids_.find(column_id) != covered_column_ids_.end();
165
78.7k
}
166
167
5.83k
bool IndexInfo::IsColumnCovered(const std::string& column_name) const {
168
15.2k
  for (const auto &col : columns_) {
169
15.2k
    if (column_name == col.column_name) {
170
3.86k
      return true;
171
3.86k
    }
172
15.2k
  }
173
1.96k
  return false;
174
5.83k
}
175
176
0
int32_t IndexInfo::IsExprCovered(const string& expr_name) const {
177
  // CHECKING if an expression is covered.
178
  // - If IndexColumn name is a substring of "expr_name", the given expression is covered. That is,
179
  //   it can be computed using the value of this column.
180
  //
181
  // - For this function to work properly, the column and expression name MUST be serialized in a
182
  //   way that guarantees their uniqueness. Function PTExpr::MangledName() resolves this issue.
183
  //
184
  // - Example:
185
  //     CREATE TABLE tab (pk int primary key, a int, j jsonb);
186
  //     CREATE INDEX a_index ON tab (a);
187
  //     SELECT pk FROM tab WHERE j->'b'->>'a' = '99';
188
  //   In this example, clearly "a_index" doesn't cover the seleted json expression, but the name
189
  //   "a" is a substring of "j->b->>a", and this function would return TRUE, which is wrong. To
190
  //   avoid this issue, <column names> and JSONB <attribute names> must be escaped uniquely and
191
  //   differently. To cover the above SELECT, the following index must be defined.
192
  //     CREATE INDEX jindex on tab(j->'b'->>'a');
193
0
  int32_t idx = 0;
194
0
  for (const auto &col : columns_) {
195
0
    if (!col.column_name.empty() && expr_name.find(col.column_name) != expr_name.npos) {
196
0
      return idx;
197
0
    }
198
0
    idx++;
199
0
  }
200
201
0
  return -1;
202
0
}
203
204
// Check for dependency is used for DDL operations, so it does not need to be fast. As a result,
205
// the dependency list does not need to be cached in a member id list for fast access.
206
6
bool IndexInfo::CheckColumnDependency(ColumnId column_id) const {
207
10
  for (const auto& index_col : columns_) {
208
    // The protobuf data contains IDs of all columns that this index is referencing.
209
    // Examples:
210
    // 1. Index by column
211
    // - INDEX ON tab (a_column)
212
    // - The ID of "a_column" is included in protobuf data.
213
    //
214
    // 2. Index by expression of column:
215
    // - INDEX ON tab (j_column->>'field')
216
    // - The ID of "j_column" is included in protobuf data.
217
10
    if (index_col.indexed_column_id == column_id) {
218
2
      return true;
219
2
    }
220
10
  }
221
222
4
  if (where_predicate_spec_) {
223
3
    for (auto indexed_col_id : where_predicate_spec_->column_ids()) {
224
3
      if (ColumnId(indexed_col_id) == column_id) 
return true2
;
225
3
    }
226
3
  }
227
228
2
  return false;
229
4
}
230
231
84
boost::optional<size_t> IndexInfo::FindKeyIndex(const string& key_expr_name) const {
232
246
  for (size_t idx = 0; idx < key_column_count(); 
idx++162
) {
233
187
    const auto& col = columns_[idx];
234
187
    if (!col.column_name.empty() && key_expr_name.find(col.column_name) != key_expr_name.npos) {
235
      // Return the found key column that is referenced by the expression.
236
25
      return idx;
237
25
    }
238
187
  }
239
240
59
  return boost::none;
241
84
}
242
243
1.93k
std::string IndexInfo::ToString() const {
244
1.93k
  IndexInfoPB pb;
245
1.93k
  ToPB(&pb);
246
1.93k
  return pb.ShortDebugString();
247
1.93k
}
248
249
344k
const IndexColumn& IndexInfo::column(const size_t idx) const {
250
344k
  return columns_[idx];
251
344k
}
252
253
3.77k
IndexMap::IndexMap(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) {
254
3.77k
  FromPB(indexes);
255
3.77k
}
256
257
1.25M
void IndexMap::FromPB(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) {
258
1.25M
  clear();
259
1.25M
  for (const auto& index : indexes) {
260
96.5k
    emplace(index.table_id(), IndexInfo(index));
261
96.5k
  }
262
1.25M
}
263
264
301M
void IndexMap::ToPB(google::protobuf::RepeatedPtrField<IndexInfoPB>* indexes) const {
265
301M
  indexes->Clear();
266
301M
  for (const auto& itr : *this) {
267
91.8k
    itr.second.ToPB(indexes->Add());
268
91.8k
  }
269
301M
}
270
271
59.2k
Result<const IndexInfo*> IndexMap::FindIndex(const TableId& index_id) const {
272
59.2k
  const auto itr = find(index_id);
273
59.2k
  if (itr == end()) {
274
819
    return STATUS(NotFound, Format("Index id $0 not found", index_id));
275
819
  }
276
58.4k
  return &itr->second;
277
59.2k
}
278
279
}  // namespace yb