YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/common/index.cc
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
// Classes that implement secondary index.
15
//--------------------------------------------------------------------------------------------------
16
17
#include "yb/common/index.h"
18
19
#include "yb/common/common.pb.h"
20
#include "yb/common/index_column.h"
21
#include "yb/common/schema.h"
22
23
#include "yb/gutil/casts.h"
24
25
#include "yb/util/result.h"
26
27
using std::vector;
28
using std::unordered_map;
29
using google::protobuf::RepeatedField;
30
using google::protobuf::RepeatedPtrField;
31
using google::protobuf::uint32;
32
33
namespace yb {
34
35
// When DocDB receive messages from older clients, those messages won't have "column_name" and
36
// "colexpr" attributes.
37
IndexColumn::IndexColumn(const IndexInfoPB::IndexColumnPB& pb)
38
    : column_id(ColumnId(pb.column_id())),
39
      column_name(pb.column_name()), // Default to empty.
40
      indexed_column_id(ColumnId(pb.indexed_column_id())),
41
309k
      colexpr(pb.colexpr()) /* Default to empty message */ {
42
309k
}
43
44
430k
void IndexColumn::ToPB(IndexInfoPB::IndexColumnPB* pb) const {
45
430k
  pb->set_column_id(column_id);
46
430k
  pb->set_column_name(column_name);
47
430k
  pb->set_indexed_column_id(indexed_column_id);
48
430k
  pb->mutable_colexpr()->CopyFrom(colexpr);
49
430k
}
50
51
0
std::string IndexColumn::ToString() const {
52
0
  return YB_STRUCT_TO_STRING(column_id, column_name, indexed_column_id, colexpr);
53
0
}
54
55
namespace {
56
57
vector<IndexColumn> IndexColumnFromPB(
58
119k
    const RepeatedPtrField<IndexInfoPB::IndexColumnPB>& columns) {
59
119k
  vector<IndexColumn> cols;
60
119k
  cols.reserve(columns.size());
61
309k
  for (const auto& column : columns) {
62
309k
    cols.emplace_back(column);
63
309k
  }
64
119k
  return cols;
65
119k
}
66
67
240k
vector<ColumnId> ColumnIdsFromPB(const RepeatedField<uint32>& ids) {
68
240k
  vector<ColumnId> column_ids;
69
240k
  column_ids.reserve(ids.size());
70
212k
  for (const auto& id : ids) {
71
212k
    column_ids.emplace_back(id);
72
212k
  }
73
240k
  return column_ids;
74
240k
}
75
76
} // namespace
77
78
IndexInfo::IndexInfo(const IndexInfoPB& pb)
79
    : table_id_(pb.table_id()),
80
      indexed_table_id_(pb.indexed_table_id()),
81
      schema_version_(pb.version()),
82
      is_local_(pb.is_local()),
83
      is_unique_(pb.is_unique()),
84
      columns_(IndexColumnFromPB(pb.columns())),
85
      hash_column_count_(pb.hash_column_count()),
86
      range_column_count_(pb.range_column_count()),
87
      indexed_hash_column_ids_(ColumnIdsFromPB(pb.indexed_hash_column_ids())),
88
      indexed_range_column_ids_(ColumnIdsFromPB(pb.indexed_range_column_ids())),
89
      index_permissions_(pb.index_permissions()),
90
      backfill_error_message_(pb.backfill_error_message()),
91
      use_mangled_column_name_(pb.use_mangled_column_name()),
92
      where_predicate_spec_(pb.has_where_predicate_spec() ?
93
120k
        std::make_shared<IndexInfoPB::WherePredicateSpecPB>(pb.where_predicate_spec()) : nullptr) {
94
309k
  for (const auto& index_col : columns_) {
95
    // Mark column as covered if the index column is the column itself.
96
    // Do not mark a column as covered when indexing by an expression of that column.
97
    // - When an expression such as "jsonb->>'field'" is used, then the "jsonb" column should not
98
    //   be included in the covered list.
99
    // - Currently we only support "jsonb->>" expression, but this is true for all expressions.
100
309k
    if (index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::kColumnId ||
101
304k
        index_col.colexpr.expr_case() == QLExpressionPB::ExprCase::EXPR_NOT_SET) {
102
304k
      covered_column_ids_.insert(index_col.indexed_column_id);
103
5.41k
    } else {
104
5.41k
      has_index_by_expr_ = true;
105
5.41k
    }
106
309k
  }
107
120k
}
108
109
168
IndexInfo::IndexInfo() = default;
110
111
138k
IndexInfo::IndexInfo(const IndexInfo& rhs) = default;
112
65.6k
IndexInfo::IndexInfo(IndexInfo&& rhs) = default;
113
114
313k
IndexInfo::~IndexInfo() = default;
115
116
117k
void IndexInfo::ToPB(IndexInfoPB* pb) const {
117
117k
  pb->set_table_id(table_id_);
118
117k
  pb->set_indexed_table_id(indexed_table_id_);
119
117k
  pb->set_version(schema_version_);
120
117k
  pb->set_is_local(is_local_);
121
117k
  pb->set_is_unique(is_unique_);
122
430k
  for (const auto& column : columns_) {
123
430k
    column.ToPB(pb->add_columns());
124
430k
  }
125
117k
  pb->set_hash_column_count(narrow_cast<uint32_t>(hash_column_count_));
126
117k
  pb->set_range_column_count(narrow_cast<uint32_t>(range_column_count_));
127
164k
  for (const auto& id : indexed_hash_column_ids_) {
128
164k
    pb->add_indexed_hash_column_ids(id);
129
164k
  }
130
126k
  for (const auto& id : indexed_range_column_ids_) {
131
126k
    pb->add_indexed_range_column_ids(id);
132
126k
  }
133
117k
  pb->set_index_permissions(index_permissions_);
134
117k
  pb->set_backfill_error_message(backfill_error_message_);
135
117k
  pb->set_use_mangled_column_name(use_mangled_column_name_);
136
117k
}
137
138
1.93k
vector<ColumnId> IndexInfo::index_key_column_ids() const {
139
1.93k
  std::unordered_map<ColumnId, ColumnId, boost::hash<ColumnId>> map;
140
6.68k
  for (const auto& column : columns_) {
141
6.68k
    map[column.indexed_column_id] = column.column_id;
142
6.68k
  }
143
1.93k
  vector<ColumnId> ids;
144
1.93k
  ids.reserve(indexed_hash_column_ids_.size() + indexed_range_column_ids_.size());
145
2.54k
  for (const auto& id : indexed_hash_column_ids_) {
146
2.54k
    ids.push_back(map[id]);
147
2.54k
  }
148
2.14k
  for (const auto& id : indexed_range_column_ids_) {
149
2.14k
    ids.push_back(map[id]);
150
2.14k
  }
151
1.93k
  return ids;
152
1.93k
}
153
154
15.7k
bool IndexInfo::PrimaryKeyColumnsOnly(const Schema& indexed_schema) const {
155
18.4k
  for (size_t i = 0; i < hash_column_count_ + range_column_count_; i++) {
156
17.7k
    if (!indexed_schema.is_key_column(columns_[i].indexed_column_id)) {
157
15.0k
      return false;
158
15.0k
    }
159
17.7k
  }
160
655
  return true;
161
15.7k
}
162
163
78.7k
bool IndexInfo::IsColumnCovered(const ColumnId column_id) const {
164
78.7k
  return covered_column_ids_.find(column_id) != covered_column_ids_.end();
165
78.7k
}
166
167
5.91k
bool IndexInfo::IsColumnCovered(const std::string& column_name) const {
168
15.5k
  for (const auto &col : columns_) {
169
15.5k
    if (column_name == col.column_name) {
170
3.91k
      return true;
171
3.91k
    }
172
15.5k
  }
173
2.00k
  return false;
174
5.91k
}
175
176
0
int32_t IndexInfo::IsExprCovered(const string& expr_name) const {
177
  // CHECKING if an expression is covered.
178
  // - If IndexColumn name is a substring of "expr_name", the given expression is covered. That is,
179
  //   it can be computed using the value of this column.
180
  //
181
  // - For this function to work properly, the column and expression name MUST be serialized in a
182
  //   way that guarantees their uniqueness. Function PTExpr::MangledName() resolves this issue.
183
  //
184
  // - Example:
185
  //     CREATE TABLE tab (pk int primary key, a int, j jsonb);
186
  //     CREATE INDEX a_index ON tab (a);
187
  //     SELECT pk FROM tab WHERE j->'b'->>'a' = '99';
188
  //   In this example, clearly "a_index" doesn't cover the seleted json expression, but the name
189
  //   "a" is a substring of "j->b->>a", and this function would return TRUE, which is wrong. To
190
  //   avoid this issue, <column names> and JSONB <attribute names> must be escaped uniquely and
191
  //   differently. To cover the above SELECT, the following index must be defined.
192
  //     CREATE INDEX jindex on tab(j->'b'->>'a');
193
0
  int32_t idx = 0;
194
0
  for (const auto &col : columns_) {
195
0
    if (!col.column_name.empty() && expr_name.find(col.column_name) != expr_name.npos) {
196
0
      return idx;
197
0
    }
198
0
    idx++;
199
0
  }
200
201
0
  return -1;
202
0
}
203
204
// Check for dependency is used for DDL operations, so it does not need to be fast. As a result,
205
// the dependency list does not need to be cached in a member id list for fast access.
206
6
bool IndexInfo::CheckColumnDependency(ColumnId column_id) const {
207
10
  for (const auto& index_col : columns_) {
208
    // The protobuf data contains IDs of all columns that this index is referencing.
209
    // Examples:
210
    // 1. Index by column
211
    // - INDEX ON tab (a_column)
212
    // - The ID of "a_column" is included in protobuf data.
213
    //
214
    // 2. Index by expression of column:
215
    // - INDEX ON tab (j_column->>'field')
216
    // - The ID of "j_column" is included in protobuf data.
217
10
    if (index_col.indexed_column_id == column_id) {
218
2
      return true;
219
2
    }
220
10
  }
221
222
4
  if (where_predicate_spec_) {
223
3
    for (auto indexed_col_id : where_predicate_spec_->column_ids()) {
224
3
      if (ColumnId(indexed_col_id) == column_id) return true;
225
3
    }
226
3
  }
227
228
2
  return false;
229
4
}
230
231
104
boost::optional<size_t> IndexInfo::FindKeyIndex(const string& key_expr_name) const {
232
267
  for (size_t idx = 0; idx < key_column_count(); idx++) {
233
207
    const auto& col = columns_[idx];
234
207
    if (!col.column_name.empty() && key_expr_name.find(col.column_name) != key_expr_name.npos) {
235
      // Return the found key column that is referenced by the expression.
236
44
      return idx;
237
44
    }
238
207
  }
239
240
60
  return boost::none;
241
104
}
242
243
235
std::string IndexInfo::ToString() const {
244
235
  IndexInfoPB pb;
245
235
  ToPB(&pb);
246
235
  return pb.ShortDebugString();
247
235
}
248
249
357k
const IndexColumn& IndexInfo::column(const size_t idx) const {
250
357k
  return columns_[idx];
251
357k
}
252
253
2.23k
IndexMap::IndexMap(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) {
254
2.23k
  FromPB(indexes);
255
2.23k
}
256
257
701k
void IndexMap::FromPB(const google::protobuf::RepeatedPtrField<IndexInfoPB>& indexes) {
258
701k
  clear();
259
65.3k
  for (const auto& index : indexes) {
260
65.3k
    emplace(index.table_id(), IndexInfo(index));
261
65.3k
  }
262
701k
}
263
264
140M
void IndexMap::ToPB(google::protobuf::RepeatedPtrField<IndexInfoPB>* indexes) const {
265
140M
  indexes->Clear();
266
69.6k
  for (const auto& itr : *this) {
267
69.6k
    itr.second.ToPB(indexes->Add());
268
69.6k
  }
269
140M
}
270
271
54.9k
Result<const IndexInfo*> IndexMap::FindIndex(const TableId& index_id) const {
272
54.9k
  const auto itr = find(index_id);
273
54.9k
  if (itr == end()) {
274
267
    return STATUS(NotFound, Format("Index id $0 not found", index_id));
275
267
  }
276
54.6k
  return &itr->second;
277
54.6k
}
278
279
}  // namespace yb