YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/master/catalog_manager_util.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/master/catalog_manager_util.h"
15
16
#include "yb/master/catalog_entity_info.h"
17
18
#include "yb/util/flag_tags.h"
19
#include "yb/util/math_util.h"
20
#include "yb/util/string_util.h"
21
22
DEFINE_double(balancer_load_max_standard_deviation, 2.0,
23
              "The standard deviation among the tserver load, above which that distribution "
24
              "is considered not balanced.");
25
TAG_FLAG(balancer_load_max_standard_deviation, advanced);
26
27
namespace yb {
28
namespace master {
29
30
using strings::Substitute;
31
32
7
Status CatalogManagerUtil::IsLoadBalanced(const master::TSDescriptorVector& ts_descs) {
33
7
  ZoneToDescMap zone_to_ts;
34
7
  RETURN_NOT_OK(GetPerZoneTSDesc(ts_descs, &zone_to_ts));
35
36
12
  for (const auto& zone : zone_to_ts) {
37
12
    if (zone.second.size() <= 1) {
38
5
      continue;
39
5
    }
40
41
    // Map from placement uuid to tserver load vector.
42
7
    std::map<string, vector<double>> load;
43
23
    for (const auto &ts_desc : zone.second) {
44
23
      (load[ts_desc->placement_uuid()]).push_back(ts_desc->num_live_replicas());
45
23
    }
46
47
9
    for (const auto& entry : load) {
48
9
      double std_dev = yb::standard_deviation(entry.second);
49
9
      LOG(INFO) << "Load standard deviation is " << std_dev << " for "
50
9
                << entry.second.size() << " tservers in placement " << zone.first
51
9
                << " for placement uuid " << entry.first;
52
53
9
      if (std_dev >= FLAGS_balancer_load_max_standard_deviation) {
54
2
        return STATUS(IllegalState, Substitute("Load not balanced: deviation=$0 in $1 for "
55
2
                                               "placement uuid $2.",
56
2
                                               std_dev, zone.first, entry.first));
57
2
      }
58
9
    }
59
7
  }
60
5
  return Status::OK();
61
7
}
62
63
Status CatalogManagerUtil::AreLeadersOnPreferredOnly(
64
    const TSDescriptorVector& ts_descs,
65
    const ReplicationInfoPB& replication_info,
66
162
    const vector<scoped_refptr<TableInfo>>& tables) {
67
162
  if (PREDICT_FALSE(ts_descs.empty())) {
68
0
    return Status::OK();
69
0
  }
70
71
  // Variables for checking transaction leader spread.
72
162
  auto num_servers = ts_descs.size();
73
162
  std::map<std::string, int> txn_map;
74
162
  int num_txn_tablets = 0;
75
162
  int max_txn_leaders_per_node = 0;
76
162
  int min_txn_leaders_per_node = 0;
77
78
162
  if (!FLAGS_transaction_tables_use_preferred_zones) {
79
162
    CalculateTxnLeaderMap(&txn_map, &num_txn_tablets, tables);
80
162
    max_txn_leaders_per_node = num_txn_tablets / num_servers;
81
162
    min_txn_leaders_per_node = max_txn_leaders_per_node;
82
162
    if (num_txn_tablets % num_servers) {
83
0
      ++max_txn_leaders_per_node;
84
0
    }
85
162
  }
86
87
  // If transaction_tables_use_preferred_zones = true, don't check for transaction leader spread.
88
  // This results in txn_map being empty, num_txn_tablets = 0, max_txn_leaders_per_node = 0, and
89
  // system_tablets_leaders = 0.
90
  // Thus all comparisons for transaction leader spread will be ignored (all 0 < 0, etc).
91
92
311
  for (const auto& ts_desc : ts_descs) {
93
311
    auto tserver = txn_map.find(ts_desc->permanent_uuid());
94
311
    int system_tablets_leaders = 0;
95
311
    if (!(tserver == txn_map.end())) {
96
4
      system_tablets_leaders = tserver->second;
97
4
    }
98
99
    // If enabled, check if transaction tablet leaders are evenly spread.
100
311
    if (system_tablets_leaders > max_txn_leaders_per_node) {
101
0
      return STATUS(
102
0
          IllegalState,
103
0
          Substitute("Too many txn status leaders found on tserver $0. Found $1, Expected $2.",
104
0
                      ts_desc->permanent_uuid(),
105
0
                      system_tablets_leaders,
106
0
                      max_txn_leaders_per_node));
107
0
    }
108
311
    if (system_tablets_leaders < min_txn_leaders_per_node) {
109
0
      return STATUS(
110
0
          IllegalState,
111
0
          Substitute("Tserver $0 expected to have at least $1 txn status leader(s), but has $2.",
112
0
                      ts_desc->permanent_uuid(),
113
0
                      min_txn_leaders_per_node,
114
0
                      system_tablets_leaders));
115
0
    }
116
117
    // Check that leaders are on preferred ts only.
118
    // If transaction tables follow preferred nodes, then we verify that there are 0 leaders.
119
    // Otherwise, we need to check that there are 0 non-txn leaders on the ts.
120
311
    if (!ts_desc->IsAcceptingLeaderLoad(replication_info) &&
121
168
        ts_desc->leader_count() > system_tablets_leaders) {
122
      // This is a ts that shouldn't have leader load (asides from txn leaders) but does.
123
140
      return STATUS(
124
140
          IllegalState,
125
140
          Substitute("Expected no leader load on tserver $0, found $1.",
126
140
                     ts_desc->permanent_uuid(), ts_desc->leader_count() - system_tablets_leaders));
127
140
    }
128
311
  }
129
22
  return Status::OK();
130
162
}
131
132
void CatalogManagerUtil::CalculateTxnLeaderMap(std::map<std::string, int>* txn_map,
133
                                               int* num_txn_tablets,
134
162
                                               vector<scoped_refptr<TableInfo>> tables) {
135
3.31k
  for (const auto& table : tables) {
136
3.31k
    bool is_txn_table = table->GetTableType() == TRANSACTION_STATUS_TABLE_TYPE;
137
3.31k
    if (!is_txn_table) {
138
3.31k
      continue;
139
3.31k
    }
140
1
    TabletInfos tablets = table->GetTablets();
141
1
    (*num_txn_tablets) += tablets.size();
142
24
    for (const auto& tablet : tablets) {
143
24
      auto replication_locations = tablet->GetReplicaLocations();
144
160
      for (const auto& replica : *replication_locations) {
145
160
        if (replica.second.role == PeerRole::LEADER) {
146
24
          (*txn_map)[replica.first]++;
147
24
        }
148
160
      }
149
24
    }
150
1
  }
151
162
}
152
153
Status CatalogManagerUtil::GetPerZoneTSDesc(const TSDescriptorVector& ts_descs,
154
11
                                            ZoneToDescMap* zone_to_ts) {
155
11
  if (zone_to_ts == nullptr) {
156
0
    return STATUS(InvalidArgument, "Need a non-null zone to tsdesc map that will be filled in.");
157
0
  }
158
11
  zone_to_ts->clear();
159
45
  for (const auto& ts_desc : ts_descs) {
160
45
    string placement_id = ts_desc->placement_id();
161
45
    auto iter = zone_to_ts->find(placement_id);
162
45
    if (iter == zone_to_ts->end()) {
163
20
      (*zone_to_ts)[placement_id] = {ts_desc};
164
25
    } else {
165
25
      iter->second.push_back(ts_desc);
166
25
    }
167
45
  }
168
11
  return Status::OK();
169
11
}
170
171
391
bool CatalogManagerUtil::IsCloudInfoEqual(const CloudInfoPB& lhs, const CloudInfoPB& rhs) {
172
391
  return (lhs.placement_cloud() == rhs.placement_cloud() &&
173
105
          lhs.placement_region() == rhs.placement_region() &&
174
105
          lhs.placement_zone() == rhs.placement_zone());
175
391
}
176
177
bool CatalogManagerUtil::DoesPlacementInfoContainCloudInfo(const PlacementInfoPB& placement_info,
178
1.94k
                                                           const CloudInfoPB& cloud_info) {
179
19
  for (const auto& placement_block : placement_info.placement_blocks()) {
180
19
    if (IsCloudInfoEqual(placement_block.cloud_info(), cloud_info)) {
181
10
      return true;
182
10
    }
183
19
  }
184
1.93k
  return false;
185
1.94k
}
186
187
bool CatalogManagerUtil::DoesPlacementInfoSpanMultipleRegions(
188
1.99k
    const PlacementInfoPB& placement_info) {
189
1.99k
  int num_blocks = placement_info.placement_blocks_size();
190
1.99k
  if (num_blocks < 2) {
191
1.92k
    return false;
192
1.92k
  }
193
73
  const auto& first_block = placement_info.placement_blocks(0).cloud_info();
194
73
  for (int i = 1; i < num_blocks; ++i) {
195
0
    const auto& cur_block = placement_info.placement_blocks(i).cloud_info();
196
0
    if (first_block.placement_cloud() != cur_block.placement_cloud() ||
197
0
        first_block.placement_region() != cur_block.placement_region()) {
198
0
      return true;
199
0
    }
200
0
  }
201
73
  return false;
202
73
}
203
204
Result<std::string> CatalogManagerUtil::GetPlacementUuidFromRaftPeer(
205
22
    const ReplicationInfoPB& replication_info, const consensus::RaftPeerPB& peer) {
206
22
  switch (peer.member_type()) {
207
1
    case consensus::PeerMemberType::PRE_VOTER:
208
17
    case consensus::PeerMemberType::VOTER: {
209
      // This peer is a live replica.
210
17
      return replication_info.live_replicas().placement_uuid();
211
1
    }
212
2
    case consensus::PeerMemberType::PRE_OBSERVER:
213
5
    case consensus::PeerMemberType::OBSERVER: {
214
      // This peer is a read replica.
215
5
      std::vector<std::string> placement_uuid_matches;
216
9
      for (const auto& placement_info : replication_info.read_replicas()) {
217
9
        if (CatalogManagerUtil::DoesPlacementInfoContainCloudInfo(
218
5
            placement_info, peer.cloud_info())) {
219
5
          placement_uuid_matches.push_back(placement_info.placement_uuid());
220
5
        }
221
9
      }
222
223
5
      if (placement_uuid_matches.size() != 1) {
224
2
        return STATUS(IllegalState, Format("Expect 1 placement match for peer $0, found $1: $2",
225
2
                                           peer.ShortDebugString(), placement_uuid_matches.size(),
226
2
                                           VectorToString(placement_uuid_matches)));
227
2
      }
228
229
3
      return placement_uuid_matches.front();
230
3
    }
231
0
    case consensus::PeerMemberType::UNKNOWN_MEMBER_TYPE: {
232
0
      return STATUS(IllegalState, Format("Member type unknown for peer $0",
233
3
                                         peer.ShortDebugString()));
234
3
    }
235
0
    default:
236
0
      return STATUS(IllegalState, "Unhandled raft state for peer $0", peer.ShortDebugString());
237
22
  }
238
22
}
239
240
CHECKED_STATUS CatalogManagerUtil::CheckIfCanDeleteSingleTablet(
241
64
    const scoped_refptr<TabletInfo>& tablet) {
242
58
  static const auto stringify_partition_key = [](const Slice& key) {
243
49
    return key.empty() ? "{empty}" : key.ToDebugString();
244
58
  };
245
64
  const auto& tablet_id = tablet->tablet_id();
246
247
64
  const auto tablet_lock = tablet->LockForRead();
248
64
  const auto tablet_pb = tablet_lock.data().pb;
249
64
  if (tablet_pb.state() == SysTabletsEntryPB::DELETED) {
250
12
    return STATUS_FORMAT(NotFound, "Tablet $0 has been already deleted", tablet_id);
251
12
  }
252
52
  const auto partition = tablet_pb.partition();
253
254
52
  TabletInfos tablets_in_range;
255
0
  VLOG(3) << "Tablet " << tablet_id << " " << AsString(partition);
256
52
  tablet->table()->GetTabletsInRange(
257
52
      partition.partition_key_start(), partition.partition_key_end(), &tablets_in_range);
258
259
52
  std::string partition_key = partition.partition_key_start();
260
119
  for (const auto& inner_tablet : tablets_in_range) {
261
119
    if (inner_tablet->tablet_id() == tablet_id) {
262
16
      continue;
263
16
    }
264
103
    PartitionPB inner_partition;
265
103
    SysTabletsEntryPB::State inner_tablet_state;
266
103
    {
267
103
      const auto inner_tablet_lock = inner_tablet->LockForRead();
268
103
      const auto& pb = inner_tablet_lock.data().pb;
269
103
      inner_partition = pb.partition();
270
103
      inner_tablet_state = pb.state();
271
103
    }
272
0
    VLOG(3) << "Inner tablet " << inner_tablet->tablet_id()
273
0
            << " partition: " << AsString(inner_partition)
274
0
            << " state: " << SysTabletsEntryPB_State_Name(inner_tablet_state);
275
103
    if (inner_tablet_state != SysTabletsEntryPB::RUNNING) {
276
21
      continue;
277
21
    }
278
82
    if (partition_key != inner_partition.partition_key_start()) {
279
23
      return STATUS_FORMAT(
280
23
          IllegalState,
281
23
          "Can't delete tablet $0 not covered by child tablets. Partition gap: $1 ... $2",
282
23
          tablet_id,
283
23
          stringify_partition_key(partition_key),
284
23
          stringify_partition_key(inner_partition.partition_key_start()));
285
23
    }
286
59
    partition_key = inner_partition.partition_key_end();
287
59
    if (!partition.partition_key_end().empty() && partition_key >= partition.partition_key_end()) {
288
13
      break;
289
13
    }
290
59
  }
291
29
  if (partition_key != partition.partition_key_end()) {
292
6
    return STATUS_FORMAT(
293
6
        IllegalState,
294
6
        "Can't delete tablet $0 not covered by child tablets. Partition gap: $1 ... $2",
295
6
        tablet_id,
296
6
        stringify_partition_key(partition_key),
297
6
        stringify_partition_key(partition.partition_key_end()));
298
6
  }
299
23
  return Status::OK();
300
23
}
301
302
CatalogManagerUtil::CloudInfoSimilarity CatalogManagerUtil::ComputeCloudInfoSimilarity(
303
178k
    const CloudInfoPB& ci1, const CloudInfoPB& ci2) {
304
178k
  if (ci1.has_placement_cloud() &&
305
178k
      ci2.has_placement_cloud() &&
306
178k
      ci1.placement_cloud() != ci2.placement_cloud()) {
307
192
      return NO_MATCH;
308
192
  }
309
310
178k
  if (ci1.has_placement_region() &&
311
178k
      ci2.has_placement_region() &&
312
178k
      ci1.placement_region() != ci2.placement_region()) {
313
10.9k
      return CLOUD_MATCH;
314
10.9k
  }
315
316
167k
  if (ci1.has_placement_zone() &&
317
164k
      ci2.has_placement_zone() &&
318
164k
      ci1.placement_zone() != ci2.placement_zone()) {
319
84.7k
      return REGION_MATCH;
320
84.7k
  }
321
82.3k
  return ZONE_MATCH;
322
82.3k
}
323
324
173k
bool CatalogManagerUtil::IsCloudInfoPrefix(const CloudInfoPB& ci1, const CloudInfoPB& ci2) {
325
173k
  return ComputeCloudInfoSimilarity(ci1, ci2) == ZONE_MATCH;
326
173k
}
327
328
81
CHECKED_STATUS CatalogManagerUtil::IsPlacementInfoValid(const PlacementInfoPB& placement_info) {
329
  // Check for duplicates.
330
81
  std::unordered_set<string> cloud_info_string;
331
332
149
  for (int i = 0; i < placement_info.placement_blocks_size(); i++) {
333
68
    if (!placement_info.placement_blocks(i).has_cloud_info()) {
334
1
      continue;
335
1
    }
336
337
67
    const CloudInfoPB& ci = placement_info.placement_blocks(i).cloud_info();
338
67
    string ci_string = TSDescriptor::generate_placement_id(ci);
339
340
67
    if (!cloud_info_string.count(ci_string)) {
341
67
      cloud_info_string.insert(ci_string);
342
0
    } else {
343
0
      return STATUS(IllegalState,
344
0
                    Substitute("Placement information specified should not contain duplicates."
345
0
                    "Given placement block: $0 isn't a prefix", ci.ShortDebugString()));
346
0
    }
347
67
  }
348
349
  // Validate the placement blocks to be prefixes.
350
149
  for (int i = 0; i < placement_info.placement_blocks_size(); i++) {
351
68
    if (!placement_info.placement_blocks(i).has_cloud_info()) {
352
1
      continue;
353
1
    }
354
355
67
    const CloudInfoPB& pb = placement_info.placement_blocks(i).cloud_info();
356
357
    // Four cases for pb to be a prefix.
358
67
    bool contains_cloud = pb.has_placement_cloud();
359
67
    bool contains_region = pb.has_placement_region();
360
67
    bool contains_zone = pb.has_placement_zone();
361
    // *.*.*
362
67
    bool star_star_star = !contains_cloud && !contains_region && !contains_zone;
363
    // C.*.*
364
67
    bool c_star_star = contains_cloud && !contains_region && !contains_zone;
365
    // C.R.*
366
67
    bool c_r_star = contains_cloud && contains_region && !contains_zone;
367
    // C.R.Z
368
67
    bool c_r_z = contains_cloud && contains_region && contains_zone;
369
370
67
    if (!star_star_star && !c_star_star && !c_r_star && !c_r_z) {
371
0
      return STATUS(IllegalState,
372
0
                        Substitute("Placement information specified should be prefixes."
373
0
                        "Given placement block: $0 isn't a prefix", pb.ShortDebugString()));
374
0
    }
375
67
  }
376
377
  // No two prefixes should overlap.
378
149
  for (int i = 0; i < placement_info.placement_blocks_size(); i++) {
379
254
    for (int j = 0; j < placement_info.placement_blocks_size(); j++) {
380
186
      if (i == j) {
381
68
        continue;
382
118
      } else {
383
118
        if (!placement_info.placement_blocks(i).has_cloud_info() ||
384
118
        !placement_info.placement_blocks(j).has_cloud_info()) {
385
0
          continue;
386
0
        }
387
388
118
        const CloudInfoPB& pb1 = placement_info.placement_blocks(i).cloud_info();
389
118
        const CloudInfoPB& pb2 = placement_info.placement_blocks(j).cloud_info();
390
        // pb1 shouldn't be prefix of pb2.
391
118
        if (CatalogManagerUtil::IsCloudInfoPrefix(pb1, pb2)) {
392
0
          return STATUS(IllegalState,
393
0
                        Substitute("Placement information specified should not overlap. $0 and"
394
0
                        " $1 overlap. For instance, c1.r1.z1,c1.r1 is invalid while "
395
0
                        "c1.r1.z1,c1.r1.z2 is valid. Also note that c1.r1,c1.r1 is valid.",
396
0
                        pb1.ShortDebugString(), pb2.ShortDebugString()));
397
0
        }
398
118
      }
399
186
    }
400
68
  }
401
81
  return Status::OK();
402
81
}
403
404
288k
bool CMPerTableLoadState::CompareLoads(const TabletServerId &ts1, const TabletServerId &ts2) {
405
288k
  if (per_ts_load_[ts1] != per_ts_load_[ts2]) {
406
112k
    return per_ts_load_[ts1] < per_ts_load_[ts2];
407
112k
  }
408
175k
  if (global_load_state_->GetGlobalLoad(ts1) == global_load_state_->GetGlobalLoad(ts2)) {
409
175k
    return ts1 < ts2;
410
175k
  }
411
65
  return global_load_state_->GetGlobalLoad(ts1) < global_load_state_->GetGlobalLoad(ts2);
412
65
}
413
414
94.8k
void CMPerTableLoadState::SortLoad() {
415
94.8k
  Comparator comp(this);
416
94.8k
  std::sort(sorted_load_.begin(), sorted_load_.end(), comp);
417
94.8k
}
418
419
} // namespace master
420
} // namespace yb