YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/master/catalog_manager_util.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/master/catalog_manager_util.h"
15
16
#include "yb/master/catalog_entity_info.h"
17
18
#include "yb/util/flag_tags.h"
19
#include "yb/util/math_util.h"
20
#include "yb/util/string_util.h"
21
22
DEFINE_double(balancer_load_max_standard_deviation, 2.0,
23
              "The standard deviation among the tserver load, above which that distribution "
24
              "is considered not balanced.");
25
TAG_FLAG(balancer_load_max_standard_deviation, advanced);
26
27
namespace yb {
28
namespace master {
29
30
using strings::Substitute;
31
32
7
Status CatalogManagerUtil::IsLoadBalanced(const master::TSDescriptorVector& ts_descs) {
33
7
  ZoneToDescMap zone_to_ts;
34
7
  RETURN_NOT_OK(GetPerZoneTSDesc(ts_descs, &zone_to_ts));
35
36
12
  
for (const auto& zone : zone_to_ts)7
{
37
12
    if (zone.second.size() <= 1) {
38
5
      continue;
39
5
    }
40
41
    // Map from placement uuid to tserver load vector.
42
7
    std::map<string, vector<double>> load;
43
23
    for (const auto &ts_desc : zone.second) {
44
23
      (load[ts_desc->placement_uuid()]).push_back(ts_desc->num_live_replicas());
45
23
    }
46
47
9
    for (const auto& entry : load) {
48
9
      double std_dev = yb::standard_deviation(entry.second);
49
9
      LOG(INFO) << "Load standard deviation is " << std_dev << " for "
50
9
                << entry.second.size() << " tservers in placement " << zone.first
51
9
                << " for placement uuid " << entry.first;
52
53
9
      if (std_dev >= FLAGS_balancer_load_max_standard_deviation) {
54
2
        return STATUS(IllegalState, Substitute("Load not balanced: deviation=$0 in $1 for "
55
2
                                               "placement uuid $2.",
56
2
                                               std_dev, zone.first, entry.first));
57
2
      }
58
9
    }
59
7
  }
60
5
  return Status::OK();
61
7
}
62
63
Status CatalogManagerUtil::AreLeadersOnPreferredOnly(
64
    const TSDescriptorVector& ts_descs,
65
    const ReplicationInfoPB& replication_info,
66
156
    const vector<scoped_refptr<TableInfo>>& tables) {
67
156
  if (PREDICT_FALSE(ts_descs.empty())) {
68
0
    return Status::OK();
69
0
  }
70
71
  // Variables for checking transaction leader spread.
72
156
  auto num_servers = ts_descs.size();
73
156
  std::map<std::string, int> txn_map;
74
156
  int num_txn_tablets = 0;
75
156
  int max_txn_leaders_per_node = 0;
76
156
  int min_txn_leaders_per_node = 0;
77
78
156
  if (!FLAGS_transaction_tables_use_preferred_zones) {
79
156
    CalculateTxnLeaderMap(&txn_map, &num_txn_tablets, tables);
80
156
    max_txn_leaders_per_node = num_txn_tablets / num_servers;
81
156
    min_txn_leaders_per_node = max_txn_leaders_per_node;
82
156
    if (num_txn_tablets % num_servers) {
83
0
      ++max_txn_leaders_per_node;
84
0
    }
85
156
  }
86
87
  // If transaction_tables_use_preferred_zones = true, don't check for transaction leader spread.
88
  // This results in txn_map being empty, num_txn_tablets = 0, max_txn_leaders_per_node = 0, and
89
  // system_tablets_leaders = 0.
90
  // Thus all comparisons for transaction leader spread will be ignored (all 0 < 0, etc).
91
92
340
  for (const auto& ts_desc : ts_descs) {
93
340
    auto tserver = txn_map.find(ts_desc->permanent_uuid());
94
340
    int system_tablets_leaders = 0;
95
340
    if (!(tserver == txn_map.end())) {
96
4
      system_tablets_leaders = tserver->second;
97
4
    }
98
99
    // If enabled, check if transaction tablet leaders are evenly spread.
100
340
    if (system_tablets_leaders > max_txn_leaders_per_node) {
101
0
      return STATUS(
102
0
          IllegalState,
103
0
          Substitute("Too many txn status leaders found on tserver $0. Found $1, Expected $2.",
104
0
                      ts_desc->permanent_uuid(),
105
0
                      system_tablets_leaders,
106
0
                      max_txn_leaders_per_node));
107
0
    }
108
340
    if (system_tablets_leaders < min_txn_leaders_per_node) {
109
0
      return STATUS(
110
0
          IllegalState,
111
0
          Substitute("Tserver $0 expected to have at least $1 txn status leader(s), but has $2.",
112
0
                      ts_desc->permanent_uuid(),
113
0
                      min_txn_leaders_per_node,
114
0
                      system_tablets_leaders));
115
0
    }
116
117
    // Check that leaders are on preferred ts only.
118
    // If transaction tables follow preferred nodes, then we verify that there are 0 leaders.
119
    // Otherwise, we need to check that there are 0 non-txn leaders on the ts.
120
340
    if (!ts_desc->IsAcceptingLeaderLoad(replication_info) &&
121
340
        
ts_desc->leader_count() > system_tablets_leaders212
) {
122
      // This is a ts that shouldn't have leader load (asides from txn leaders) but does.
123
134
      return STATUS(
124
134
          IllegalState,
125
134
          Substitute("Expected no leader load on tserver $0, found $1.",
126
134
                     ts_desc->permanent_uuid(), ts_desc->leader_count() - system_tablets_leaders));
127
134
    }
128
340
  }
129
22
  return Status::OK();
130
156
}
131
132
void CatalogManagerUtil::CalculateTxnLeaderMap(std::map<std::string, int>* txn_map,
133
                                               int* num_txn_tablets,
134
156
                                               vector<scoped_refptr<TableInfo>> tables) {
135
3.20k
  for (const auto& table : tables) {
136
3.20k
    bool is_txn_table = table->GetTableType() == TRANSACTION_STATUS_TABLE_TYPE;
137
3.20k
    if (!is_txn_table) {
138
3.20k
      continue;
139
3.20k
    }
140
1
    TabletInfos tablets = table->GetTablets();
141
1
    (*num_txn_tablets) += tablets.size();
142
24
    for (const auto& tablet : tablets) {
143
24
      auto replication_locations = tablet->GetReplicaLocations();
144
150
      for (const auto& replica : *replication_locations) {
145
150
        if (replica.second.role == PeerRole::LEADER) {
146
24
          (*txn_map)[replica.first]++;
147
24
        }
148
150
      }
149
24
    }
150
1
  }
151
156
}
152
153
Status CatalogManagerUtil::GetPerZoneTSDesc(const TSDescriptorVector& ts_descs,
154
11
                                            ZoneToDescMap* zone_to_ts) {
155
11
  if (zone_to_ts == nullptr) {
156
0
    return STATUS(InvalidArgument, "Need a non-null zone to tsdesc map that will be filled in.");
157
0
  }
158
11
  zone_to_ts->clear();
159
45
  for (const auto& ts_desc : ts_descs) {
160
45
    string placement_id = ts_desc->placement_id();
161
45
    auto iter = zone_to_ts->find(placement_id);
162
45
    if (iter == zone_to_ts->end()) {
163
20
      (*zone_to_ts)[placement_id] = {ts_desc};
164
25
    } else {
165
25
      iter->second.push_back(ts_desc);
166
25
    }
167
45
  }
168
11
  return Status::OK();
169
11
}
170
171
723
bool CatalogManagerUtil::IsCloudInfoEqual(const CloudInfoPB& lhs, const CloudInfoPB& rhs) {
172
723
  return (lhs.placement_cloud() == rhs.placement_cloud() &&
173
723
          
lhs.placement_region() == rhs.placement_region()342
&&
174
723
          
lhs.placement_zone() == rhs.placement_zone()338
);
175
723
}
176
177
bool CatalogManagerUtil::DoesPlacementInfoContainCloudInfo(const PlacementInfoPB& placement_info,
178
3.58k
                                                           const CloudInfoPB& cloud_info) {
179
3.58k
  for (const auto& placement_block : placement_info.placement_blocks()) {
180
195
    if (IsCloudInfoEqual(placement_block.cloud_info(), cloud_info)) {
181
171
      return true;
182
171
    }
183
195
  }
184
3.41k
  return false;
185
3.58k
}
186
187
bool CatalogManagerUtil::DoesPlacementInfoSpanMultipleRegions(
188
3.75k
    const PlacementInfoPB& placement_info) {
189
3.75k
  int num_blocks = placement_info.placement_blocks_size();
190
3.75k
  if (num_blocks < 2) {
191
3.56k
    return false;
192
3.56k
  }
193
192
  const auto& first_block = placement_info.placement_blocks(0).cloud_info();
194
192
  for (int i = 1; i < num_blocks; 
++i0
) {
195
9
    const auto& cur_block = placement_info.placement_blocks(i).cloud_info();
196
9
    if (first_block.placement_cloud() != cur_block.placement_cloud() ||
197
9
        
first_block.placement_region() != cur_block.placement_region()0
) {
198
9
      return true;
199
9
    }
200
9
  }
201
183
  return false;
202
192
}
203
204
Result<std::string> CatalogManagerUtil::GetPlacementUuidFromRaftPeer(
205
38
    const ReplicationInfoPB& replication_info, const consensus::RaftPeerPB& peer) {
206
38
  switch (peer.member_type()) {
207
1
    case consensus::PeerMemberType::PRE_VOTER:
208
33
    case consensus::PeerMemberType::VOTER: {
209
      // This peer is a live replica.
210
33
      return replication_info.live_replicas().placement_uuid();
211
1
    }
212
2
    case consensus::PeerMemberType::PRE_OBSERVER:
213
5
    case consensus::PeerMemberType::OBSERVER: {
214
      // This peer is a read replica.
215
5
      std::vector<std::string> placement_uuid_matches;
216
9
      for (const auto& placement_info : replication_info.read_replicas()) {
217
9
        if (CatalogManagerUtil::DoesPlacementInfoContainCloudInfo(
218
9
            placement_info, peer.cloud_info())) {
219
5
          placement_uuid_matches.push_back(placement_info.placement_uuid());
220
5
        }
221
9
      }
222
223
5
      if (placement_uuid_matches.size() != 1) {
224
2
        return STATUS(IllegalState, Format("Expect 1 placement match for peer $0, found $1: $2",
225
2
                                           peer.ShortDebugString(), placement_uuid_matches.size(),
226
2
                                           VectorToString(placement_uuid_matches)));
227
2
      }
228
229
3
      return placement_uuid_matches.front();
230
5
    }
231
0
    case consensus::PeerMemberType::UNKNOWN_MEMBER_TYPE: {
232
0
      return STATUS(IllegalState, Format("Member type unknown for peer $0",
233
5
                                         peer.ShortDebugString()));
234
5
    }
235
0
    default:
236
0
      return STATUS(IllegalState, "Unhandled raft state for peer $0", peer.ShortDebugString());
237
38
  }
238
38
}
239
240
CHECKED_STATUS CatalogManagerUtil::CheckIfCanDeleteSingleTablet(
241
62
    const scoped_refptr<TabletInfo>& tablet) {
242
62
  static const auto stringify_partition_key = [](const Slice& key) {
243
56
    return key.empty() ? 
"{empty}"8
:
key.ToDebugString()48
;
244
56
  };
245
62
  const auto& tablet_id = tablet->tablet_id();
246
247
62
  const auto tablet_lock = tablet->LockForRead();
248
62
  const auto tablet_pb = tablet_lock.data().pb;
249
62
  if (tablet_pb.state() == SysTabletsEntryPB::DELETED) {
250
12
    return STATUS_FORMAT(NotFound, "Tablet $0 has been already deleted", tablet_id);
251
12
  }
252
50
  const auto partition = tablet_pb.partition();
253
254
50
  TabletInfos tablets_in_range;
255
50
  VLOG
(3) << "Tablet " << tablet_id << " " << AsString(partition)0
;
256
50
  tablet->table()->GetTabletsInRange(
257
50
      partition.partition_key_start(), partition.partition_key_end(), &tablets_in_range);
258
259
50
  std::string partition_key = partition.partition_key_start();
260
115
  for (const auto& inner_tablet : tablets_in_range) {
261
115
    if (inner_tablet->tablet_id() == tablet_id) {
262
16
      continue;
263
16
    }
264
99
    PartitionPB inner_partition;
265
99
    SysTabletsEntryPB::State inner_tablet_state;
266
99
    {
267
99
      const auto inner_tablet_lock = inner_tablet->LockForRead();
268
99
      const auto& pb = inner_tablet_lock.data().pb;
269
99
      inner_partition = pb.partition();
270
99
      inner_tablet_state = pb.state();
271
99
    }
272
99
    VLOG(3) << "Inner tablet " << inner_tablet->tablet_id()
273
0
            << " partition: " << AsString(inner_partition)
274
0
            << " state: " << SysTabletsEntryPB_State_Name(inner_tablet_state);
275
99
    if (inner_tablet_state != SysTabletsEntryPB::RUNNING) {
276
20
      continue;
277
20
    }
278
79
    if (partition_key != inner_partition.partition_key_start()) {
279
23
      return STATUS_FORMAT(
280
23
          IllegalState,
281
23
          "Can't delete tablet $0 not covered by child tablets. Partition gap: $1 ... $2",
282
23
          tablet_id,
283
23
          stringify_partition_key(partition_key),
284
23
          stringify_partition_key(inner_partition.partition_key_start()));
285
23
    }
286
56
    partition_key = inner_partition.partition_key_end();
287
56
    if (!partition.partition_key_end().empty() && 
partition_key >= partition.partition_key_end()42
) {
288
13
      break;
289
13
    }
290
56
  }
291
27
  if (partition_key != partition.partition_key_end()) {
292
5
    return STATUS_FORMAT(
293
5
        IllegalState,
294
5
        "Can't delete tablet $0 not covered by child tablets. Partition gap: $1 ... $2",
295
5
        tablet_id,
296
5
        stringify_partition_key(partition_key),
297
5
        stringify_partition_key(partition.partition_key_end()));
298
5
  }
299
22
  return Status::OK();
300
27
}
301
302
CatalogManagerUtil::CloudInfoSimilarity CatalogManagerUtil::ComputeCloudInfoSimilarity(
303
4.97M
    const CloudInfoPB& ci1, const CloudInfoPB& ci2) {
304
4.97M
  if (ci1.has_placement_cloud() &&
305
4.97M
      ci2.has_placement_cloud() &&
306
4.97M
      ci1.placement_cloud() != ci2.placement_cloud()) {
307
124k
      return NO_MATCH;
308
124k
  }
309
310
4.84M
  if (ci1.has_placement_region() &&
311
4.84M
      ci2.has_placement_region() &&
312
4.84M
      ci1.placement_region() != ci2.placement_region()) {
313
855k
      return CLOUD_MATCH;
314
855k
  }
315
316
3.99M
  if (ci1.has_placement_zone() &&
317
3.99M
      
ci2.has_placement_zone()3.77M
&&
318
3.99M
      
ci1.placement_zone() != ci2.placement_zone()3.77M
) {
319
2.24M
      return REGION_MATCH;
320
2.24M
  }
321
1.74M
  return ZONE_MATCH;
322
3.99M
}
323
324
4.96M
bool CatalogManagerUtil::IsCloudInfoPrefix(const CloudInfoPB& ci1, const CloudInfoPB& ci2) {
325
4.96M
  return ComputeCloudInfoSimilarity(ci1, ci2) == ZONE_MATCH;
326
4.96M
}
327
328
134
CHECKED_STATUS CatalogManagerUtil::IsPlacementInfoValid(const PlacementInfoPB& placement_info) {
329
  // Check for duplicates.
330
134
  std::unordered_set<string> cloud_info_string;
331
332
218
  for (int i = 0; i < placement_info.placement_blocks_size(); 
i++84
) {
333
84
    if (!placement_info.placement_blocks(i).has_cloud_info()) {
334
1
      continue;
335
1
    }
336
337
83
    const CloudInfoPB& ci = placement_info.placement_blocks(i).cloud_info();
338
83
    string ci_string = TSDescriptor::generate_placement_id(ci);
339
340
83
    if (!cloud_info_string.count(ci_string)) {
341
83
      cloud_info_string.insert(ci_string);
342
83
    } else {
343
0
      return STATUS(IllegalState,
344
0
                    Substitute("Placement information specified should not contain duplicates."
345
0
                    "Given placement block: $0 isn't a prefix", ci.ShortDebugString()));
346
0
    }
347
83
  }
348
349
  // Validate the placement blocks to be prefixes.
350
218
  
for (int i = 0; 134
i < placement_info.placement_blocks_size();
i++84
) {
351
84
    if (!placement_info.placement_blocks(i).has_cloud_info()) {
352
1
      continue;
353
1
    }
354
355
83
    const CloudInfoPB& pb = placement_info.placement_blocks(i).cloud_info();
356
357
    // Four cases for pb to be a prefix.
358
83
    bool contains_cloud = pb.has_placement_cloud();
359
83
    bool contains_region = pb.has_placement_region();
360
83
    bool contains_zone = pb.has_placement_zone();
361
    // *.*.*
362
83
    bool star_star_star = !contains_cloud && 
!contains_region1
&&
!contains_zone1
;
363
    // C.*.*
364
83
    bool c_star_star = contains_cloud && 
!contains_region82
&&
!contains_zone1
;
365
    // C.R.*
366
83
    bool c_r_star = contains_cloud && 
contains_region82
&&
!contains_zone81
;
367
    // C.R.Z
368
83
    bool c_r_z = contains_cloud && 
contains_region82
&&
contains_zone81
;
369
370
83
    if (!star_star_star && 
!c_star_star82
&&
!c_r_star81
&&
!c_r_z80
) {
371
0
      return STATUS(IllegalState,
372
0
                        Substitute("Placement information specified should be prefixes."
373
0
                        "Given placement block: $0 isn't a prefix", pb.ShortDebugString()));
374
0
    }
375
83
  }
376
377
  // No two prefixes should overlap.
378
218
  
for (int i = 0; 134
i < placement_info.placement_blocks_size();
i++84
) {
379
312
    for (int j = 0; j < placement_info.placement_blocks_size(); 
j++228
) {
380
228
      if (i == j) {
381
84
        continue;
382
144
      } else {
383
144
        if (!placement_info.placement_blocks(i).has_cloud_info() ||
384
144
        !placement_info.placement_blocks(j).has_cloud_info()) {
385
0
          continue;
386
0
        }
387
388
144
        const CloudInfoPB& pb1 = placement_info.placement_blocks(i).cloud_info();
389
144
        const CloudInfoPB& pb2 = placement_info.placement_blocks(j).cloud_info();
390
        // pb1 shouldn't be prefix of pb2.
391
144
        if (CatalogManagerUtil::IsCloudInfoPrefix(pb1, pb2)) {
392
0
          return STATUS(IllegalState,
393
0
                        Substitute("Placement information specified should not overlap. $0 and"
394
0
                        " $1 overlap. For instance, c1.r1.z1,c1.r1 is invalid while "
395
0
                        "c1.r1.z1,c1.r1.z2 is valid. Also note that c1.r1,c1.r1 is valid.",
396
0
                        pb1.ShortDebugString(), pb2.ShortDebugString()));
397
0
        }
398
144
      }
399
228
    }
400
84
  }
401
134
  return Status::OK();
402
134
}
403
404
499k
bool CMPerTableLoadState::CompareLoads(const TabletServerId &ts1, const TabletServerId &ts2) {
405
499k
  if (per_ts_load_[ts1] != per_ts_load_[ts2]) {
406
194k
    return per_ts_load_[ts1] < per_ts_load_[ts2];
407
194k
  }
408
304k
  if (global_load_state_->GetGlobalLoad(ts1) == global_load_state_->GetGlobalLoad(ts2)) {
409
299k
    return ts1 < ts2;
410
299k
  }
411
4.53k
  return global_load_state_->GetGlobalLoad(ts1) < global_load_state_->GetGlobalLoad(ts2);
412
304k
}
413
414
161k
void CMPerTableLoadState::SortLoad() {
415
161k
  Comparator comp(this);
416
161k
  std::sort(sorted_load_.begin(), sorted_load_.end(), comp);
417
161k
}
418
419
} // namespace master
420
} // namespace yb