YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/master/backfill_index.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_MASTER_BACKFILL_INDEX_H
15
#define YB_MASTER_BACKFILL_INDEX_H
16
17
#include <float.h>
18
19
#include <chrono>
20
#include <set>
21
#include <sstream>
22
#include <string>
23
#include <type_traits>
24
#include <utility>
25
#include <vector>
26
27
#include <boost/mpl/and.hpp>
28
#include <gflags/gflags_declare.h>
29
30
#include "yb/common/entity_ids.h"
31
#include "yb/common/index.h"
32
#include "yb/common/partition.h"
33
34
#include "yb/gutil/integral_types.h"
35
#include "yb/gutil/ref_counted.h"
36
37
#include "yb/master/async_rpc_tasks.h"
38
#include "yb/master/catalog_entity_info.h"
39
40
#include "yb/server/monitored_task.h"
41
42
#include "yb/util/status_fwd.h"
43
#include "yb/util/format.h"
44
#include "yb/util/locks.h"
45
#include "yb/util/monotime.h"
46
#include "yb/util/shared_lock.h"
47
#include "yb/util/tostring.h"
48
#include "yb/util/type_traits.h"
49
50
namespace yb {
51
namespace master {
52
53
class CatalogManager;
54
55
// Implements a multi-stage alter table. As of Dec 30 2019, used for adding an
56
// index to an existing table, such that the index can be backfilled with
57
// historic data in an online manner.
58
//
59
class MultiStageAlterTable {
60
 public:
61
  // Launches the next stage of the multi stage schema change. Updates the
62
  // table info, upon the completion of an alter table round if we are in the
63
  // middle of an index backfill. Will update the IndexPermission from
64
  // INDEX_PERM_DELETE_ONLY -> INDEX_PERM_WRITE_AND_DELETE -> BACKFILL
65
  static Status LaunchNextTableInfoVersionIfNecessary(
66
      CatalogManager* mgr, const scoped_refptr<TableInfo>& Info, uint32_t current_version,
67
      bool respect_backfill_deferrals = true);
68
69
  // Clears the fully_applied_* state for the given table and optionally sets it to RUNNING.
70
  // If the version has changed and does not match the expected version no
71
  // change is made.
72
  static Status ClearFullyAppliedAndUpdateState(
73
      CatalogManager* mgr, const scoped_refptr<TableInfo>& table,
74
      boost::optional<uint32_t> expected_version, bool update_state_to_running);
75
76
  // Copies the current schema, schema_version, indexes and index_info
77
  // into their fully_applied_* equivalents. This is useful to ensure
78
  // that the master returns the fully applied version of the table schema
79
  // while the next alter table is in progress.
80
  static void CopySchemaDetailsToFullyApplied(SysTablesEntryPB* state);
81
82
  // Updates and persists the IndexPermission corresponding to the index_table_id for
83
  // the indexed_table's TableInfo.
84
  // Returns whether any permissions were actually updated (leading to a version being incremented).
85
  static Result<bool> UpdateIndexPermission(
86
      CatalogManager* mgr, const scoped_refptr<TableInfo>& indexed_table,
87
      const std::unordered_map<TableId, IndexPermissions>& perm_mapping,
88
      boost::optional<uint32_t> current_version = boost::none);
89
90
  // TODO(jason): make this private when closing issue #6218.
91
  // Start Index Backfill process/step for the specified table/index.
92
  static Status
93
  StartBackfillingData(CatalogManager *catalog_manager,
94
                       const scoped_refptr<TableInfo> &indexed_table,
95
                       const std::vector<IndexInfoPB>& idx_infos,
96
                       boost::optional<uint32_t> expected_version);
97
};
98
99
class BackfillTablet;
100
class BackfillChunk;
101
class BackfillTableJob;
102
103
// This class is responsible for backfilling the specified indexes on the
104
// indexed_table.
105
class BackfillTable : public std::enable_shared_from_this<BackfillTable> {
106
 public:
107
  BackfillTable(Master *master, ThreadPool *callback_pool,
108
                const scoped_refptr<TableInfo> &indexed_table,
109
                std::vector<IndexInfoPB> indexes,
110
                const scoped_refptr<NamespaceInfo> &ns_info);
111
112
  void Launch();
113
114
  Status UpdateSafeTime(const Status& s, HybridTime ht);
115
116
  void Done(const Status& s, const std::unordered_set<TableId>& failed_indexes);
117
118
16.9k
  Master* master() { return master_; }
119
120
5.38k
  ThreadPool* threadpool() { return callback_pool_; }
121
122
5.76k
  const std::string& requested_index_names() const { return requested_index_names_; }
123
124
2.72k
  int32_t schema_version() const { return schema_version_; }
125
126
  std::string LogPrefix() const;
127
128
  std::string description() const;
129
130
5.25k
  bool done() const {
131
5.25k
    return done_.load(std::memory_order_acquire);
132
5.25k
  }
133
134
2.65k
  bool timestamp_chosen() const {
135
2.65k
    return timestamp_chosen_.load(std::memory_order_acquire);
136
2.65k
  }
137
138
2.72k
  HybridTime read_time_for_backfill() const {
139
2.72k
    std::lock_guard<simple_spinlock> l(mutex_);
140
2.72k
    return read_time_for_backfill_;
141
2.72k
  }
142
143
7.82k
  int64_t leader_term() const {
144
7.82k
    return leader_term_;
145
7.82k
  }
146
147
  const std::string GetNamespaceName() const;
148
149
2.72k
  const std::vector<IndexInfoPB>& index_infos() const { return index_infos_; }
150
151
  const std::unordered_set<TableId> indexes_to_build() const;
152
153
2.72k
  const TableId& indexed_table_id() const { return indexed_table_->id(); }
154
155
  Status UpdateRowsProcessedForIndexTable(const uint64_t number_rows_processed);
156
157
 private:
158
  void LaunchComputeSafeTimeForRead();
159
  void LaunchBackfill();
160
161
  CHECKED_STATUS MarkAllIndexesAsFailed();
162
  CHECKED_STATUS MarkAllIndexesAsSuccess();
163
164
  CHECKED_STATUS MarkIndexesAsFailed(
165
      const std::unordered_set<TableId>& indexes, const std::string& message);
166
  CHECKED_STATUS MarkIndexesAsDesired(
167
      const std::unordered_set<TableId>& index_ids, BackfillJobPB_State state,
168
      const string message);
169
170
  CHECKED_STATUS AlterTableStateToAbort();
171
  CHECKED_STATUS AlterTableStateToSuccess();
172
173
  void CheckIfDone();
174
  CHECKED_STATUS UpdateIndexPermissionsForIndexes();
175
  CHECKED_STATUS ClearCheckpointStateInTablets();
176
177
  // We want to prevent major compactions from garbage collecting delete markers
178
  // on an index table, until the backfill process is complete.
179
  // This API is used at the end of a successful backfill to enable major compactions
180
  // to gc delete markers on an index table.
181
  CHECKED_STATUS AllowCompactionsToGCDeleteMarkers(const TableId& index_table_id);
182
183
  // Send the "backfill done request" to all tablets of the specified table.
184
  CHECKED_STATUS SendRpcToAllowCompactionsToGCDeleteMarkers(
185
      const scoped_refptr<TableInfo> &index_table);
186
  // Send the "backfill done request" to the specified tablet.
187
  CHECKED_STATUS SendRpcToAllowCompactionsToGCDeleteMarkers(
188
      const scoped_refptr<TabletInfo> &index_table_tablet, const std::string &table_id);
189
190
  Master* master_;
191
  ThreadPool* callback_pool_;
192
  const scoped_refptr<TableInfo> indexed_table_;
193
  const std::vector<IndexInfoPB> index_infos_;
194
  int32_t schema_version_;
195
  int64_t leader_term_;
196
  std::atomic<uint64> number_rows_processed_;
197
198
  std::atomic_bool done_{false};
199
  std::atomic_bool timestamp_chosen_{false};
200
  std::atomic<size_t> tablets_pending_;
201
  std::atomic<size_t> num_tablets_;
202
  std::shared_ptr<BackfillTableJob> backfill_job_;
203
  mutable simple_spinlock mutex_;
204
  HybridTime read_time_for_backfill_ GUARDED_BY(mutex_){HybridTime::kMin};
205
  const std::unordered_set<TableId> requested_index_ids_;
206
  const std::string requested_index_names_;
207
208
  const scoped_refptr<NamespaceInfo> ns_info_;
209
};
210
211
class BackfillTableJob : public server::MonitoredTask {
212
 public:
213
  explicit BackfillTableJob(std::shared_ptr<BackfillTable> backfill_table)
214
      : start_timestamp_(MonoTime::Now()),
215
        backfill_table_(backfill_table),
216
452
        requested_index_names_(backfill_table_->requested_index_names()) {}
217
218
0
  Type type() const override { return BACKFILL_TABLE; }
219
220
0
  std::string type_name() const override { return "Backfill Table"; }
221
222
0
  MonoTime start_timestamp() const override { return start_timestamp_; }
223
224
0
  MonoTime completion_timestamp() const override {
225
0
    return completion_timestamp_;
226
0
  }
227
228
  std::string description() const override;
229
230
872
  server::MonitoredTaskState state() const override {
231
872
    return state_.load(std::memory_order_acquire);
232
872
  }
233
234
  void SetState(server::MonitoredTaskState new_state);
235
236
  server::MonitoredTaskState AbortAndReturnPrevState(const Status& status) override;
237
238
419
  void MarkDone() {
239
419
    completion_timestamp_ = MonoTime::Now();
240
419
    backfill_table_ = nullptr;
241
419
  }
242
243
 private:
244
  MonoTime start_timestamp_, completion_timestamp_;
245
  std::atomic<server::MonitoredTaskState> state_{server::MonitoredTaskState::kWaiting};
246
  std::shared_ptr<BackfillTable> backfill_table_;
247
  const std::string requested_index_names_;
248
};
249
250
// A background task which is responsible for backfilling rows from a given
251
// tablet in the indexed table.
252
class BackfillTablet : public std::enable_shared_from_this<BackfillTablet> {
253
 public:
254
  BackfillTablet(
255
      std::shared_ptr<BackfillTable> backfill_table, const scoped_refptr<TabletInfo>& tablet);
256
257
2.62k
  void Launch() { LaunchNextChunkOrDone(); }
258
259
  void LaunchNextChunkOrDone();
260
  void Done(
261
      const Status& status,
262
      const boost::optional<string>& backfilled_until,
263
      const uint64_t number_rows_processed,
264
      const std::unordered_set<TableId>& failed_indexes);
265
266
8.16k
  Master* master() { return backfill_table_->master(); }
267
268
2.72k
  ThreadPool* threadpool() { return backfill_table_->threadpool(); }
269
270
2.72k
  HybridTime read_time_for_backfill() {
271
2.72k
    return backfill_table_->read_time_for_backfill();
272
2.72k
  }
273
274
2.72k
  const std::unordered_set<TableId> indexes_to_build() {
275
2.72k
    return backfill_table_->indexes_to_build();
276
2.72k
  }
277
2.72k
  const TableId& indexed_table_id() { return backfill_table_->indexed_table_id(); }
278
2.72k
  const std::vector<IndexInfoPB>& index_infos() const { return backfill_table_->index_infos(); }
279
280
0
  const std::string& requested_index_names() { return backfill_table_->requested_index_names(); }
281
282
2.72k
  int32_t schema_version() { return backfill_table_->schema_version(); }
283
284
26.9k
  const scoped_refptr<TabletInfo> tablet() { return tablet_; }
285
286
7.87k
  bool done() const {
287
7.87k
    return done_.load(std::memory_order_acquire);
288
7.87k
  }
289
290
  std::string LogPrefix() const;
291
292
235
  const std::string GetNamespaceName() const { return backfill_table_->GetNamespaceName(); }
293
294
 private:
295
  CHECKED_STATUS UpdateBackfilledUntil(
296
      const string& backfilled_until, const uint64_t number_rows_processed);
297
298
  std::shared_ptr<BackfillTable> backfill_table_;
299
  const scoped_refptr<TabletInfo> tablet_;
300
  Partition partition_;
301
302
  // if non-empty, corresponds to the row in the tablet up to which
303
  // backfill has been already processed (non-inclusive). The next
304
  // request to backfill has to start backfilling from this row till
305
  // the end of the tablet range.
306
  std::string backfilled_until_;
307
  std::atomic_bool done_{false};
308
};
309
310
class GetSafeTimeForTablet : public RetryingTSRpcTask {
311
 public:
312
  GetSafeTimeForTablet(
313
      std::shared_ptr<BackfillTable> backfill_table,
314
      const scoped_refptr<TabletInfo>& tablet,
315
      HybridTime min_cutoff)
316
      : RetryingTSRpcTask(
317
            backfill_table->master(), backfill_table->threadpool(),
318
            std::unique_ptr<TSPicker>(new PickLeaderReplica(tablet)), tablet->table().get()),
319
        backfill_table_(backfill_table),
320
        tablet_(tablet),
321
2.66k
        min_cutoff_(min_cutoff) {
322
2.66k
    deadline_ = MonoTime::Max();  // Never time out.
323
2.66k
  }
324
325
  void Launch();
326
327
3.65k
  Type type() const override { return ASYNC_GET_SAFE_TIME; }
328
329
10.6k
  std::string type_name() const override { return "Get SafeTime for Tablet"; }
330
331
5.31k
  std::string description() const override {
332
5.31k
    return yb::Format("GetSafeTime for $0 Backfilling index tables $1",
333
5.31k
                      tablet_id(), backfill_table_->requested_index_names());
334
5.31k
  }
335
336
 private:
337
5.31k
  TabletId tablet_id() const override { return tablet_->id(); }
338
339
  void HandleResponse(int attempt) override;
340
341
  bool SendRequest(int attempt) override;
342
343
  void UnregisterAsyncTaskCallback() override;
344
345
  TabletServerId permanent_uuid();
346
347
  tserver::GetSafeTimeResponsePB resp_;
348
  const std::shared_ptr<BackfillTable> backfill_table_;
349
  const scoped_refptr<TabletInfo> tablet_;
350
  const HybridTime min_cutoff_;
351
};
352
353
// A background task which is responsible for backfilling rows in the partitions
354
// [start, end) on the indexed table.
355
class BackfillChunk : public RetryingTSRpcTask {
356
 public:
357
  BackfillChunk(std::shared_ptr<BackfillTablet> backfill_tablet,
358
                const std::string& start_key);
359
360
  void Launch();
361
362
2.08k
  Type type() const override { return ASYNC_BACKFILL_TABLET_CHUNK; }
363
364
10.6k
  std::string type_name() const override { return "Backfill Index Table"; }
365
366
  std::string description() const override;
367
368
  MonoTime ComputeDeadline() override;
369
370
 private:
371
8.08k
  TabletId tablet_id() const override { return backfill_tablet_->tablet()->id(); }
372
373
  void HandleResponse(int attempt) override;
374
375
  bool SendRequest(int attempt) override;
376
377
  void UnregisterAsyncTaskCallback() override;
378
379
  TabletServerId permanent_uuid();
380
381
  int num_max_retries() override;
382
  int max_delay_ms() override;
383
384
7.93k
  TableType GetTableType() const {
385
7.93k
    return backfill_tablet_->tablet()->table()->GetTableType();
386
7.93k
  }
387
388
  const std::unordered_set<TableId> indexes_being_backfilled_;
389
  tserver::BackfillIndexResponsePB resp_;
390
  std::shared_ptr<BackfillTablet> backfill_tablet_;
391
  std::string start_key_;
392
  const std::string requested_index_names_;
393
};
394
395
}  // namespace master
396
}  // namespace yb
397
398
#endif  // YB_MASTER_BACKFILL_INDEX_H