YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/tserver/remote_bootstrap_client.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include "yb/tserver/remote_bootstrap_client.h"
34
35
#include <glog/logging.h>
36
37
#include "yb/common/index.h"
38
#include "yb/common/schema.h"
39
#include "yb/common/wire_protocol.h"
40
41
#include "yb/consensus/consensus.h"
42
#include "yb/consensus/consensus_meta.h"
43
#include "yb/consensus/metadata.pb.h"
44
45
#include "yb/fs/fs_manager.h"
46
47
#include "yb/gutil/strings/substitute.h"
48
#include "yb/gutil/walltime.h"
49
50
#include "yb/rpc/rpc_controller.h"
51
52
#include "yb/tablet/tablet.pb.h"
53
#include "yb/tablet/tablet_bootstrap_if.h"
54
#include "yb/tablet/tablet_metadata.h"
55
56
#include "yb/tserver/remote_bootstrap.pb.h"
57
#include "yb/tserver/remote_bootstrap.proxy.h"
58
#include "yb/tserver/remote_bootstrap_snapshots.h"
59
#include "yb/tserver/ts_tablet_manager.h"
60
61
#include "yb/util/env.h"
62
#include "yb/util/env_util.h"
63
#include "yb/util/fault_injection.h"
64
#include "yb/util/flag_tags.h"
65
#include "yb/util/logging.h"
66
#include "yb/util/net/net_util.h"
67
#include "yb/util/result.h"
68
#include "yb/util/scope_exit.h"
69
#include "yb/util/size_literals.h"
70
#include "yb/util/status_log.h"
71
72
using namespace yb::size_literals;
73
74
DEFINE_int32(remote_bootstrap_begin_session_timeout_ms, 5000,
75
             "Tablet server RPC client timeout for BeginRemoteBootstrapSession calls.");
76
TAG_FLAG(remote_bootstrap_begin_session_timeout_ms, hidden);
77
78
DEFINE_int32(remote_bootstrap_end_session_timeout_sec, 15,
79
             "Tablet server RPC client timeout for EndRemoteBootstrapSession calls. "
80
             "The timeout is usually a large value because we have to wait for the remote server "
81
             "to get a CHANGE_ROLE config change accepted.");
82
TAG_FLAG(remote_bootstrap_end_session_timeout_sec, hidden);
83
84
DEFINE_bool(remote_bootstrap_save_downloaded_metadata, false,
85
            "Save copies of the downloaded remote bootstrap files for debugging purposes. "
86
            "Note: This is only intended for debugging and should not be normally used!");
87
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, advanced);
88
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, hidden);
89
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, runtime);
90
91
DEFINE_int32(committed_config_change_role_timeout_sec, 30,
92
             "Number of seconds to wait for the CHANGE_ROLE to be in the committed config before "
93
             "timing out. ");
94
TAG_FLAG(committed_config_change_role_timeout_sec, hidden);
95
96
DEFINE_test_flag(double, fault_crash_bootstrap_client_before_changing_role, 0.0,
97
                 "The remote bootstrap client will crash before closing the session with the "
98
                 "leader. Because the session won't be closed successfully, the leader won't issue "
99
                 "a ChangeConfig request to change this tserver role *(from PRE_VOTER or "
100
                 "PRE_OBSERVER to VOTER or OBSERVER respectively).");
101
102
DEFINE_test_flag(int32, simulate_long_remote_bootstrap_sec, 0,
103
                 "The remote bootstrap client will take at least this number of seconds to finish. "
104
                 "We use this for testing a scenario where a remote bootstrap takes longer than "
105
                 "follower_unavailable_considered_failed_sec seconds.");
106
107
DEFINE_test_flag(bool, download_partial_wal_segments, false, "");
108
109
DECLARE_int32(bytes_remote_bootstrap_durable_write_mb);
110
111
namespace yb {
112
namespace tserver {
113
114
using consensus::ConsensusMetadata;
115
using consensus::ConsensusStatePB;
116
using consensus::PeerMemberType;
117
using consensus::RaftConfigPB;
118
using consensus::RaftPeerPB;
119
using env_util::CopyFile;
120
using rpc::Messenger;
121
using std::shared_ptr;
122
using std::string;
123
using std::vector;
124
using strings::Substitute;
125
using tablet::TabletDataState;
126
using tablet::TabletDataState_Name;
127
using tablet::RaftGroupMetadata;
128
using tablet::RaftGroupMetadataPtr;
129
using tablet::TabletStatusListener;
130
using tablet::RaftGroupReplicaSuperBlockPB;
131
132
std::atomic<int32_t> remote_bootstrap_clients_started_{0};
133
134
RemoteBootstrapClient::RemoteBootstrapClient(std::string tablet_id, FsManager* fs_manager)
135
    : tablet_id_(std::move(tablet_id)),
136
      log_prefix_(Format("T $0 P $1: Remote bootstrap client: ", tablet_id_, fs_manager->uuid())),
137
2.02k
      downloader_(&log_prefix_, fs_manager) {
138
2.02k
  AddComponent<RemoteBootstrapSnapshotsComponent>();
139
2.02k
}
140
141
2.00k
RemoteBootstrapClient::~RemoteBootstrapClient() {
142
  // Note: Ending the remote bootstrap session releases anchors on the remote.
143
  // This assumes that succeeded_ only gets set to true in Finish() just before calling
144
  // EndRemoteSession. If this didn't happen, then close the session here.
145
2.00k
  if (!succeeded_) {
146
11
    LOG_WITH_PREFIX(INFO) << "Closing remote bootstrap session " << session_id()
147
11
                          << " in RemoteBootstrapClient destructor.";
148
11
    WARN_NOT_OK(EndRemoteSession(),
149
11
                LogPrefix() + "Unable to close remote bootstrap session " + session_id());
150
11
  }
151
2.00k
  if (started_) {
152
1.99k
    auto old_count = remote_bootstrap_clients_started_.fetch_sub(1, std::memory_order_acq_rel);
153
1.99k
    if (old_count < 1) {
154
0
      LOG_WITH_PREFIX(DFATAL) << "Invalid number of remote bootstrap sessions: " << old_count;
155
0
    }
156
1.99k
  }
157
2.00k
}
158
159
Status RemoteBootstrapClient::SetTabletToReplace(const RaftGroupMetadataPtr& meta,
160
153
                                                 int64_t caller_term) {
161
153
  CHECK_EQ(tablet_id_, meta->raft_group_id());
162
153
  TabletDataState data_state = meta->tablet_data_state();
163
153
  if (data_state != tablet::TABLET_DATA_TOMBSTONED) {
164
0
    return STATUS(IllegalState, Substitute("Tablet $0 not in tombstoned state: $1 ($2)",
165
0
                                           tablet_id_,
166
0
                                           TabletDataState_Name(data_state),
167
0
                                           data_state));
168
0
  }
169
170
153
  replace_tombstoned_tablet_ = true;
171
153
  meta_ = meta;
172
173
153
  int64_t last_logged_term = meta->tombstone_last_logged_opid().term;
174
153
  if (last_logged_term > caller_term) {
175
0
    return STATUS(InvalidArgument,
176
0
        Substitute("Leader has term $0 but the last log entry written by the tombstoned replica "
177
0
                   "for tablet $1 has higher term $2. Refusing remote bootstrap from leader",
178
0
                   caller_term, tablet_id_, last_logged_term));
179
0
  }
180
181
  // Load the old consensus metadata, if it exists.
182
153
  std::unique_ptr<ConsensusMetadata> cmeta;
183
153
  Status s = ConsensusMetadata::Load(
184
153
      &fs_manager(), tablet_id_, permanent_uuid(), &cmeta);
185
153
  if (s.IsNotFound()) {
186
    // The consensus metadata was not written to disk, possibly due to a failed
187
    // remote bootstrap.
188
0
    return Status::OK();
189
0
  }
190
153
  RETURN_NOT_OK(s);
191
153
  cmeta_ = std::move(cmeta);
192
153
  return Status::OK();
193
153
}
194
195
Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid,
196
                                    rpc::ProxyCache* proxy_cache,
197
                                    const HostPort& bootstrap_peer_addr,
198
                                    RaftGroupMetadataPtr* meta,
199
2.02k
                                    TSTabletManager* ts_manager) {
200
2.02k
  CHECK(!started_);
201
2.02k
  start_time_micros_ = GetCurrentTimeMicros();
202
203
2.02k
  LOG_WITH_PREFIX(INFO) << "Beginning remote bootstrap session"
204
2.02k
                        << " from remote peer at address " << bootstrap_peer_addr.ToString();
205
206
  // Set up an RPC proxy for the RemoteBootstrapService.
207
2.02k
  proxy_.reset(new RemoteBootstrapServiceProxy(proxy_cache, bootstrap_peer_addr));
208
209
2.02k
  BeginRemoteBootstrapSessionRequestPB req;
210
2.02k
  req.set_requestor_uuid(permanent_uuid());
211
2.02k
  req.set_tablet_id(tablet_id_);
212
213
2.02k
  rpc::RpcController controller;
214
2.02k
  controller.set_timeout(MonoDelta::FromMilliseconds(
215
2.02k
      FLAGS_remote_bootstrap_begin_session_timeout_ms));
216
217
  // Begin the remote bootstrap session with the remote peer.
218
2.02k
  BeginRemoteBootstrapSessionResponsePB resp;
219
2.02k
  auto status =
220
2.02k
      UnwindRemoteError(proxy_->BeginRemoteBootstrapSession(req, &resp, &controller), controller);
221
222
2.02k
  if (!status.ok()) {
223
3
    status = status.CloneAndPrepend("Unable to begin remote bootstrap session");
224
3
    LOG_WITH_PREFIX(WARNING) << status;
225
3
    return status;
226
3
  }
227
228
2.02k
  remote_tablet_data_state_ = resp.superblock().tablet_data_state();
229
2.02k
  if (!CanServeTabletData(remote_tablet_data_state_)) {
230
0
    Status s = STATUS(IllegalState, "Remote peer (" + bootstrap_peer_uuid + ")" +
231
0
                                    " is currently remotely bootstrapping itself!",
232
0
                                    resp.superblock().ShortDebugString());
233
0
    LOG_WITH_PREFIX(WARNING) << s.ToString();
234
0
    return s;
235
0
  }
236
237
2.02k
  LOG_WITH_PREFIX(INFO) << "Received superblock: " << resp.superblock().ShortDebugString();
238
2.02k
  RETURN_NOT_OK(MigrateSuperblock(resp.mutable_superblock()));
239
240
2.02k
  auto* kv_store = resp.mutable_superblock()->mutable_kv_store();
241
2.02k
  LOG_WITH_PREFIX(INFO) << "RocksDB files: " << yb::ToString(kv_store->rocksdb_files());
242
2.02k
  LOG_WITH_PREFIX(INFO) << "Snapshot files: " << yb::ToString(kv_store->snapshot_files());
243
2.02k
  if (first_wal_seqno_) {
244
0
    LOG_WITH_PREFIX(INFO) << "First WAL segment: " << first_wal_seqno_;
245
2.02k
  } else {
246
2.02k
    LOG_WITH_PREFIX(INFO) << "Log files: " << yb::ToString(resp.deprecated_wal_segment_seqnos());
247
2.02k
  }
248
249
2.02k
  const TableId table_id = resp.superblock().primary_table_id();
250
2.02k
  const bool colocated = resp.superblock().colocated();
251
2.02k
  const tablet::TableInfoPB* table_ptr = nullptr;
252
2.02k
  for (auto& table_pb : kv_store->tables()) {
253
2.02k
    if (table_pb.table_id() == table_id) {
254
2.02k
      table_ptr = &table_pb;
255
2.02k
      break;
256
2.02k
    }
257
2.02k
  }
258
2.02k
  if (!table_ptr) {
259
0
    return STATUS(InvalidArgument, Format(
260
0
        "Tablet $0: Superblock's KV-store doesn't contain primary table $1", tablet_id_,
261
0
        table_id));
262
0
  }
263
2.02k
  const auto& table = *table_ptr;
264
265
2.02k
  downloader_.Start(
266
2.02k
      proxy_, resp.session_id(), MonoDelta::FromMilliseconds(resp.session_idle_timeout_millis()));
267
2.02k
  LOG_WITH_PREFIX(INFO) << "Began remote bootstrap session " << session_id();
268
269
2.02k
  superblock_.reset(resp.release_superblock());
270
271
  // Clear fields rocksdb_dir and wal_dir so we get an error if we try to use them without setting
272
  // them to the right path.
273
2.02k
  kv_store->clear_rocksdb_dir();
274
2.02k
  superblock_->clear_wal_dir();
275
276
2.02k
  superblock_->set_tablet_data_state(tablet::TABLET_DATA_COPYING);
277
2.02k
  wal_seqnos_.assign(resp.deprecated_wal_segment_seqnos().begin(),
278
2.02k
                     resp.deprecated_wal_segment_seqnos().end());
279
2.02k
  if (
resp.has_first_wal_segment_seqno()2.02k
) {
280
2.02k
    first_wal_seqno_ = resp.first_wal_segment_seqno();
281
18.4E
  } else {
282
18.4E
    first_wal_seqno_ = 0;
283
18.4E
  }
284
2.02k
  remote_committed_cstate_.reset(resp.release_initial_committed_cstate());
285
286
2.02k
  Schema schema;
287
2.02k
  RETURN_NOT_OK_PREPEND(SchemaFromPB(
288
2.02k
      table.schema(), &schema), "Cannot deserialize schema from remote superblock");
289
2.02k
  string data_root_dir;
290
2.02k
  string wal_root_dir;
291
2.02k
  if (replace_tombstoned_tablet_) {
292
    // Also validate the term of the bootstrap source peer, in case they are
293
    // different. This is a sanity check that protects us in case a bug or
294
    // misconfiguration causes us to attempt to bootstrap from an out-of-date
295
    // source peer, even after passing the term check from the caller in
296
    // SetTabletToReplace().
297
151
    int64_t last_logged_term = meta_->tombstone_last_logged_opid().term;
298
151
    if (last_logged_term > remote_committed_cstate_->current_term()) {
299
0
      return STATUS(InvalidArgument,
300
0
          Substitute("Tablet $0: Bootstrap source has term $1 but "
301
0
                     "tombstoned replica has last-logged opid with higher term $2. "
302
0
                      "Refusing remote bootstrap from source peer $3",
303
0
                      tablet_id_,
304
0
                      remote_committed_cstate_->current_term(),
305
0
                      last_logged_term,
306
0
                      bootstrap_peer_uuid));
307
0
    }
308
    // Replace rocksdb_dir in the received superblock with our rocksdb_dir.
309
151
    kv_store->set_rocksdb_dir(meta_->rocksdb_dir());
310
311
    // Replace wal_dir in the received superblock with our assigned wal_dir.
312
151
    superblock_->set_wal_dir(meta_->wal_dir());
313
314
    // This will flush to disk, but we set the data state to COPYING above.
315
151
    RETURN_NOT_OK_PREPEND(meta_->ReplaceSuperBlock(*superblock_),
316
151
                          "Remote bootstrap unable to replace superblock on tablet " +
317
151
                          tablet_id_);
318
    // Update the directory assignment mapping.
319
151
    data_root_dir = meta_->data_root_dir();
320
151
    wal_root_dir = meta_->wal_root_dir();
321
152
    if (
ts_manager != nullptr151
) {
322
152
      ts_manager->RegisterDataAndWalDir(&fs_manager(),
323
152
                                        table_id,
324
152
                                        meta_->raft_group_id(),
325
152
                                        data_root_dir,
326
152
                                        wal_root_dir);
327
152
    }
328
1.87k
  } else {
329
1.87k
    Partition partition;
330
1.87k
    Partition::FromPB(superblock_->partition(), &partition);
331
1.87k
    PartitionSchema partition_schema;
332
1.87k
    RETURN_NOT_OK(PartitionSchema::FromPB(table.partition_schema(), schema, &partition_schema));
333
    // Create the superblock on disk.
334
1.87k
    if (ts_manager != nullptr) {
335
1.81k
      ts_manager->GetAndRegisterDataAndWalDir(&fs_manager(),
336
1.81k
                                              table_id,
337
1.81k
                                              tablet_id_,
338
1.81k
                                              &data_root_dir,
339
1.81k
                                              &wal_root_dir);
340
1.81k
    }
341
1.87k
    auto table_info = std::make_shared<tablet::TableInfo>(
342
1.87k
        table_id, table.namespace_name(), table.table_name(), table.table_type(), schema,
343
1.87k
        IndexMap(table.indexes()),
344
1.87k
        table.has_index_info() ? 
boost::optional<IndexInfo>(table.index_info())18
:
boost::none1.85k
,
345
1.87k
        table.schema_version(), partition_schema);
346
1.87k
    auto create_result = RaftGroupMetadata::CreateNew(tablet::RaftGroupMetadataData {
347
1.87k
        .fs_manager = &fs_manager(),
348
1.87k
        .table_info = table_info,
349
1.87k
        .raft_group_id = tablet_id_,
350
1.87k
        .partition = partition,
351
1.87k
        .tablet_data_state = tablet::TABLET_DATA_COPYING,
352
1.87k
        .colocated = colocated }, data_root_dir, wal_root_dir);
353
1.87k
    if (ts_manager != nullptr && 
!create_result.ok()1.81k
) {
354
0
      ts_manager->UnregisterDataWalDir(table_id, tablet_id_, data_root_dir, wal_root_dir);
355
0
    }
356
1.87k
    RETURN_NOT_OK(create_result);
357
1.87k
    meta_ = std::move(*create_result);
358
359
1.87k
    vector<DeletedColumn> deleted_cols;
360
1.87k
    for (const DeletedColumnPB& col_pb : table.deleted_cols()) {
361
0
      DeletedColumn col;
362
0
      RETURN_NOT_OK(DeletedColumn::FromPB(col_pb, &col));
363
0
      deleted_cols.push_back(col);
364
0
    }
365
1.87k
    meta_->SetSchema(schema,
366
1.87k
                     IndexMap(table.indexes()),
367
1.87k
                     deleted_cols,
368
1.87k
                     table.schema_version());
369
370
    // Replace rocksdb_dir in the received superblock with our rocksdb_dir.
371
1.87k
    kv_store->set_rocksdb_dir(meta_->rocksdb_dir());
372
373
    // Replace wal_dir in the received superblock with our assigned wal_dir.
374
1.87k
    superblock_->set_wal_dir(meta_->wal_dir());
375
1.87k
  }
376
377
2.02k
  started_ = true;
378
2.02k
  auto old_count = remote_bootstrap_clients_started_.fetch_add(1, std::memory_order_acq_rel);
379
2.02k
  if (old_count < 0) {
380
0
    LOG_WITH_PREFIX(DFATAL) << "Invalid number of remote bootstrap sessions: " << old_count;
381
0
    remote_bootstrap_clients_started_.store(0, std::memory_order_release);
382
0
  }
383
384
2.02k
  if (
meta2.02k
) {
385
2.02k
    *meta = meta_;
386
2.02k
  }
387
2.02k
  return Status::OK();
388
2.02k
}
389
390
2.02k
Status RemoteBootstrapClient::FetchAll(TabletStatusListener* status_listener) {
391
2.02k
  CHECK(started_);
392
2.02k
  status_listener_ = CHECK_NOTNULL(status_listener);
393
394
2.02k
  
VLOG_WITH_PREFIX0
(2) << "Fetching table_type: " << TableType_Name(meta_->table_type())0
;
395
396
2.02k
  new_superblock_ = *superblock_;
397
  // Replace rocksdb_dir with our rocksdb_dir
398
2.02k
  new_superblock_.mutable_kv_store()->set_rocksdb_dir(meta_->rocksdb_dir());
399
400
2.02k
  RETURN_NOT_OK(DownloadRocksDBFiles());
401
2.02k
  RETURN_NOT_OK(DownloadWALs());
402
2.02k
  for (const auto& component : components_) {
403
2.02k
    RETURN_NOT_OK(component->Download());
404
2.02k
  }
405
406
  // We sleep here to simulate the transfer of very large files.
407
2.02k
  if (PREDICT_FALSE(FLAGS_TEST_simulate_long_remote_bootstrap_sec > 0)) {
408
53
    LOG_WITH_PREFIX(INFO) << "Sleeping " << FLAGS_TEST_simulate_long_remote_bootstrap_sec
409
53
                          << " seconds to simulate the transfer of very large files";
410
53
    SleepFor(MonoDelta::FromSeconds(FLAGS_TEST_simulate_long_remote_bootstrap_sec));
411
53
  }
412
2.02k
  return Status::OK();
413
2.02k
}
414
415
2.01k
Status RemoteBootstrapClient::Finish() {
416
2.01k
  CHECK(meta_);
417
2.01k
  CHECK(started_);
418
419
2.01k
  CHECK(downloaded_wal_);
420
18.4E
  CHECK(downloaded_rocksdb_files_) << "files not downloaded";
421
422
2.01k
  RETURN_NOT_OK(WriteConsensusMetadata());
423
424
  // Replace tablet metadata superblock. This will set the tablet metadata state
425
  // to remote_tablet_data_state_.
426
2.01k
  LOG_WITH_PREFIX(INFO) << "Remote bootstrap complete. Replacing tablet superblock.";
427
2.01k
  UpdateStatusMessage("Replacing tablet superblock");
428
2.01k
  new_superblock_.set_tablet_data_state(remote_tablet_data_state_);
429
2.01k
  RETURN_NOT_OK(meta_->ReplaceSuperBlock(new_superblock_));
430
431
2.01k
  if (FLAGS_remote_bootstrap_save_downloaded_metadata) {
432
0
    string meta_path = fs_manager().GetRaftGroupMetadataPath(tablet_id_);
433
0
    string meta_copy_path = Substitute("$0.copy.$1.tmp", meta_path, start_time_micros_);
434
0
    RETURN_NOT_OK_PREPEND(CopyFile(Env::Default(), meta_path, meta_copy_path,
435
0
                                   WritableFileOptions()),
436
0
                          "Unable to make copy of tablet metadata");
437
0
  }
438
439
2.01k
  succeeded_ = true;
440
441
2.01k
  MAYBE_FAULT(FLAGS_TEST_fault_crash_bootstrap_client_before_changing_role);
442
443
2.01k
  RETURN_NOT_OK_PREPEND(
444
2.01k
      EndRemoteSession(), "Error closing remote bootstrap session " + session_id());
445
446
2.01k
  return Status::OK();
447
2.01k
}
448
449
Status RemoteBootstrapClient::VerifyChangeRoleSucceeded(
450
2.01k
    const shared_ptr<consensus::Consensus>& shared_consensus) {
451
452
2.01k
  if (!shared_consensus) {
453
0
    return STATUS(InvalidArgument, "Invalid consensus object");
454
0
  }
455
456
2.01k
  auto start = MonoTime::Now();
457
2.01k
  auto timeout = MonoDelta::FromSeconds(FLAGS_committed_config_change_role_timeout_sec);
458
2.01k
  int backoff_ms = 1;
459
2.01k
  const int kMaxBackoffMs = 256;
460
2.01k
  RaftConfigPB committed_config;
461
462
77.7M
  do {
463
77.7M
    committed_config = shared_consensus->CommittedConfig();
464
197M
    for (const auto &peer : committed_config.peers()) {
465
197M
      if (peer.permanent_uuid() != permanent_uuid()) {
466
197M
        continue;
467
197M
      }
468
469
20.6k
      if (peer.member_type() == PeerMemberType::VOTER ||
470
20.6k
          
peer.member_type() == PeerMemberType::OBSERVER18.7k
) {
471
1.98k
        return Status::OK();
472
18.6k
      } else {
473
18.6k
        SleepFor(MonoDelta::FromMilliseconds(backoff_ms));
474
18.6k
        backoff_ms = min(backoff_ms << 1, kMaxBackoffMs);
475
18.6k
        break;
476
18.6k
      }
477
20.6k
    }
478
77.7M
  } while (
MonoTime::Now().GetDeltaSince(start).LessThan(timeout)77.7M
);
479
480
28
  return STATUS(TimedOut,
481
2.01k
                Substitute("Timed out waiting member type of peer $0 to change in the committed "
482
2.01k
                           "config $1", permanent_uuid(),
483
2.01k
                           committed_config.ShortDebugString()));
484
2.01k
}
485
486
6.72k
void RemoteBootstrapClient::UpdateStatusMessage(const string& message) {
487
6.72k
  if (status_listener_ != nullptr) {
488
6.72k
    status_listener_->StatusMessage("RemoteBootstrap: " + message);
489
6.72k
  }
490
6.72k
}
491
492
2.02k
Status RemoteBootstrapClient::EndRemoteSession() {
493
2.02k
  if (!started_) {
494
3
    return Status::OK();
495
3
  }
496
497
2.01k
  rpc::RpcController controller;
498
2.01k
  controller.set_timeout(MonoDelta::FromSeconds(FLAGS_remote_bootstrap_end_session_timeout_sec));
499
500
2.01k
  EndRemoteBootstrapSessionRequestPB req;
501
2.01k
  req.set_session_id(session_id());
502
2.01k
  req.set_is_success(succeeded_);
503
2.01k
  req.set_keep_session(succeeded_);
504
2.01k
  EndRemoteBootstrapSessionResponsePB resp;
505
506
2.01k
  LOG_WITH_PREFIX(INFO) << "Ending remote bootstrap session " << session_id();
507
2.01k
  auto status = proxy_->EndRemoteBootstrapSession(req, &resp, &controller);
508
2.01k
  if (
status.ok()2.01k
) {
509
2.01k
    remove_required_ = resp.session_kept();
510
2.01k
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id()
511
2.01k
                          << " ended successfully";
512
2.01k
    return Status::OK();
513
2.01k
  }
514
515
18.4E
  if (status.IsTimedOut()) {
516
    // Ignore timeout errors since the server could have sent the ChangeConfig request and died
517
    // before replying. We need to check the config to verify that this server's role changed as
518
    // expected, in which case, the remote bootstrap was completed successfully.
519
0
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id() << " timed out";
520
0
    return Status::OK();
521
0
  }
522
523
18.4E
  status = UnwindRemoteError(status, controller);
524
18.4E
  return status.CloneAndPrepend(
525
18.4E
      Format("Failed to end remote bootstrap session $0", session_id()));
526
18.4E
}
527
528
1.93k
Status RemoteBootstrapClient::Remove() {
529
1.93k
  if (!remove_required_) {
530
0
    return Status::OK();
531
0
  }
532
533
1.93k
  rpc::RpcController controller;
534
1.93k
  controller.set_timeout(MonoDelta::FromSeconds(FLAGS_remote_bootstrap_end_session_timeout_sec));
535
536
1.93k
  RemoveSessionRequestPB req;
537
1.93k
  req.set_session_id(session_id());
538
1.93k
  RemoveSessionResponsePB resp;
539
540
1.93k
  LOG_WITH_PREFIX(INFO) << "Removing remote bootstrap session " << session_id();
541
1.93k
  const auto status = proxy_->RemoveSession(req, &resp, &controller);
542
1.93k
  if (status.ok()) {
543
1.93k
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id() << " removed successfully";
544
1.93k
    return Status::OK();
545
1.93k
  }
546
547
0
  return UnwindRemoteError(status, controller).CloneAndPrepend(
548
0
      Format("Failure removing remote bootstrap session $0", session_id()));
549
1.93k
}
550
551
2.02k
Status RemoteBootstrapClient::DownloadWALs() {
552
2.02k
  CHECK(started_);
553
554
  // Delete and recreate WAL dir if it already exists, to ensure stray files are
555
  // not kept from previous bootstraps and runs.
556
2.02k
  const string& wal_dir = meta_->wal_dir();
557
2.02k
  if (env().FileExists(wal_dir)) {
558
0
    RETURN_NOT_OK(env().DeleteRecursively(wal_dir));
559
0
  }
560
2.02k
  auto wal_table_top_dir = DirName(wal_dir);
561
2.02k
  RETURN_NOT_OK_PREPEND(fs_manager().CreateDirIfMissing(wal_table_top_dir),
562
2.02k
                        Substitute("Failed to create WAL table directory $0", wal_table_top_dir));
563
564
  // fsync() parent dir.
565
2.02k
  RETURN_NOT_OK_PREPEND(env().SyncDir(DirName(wal_table_top_dir)),
566
2.02k
                        Substitute("Failed to sync WAL root directory $0",
567
2.02k
                                   DirName(wal_table_top_dir)));
568
569
2.02k
  RETURN_NOT_OK_PREPEND(env().CreateDir(wal_dir),
570
2.02k
                        Substitute("Failed to create WAL tablet directory $0", wal_dir));
571
572
  // fsync() parent dir.
573
2.02k
  RETURN_NOT_OK_PREPEND(env().SyncDir(wal_table_top_dir),
574
2.02k
                        Substitute("Failed to sync WAL table directory $0", wal_table_top_dir));
575
576
  // Download the WAL segments.
577
2.02k
  uint64_t counter = 0;
578
2.02k
  if (first_wal_seqno_) {
579
2.02k
    LOG_WITH_PREFIX(INFO) << "Starting download of WAL segments starting from sequence number "
580
2.02k
                          << first_wal_seqno_;
581
4.71k
    for (;;) {
582
4.71k
      uint64_t segment_seqno = first_wal_seqno_ + counter;
583
4.71k
      UpdateStatusMessage(
584
4.71k
          Format("Downloading WAL segment with seq. number $0 (#$1 in this session)",
585
4.71k
                 segment_seqno, counter + 1));
586
4.71k
      auto download_status = DownloadWAL(segment_seqno);
587
4.71k
      if (!download_status.ok()) {
588
2.02k
        std::string message_suffix;
589
2.02k
        if (counter > 0) {
590
2.01k
          message_suffix = Format(", downloaded segments in range: $0..$1",
591
2.01k
                                      first_wal_seqno_, segment_seqno - 1);
592
2.01k
        } else {
593
1
          message_suffix = ", no segments were downloaded";
594
1
        }
595
2.02k
        if (download_status.IsNotFound()) {
596
2.01k
          LOG_WITH_PREFIX(INFO) << "Stopped downloading WAL segments" << message_suffix;
597
2.01k
          break;
598
2.01k
        }
599
2
        LOG_WITH_PREFIX(WARNING) << "Downloading WAL segments failed: "
600
2
                                 << download_status << message_suffix;
601
2
        return download_status;
602
2.02k
      }
603
2.69k
      ++counter;
604
2.69k
      if (PREDICT_FALSE(FLAGS_TEST_download_partial_wal_segments) && 
counter > 00
) {
605
0
        LOG(INFO) << "Flag TEST_download_partial_wal_segments set to true. "
606
0
                  << "Stopping WAL files download after one file has been downloaded.";
607
0
        break;
608
0
      }
609
2.69k
    }
610
2.02k
  } else {
611
0
    auto num_segments = wal_seqnos_.size();
612
0
    LOG_WITH_PREFIX(INFO) << "Starting download of " << num_segments << " WAL segments...";
613
0
    for (uint64_t seg_seqno : wal_seqnos_) {
614
0
      UpdateStatusMessage(Substitute("Downloading WAL segment with seq. number $0 ($1/$2)",
615
0
                                     seg_seqno, counter + 1, num_segments));
616
0
      RETURN_NOT_OK(DownloadWAL(seg_seqno));
617
0
      ++counter;
618
0
    }
619
0
  }
620
621
2.02k
  
if (2.01k
FLAGS_bytes_remote_bootstrap_durable_write_mb != 02.01k
) {
622
    // Persist directory so that recently downloaded files are accessible.
623
2.02k
    RETURN_NOT_OK_PREPEND(env().SyncDir(wal_table_top_dir),
624
2.02k
                          Substitute("Failed to sync WAL table directory $0", wal_table_top_dir));
625
2.02k
  }
626
627
2.01k
  downloaded_wal_ = true;
628
2.01k
  return Status::OK();
629
2.01k
}
630
631
2.02k
Status RemoteBootstrapClient::CreateTabletDirectories(const string& db_dir, FsManager* fs) {
632
  // Create the directory table-uuid first.
633
2.02k
  RETURN_NOT_OK_PREPEND(fs->CreateDirIfMissing(DirName(db_dir)),
634
2.02k
                        Substitute("Failed to create RocksDB table directory $0",
635
2.02k
                                   DirName(db_dir)));
636
637
2.02k
  RETURN_NOT_OK_PREPEND(fs->CreateDirIfMissing(db_dir),
638
2.02k
                        Substitute("Failed to create RocksDB tablet directory $0",
639
2.02k
                                   db_dir));
640
641
2.02k
  for (const auto& component : components_) {
642
2.02k
    RETURN_NOT_OK(component->CreateDirectories(db_dir, fs));
643
2.02k
  }
644
645
2.02k
  return Status::OK();
646
2.02k
}
647
648
2.02k
Status RemoteBootstrapClient::DownloadRocksDBFiles() {
649
2.02k
  const auto& rocksdb_dir = meta_->rocksdb_dir();
650
651
2.02k
  RETURN_NOT_OK(CreateTabletDirectories(rocksdb_dir, meta_->fs_manager()));
652
653
2.02k
  DataIdPB data_id;
654
2.02k
  data_id.set_type(DataIdPB::ROCKSDB_FILE);
655
6.53k
  for (auto const& file_pb : new_superblock_.kv_store().rocksdb_files()) {
656
6.53k
    auto start = MonoTime::Now();
657
6.53k
    RETURN_NOT_OK(downloader_.DownloadFile(file_pb, rocksdb_dir, &data_id));
658
6.52k
    auto elapsed = MonoTime::Now().GetDeltaSince(start);
659
6.52k
    LOG_WITH_PREFIX(INFO)
660
6.52k
        << "Downloaded file " << file_pb.name() << " of size " << file_pb.size_bytes()
661
6.52k
        << " in " << elapsed.ToSeconds() << " seconds";
662
6.52k
  }
663
664
  // To avoid adding new file type to remote bootstrap we move intents as subdir of regular DB.
665
2.02k
  auto intents_tmp_dir = JoinPathSegments(rocksdb_dir, tablet::kIntentsSubdir);
666
2.02k
  if (env().FileExists(intents_tmp_dir)) {
667
103
    auto intents_dir = rocksdb_dir + tablet::kIntentsDBSuffix;
668
103
    LOG_WITH_PREFIX(INFO) << "Moving intents DB: " << intents_tmp_dir << " => " << intents_dir;
669
103
    RETURN_NOT_OK(env().RenameFile(intents_tmp_dir, intents_dir));
670
103
  }
671
2.02k
  if (FLAGS_bytes_remote_bootstrap_durable_write_mb != 0) {
672
    // Persist directory so that recently downloaded files are accessible.
673
2.02k
    RETURN_NOT_OK(env().SyncDir(rocksdb_dir));
674
2.02k
  }
675
2.02k
  downloaded_rocksdb_files_ = true;
676
2.02k
  return Status::OK();
677
2.02k
}
678
679
4.71k
Status RemoteBootstrapClient::DownloadWAL(uint64_t wal_segment_seqno) {
680
4.71k
  
VLOG_WITH_PREFIX0
(1) << "Downloading WAL segment with seqno " << wal_segment_seqno0
;
681
4.71k
  DataIdPB data_id;
682
4.71k
  data_id.set_type(DataIdPB::LOG_SEGMENT);
683
4.71k
  data_id.set_wal_segment_seqno(wal_segment_seqno);
684
4.71k
  const string dest_path = fs_manager().GetWalSegmentFilePath(meta_->wal_dir(), wal_segment_seqno);
685
4.71k
  const auto temp_dest_path = dest_path + ".tmp";
686
4.71k
  bool ok = false;
687
4.71k
  auto se = ScopeExit([this, &temp_dest_path, &ok] {
688
4.71k
    if (!ok) {
689
2.01k
      WARN_NOT_OK(env().DeleteFile(temp_dest_path),
690
2.01k
                  "Failed to delete temporary WAL segment");
691
2.01k
    }
692
4.71k
  });
693
694
4.71k
  WritableFileOptions opts;
695
4.71k
  opts.sync_on_close = true;
696
4.71k
  std::unique_ptr<WritableFile> writer;
697
4.71k
  RETURN_NOT_OK_PREPEND(env().NewWritableFile(opts, temp_dest_path, &writer),
698
4.71k
                        "Unable to open file for writing");
699
700
4.71k
  auto start = MonoTime::Now();
701
4.71k
  RETURN_NOT_OK_PREPEND(downloader_.DownloadFile(data_id, writer.get()),
702
4.71k
                        Substitute("Unable to download WAL segment with seq. number $0",
703
4.71k
                                   wal_segment_seqno));
704
2.69k
  RETURN_NOT_OK(env().RenameFile(temp_dest_path, dest_path));
705
2.69k
  auto elapsed = MonoTime::Now().GetDeltaSince(start);
706
2.69k
  LOG_WITH_PREFIX(INFO) << "Downloaded WAL segment with seq. number " << wal_segment_seqno
707
2.69k
                        << " of size " << writer->Size() << " in " << elapsed.ToSeconds()
708
2.69k
                        << " seconds";
709
2.69k
  ok = true;
710
711
2.69k
  return Status::OK();
712
2.69k
}
713
714
2.01k
Status RemoteBootstrapClient::WriteConsensusMetadata() {
715
  // If we didn't find a previous consensus meta file, create one.
716
2.01k
  if (!cmeta_) {
717
1.86k
    std::unique_ptr<ConsensusMetadata> cmeta;
718
1.86k
    return ConsensusMetadata::Create(&fs_manager(), tablet_id_, fs_manager().uuid(),
719
1.86k
                                     remote_committed_cstate_->config(),
720
1.86k
                                     remote_committed_cstate_->current_term(),
721
1.86k
                                     &cmeta);
722
1.86k
  }
723
724
  // Otherwise, update the consensus metadata to reflect the config and term
725
  // sent by the remote bootstrap source.
726
150
  cmeta_->MergeCommittedConsensusStatePB(*remote_committed_cstate_);
727
150
  RETURN_NOT_OK(cmeta_->Flush());
728
729
150
  if (FLAGS_remote_bootstrap_save_downloaded_metadata) {
730
0
    string cmeta_path = fs_manager().GetConsensusMetadataPath(tablet_id_);
731
0
    string cmeta_copy_path = Substitute("$0.copy.$1.tmp", cmeta_path, start_time_micros_);
732
0
    RETURN_NOT_OK_PREPEND(CopyFile(Env::Default(), cmeta_path, cmeta_copy_path,
733
0
                                   WritableFileOptions()),
734
0
                          "Unable to make copy of consensus metadata");
735
0
  }
736
737
150
  return Status::OK();
738
150
}
739
740
23.6k
Env& RemoteBootstrapClient::env() const {
741
23.6k
  return *fs_manager().env();
742
23.6k
}
743
744
197M
const std::string& RemoteBootstrapClient::permanent_uuid() const {
745
197M
  return fs_manager().uuid();
746
197M
}
747
748
} // namespace tserver
749
} // namespace yb