YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/tserver/remote_bootstrap_client.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
33
#include "yb/tserver/remote_bootstrap_client.h"
34
35
#include <glog/logging.h>
36
37
#include "yb/common/index.h"
38
#include "yb/common/schema.h"
39
#include "yb/common/wire_protocol.h"
40
41
#include "yb/consensus/consensus.h"
42
#include "yb/consensus/consensus_meta.h"
43
#include "yb/consensus/metadata.pb.h"
44
45
#include "yb/fs/fs_manager.h"
46
47
#include "yb/gutil/strings/substitute.h"
48
#include "yb/gutil/walltime.h"
49
50
#include "yb/rpc/rpc_controller.h"
51
52
#include "yb/tablet/tablet.pb.h"
53
#include "yb/tablet/tablet_bootstrap_if.h"
54
#include "yb/tablet/tablet_metadata.h"
55
56
#include "yb/tserver/remote_bootstrap.pb.h"
57
#include "yb/tserver/remote_bootstrap.proxy.h"
58
#include "yb/tserver/remote_bootstrap_snapshots.h"
59
#include "yb/tserver/ts_tablet_manager.h"
60
61
#include "yb/util/env.h"
62
#include "yb/util/env_util.h"
63
#include "yb/util/fault_injection.h"
64
#include "yb/util/flag_tags.h"
65
#include "yb/util/logging.h"
66
#include "yb/util/net/net_util.h"
67
#include "yb/util/result.h"
68
#include "yb/util/scope_exit.h"
69
#include "yb/util/size_literals.h"
70
#include "yb/util/status_log.h"
71
72
using namespace yb::size_literals;
73
74
DEFINE_int32(remote_bootstrap_begin_session_timeout_ms, 5000,
75
             "Tablet server RPC client timeout for BeginRemoteBootstrapSession calls.");
76
TAG_FLAG(remote_bootstrap_begin_session_timeout_ms, hidden);
77
78
DEFINE_int32(remote_bootstrap_end_session_timeout_sec, 15,
79
             "Tablet server RPC client timeout for EndRemoteBootstrapSession calls. "
80
             "The timeout is usually a large value because we have to wait for the remote server "
81
             "to get a CHANGE_ROLE config change accepted.");
82
TAG_FLAG(remote_bootstrap_end_session_timeout_sec, hidden);
83
84
DEFINE_bool(remote_bootstrap_save_downloaded_metadata, false,
85
            "Save copies of the downloaded remote bootstrap files for debugging purposes. "
86
            "Note: This is only intended for debugging and should not be normally used!");
87
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, advanced);
88
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, hidden);
89
TAG_FLAG(remote_bootstrap_save_downloaded_metadata, runtime);
90
91
DEFINE_int32(committed_config_change_role_timeout_sec, 30,
92
             "Number of seconds to wait for the CHANGE_ROLE to be in the committed config before "
93
             "timing out. ");
94
TAG_FLAG(committed_config_change_role_timeout_sec, hidden);
95
96
DEFINE_test_flag(double, fault_crash_bootstrap_client_before_changing_role, 0.0,
97
                 "The remote bootstrap client will crash before closing the session with the "
98
                 "leader. Because the session won't be closed successfully, the leader won't issue "
99
                 "a ChangeConfig request to change this tserver role *(from PRE_VOTER or "
100
                 "PRE_OBSERVER to VOTER or OBSERVER respectively).");
101
102
DEFINE_test_flag(int32, simulate_long_remote_bootstrap_sec, 0,
103
                 "The remote bootstrap client will take at least this number of seconds to finish. "
104
                 "We use this for testing a scenario where a remote bootstrap takes longer than "
105
                 "follower_unavailable_considered_failed_sec seconds.");
106
107
DEFINE_test_flag(bool, download_partial_wal_segments, false, "");
108
109
DECLARE_int32(bytes_remote_bootstrap_durable_write_mb);
110
111
namespace yb {
112
namespace tserver {
113
114
using consensus::ConsensusMetadata;
115
using consensus::ConsensusStatePB;
116
using consensus::PeerMemberType;
117
using consensus::RaftConfigPB;
118
using consensus::RaftPeerPB;
119
using env_util::CopyFile;
120
using rpc::Messenger;
121
using std::shared_ptr;
122
using std::string;
123
using std::vector;
124
using strings::Substitute;
125
using tablet::TabletDataState;
126
using tablet::TabletDataState_Name;
127
using tablet::RaftGroupMetadata;
128
using tablet::RaftGroupMetadataPtr;
129
using tablet::TabletStatusListener;
130
using tablet::RaftGroupReplicaSuperBlockPB;
131
132
std::atomic<int32_t> remote_bootstrap_clients_started_{0};
133
134
RemoteBootstrapClient::RemoteBootstrapClient(std::string tablet_id, FsManager* fs_manager)
135
    : tablet_id_(std::move(tablet_id)),
136
      log_prefix_(Format("T $0 P $1: Remote bootstrap client: ", tablet_id_, fs_manager->uuid())),
137
1.44k
      downloader_(&log_prefix_, fs_manager) {
138
1.44k
  AddComponent<RemoteBootstrapSnapshotsComponent>();
139
1.44k
}
140
141
1.43k
RemoteBootstrapClient::~RemoteBootstrapClient() {
142
  // Note: Ending the remote bootstrap session releases anchors on the remote.
143
  // This assumes that succeeded_ only gets set to true in Finish() just before calling
144
  // EndRemoteSession. If this didn't happen, then close the session here.
145
1.43k
  if (!succeeded_) {
146
452
    LOG_WITH_PREFIX(INFO) << "Closing remote bootstrap session " << session_id()
147
452
                          << " in RemoteBootstrapClient destructor.";
148
452
    WARN_NOT_OK(EndRemoteSession(),
149
452
                LogPrefix() + "Unable to close remote bootstrap session " + session_id());
150
452
  }
151
1.43k
  if (started_) {
152
985
    auto old_count = remote_bootstrap_clients_started_.fetch_sub(1, std::memory_order_acq_rel);
153
985
    if (old_count < 1) {
154
0
      LOG_WITH_PREFIX(DFATAL) << "Invalid number of remote bootstrap sessions: " << old_count;
155
0
    }
156
985
  }
157
1.43k
}
158
159
Status RemoteBootstrapClient::SetTabletToReplace(const RaftGroupMetadataPtr& meta,
160
102
                                                 int64_t caller_term) {
161
102
  CHECK_EQ(tablet_id_, meta->raft_group_id());
162
102
  TabletDataState data_state = meta->tablet_data_state();
163
102
  if (data_state != tablet::TABLET_DATA_TOMBSTONED) {
164
0
    return STATUS(IllegalState, Substitute("Tablet $0 not in tombstoned state: $1 ($2)",
165
0
                                           tablet_id_,
166
0
                                           TabletDataState_Name(data_state),
167
0
                                           data_state));
168
0
  }
169
170
102
  replace_tombstoned_tablet_ = true;
171
102
  meta_ = meta;
172
173
102
  int64_t last_logged_term = meta->tombstone_last_logged_opid().term;
174
102
  if (last_logged_term > caller_term) {
175
0
    return STATUS(InvalidArgument,
176
0
        Substitute("Leader has term $0 but the last log entry written by the tombstoned replica "
177
0
                   "for tablet $1 has higher term $2. Refusing remote bootstrap from leader",
178
0
                   caller_term, tablet_id_, last_logged_term));
179
0
  }
180
181
  // Load the old consensus metadata, if it exists.
182
102
  std::unique_ptr<ConsensusMetadata> cmeta;
183
102
  Status s = ConsensusMetadata::Load(
184
102
      &fs_manager(), tablet_id_, permanent_uuid(), &cmeta);
185
102
  if (s.IsNotFound()) {
186
    // The consensus metadata was not written to disk, possibly due to a failed
187
    // remote bootstrap.
188
0
    return Status::OK();
189
0
  }
190
102
  RETURN_NOT_OK(s);
191
102
  cmeta_ = std::move(cmeta);
192
102
  return Status::OK();
193
102
}
194
195
Status RemoteBootstrapClient::Start(const string& bootstrap_peer_uuid,
196
                                    rpc::ProxyCache* proxy_cache,
197
                                    const HostPort& bootstrap_peer_addr,
198
                                    RaftGroupMetadataPtr* meta,
199
1.44k
                                    TSTabletManager* ts_manager) {
200
1.44k
  CHECK(!started_);
201
1.44k
  start_time_micros_ = GetCurrentTimeMicros();
202
203
1.44k
  LOG_WITH_PREFIX(INFO) << "Beginning remote bootstrap session"
204
1.44k
                        << " from remote peer at address " << bootstrap_peer_addr.ToString();
205
206
  // Set up an RPC proxy for the RemoteBootstrapService.
207
1.44k
  proxy_.reset(new RemoteBootstrapServiceProxy(proxy_cache, bootstrap_peer_addr));
208
209
1.44k
  BeginRemoteBootstrapSessionRequestPB req;
210
1.44k
  req.set_requestor_uuid(permanent_uuid());
211
1.44k
  req.set_tablet_id(tablet_id_);
212
213
1.44k
  rpc::RpcController controller;
214
1.44k
  controller.set_timeout(MonoDelta::FromMilliseconds(
215
1.44k
      FLAGS_remote_bootstrap_begin_session_timeout_ms));
216
217
  // Begin the remote bootstrap session with the remote peer.
218
1.44k
  BeginRemoteBootstrapSessionResponsePB resp;
219
1.44k
  auto status =
220
1.44k
      UnwindRemoteError(proxy_->BeginRemoteBootstrapSession(req, &resp, &controller), controller);
221
222
1.44k
  if (!status.ok()) {
223
14
    status = status.CloneAndPrepend("Unable to begin remote bootstrap session");
224
14
    LOG_WITH_PREFIX(WARNING) << status;
225
14
    return status;
226
14
  }
227
228
1.43k
  remote_tablet_data_state_ = resp.superblock().tablet_data_state();
229
1.43k
  if (!CanServeTabletData(remote_tablet_data_state_)) {
230
0
    Status s = STATUS(IllegalState, "Remote peer (" + bootstrap_peer_uuid + ")" +
231
0
                                    " is currently remotely bootstrapping itself!",
232
0
                                    resp.superblock().ShortDebugString());
233
0
    LOG_WITH_PREFIX(WARNING) << s.ToString();
234
0
    return s;
235
0
  }
236
237
1.43k
  LOG_WITH_PREFIX(INFO) << "Received superblock: " << resp.superblock().ShortDebugString();
238
1.43k
  RETURN_NOT_OK(MigrateSuperblock(resp.mutable_superblock()));
239
240
1.43k
  auto* kv_store = resp.mutable_superblock()->mutable_kv_store();
241
1.43k
  LOG_WITH_PREFIX(INFO) << "RocksDB files: " << yb::ToString(kv_store->rocksdb_files());
242
1.43k
  LOG_WITH_PREFIX(INFO) << "Snapshot files: " << yb::ToString(kv_store->snapshot_files());
243
1.43k
  if (first_wal_seqno_) {
244
0
    LOG_WITH_PREFIX(INFO) << "First WAL segment: " << first_wal_seqno_;
245
1.43k
  } else {
246
1.43k
    LOG_WITH_PREFIX(INFO) << "Log files: " << yb::ToString(resp.deprecated_wal_segment_seqnos());
247
1.43k
  }
248
249
1.43k
  const TableId table_id = resp.superblock().primary_table_id();
250
1.43k
  const bool colocated = resp.superblock().colocated();
251
1.43k
  const tablet::TableInfoPB* table_ptr = nullptr;
252
1.43k
  for (auto& table_pb : kv_store->tables()) {
253
1.43k
    if (table_pb.table_id() == table_id) {
254
1.43k
      table_ptr = &table_pb;
255
1.43k
      break;
256
1.43k
    }
257
1.43k
  }
258
1.43k
  if (!table_ptr) {
259
0
    return STATUS(InvalidArgument, Format(
260
0
        "Tablet $0: Superblock's KV-store doesn't contain primary table $1", tablet_id_,
261
0
        table_id));
262
0
  }
263
1.43k
  const auto& table = *table_ptr;
264
265
1.43k
  downloader_.Start(
266
1.43k
      proxy_, resp.session_id(), MonoDelta::FromMilliseconds(resp.session_idle_timeout_millis()));
267
1.43k
  LOG_WITH_PREFIX(INFO) << "Began remote bootstrap session " << session_id();
268
269
1.43k
  superblock_.reset(resp.release_superblock());
270
271
  // Clear fields rocksdb_dir and wal_dir so we get an error if we try to use them without setting
272
  // them to the right path.
273
1.43k
  kv_store->clear_rocksdb_dir();
274
1.43k
  superblock_->clear_wal_dir();
275
276
1.43k
  superblock_->set_tablet_data_state(tablet::TABLET_DATA_COPYING);
277
1.43k
  wal_seqnos_.assign(resp.deprecated_wal_segment_seqnos().begin(),
278
1.43k
                     resp.deprecated_wal_segment_seqnos().end());
279
1.43k
  if (resp.has_first_wal_segment_seqno()) {
280
1.43k
    first_wal_seqno_ = resp.first_wal_segment_seqno();
281
2
  } else {
282
2
    first_wal_seqno_ = 0;
283
2
  }
284
1.43k
  remote_committed_cstate_.reset(resp.release_initial_committed_cstate());
285
286
1.43k
  Schema schema;
287
1.43k
  RETURN_NOT_OK_PREPEND(SchemaFromPB(
288
1.43k
      table.schema(), &schema), "Cannot deserialize schema from remote superblock");
289
1.43k
  string data_root_dir;
290
1.43k
  string wal_root_dir;
291
1.43k
  if (replace_tombstoned_tablet_) {
292
    // Also validate the term of the bootstrap source peer, in case they are
293
    // different. This is a sanity check that protects us in case a bug or
294
    // misconfiguration causes us to attempt to bootstrap from an out-of-date
295
    // source peer, even after passing the term check from the caller in
296
    // SetTabletToReplace().
297
100
    int64_t last_logged_term = meta_->tombstone_last_logged_opid().term;
298
100
    if (last_logged_term > remote_committed_cstate_->current_term()) {
299
0
      return STATUS(InvalidArgument,
300
0
          Substitute("Tablet $0: Bootstrap source has term $1 but "
301
0
                     "tombstoned replica has last-logged opid with higher term $2. "
302
0
                      "Refusing remote bootstrap from source peer $3",
303
0
                      tablet_id_,
304
0
                      remote_committed_cstate_->current_term(),
305
0
                      last_logged_term,
306
0
                      bootstrap_peer_uuid));
307
0
    }
308
    // Replace rocksdb_dir in the received superblock with our rocksdb_dir.
309
100
    kv_store->set_rocksdb_dir(meta_->rocksdb_dir());
310
311
    // Replace wal_dir in the received superblock with our assigned wal_dir.
312
100
    superblock_->set_wal_dir(meta_->wal_dir());
313
314
    // This will flush to disk, but we set the data state to COPYING above.
315
100
    RETURN_NOT_OK_PREPEND(meta_->ReplaceSuperBlock(*superblock_),
316
100
                          "Remote bootstrap unable to replace superblock on tablet " +
317
100
                          tablet_id_);
318
    // Update the directory assignment mapping.
319
100
    data_root_dir = meta_->data_root_dir();
320
100
    wal_root_dir = meta_->wal_root_dir();
321
100
    if (ts_manager != nullptr) {
322
100
      ts_manager->RegisterDataAndWalDir(&fs_manager(),
323
100
                                        table_id,
324
100
                                        meta_->raft_group_id(),
325
100
                                        data_root_dir,
326
100
                                        wal_root_dir);
327
100
    }
328
1.33k
  } else {
329
1.33k
    Partition partition;
330
1.33k
    Partition::FromPB(superblock_->partition(), &partition);
331
1.33k
    PartitionSchema partition_schema;
332
1.33k
    RETURN_NOT_OK(PartitionSchema::FromPB(table.partition_schema(), schema, &partition_schema));
333
    // Create the superblock on disk.
334
1.33k
    if (ts_manager != nullptr) {
335
1.28k
      ts_manager->GetAndRegisterDataAndWalDir(&fs_manager(),
336
1.28k
                                              table_id,
337
1.28k
                                              tablet_id_,
338
1.28k
                                              &data_root_dir,
339
1.28k
                                              &wal_root_dir);
340
1.28k
    }
341
1.33k
    auto table_info = std::make_shared<tablet::TableInfo>(
342
1.33k
        table_id, table.namespace_name(), table.table_name(), table.table_type(), schema,
343
1.33k
        IndexMap(table.indexes()),
344
1.12k
        table.has_index_info() ? boost::optional<IndexInfo>(table.index_info()) : boost::none,
345
1.33k
        table.schema_version(), partition_schema);
346
1.33k
    auto create_result = RaftGroupMetadata::CreateNew(tablet::RaftGroupMetadataData {
347
1.33k
        .fs_manager = &fs_manager(),
348
1.33k
        .table_info = table_info,
349
1.33k
        .raft_group_id = tablet_id_,
350
1.33k
        .partition = partition,
351
1.33k
        .tablet_data_state = tablet::TABLET_DATA_COPYING,
352
1.33k
        .colocated = colocated }, data_root_dir, wal_root_dir);
353
1.33k
    if (ts_manager != nullptr && !create_result.ok()) {
354
429
      ts_manager->UnregisterDataWalDir(table_id, tablet_id_, data_root_dir, wal_root_dir);
355
429
    }
356
1.33k
    RETURN_NOT_OK(create_result);
357
904
    meta_ = std::move(*create_result);
358
359
904
    vector<DeletedColumn> deleted_cols;
360
0
    for (const DeletedColumnPB& col_pb : table.deleted_cols()) {
361
0
      DeletedColumn col;
362
0
      RETURN_NOT_OK(DeletedColumn::FromPB(col_pb, &col));
363
0
      deleted_cols.push_back(col);
364
0
    }
365
904
    meta_->SetSchema(schema,
366
904
                     IndexMap(table.indexes()),
367
904
                     deleted_cols,
368
904
                     table.schema_version());
369
370
    // Replace rocksdb_dir in the received superblock with our rocksdb_dir.
371
904
    kv_store->set_rocksdb_dir(meta_->rocksdb_dir());
372
373
    // Replace wal_dir in the received superblock with our assigned wal_dir.
374
904
    superblock_->set_wal_dir(meta_->wal_dir());
375
904
  }
376
377
1.00k
  started_ = true;
378
1.00k
  auto old_count = remote_bootstrap_clients_started_.fetch_add(1, std::memory_order_acq_rel);
379
1.00k
  if (old_count < 0) {
380
0
    LOG_WITH_PREFIX(DFATAL) << "Invalid number of remote bootstrap sessions: " << old_count;
381
0
    remote_bootstrap_clients_started_.store(0, std::memory_order_release);
382
0
  }
383
384
1.00k
  if (meta) {
385
1.00k
    *meta = meta_;
386
1.00k
  }
387
1.00k
  return Status::OK();
388
1.43k
}
389
390
1.00k
Status RemoteBootstrapClient::FetchAll(TabletStatusListener* status_listener) {
391
1.00k
  CHECK(started_);
392
1.00k
  status_listener_ = CHECK_NOTNULL(status_listener);
393
394
0
  VLOG_WITH_PREFIX(2) << "Fetching table_type: " << TableType_Name(meta_->table_type());
395
396
1.00k
  new_superblock_ = *superblock_;
397
  // Replace rocksdb_dir with our rocksdb_dir
398
1.00k
  new_superblock_.mutable_kv_store()->set_rocksdb_dir(meta_->rocksdb_dir());
399
400
1.00k
  RETURN_NOT_OK(DownloadRocksDBFiles());
401
999
  RETURN_NOT_OK(DownloadWALs());
402
999
  for (const auto& component : components_) {
403
999
    RETURN_NOT_OK(component->Download());
404
999
  }
405
406
  // We sleep here to simulate the transfer of very large files.
407
999
  if (PREDICT_FALSE(FLAGS_TEST_simulate_long_remote_bootstrap_sec > 0)) {
408
38
    LOG_WITH_PREFIX(INFO) << "Sleeping " << FLAGS_TEST_simulate_long_remote_bootstrap_sec
409
38
                          << " seconds to simulate the transfer of very large files";
410
38
    SleepFor(MonoDelta::FromSeconds(FLAGS_TEST_simulate_long_remote_bootstrap_sec));
411
38
  }
412
999
  return Status::OK();
413
999
}
414
415
995
Status RemoteBootstrapClient::Finish() {
416
995
  CHECK(meta_);
417
995
  CHECK(started_);
418
419
995
  CHECK(downloaded_wal_);
420
0
  CHECK(downloaded_rocksdb_files_) << "files not downloaded";
421
422
995
  RETURN_NOT_OK(WriteConsensusMetadata());
423
424
  // Replace tablet metadata superblock. This will set the tablet metadata state
425
  // to remote_tablet_data_state_.
426
995
  LOG_WITH_PREFIX(INFO) << "Remote bootstrap complete. Replacing tablet superblock.";
427
995
  UpdateStatusMessage("Replacing tablet superblock");
428
995
  new_superblock_.set_tablet_data_state(remote_tablet_data_state_);
429
995
  RETURN_NOT_OK(meta_->ReplaceSuperBlock(new_superblock_));
430
431
995
  if (FLAGS_remote_bootstrap_save_downloaded_metadata) {
432
0
    string meta_path = fs_manager().GetRaftGroupMetadataPath(tablet_id_);
433
0
    string meta_copy_path = Substitute("$0.copy.$1.tmp", meta_path, start_time_micros_);
434
0
    RETURN_NOT_OK_PREPEND(CopyFile(Env::Default(), meta_path, meta_copy_path,
435
0
                                   WritableFileOptions()),
436
0
                          "Unable to make copy of tablet metadata");
437
0
  }
438
439
995
  succeeded_ = true;
440
441
995
  MAYBE_FAULT(FLAGS_TEST_fault_crash_bootstrap_client_before_changing_role);
442
443
995
  RETURN_NOT_OK_PREPEND(
444
995
      EndRemoteSession(), "Error closing remote bootstrap session " + session_id());
445
446
995
  return Status::OK();
447
995
}
448
449
Status RemoteBootstrapClient::VerifyChangeRoleSucceeded(
450
994
    const shared_ptr<consensus::Consensus>& shared_consensus) {
451
452
994
  if (!shared_consensus) {
453
0
    return STATUS(InvalidArgument, "Invalid consensus object");
454
0
  }
455
456
994
  auto start = MonoTime::Now();
457
994
  auto timeout = MonoDelta::FromSeconds(FLAGS_committed_config_change_role_timeout_sec);
458
994
  int backoff_ms = 1;
459
994
  const int kMaxBackoffMs = 256;
460
994
  RaftConfigPB committed_config;
461
462
50.5M
  do {
463
50.5M
    committed_config = shared_consensus->CommittedConfig();
464
139M
    for (const auto &peer : committed_config.peers()) {
465
139M
      if (peer.permanent_uuid() != permanent_uuid()) {
466
139M
        continue;
467
139M
      }
468
469
10.2k
      if (peer.member_type() == PeerMemberType::VOTER ||
470
9.30k
          peer.member_type() == PeerMemberType::OBSERVER) {
471
973
        return Status::OK();
472
9.25k
      } else {
473
9.25k
        SleepFor(MonoDelta::FromMilliseconds(backoff_ms));
474
9.25k
        backoff_ms = min(backoff_ms << 1, kMaxBackoffMs);
475
9.25k
        break;
476
9.25k
      }
477
10.2k
    }
478
50.5M
  } while (MonoTime::Now().GetDeltaSince(start).LessThan(timeout));
479
480
21
  return STATUS(TimedOut,
481
994
                Substitute("Timed out waiting member type of peer $0 to change in the committed "
482
994
                           "config $1", permanent_uuid(),
483
994
                           committed_config.ShortDebugString()));
484
994
}
485
486
3.23k
void RemoteBootstrapClient::UpdateStatusMessage(const string& message) {
487
3.23k
  if (status_listener_ != nullptr) {
488
3.23k
    status_listener_->StatusMessage("RemoteBootstrap: " + message);
489
3.23k
  }
490
3.23k
}
491
492
1.44k
Status RemoteBootstrapClient::EndRemoteSession() {
493
1.44k
  if (!started_) {
494
445
    return Status::OK();
495
445
  }
496
497
1.00k
  rpc::RpcController controller;
498
1.00k
  controller.set_timeout(MonoDelta::FromSeconds(FLAGS_remote_bootstrap_end_session_timeout_sec));
499
500
1.00k
  EndRemoteBootstrapSessionRequestPB req;
501
1.00k
  req.set_session_id(session_id());
502
1.00k
  req.set_is_success(succeeded_);
503
1.00k
  req.set_keep_session(succeeded_);
504
1.00k
  EndRemoteBootstrapSessionResponsePB resp;
505
506
1.00k
  LOG_WITH_PREFIX(INFO) << "Ending remote bootstrap session " << session_id();
507
1.00k
  auto status = proxy_->EndRemoteBootstrapSession(req, &resp, &controller);
508
1.00k
  if (status.ok()) {
509
998
    remove_required_ = resp.session_kept();
510
998
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id()
511
998
                          << " ended successfully";
512
998
    return Status::OK();
513
998
  }
514
515
4
  if (status.IsTimedOut()) {
516
    // Ignore timeout errors since the server could have sent the ChangeConfig request and died
517
    // before replying. We need to check the config to verify that this server's role changed as
518
    // expected, in which case, the remote bootstrap was completed successfully.
519
0
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id() << " timed out";
520
0
    return Status::OK();
521
0
  }
522
523
4
  status = UnwindRemoteError(status, controller);
524
4
  return status.CloneAndPrepend(
525
4
      Format("Failed to end remote bootstrap session $0", session_id()));
526
4
}
527
528
941
Status RemoteBootstrapClient::Remove() {
529
941
  if (!remove_required_) {
530
0
    return Status::OK();
531
0
  }
532
533
941
  rpc::RpcController controller;
534
941
  controller.set_timeout(MonoDelta::FromSeconds(FLAGS_remote_bootstrap_end_session_timeout_sec));
535
536
941
  RemoveSessionRequestPB req;
537
941
  req.set_session_id(session_id());
538
941
  RemoveSessionResponsePB resp;
539
540
941
  LOG_WITH_PREFIX(INFO) << "Removing remote bootstrap session " << session_id();
541
941
  const auto status = proxy_->RemoveSession(req, &resp, &controller);
542
941
  if (status.ok()) {
543
941
    LOG_WITH_PREFIX(INFO) << "Remote bootstrap session " << session_id() << " removed successfully";
544
941
    return Status::OK();
545
941
  }
546
547
0
  return UnwindRemoteError(status, controller).CloneAndPrepend(
548
0
      Format("Failure removing remote bootstrap session $0", session_id()));
549
0
}
550
551
999
Status RemoteBootstrapClient::DownloadWALs() {
552
999
  CHECK(started_);
553
554
  // Delete and recreate WAL dir if it already exists, to ensure stray files are
555
  // not kept from previous bootstraps and runs.
556
999
  const string& wal_dir = meta_->wal_dir();
557
999
  if (env().FileExists(wal_dir)) {
558
0
    RETURN_NOT_OK(env().DeleteRecursively(wal_dir));
559
0
  }
560
999
  auto wal_table_top_dir = DirName(wal_dir);
561
999
  RETURN_NOT_OK_PREPEND(fs_manager().CreateDirIfMissing(wal_table_top_dir),
562
999
                        Substitute("Failed to create WAL table directory $0", wal_table_top_dir));
563
564
  // fsync() parent dir.
565
999
  RETURN_NOT_OK_PREPEND(env().SyncDir(DirName(wal_table_top_dir)),
566
999
                        Substitute("Failed to sync WAL root directory $0",
567
999
                                   DirName(wal_table_top_dir)));
568
569
999
  RETURN_NOT_OK_PREPEND(env().CreateDir(wal_dir),
570
999
                        Substitute("Failed to create WAL tablet directory $0", wal_dir));
571
572
  // fsync() parent dir.
573
999
  RETURN_NOT_OK_PREPEND(env().SyncDir(wal_table_top_dir),
574
999
                        Substitute("Failed to sync WAL table directory $0", wal_table_top_dir));
575
576
  // Download the WAL segments.
577
999
  uint64_t counter = 0;
578
999
  if (first_wal_seqno_) {
579
999
    LOG_WITH_PREFIX(INFO) << "Starting download of WAL segments starting from sequence number "
580
999
                          << first_wal_seqno_;
581
2.24k
    for (;;) {
582
2.24k
      uint64_t segment_seqno = first_wal_seqno_ + counter;
583
2.24k
      UpdateStatusMessage(
584
2.24k
          Format("Downloading WAL segment with seq. number $0 (#$1 in this session)",
585
2.24k
                 segment_seqno, counter + 1));
586
2.24k
      auto download_status = DownloadWAL(segment_seqno);
587
2.24k
      if (!download_status.ok()) {
588
999
        std::string message_suffix;
589
999
        if (counter > 0) {
590
999
          message_suffix = Format(", downloaded segments in range: $0..$1",
591
999
                                      first_wal_seqno_, segment_seqno - 1);
592
0
        } else {
593
0
          message_suffix = ", no segments were downloaded";
594
0
        }
595
999
        if (download_status.IsNotFound()) {
596
999
          LOG_WITH_PREFIX(INFO) << "Stopped downloading WAL segments" << message_suffix;
597
999
          break;
598
999
        }
599
0
        LOG_WITH_PREFIX(WARNING) << "Downloading WAL segments failed: "
600
0
                                 << download_status << message_suffix;
601
0
        return download_status;
602
0
      }
603
1.24k
      ++counter;
604
1.24k
      if (PREDICT_FALSE(FLAGS_TEST_download_partial_wal_segments) && counter > 0) {
605
0
        LOG(INFO) << "Flag TEST_download_partial_wal_segments set to true. "
606
0
                  << "Stopping WAL files download after one file has been downloaded.";
607
0
        break;
608
0
      }
609
1.24k
    }
610
0
  } else {
611
0
    auto num_segments = wal_seqnos_.size();
612
0
    LOG_WITH_PREFIX(INFO) << "Starting download of " << num_segments << " WAL segments...";
613
0
    for (uint64_t seg_seqno : wal_seqnos_) {
614
0
      UpdateStatusMessage(Substitute("Downloading WAL segment with seq. number $0 ($1/$2)",
615
0
                                     seg_seqno, counter + 1, num_segments));
616
0
      RETURN_NOT_OK(DownloadWAL(seg_seqno));
617
0
      ++counter;
618
0
    }
619
0
  }
620
621
999
  if (FLAGS_bytes_remote_bootstrap_durable_write_mb != 0) {
622
    // Persist directory so that recently downloaded files are accessible.
623
999
    RETURN_NOT_OK_PREPEND(env().SyncDir(wal_table_top_dir),
624
999
                          Substitute("Failed to sync WAL table directory $0", wal_table_top_dir));
625
999
  }
626
627
999
  downloaded_wal_ = true;
628
999
  return Status::OK();
629
999
}
630
631
1.00k
Status RemoteBootstrapClient::CreateTabletDirectories(const string& db_dir, FsManager* fs) {
632
  // Create the directory table-uuid first.
633
1.00k
  RETURN_NOT_OK_PREPEND(fs->CreateDirIfMissing(DirName(db_dir)),
634
1.00k
                        Substitute("Failed to create RocksDB table directory $0",
635
1.00k
                                   DirName(db_dir)));
636
637
1.00k
  RETURN_NOT_OK_PREPEND(fs->CreateDirIfMissing(db_dir),
638
1.00k
                        Substitute("Failed to create RocksDB tablet directory $0",
639
1.00k
                                   db_dir));
640
641
1.00k
  for (const auto& component : components_) {
642
1.00k
    RETURN_NOT_OK(component->CreateDirectories(db_dir, fs));
643
1.00k
  }
644
645
1.00k
  return Status::OK();
646
1.00k
}
647
648
1.00k
Status RemoteBootstrapClient::DownloadRocksDBFiles() {
649
1.00k
  const auto& rocksdb_dir = meta_->rocksdb_dir();
650
651
1.00k
  RETURN_NOT_OK(CreateTabletDirectories(rocksdb_dir, meta_->fs_manager()));
652
653
1.00k
  DataIdPB data_id;
654
1.00k
  data_id.set_type(DataIdPB::ROCKSDB_FILE);
655
3.51k
  for (auto const& file_pb : new_superblock_.kv_store().rocksdb_files()) {
656
3.51k
    auto start = MonoTime::Now();
657
3.51k
    RETURN_NOT_OK(downloader_.DownloadFile(file_pb, rocksdb_dir, &data_id));
658
3.50k
    auto elapsed = MonoTime::Now().GetDeltaSince(start);
659
3.50k
    LOG_WITH_PREFIX(INFO)
660
3.50k
        << "Downloaded file " << file_pb.name() << " of size " << file_pb.size_bytes()
661
3.50k
        << " in " << elapsed.ToSeconds() << " seconds";
662
3.50k
  }
663
664
  // To avoid adding new file type to remote bootstrap we move intents as subdir of regular DB.
665
999
  auto intents_tmp_dir = JoinPathSegments(rocksdb_dir, tablet::kIntentsSubdir);
666
999
  if (env().FileExists(intents_tmp_dir)) {
667
52
    auto intents_dir = rocksdb_dir + tablet::kIntentsDBSuffix;
668
52
    LOG_WITH_PREFIX(INFO) << "Moving intents DB: " << intents_tmp_dir << " => " << intents_dir;
669
52
    RETURN_NOT_OK(env().RenameFile(intents_tmp_dir, intents_dir));
670
52
  }
671
999
  if (FLAGS_bytes_remote_bootstrap_durable_write_mb != 0) {
672
    // Persist directory so that recently downloaded files are accessible.
673
999
    RETURN_NOT_OK(env().SyncDir(rocksdb_dir));
674
999
  }
675
999
  downloaded_rocksdb_files_ = true;
676
999
  return Status::OK();
677
999
}
678
679
2.24k
Status RemoteBootstrapClient::DownloadWAL(uint64_t wal_segment_seqno) {
680
0
  VLOG_WITH_PREFIX(1) << "Downloading WAL segment with seqno " << wal_segment_seqno;
681
2.24k
  DataIdPB data_id;
682
2.24k
  data_id.set_type(DataIdPB::LOG_SEGMENT);
683
2.24k
  data_id.set_wal_segment_seqno(wal_segment_seqno);
684
2.24k
  const string dest_path = fs_manager().GetWalSegmentFilePath(meta_->wal_dir(), wal_segment_seqno);
685
2.24k
  const auto temp_dest_path = dest_path + ".tmp";
686
2.24k
  bool ok = false;
687
2.24k
  auto se = ScopeExit([this, &temp_dest_path, &ok] {
688
2.24k
    if (!ok) {
689
999
      WARN_NOT_OK(env().DeleteFile(temp_dest_path),
690
999
                  "Failed to delete temporary WAL segment");
691
999
    }
692
2.24k
  });
693
694
2.24k
  WritableFileOptions opts;
695
2.24k
  opts.sync_on_close = true;
696
2.24k
  std::unique_ptr<WritableFile> writer;
697
2.24k
  RETURN_NOT_OK_PREPEND(env().NewWritableFile(opts, temp_dest_path, &writer),
698
2.24k
                        "Unable to open file for writing");
699
700
2.24k
  auto start = MonoTime::Now();
701
2.24k
  RETURN_NOT_OK_PREPEND(downloader_.DownloadFile(data_id, writer.get()),
702
2.24k
                        Substitute("Unable to download WAL segment with seq. number $0",
703
1.24k
                                   wal_segment_seqno));
704
1.24k
  RETURN_NOT_OK(env().RenameFile(temp_dest_path, dest_path));
705
1.24k
  auto elapsed = MonoTime::Now().GetDeltaSince(start);
706
1.24k
  LOG_WITH_PREFIX(INFO) << "Downloaded WAL segment with seq. number " << wal_segment_seqno
707
1.24k
                        << " of size " << writer->Size() << " in " << elapsed.ToSeconds()
708
1.24k
                        << " seconds";
709
1.24k
  ok = true;
710
711
1.24k
  return Status::OK();
712
1.24k
}
713
714
995
Status RemoteBootstrapClient::WriteConsensusMetadata() {
715
  // If we didn't find a previous consensus meta file, create one.
716
995
  if (!cmeta_) {
717
898
    std::unique_ptr<ConsensusMetadata> cmeta;
718
898
    return ConsensusMetadata::Create(&fs_manager(), tablet_id_, fs_manager().uuid(),
719
898
                                     remote_committed_cstate_->config(),
720
898
                                     remote_committed_cstate_->current_term(),
721
898
                                     &cmeta);
722
898
  }
723
724
  // Otherwise, update the consensus metadata to reflect the config and term
725
  // sent by the remote bootstrap source.
726
97
  cmeta_->MergeCommittedConsensusStatePB(*remote_committed_cstate_);
727
97
  RETURN_NOT_OK(cmeta_->Flush());
728
729
97
  if (FLAGS_remote_bootstrap_save_downloaded_metadata) {
730
0
    string cmeta_path = fs_manager().GetConsensusMetadataPath(tablet_id_);
731
0
    string cmeta_copy_path = Substitute("$0.copy.$1.tmp", cmeta_path, start_time_micros_);
732
0
    RETURN_NOT_OK_PREPEND(CopyFile(Env::Default(), cmeta_path, cmeta_copy_path,
733
0
                                   WritableFileOptions()),
734
0
                          "Unable to make copy of consensus metadata");
735
0
  }
736
737
97
  return Status::OK();
738
97
}
739
740
11.5k
Env& RemoteBootstrapClient::env() const {
741
11.5k
  return *fs_manager().env();
742
11.5k
}
743
744
139M
const std::string& RemoteBootstrapClient::permanent_uuid() const {
745
139M
  return fs_manager().uuid();
746
139M
}
747
748
} // namespace tserver
749
} // namespace yb