YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/rocksdb/utilities/checkpoint/checkpoint.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2012 Facebook.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file.
23
24
#ifndef ROCKSDB_LITE
25
26
#include "yb/rocksdb/utilities/checkpoint.h"
27
28
#ifndef __STDC_FORMAT_MACROS
29
#define __STDC_FORMAT_MACROS
30
#endif
31
32
#include <inttypes.h>
33
#include <algorithm>
34
#include <string>
35
#include "yb/rocksdb/db/filename.h"
36
#include "yb/rocksdb/db/wal_manager.h"
37
#include "yb/rocksdb/db.h"
38
#include "yb/rocksdb/env.h"
39
#include "yb/rocksdb/transaction_log.h"
40
#include "yb/rocksdb/util/file_util.h"
41
#include "yb/rocksdb/port/port.h"
42
#include "yb/util/random_util.h"
43
#include "yb/util/status_log.h"
44
#include "yb/util/string_util.h"
45
46
namespace rocksdb {
47
namespace checkpoint {
48
49
// Builds an openable snapshot of RocksDB on the same disk, which
50
// accepts an output directory on the same disk, and under the directory
51
// (1) hard-linked SST files pointing to existing live SST files
52
// SST files will be copied if output directory is on a different filesystem
53
// (2) a copied manifest files and other files
54
// The directory should not already exist and will be created by this API.
55
// The directory will be an absolute path
56
2.07k
Status CreateCheckpoint(DB* db, const std::string& checkpoint_dir) {
57
2.07k
  if (!db->GetCheckpointEnv()->IsPlainText()) {
58
0
    return STATUS(InvalidArgument, "db's checkpoint env is not plaintext.");
59
0
  }
60
2.07k
  std::vector<std::string> live_files;
61
2.07k
  uint64_t manifest_file_size = 0;
62
2.07k
  uint64_t sequence_number = db->GetLatestSequenceNumber();
63
2.07k
  bool same_fs = true;
64
2.07k
  VectorLogPtr live_wal_files;
65
2.07k
  bool delete_checkpoint_dir = false;
66
67
2.07k
  Status s = db->GetCheckpointEnv()->FileExists(checkpoint_dir);
68
2.07k
  if (s.ok()) {
69
1
    delete_checkpoint_dir = true;
70
2.07k
  } else if (!s.IsNotFound()) {
71
0
    assert(s.IsIOError());
72
0
    return s;
73
0
  }
74
75
2.07k
  s = db->DisableFileDeletions();
76
2.07k
  if (s.ok()) {
77
    // this will return live_files prefixed with "/"
78
2.07k
    s = db->GetLiveFiles(live_files, &manifest_file_size, true);
79
2.07k
  }
80
  // if we have more than one column family, we need to also get WAL files
81
2.07k
  if (s.ok()) {
82
2.07k
    s = db->GetSortedWalFiles(&live_wal_files);
83
2.07k
  }
84
2.07k
  if (!s.ok()) {
85
0
    WARN_NOT_OK(db->EnableFileDeletions(false), "Failed to disable file deletions");
86
0
    return s;
87
0
  }
88
89
2.07k
  size_t wal_size = live_wal_files.size();
90
2.07k
  RLOG(db->GetOptions().info_log,
91
2.07k
       "Started the snapshot process -- creating snapshot in directory %s",
92
2.07k
       checkpoint_dir.c_str());
93
94
2.07k
  const std::string full_private_path =
95
2.07k
      checkpoint_dir + ".tmp." + ToString(yb::RandomUniformInt<uint64_t>());
96
97
  // create snapshot directory
98
2.07k
  s = db->GetCheckpointEnv()->CreateDir(full_private_path);
99
100
  // copy/hard link live_files
101
11.4k
  for (size_t i = 0; s.ok() && 
i < live_files.size()11.4k
;
++i9.33k
) {
102
9.33k
    uint64_t number;
103
9.33k
    FileType type;
104
9.33k
    bool ok = ParseFileName(live_files[i], &number, &type);
105
9.33k
    if (!ok) {
106
0
      s = STATUS(Corruption, "Can't parse file name. This is very bad");
107
0
      break;
108
0
    }
109
    // we should only get sst, manifest and current files here
110
9.33k
    assert(type == kTableFile || type == kTableSBlockFile || type == kDescriptorFile ||
111
9.33k
           type == kCurrentFile);
112
0
    assert(live_files[i].size() > 0 && live_files[i][0] == '/');
113
0
    std::string src_fname = live_files[i];
114
115
    // rules:
116
    // * if it's kTableFile or kTableSBlockFile, then it's shared
117
    // * if it's kDescriptorFile, limit the size to manifest_file_size
118
    // * always copy if cross-device link
119
9.33k
    bool is_table_file = type == kTableFile || 
type == kTableSBlockFile6.73k
;
120
9.33k
    if (is_table_file && 
same_fs5.18k
) {
121
5.18k
      RLOG(db->GetOptions().info_log, "Hard Linking %s", src_fname.c_str());
122
5.18k
      s = db->GetCheckpointEnv()->LinkFile(db->GetName() + src_fname,
123
5.18k
                                 full_private_path + src_fname);
124
5.18k
      if (s.IsNotSupported()) {
125
0
        same_fs = false;
126
0
        s = Status::OK();
127
0
      }
128
5.18k
    }
129
9.33k
    if (!is_table_file || 
!same_fs5.18k
) {
130
4.14k
      RLOG(db->GetOptions().info_log, "Copying %s", src_fname.c_str());
131
4.14k
      std::string dest_name = full_private_path + src_fname;
132
4.14k
      s = CopyFile(db->GetCheckpointEnv(), db->GetName() + src_fname, dest_name,
133
4.14k
                   type == kDescriptorFile ? 
manifest_file_size2.07k
:
02.07k
);
134
4.14k
    }
135
9.33k
  }
136
2.07k
  RLOG(db->GetOptions().info_log, "Number of log files %" ROCKSDB_PRIszt,
137
2.07k
       live_wal_files.size());
138
139
  // Link WAL files. Copy exact size of last one because it is the only one
140
  // that has changes after the last flush.
141
2.07k
  for (size_t i = 0; s.ok() && 
i < wal_size2.07k
;
++i1
) {
142
3
    if ((live_wal_files[i]->Type() == kAliveLogFile) &&
143
3
        (live_wal_files[i]->StartSequence() >= sequence_number)) {
144
2
      if (i + 1 == wal_size) {
145
2
        RLOG(db->GetOptions().info_log, "Copying %s",
146
2
             live_wal_files[i]->PathName().c_str());
147
2
        s = CopyFile(db->GetCheckpointEnv(),
148
2
                     db->GetOptions().wal_dir + live_wal_files[i]->PathName(),
149
2
                     full_private_path + live_wal_files[i]->PathName(),
150
2
                     live_wal_files[i]->SizeFileBytes());
151
2
        break;
152
2
      }
153
0
      if (same_fs) {
154
        // we only care about live log files
155
0
        RLOG(db->GetOptions().info_log, "Hard Linking %s",
156
0
             live_wal_files[i]->PathName().c_str());
157
0
        s = db->GetCheckpointEnv()->LinkFile(
158
0
             db->GetOptions().wal_dir + live_wal_files[i]->PathName(),
159
0
             full_private_path + live_wal_files[i]->PathName());
160
0
        if (s.IsNotSupported()) {
161
0
          same_fs = false;
162
0
          s = Status::OK();
163
0
        }
164
0
      }
165
0
      if (!same_fs) {
166
0
        RLOG(db->GetOptions().info_log, "Copying %s",
167
0
             live_wal_files[i]->PathName().c_str());
168
0
        s = CopyFile(db->GetCheckpointEnv(),
169
0
                     db->GetOptions().wal_dir + live_wal_files[i]->PathName(),
170
0
                     full_private_path + live_wal_files[i]->PathName(), 0);
171
0
      }
172
0
    }
173
3
  }
174
175
  // we copied all the files, enable file deletions
176
2.07k
  RETURN_NOT_OK(db->EnableFileDeletions(false));
177
178
2.07k
  if (s.ok()) {
179
2.07k
    if (delete_checkpoint_dir) {
180
1
      const Status s_del = DeleteRecursively(db->GetCheckpointEnv(), checkpoint_dir);
181
1
      RLOG(
182
1
          db->GetOptions().info_log, "Deleted dir %s -- %s",
183
1
          checkpoint_dir.c_str(), s_del.ToString().c_str());
184
1
    }
185
186
    // move tmp private backup to real snapshot directory
187
2.07k
    s = db->GetCheckpointEnv()->RenameFile(full_private_path, checkpoint_dir);
188
2.07k
  }
189
2.07k
  if (s.ok()) {
190
2.07k
    unique_ptr<Directory> checkpoint_directory;
191
2.07k
    RETURN_NOT_OK(db->GetCheckpointEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory));
192
2.07k
    if (checkpoint_directory != nullptr) {
193
2.07k
      s = checkpoint_directory->Fsync();
194
2.07k
    }
195
2.07k
  }
196
197
2.07k
  if (!s.ok()) {
198
    // clean all the files we might have created
199
1
    RLOG(db->GetOptions().info_log, "Snapshot failed -- %s",
200
1
         s.ToString().c_str());
201
    // we have to delete the dir and all its children
202
1
    const Status s_del = DeleteRecursively(db->GetCheckpointEnv(), full_private_path);
203
1
    RLOG(
204
1
        db->GetOptions().info_log, "Deleted dir %s -- %s",
205
1
        full_private_path.c_str(), s_del.ToString().c_str());
206
1
    return s;
207
1
  }
208
209
  // here we know that we succeeded and installed the new snapshot
210
2.07k
  RLOG(db->GetOptions().info_log, "Checkpoint DONE. All is good");
211
2.07k
  RLOG(db->GetOptions().info_log, "Checkpoint sequence number: %" PRIu64,
212
2.07k
      sequence_number);
213
214
2.07k
  return s;
215
2.07k
}
216
217
}  // namespace checkpoint
218
}  // namespace rocksdb
219
220
#endif  // ROCKSDB_LITE