/Users/deen/code/yugabyte-db/src/yb/rocksdb/utilities/checkpoint/checkpoint.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2012 Facebook. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. |
23 | | |
24 | | #ifndef ROCKSDB_LITE |
25 | | |
26 | | #include "yb/rocksdb/utilities/checkpoint.h" |
27 | | |
28 | | #ifndef __STDC_FORMAT_MACROS |
29 | | #define __STDC_FORMAT_MACROS |
30 | | #endif |
31 | | |
32 | | #include <inttypes.h> |
33 | | #include <algorithm> |
34 | | #include <string> |
35 | | #include "yb/rocksdb/db/filename.h" |
36 | | #include "yb/rocksdb/db/wal_manager.h" |
37 | | #include "yb/rocksdb/db.h" |
38 | | #include "yb/rocksdb/env.h" |
39 | | #include "yb/rocksdb/transaction_log.h" |
40 | | #include "yb/rocksdb/util/file_util.h" |
41 | | #include "yb/rocksdb/port/port.h" |
42 | | #include "yb/util/random_util.h" |
43 | | #include "yb/util/status_log.h" |
44 | | #include "yb/util/string_util.h" |
45 | | |
46 | | namespace rocksdb { |
47 | | namespace checkpoint { |
48 | | |
49 | | // Builds an openable snapshot of RocksDB on the same disk, which |
50 | | // accepts an output directory on the same disk, and under the directory |
51 | | // (1) hard-linked SST files pointing to existing live SST files |
52 | | // SST files will be copied if output directory is on a different filesystem |
53 | | // (2) a copied manifest files and other files |
54 | | // The directory should not already exist and will be created by this API. |
55 | | // The directory will be an absolute path |
56 | 2.07k | Status CreateCheckpoint(DB* db, const std::string& checkpoint_dir) { |
57 | 2.07k | if (!db->GetCheckpointEnv()->IsPlainText()) { |
58 | 0 | return STATUS(InvalidArgument, "db's checkpoint env is not plaintext."); |
59 | 0 | } |
60 | 2.07k | std::vector<std::string> live_files; |
61 | 2.07k | uint64_t manifest_file_size = 0; |
62 | 2.07k | uint64_t sequence_number = db->GetLatestSequenceNumber(); |
63 | 2.07k | bool same_fs = true; |
64 | 2.07k | VectorLogPtr live_wal_files; |
65 | 2.07k | bool delete_checkpoint_dir = false; |
66 | | |
67 | 2.07k | Status s = db->GetCheckpointEnv()->FileExists(checkpoint_dir); |
68 | 2.07k | if (s.ok()) { |
69 | 1 | delete_checkpoint_dir = true; |
70 | 2.07k | } else if (!s.IsNotFound()) { |
71 | 0 | assert(s.IsIOError()); |
72 | 0 | return s; |
73 | 0 | } |
74 | | |
75 | 2.07k | s = db->DisableFileDeletions(); |
76 | 2.07k | if (s.ok()) { |
77 | | // this will return live_files prefixed with "/" |
78 | 2.07k | s = db->GetLiveFiles(live_files, &manifest_file_size, true); |
79 | 2.07k | } |
80 | | // if we have more than one column family, we need to also get WAL files |
81 | 2.07k | if (s.ok()) { |
82 | 2.07k | s = db->GetSortedWalFiles(&live_wal_files); |
83 | 2.07k | } |
84 | 2.07k | if (!s.ok()) { |
85 | 0 | WARN_NOT_OK(db->EnableFileDeletions(false), "Failed to disable file deletions"); |
86 | 0 | return s; |
87 | 0 | } |
88 | | |
89 | 2.07k | size_t wal_size = live_wal_files.size(); |
90 | 2.07k | RLOG(db->GetOptions().info_log, |
91 | 2.07k | "Started the snapshot process -- creating snapshot in directory %s", |
92 | 2.07k | checkpoint_dir.c_str()); |
93 | | |
94 | 2.07k | const std::string full_private_path = |
95 | 2.07k | checkpoint_dir + ".tmp." + ToString(yb::RandomUniformInt<uint64_t>()); |
96 | | |
97 | | // create snapshot directory |
98 | 2.07k | s = db->GetCheckpointEnv()->CreateDir(full_private_path); |
99 | | |
100 | | // copy/hard link live_files |
101 | 11.4k | for (size_t i = 0; s.ok() && i < live_files.size()11.4k ; ++i9.33k ) { |
102 | 9.33k | uint64_t number; |
103 | 9.33k | FileType type; |
104 | 9.33k | bool ok = ParseFileName(live_files[i], &number, &type); |
105 | 9.33k | if (!ok) { |
106 | 0 | s = STATUS(Corruption, "Can't parse file name. This is very bad"); |
107 | 0 | break; |
108 | 0 | } |
109 | | // we should only get sst, manifest and current files here |
110 | 9.33k | assert(type == kTableFile || type == kTableSBlockFile || type == kDescriptorFile || |
111 | 9.33k | type == kCurrentFile); |
112 | 0 | assert(live_files[i].size() > 0 && live_files[i][0] == '/'); |
113 | 0 | std::string src_fname = live_files[i]; |
114 | | |
115 | | // rules: |
116 | | // * if it's kTableFile or kTableSBlockFile, then it's shared |
117 | | // * if it's kDescriptorFile, limit the size to manifest_file_size |
118 | | // * always copy if cross-device link |
119 | 9.33k | bool is_table_file = type == kTableFile || type == kTableSBlockFile6.73k ; |
120 | 9.33k | if (is_table_file && same_fs5.18k ) { |
121 | 5.18k | RLOG(db->GetOptions().info_log, "Hard Linking %s", src_fname.c_str()); |
122 | 5.18k | s = db->GetCheckpointEnv()->LinkFile(db->GetName() + src_fname, |
123 | 5.18k | full_private_path + src_fname); |
124 | 5.18k | if (s.IsNotSupported()) { |
125 | 0 | same_fs = false; |
126 | 0 | s = Status::OK(); |
127 | 0 | } |
128 | 5.18k | } |
129 | 9.33k | if (!is_table_file || !same_fs5.18k ) { |
130 | 4.14k | RLOG(db->GetOptions().info_log, "Copying %s", src_fname.c_str()); |
131 | 4.14k | std::string dest_name = full_private_path + src_fname; |
132 | 4.14k | s = CopyFile(db->GetCheckpointEnv(), db->GetName() + src_fname, dest_name, |
133 | 4.14k | type == kDescriptorFile ? manifest_file_size2.07k : 02.07k ); |
134 | 4.14k | } |
135 | 9.33k | } |
136 | 2.07k | RLOG(db->GetOptions().info_log, "Number of log files %" ROCKSDB_PRIszt, |
137 | 2.07k | live_wal_files.size()); |
138 | | |
139 | | // Link WAL files. Copy exact size of last one because it is the only one |
140 | | // that has changes after the last flush. |
141 | 2.07k | for (size_t i = 0; s.ok() && i < wal_size2.07k ; ++i1 ) { |
142 | 3 | if ((live_wal_files[i]->Type() == kAliveLogFile) && |
143 | 3 | (live_wal_files[i]->StartSequence() >= sequence_number)) { |
144 | 2 | if (i + 1 == wal_size) { |
145 | 2 | RLOG(db->GetOptions().info_log, "Copying %s", |
146 | 2 | live_wal_files[i]->PathName().c_str()); |
147 | 2 | s = CopyFile(db->GetCheckpointEnv(), |
148 | 2 | db->GetOptions().wal_dir + live_wal_files[i]->PathName(), |
149 | 2 | full_private_path + live_wal_files[i]->PathName(), |
150 | 2 | live_wal_files[i]->SizeFileBytes()); |
151 | 2 | break; |
152 | 2 | } |
153 | 0 | if (same_fs) { |
154 | | // we only care about live log files |
155 | 0 | RLOG(db->GetOptions().info_log, "Hard Linking %s", |
156 | 0 | live_wal_files[i]->PathName().c_str()); |
157 | 0 | s = db->GetCheckpointEnv()->LinkFile( |
158 | 0 | db->GetOptions().wal_dir + live_wal_files[i]->PathName(), |
159 | 0 | full_private_path + live_wal_files[i]->PathName()); |
160 | 0 | if (s.IsNotSupported()) { |
161 | 0 | same_fs = false; |
162 | 0 | s = Status::OK(); |
163 | 0 | } |
164 | 0 | } |
165 | 0 | if (!same_fs) { |
166 | 0 | RLOG(db->GetOptions().info_log, "Copying %s", |
167 | 0 | live_wal_files[i]->PathName().c_str()); |
168 | 0 | s = CopyFile(db->GetCheckpointEnv(), |
169 | 0 | db->GetOptions().wal_dir + live_wal_files[i]->PathName(), |
170 | 0 | full_private_path + live_wal_files[i]->PathName(), 0); |
171 | 0 | } |
172 | 0 | } |
173 | 3 | } |
174 | | |
175 | | // we copied all the files, enable file deletions |
176 | 2.07k | RETURN_NOT_OK(db->EnableFileDeletions(false)); |
177 | | |
178 | 2.07k | if (s.ok()) { |
179 | 2.07k | if (delete_checkpoint_dir) { |
180 | 1 | const Status s_del = DeleteRecursively(db->GetCheckpointEnv(), checkpoint_dir); |
181 | 1 | RLOG( |
182 | 1 | db->GetOptions().info_log, "Deleted dir %s -- %s", |
183 | 1 | checkpoint_dir.c_str(), s_del.ToString().c_str()); |
184 | 1 | } |
185 | | |
186 | | // move tmp private backup to real snapshot directory |
187 | 2.07k | s = db->GetCheckpointEnv()->RenameFile(full_private_path, checkpoint_dir); |
188 | 2.07k | } |
189 | 2.07k | if (s.ok()) { |
190 | 2.07k | unique_ptr<Directory> checkpoint_directory; |
191 | 2.07k | RETURN_NOT_OK(db->GetCheckpointEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory)); |
192 | 2.07k | if (checkpoint_directory != nullptr) { |
193 | 2.07k | s = checkpoint_directory->Fsync(); |
194 | 2.07k | } |
195 | 2.07k | } |
196 | | |
197 | 2.07k | if (!s.ok()) { |
198 | | // clean all the files we might have created |
199 | 1 | RLOG(db->GetOptions().info_log, "Snapshot failed -- %s", |
200 | 1 | s.ToString().c_str()); |
201 | | // we have to delete the dir and all its children |
202 | 1 | const Status s_del = DeleteRecursively(db->GetCheckpointEnv(), full_private_path); |
203 | 1 | RLOG( |
204 | 1 | db->GetOptions().info_log, "Deleted dir %s -- %s", |
205 | 1 | full_private_path.c_str(), s_del.ToString().c_str()); |
206 | 1 | return s; |
207 | 1 | } |
208 | | |
209 | | // here we know that we succeeded and installed the new snapshot |
210 | 2.07k | RLOG(db->GetOptions().info_log, "Checkpoint DONE. All is good"); |
211 | 2.07k | RLOG(db->GetOptions().info_log, "Checkpoint sequence number: %" PRIu64, |
212 | 2.07k | sequence_number); |
213 | | |
214 | 2.07k | return s; |
215 | 2.07k | } |
216 | | |
217 | | } // namespace checkpoint |
218 | | } // namespace rocksdb |
219 | | |
220 | | #endif // ROCKSDB_LITE |