/Users/deen/code/yugabyte-db/src/yb/master/sys_catalog_initialization.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/master/sys_catalog_initialization.h" |
15 | | |
16 | | #include "yb/common/wire_protocol.h" |
17 | | |
18 | | #include "yb/master/catalog_entity_info.h" |
19 | | #include "yb/master/sys_catalog.h" |
20 | | |
21 | | #include "yb/tablet/operations/change_metadata_operation.h" |
22 | | #include "yb/tablet/operations/snapshot_operation.h" |
23 | | #include "yb/tablet/tablet.h" |
24 | | #include "yb/tablet/tablet_metadata.h" |
25 | | #include "yb/tablet/tablet_peer.h" |
26 | | #include "yb/tablet/tablet_snapshots.h" |
27 | | |
28 | | #include "yb/util/countdown_latch.h" |
29 | | #include "yb/util/env_util.h" |
30 | | #include "yb/util/flag_tags.h" |
31 | | |
32 | | DEFINE_string(initial_sys_catalog_snapshot_path, "", |
33 | | "If this is specified, system catalog RocksDB is checkpointed at this location after initdb " |
34 | | "is done."); |
35 | | |
36 | | DEFINE_bool(use_initial_sys_catalog_snapshot, false, |
37 | | "DEPRECATED: use --enable_ysql instead. " |
38 | | "Initialize sys catalog tablet from a pre-existing snapshot instead of running initdb. " |
39 | | "Only takes effect if --initial_sys_catalog_snapshot_path is specified or can be " |
40 | | "auto-detected."); |
41 | | |
42 | | DEFINE_bool(enable_ysql, true, |
43 | | "Enable YSQL on cluster. This will initialize sys catalog tablet from a pre-existing snapshot " |
44 | | "and start YSQL proxy. " |
45 | | "Only takes effect if --initial_sys_catalog_snapshot_path is specified or can be auto-detected." |
46 | | ); |
47 | | |
48 | | DEFINE_bool(create_initial_sys_catalog_snapshot, false, |
49 | | "Run initdb and create an initial sys catalog data snapshot"); |
50 | | |
51 | | DEFINE_bool( |
52 | | // TODO: switch the default to true after updating all external callers (yb-ctl, YugaWare) |
53 | | // and unit tests. |
54 | | master_auto_run_initdb, false, |
55 | | "Automatically run initdb on master leader initialization"); |
56 | | |
57 | | TAG_FLAG(create_initial_sys_catalog_snapshot, advanced); |
58 | | TAG_FLAG(create_initial_sys_catalog_snapshot, hidden); |
59 | | |
60 | | using yb::CountDownLatch; |
61 | | using yb::tserver::TabletSnapshotOpRequestPB; |
62 | | using yb::tserver::TabletSnapshotOpResponsePB; |
63 | | using yb::tablet::SnapshotOperation; |
64 | | using yb::pb_util::ReadPBContainerFromPath; |
65 | | |
66 | | namespace yb { |
67 | | namespace master { |
68 | | |
69 | | namespace { |
70 | | |
71 | | const char* kDefaultInitialSysCatalogSnapshotDir = "initial_sys_catalog_snapshot"; |
72 | | const char* kSysCatalogSnapshotRocksDbSubDir = "rocksdb"; |
73 | | const char* kSysCatalogSnapshotTabletMetadataChangesFile = |
74 | | "exported_tablet_metadata_changes"; |
75 | | const char* kUseInitialSysCatalogSnapshotEnvVar = "YB_USE_INITIAL_SYS_CATALOG_SNAPSHOT"; |
76 | | } // anonymous namespace |
77 | | |
78 | | // ------------------------------------------------------------------------------------------------ |
79 | | // InitialSysCatalogSnapshotWriter |
80 | | // ------------------------------------------------------------------------------------------------ |
81 | | |
82 | 0 | InitialSysCatalogSnapshotWriter::InitialSysCatalogSnapshotWriter() = default; |
83 | 0 | InitialSysCatalogSnapshotWriter::~InitialSysCatalogSnapshotWriter() = default; |
84 | | |
85 | | void InitialSysCatalogSnapshotWriter::AddMetadataChange( |
86 | 0 | tablet::ChangeMetadataRequestPB metadata_change) { |
87 | 0 | initdb_metadata_changes_.push_back(std::move(metadata_change)); |
88 | 0 | } |
89 | | |
90 | | Status InitialSysCatalogSnapshotWriter::WriteSnapshot( |
91 | | tablet::Tablet* sys_catalog_tablet, |
92 | 0 | const std::string& dest_path) { |
93 | 0 | RETURN_NOT_OK(sys_catalog_tablet->Flush(yb::tablet::FlushMode::kSync)); |
94 | 0 | RETURN_NOT_OK(Env::Default()->CreateDir(dest_path)); |
95 | 0 | RETURN_NOT_OK(sys_catalog_tablet->snapshots().CreateCheckpoint( |
96 | 0 | JoinPathSegments(dest_path, kSysCatalogSnapshotRocksDbSubDir))); |
97 | |
|
98 | 0 | tserver::ExportedTabletMetadataChanges exported_tablet_metadata_changes; |
99 | 0 | for (size_t i = 0; i < initdb_metadata_changes_.size(); ++i) { |
100 | 0 | *exported_tablet_metadata_changes.add_metadata_changes() = std::move( |
101 | 0 | initdb_metadata_changes_[i]); |
102 | 0 | } |
103 | |
|
104 | 0 | const string metadata_changes_file = JoinPathSegments( |
105 | 0 | dest_path, |
106 | 0 | kSysCatalogSnapshotTabletMetadataChangesFile); |
107 | 0 | RETURN_NOT_OK(WritePBContainerToPath( |
108 | 0 | Env::Default(), |
109 | 0 | metadata_changes_file, |
110 | 0 | exported_tablet_metadata_changes, |
111 | 0 | pb_util::CreateMode::NO_OVERWRITE, |
112 | 0 | pb_util::SyncMode::NO_SYNC)); |
113 | 0 | LOG(INFO) << "Wrote " << initdb_metadata_changes_.size() << " tablet metadata changes to file " |
114 | 0 | << metadata_changes_file; |
115 | |
|
116 | 0 | LOG(INFO) << "Created initial sys catalog snapshot at " << dest_path; |
117 | 0 | return Status::OK(); |
118 | 0 | } |
119 | | |
120 | | // ------------------------------------------------------------------------------------------------ |
121 | | // End of InitialSysCatalogSnapshotWriter |
122 | | // ------------------------------------------------------------------------------------------------ |
123 | | |
124 | | Status RestoreInitialSysCatalogSnapshot( |
125 | | const std::string& initial_snapshot_path, |
126 | | tablet::TabletPeer* sys_catalog_tablet_peer, |
127 | 361 | int64_t term) { |
128 | 361 | TabletSnapshotOpRequestPB tablet_snapshot_req; |
129 | 361 | tablet_snapshot_req.set_operation(yb::tserver::TabletSnapshotOpRequestPB::RESTORE_ON_TABLET); |
130 | 361 | tablet_snapshot_req.add_tablet_id(kSysCatalogTabletId); |
131 | 361 | tablet_snapshot_req.set_snapshot_dir_override( |
132 | 361 | JoinPathSegments(initial_snapshot_path, kSysCatalogSnapshotRocksDbSubDir)); |
133 | | |
134 | 361 | TabletSnapshotOpResponsePB tablet_snapshot_resp; |
135 | 361 | auto operation = std::make_unique<SnapshotOperation>( |
136 | 361 | sys_catalog_tablet_peer->tablet(), &tablet_snapshot_req); |
137 | | |
138 | 361 | CountDownLatch latch(1); |
139 | 361 | operation->set_completion_callback( |
140 | 361 | tablet::MakeLatchOperationCompletionCallback(&latch, &tablet_snapshot_resp)); |
141 | | |
142 | 361 | sys_catalog_tablet_peer->Submit(std::move(operation), term); |
143 | | |
144 | | // Now restore tablet metadata. |
145 | 361 | tserver::ExportedTabletMetadataChanges tablet_metadata_changes; |
146 | 361 | RETURN_NOT_OK(ReadPBContainerFromPath( |
147 | 361 | Env::Default(), |
148 | 361 | JoinPathSegments(initial_snapshot_path, kSysCatalogSnapshotTabletMetadataChangesFile), |
149 | 361 | &tablet_metadata_changes)); |
150 | 202k | for (const auto& change_metadata_req : tablet_metadata_changes.metadata_changes()) { |
151 | 202k | RETURN_NOT_OK(tablet::SyncReplicateChangeMetadataOperation( |
152 | 202k | &change_metadata_req, |
153 | 202k | sys_catalog_tablet_peer, |
154 | 202k | term)); |
155 | 202k | } |
156 | 361 | LOG(INFO) << "Imported " << tablet_metadata_changes.metadata_changes_size() |
157 | 361 | << " tablet metadata changes"; |
158 | | |
159 | 361 | latch.Wait(); |
160 | 361 | return Status::OK(); |
161 | 361 | } |
162 | | |
163 | 4.93k | void SetDefaultInitialSysCatalogSnapshotFlags() { |
164 | | // Allowing to turn off the use of initial catalog snapshot with an env variable -- useful in |
165 | | // tests. |
166 | 4.93k | const char* env_var_value = getenv(kUseInitialSysCatalogSnapshotEnvVar); |
167 | 4.93k | if (env_var_value && strcmp(env_var_value, "0") == 0) { |
168 | 0 | LOG(INFO) << "Disabling the use of initial sys catalog snapshot: env var " |
169 | 0 | << kUseInitialSysCatalogSnapshotEnvVar << " is set to 0"; |
170 | 0 | FLAGS_use_initial_sys_catalog_snapshot = 0; |
171 | 0 | FLAGS_enable_ysql = 0; |
172 | 0 | } |
173 | | |
174 | 4.93k | if (FLAGS_initial_sys_catalog_snapshot_path.empty() && |
175 | 4.93k | !FLAGS_create_initial_sys_catalog_snapshot && |
176 | 4.93k | (FLAGS_use_initial_sys_catalog_snapshot || FLAGS_enable_ysql)) { |
177 | 872 | const char* kStaticDataParentDir = "share"; |
178 | 872 | const std::string search_for_dir = JoinPathSegments( |
179 | 872 | kStaticDataParentDir, kDefaultInitialSysCatalogSnapshotDir, |
180 | 872 | kSysCatalogSnapshotRocksDbSubDir); |
181 | 0 | VLOG(1) << "Searching for directory containing subdirectory " << search_for_dir; |
182 | 872 | const string candidate_dir = |
183 | 872 | JoinPathSegments( |
184 | 872 | env_util::GetRootDir(search_for_dir), |
185 | 872 | kStaticDataParentDir, |
186 | 872 | kDefaultInitialSysCatalogSnapshotDir); |
187 | 0 | VLOG(1) << "candidate_dir=" << candidate_dir; |
188 | | |
189 | | // The metadata changes file is written last, so its presence indicates that the snapshot |
190 | | // was successful. |
191 | 872 | const string candidate_metadata_changes_path = |
192 | 872 | JoinPathSegments(candidate_dir, kSysCatalogSnapshotTabletMetadataChangesFile); |
193 | 0 | VLOG(1) << "candidate_metadata_changes_path=" << candidate_metadata_changes_path; |
194 | | |
195 | 872 | if (Env::Default()->FileExists(candidate_metadata_changes_path)) { |
196 | 0 | VLOG(1) << "Found initial sys catalog snapshot directory: " << candidate_dir; |
197 | 872 | FLAGS_initial_sys_catalog_snapshot_path = candidate_dir; |
198 | 872 | return; |
199 | 0 | } else { |
200 | 0 | VLOG(1) << "File " << candidate_metadata_changes_path << " does not exist"; |
201 | 0 | } |
202 | 4.06k | } else { |
203 | 0 | VLOG(1) |
204 | 0 | << "Not attempting initial sys catalog snapshot auto-detection: " |
205 | 0 | << "FLAGS_initial_sys_catalog_snapshot_path=" |
206 | 0 | << FLAGS_initial_sys_catalog_snapshot_path << ", " |
207 | 0 | << "FLAGS_create_initial_sys_catalog_snapshot=" |
208 | 0 | << FLAGS_create_initial_sys_catalog_snapshot << ", " |
209 | 0 | << "FLAGS_use_initial_sys_catalog_snapshot=" |
210 | 0 | << FLAGS_use_initial_sys_catalog_snapshot << ", " |
211 | 0 | << "FLAGS_enable_ysql=" |
212 | 0 | << FLAGS_enable_ysql; |
213 | 4.06k | } |
214 | 4.93k | } |
215 | | |
216 | 2.00k | bool ShouldAutoRunInitDb(SysConfigInfo* ysql_catalog_config, bool pg_proc_exists) { |
217 | 2.00k | if (pg_proc_exists) { |
218 | 363 | LOG(INFO) << "Table pg_proc exists, assuming initdb has already been run"; |
219 | 363 | return false; |
220 | 363 | } |
221 | | |
222 | 1.64k | if (!FLAGS_master_auto_run_initdb) { |
223 | 1.64k | LOG(INFO) << "--master_auto_run_initdb is set to false, not running initdb"; |
224 | 1.64k | return false; |
225 | 1.64k | } |
226 | | |
227 | 0 | { |
228 | 0 | auto l = ysql_catalog_config->LockForRead(); |
229 | 0 | if (l->pb.ysql_catalog_config().initdb_done()) { |
230 | 0 | LOG(INFO) << "Cluster configuration indicates that initdb has already completed"; |
231 | 0 | return false; |
232 | 0 | } |
233 | 0 | } |
234 | | |
235 | 0 | LOG(INFO) << "initdb has never been run on this cluster, running it"; |
236 | 0 | return true; |
237 | 0 | } |
238 | | |
239 | | Status MakeYsqlSysCatalogTablesTransactional( |
240 | | TableInfoMap* table_ids_map, |
241 | | SysCatalogTable* sys_catalog, |
242 | | SysConfigInfo* ysql_catalog_config, |
243 | 2.00k | int64_t term) { |
244 | 2.00k | { |
245 | 2.00k | auto ysql_catalog_config_lock = ysql_catalog_config->LockForRead(); |
246 | 2.00k | const auto& ysql_catalog_config_pb = ysql_catalog_config_lock->pb.ysql_catalog_config(); |
247 | 2.00k | if (ysql_catalog_config_pb.transactional_sys_catalog_enabled()) { |
248 | 60 | LOG(INFO) << "YSQL catalog tables are already transactional"; |
249 | 60 | return Status::OK(); |
250 | 60 | } |
251 | 1.94k | } |
252 | | |
253 | 1.94k | int num_updated_tables = 0; |
254 | 235k | for (const auto& iter : *table_ids_map) { |
255 | 235k | const auto& table_id = iter.first; |
256 | 235k | auto& table_info = *iter.second; |
257 | | |
258 | 235k | if (!IsPgsqlId(table_id)) { |
259 | 33.0k | continue; |
260 | 33.0k | } |
261 | | |
262 | 202k | { |
263 | 202k | TabletInfos tablet_infos = table_info.GetTablets(); |
264 | 202k | if (tablet_infos.size() != 1 || tablet_infos.front()->tablet_id() != kSysCatalogTabletId) { |
265 | 0 | continue; |
266 | 0 | } |
267 | 202k | } |
268 | | |
269 | 202k | auto table_lock = table_info.LockForWrite(); |
270 | 202k | auto& schema = *table_lock.mutable_data()->mutable_schema(); |
271 | 202k | auto& table_properties = *schema.mutable_table_properties(); |
272 | | |
273 | 202k | bool should_modify = false; |
274 | 202k | if (!table_properties.is_ysql_catalog_table()) { |
275 | 0 | table_properties.set_is_ysql_catalog_table(true); |
276 | 0 | should_modify = true; |
277 | 0 | } |
278 | 202k | if (!table_properties.is_transactional()) { |
279 | 0 | table_properties.set_is_transactional(true); |
280 | 0 | should_modify = true; |
281 | 0 | } |
282 | 202k | if (!should_modify) { |
283 | 202k | continue; |
284 | 202k | } |
285 | | |
286 | 0 | num_updated_tables++; |
287 | 0 | LOG(INFO) << "Making YSQL system catalog table transactional: " << table_info.ToString(); |
288 | | |
289 | | // Change table properties in tablet metadata. |
290 | 0 | tablet::ChangeMetadataRequestPB change_req; |
291 | 0 | change_req.set_tablet_id(kSysCatalogTabletId); |
292 | 0 | auto& add_table = *change_req.mutable_add_table(); |
293 | 0 | VERIFY_RESULT(sys_catalog->tablet_peer()->tablet_metadata()->GetTableInfo(table_id))->ToPB( |
294 | 0 | &add_table); |
295 | 0 | auto& metadata_table_properties = *add_table.mutable_schema()->mutable_table_properties(); |
296 | 0 | metadata_table_properties.set_is_ysql_catalog_table(true); |
297 | 0 | metadata_table_properties.set_is_transactional(true); |
298 | |
|
299 | 0 | RETURN_NOT_OK(tablet::SyncReplicateChangeMetadataOperation( |
300 | 0 | &change_req, sys_catalog->tablet_peer().get(), term)); |
301 | | |
302 | | // Change table properties in the sys catalog. We do this after updating tablet metadata, so |
303 | | // that if a restart happens before this step succeeds, we'll retry updating both next time. |
304 | 0 | RETURN_NOT_OK(sys_catalog->Upsert(term, &table_info)); |
305 | 0 | table_lock.Commit(); |
306 | 0 | } |
307 | | |
308 | 1.94k | if (num_updated_tables > 0) { |
309 | 0 | LOG(INFO) << "Made " << num_updated_tables << " YSQL sys catalog tables transactional"; |
310 | 0 | } |
311 | | |
312 | 1.94k | LOG(INFO) << "Marking YSQL system catalog as transactional in YSQL catalog config"; |
313 | 1.94k | { |
314 | 1.94k | auto ysql_catalog_lock = ysql_catalog_config->LockForWrite(); |
315 | 1.94k | auto* ysql_catalog_config_pb = |
316 | 1.94k | ysql_catalog_lock.mutable_data()->pb.mutable_ysql_catalog_config(); |
317 | 1.94k | ysql_catalog_config_pb->set_transactional_sys_catalog_enabled(true); |
318 | 1.94k | RETURN_NOT_OK(sys_catalog->Upsert(term, ysql_catalog_config)); |
319 | 1.94k | ysql_catalog_lock.Commit(); |
320 | 1.94k | } |
321 | | |
322 | 1.94k | return Status::OK(); |
323 | 1.94k | } |
324 | | |
325 | | } // namespace master |
326 | | } // namespace yb |