/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/meta_blocks.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | |
21 | | #include "yb/rocksdb/table/meta_blocks.h" |
22 | | |
23 | | #include <map> |
24 | | #include <string> |
25 | | |
26 | | #include "yb/rocksdb/db/table_properties_collector.h" |
27 | | #include "yb/rocksdb/table.h" |
28 | | #include "yb/rocksdb/table/block.h" |
29 | | #include "yb/rocksdb/table/block_builder.h" |
30 | | #include "yb/rocksdb/table/format.h" |
31 | | #include "yb/rocksdb/table/internal_iterator.h" |
32 | | #include "yb/rocksdb/table/table_properties_internal.h" |
33 | | #include "yb/rocksdb/util/coding.h" |
34 | | #include "yb/rocksdb/util/file_reader_writer.h" |
35 | | |
36 | | DEFINE_bool(verify_encrypted_meta_block_checksums, true, |
37 | | "Whether to verify checksums for meta blocks of encrypted SSTables."); |
38 | | |
39 | | namespace rocksdb { |
40 | | |
41 | | namespace { |
42 | | |
43 | | constexpr auto kMetaIndexBlockRestartInterval = 1; |
44 | | |
45 | | // We use kKeyDeltaEncodingSharedPrefix format for property blocks, but since |
46 | | // kPropertyBlockRestartInterval == 1 every key in these blocks will still have zero shared prefix |
47 | | // length and will be stored fully. |
48 | | constexpr auto kPropertyBlockKeyValueEncodingFormat = |
49 | | KeyValueEncodingFormat::kKeyDeltaEncodingSharedPrefix; |
50 | | constexpr auto kPropertyBlockRestartInterval = 1; |
51 | | |
52 | 133k | ReadOptions CreateMetaBlockReadOptions(RandomAccessFileReader* file) { |
53 | 133k | ReadOptions read_options; |
54 | | |
55 | | // We need to verify checksums for meta blocks in order to recover from the encryption format |
56 | | // issue described at https://github.com/yugabyte/yugabyte-db/issues/3707. |
57 | | // However, we only do that for encrypted files in order to prevent lots of RocksDB unit tests |
58 | | // from failing as described at https://github.com/yugabyte/yugabyte-db/issues/3974. |
59 | 133k | read_options.verify_checksums = file->file()->IsEncrypted() && |
60 | 11 | FLAGS_verify_encrypted_meta_block_checksums; |
61 | 133k | return read_options; |
62 | 133k | } |
63 | | |
64 | | } // namespace |
65 | | |
66 | | MetaIndexBuilder::MetaIndexBuilder() |
67 | | : meta_index_block_(new BlockBuilder( |
68 | 63.4k | kMetaIndexBlockRestartInterval, kMetaIndexBlockKeyValueEncodingFormat)) {} |
69 | | |
70 | | void MetaIndexBuilder::Add(const std::string& key, |
71 | 74.0k | const BlockHandle& handle) { |
72 | 74.0k | std::string handle_encoding; |
73 | 74.0k | handle.AppendEncodedTo(&handle_encoding); |
74 | 74.0k | meta_block_handles_.insert({key, handle_encoding}); |
75 | 74.0k | } |
76 | | |
77 | 63.4k | Slice MetaIndexBuilder::Finish() { |
78 | 74.0k | for (const auto& metablock : meta_block_handles_) { |
79 | 74.0k | meta_index_block_->Add(metablock.first, metablock.second); |
80 | 74.0k | } |
81 | 63.4k | return meta_index_block_->Finish(); |
82 | 63.4k | } |
83 | | |
84 | | PropertyBlockBuilder::PropertyBlockBuilder() |
85 | | : properties_block_( |
86 | 63.4k | new BlockBuilder(kPropertyBlockRestartInterval, kPropertyBlockKeyValueEncodingFormat)) {} |
87 | | |
88 | | void PropertyBlockBuilder::Add(const std::string& name, |
89 | 1.14M | const std::string& val) { |
90 | 1.14M | props_.insert({name, val}); |
91 | 1.14M | } |
92 | | |
93 | 761k | void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) { |
94 | 761k | assert(props_.find(name) == props_.end()); |
95 | | |
96 | 761k | std::string dst; |
97 | 761k | PutVarint64(&dst, val); |
98 | | |
99 | 761k | Add(name, dst); |
100 | 761k | } |
101 | | |
102 | | void PropertyBlockBuilder::Add( |
103 | 123k | const UserCollectedProperties& user_collected_properties) { |
104 | 370k | for (const auto& prop : user_collected_properties) { |
105 | 370k | Add(prop.first, prop.second); |
106 | 370k | } |
107 | 123k | } |
108 | | |
109 | 63.4k | void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) { |
110 | 63.4k | Add(TablePropertiesNames::kRawKeySize, props.raw_key_size); |
111 | 63.4k | Add(TablePropertiesNames::kRawValueSize, props.raw_value_size); |
112 | 63.4k | Add(TablePropertiesNames::kDataSize, props.data_size); |
113 | 63.4k | Add(TablePropertiesNames::kDataIndexSize, props.data_index_size); |
114 | 63.4k | Add(TablePropertiesNames::kFilterIndexSize, props.filter_index_size); |
115 | 63.4k | Add(TablePropertiesNames::kNumEntries, props.num_entries); |
116 | 63.4k | Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks); |
117 | 63.4k | Add(TablePropertiesNames::kNumFilterBlocks, props.num_filter_blocks); |
118 | 63.4k | Add(TablePropertiesNames::kNumDataIndexBlocks, props.num_data_index_blocks); |
119 | 63.4k | Add(TablePropertiesNames::kFilterSize, props.filter_size); |
120 | 63.4k | Add(TablePropertiesNames::kFormatVersion, props.format_version); |
121 | 63.4k | Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len); |
122 | | |
123 | 63.4k | if (!props.filter_policy_name.empty()) { |
124 | 7.70k | Add(TablePropertiesNames::kFilterPolicy, |
125 | 7.70k | props.filter_policy_name); |
126 | 7.70k | } |
127 | 63.4k | } |
128 | | |
129 | 63.4k | Slice PropertyBlockBuilder::Finish() { |
130 | 1.14M | for (const auto& prop : props_) { |
131 | 1.14M | properties_block_->Add(prop.first, prop.second); |
132 | 1.14M | } |
133 | | |
134 | 63.4k | return properties_block_->Finish(); |
135 | 63.4k | } |
136 | | |
137 | | void LogPropertiesCollectionError( |
138 | 0 | Logger* info_log, const std::string& method, const std::string& name) { |
139 | 0 | assert(method == "Add" || method == "Finish"); |
140 | |
|
141 | 0 | std::string msg = |
142 | 0 | "Encountered error when calling TablePropertiesCollector::" + |
143 | 0 | method + "() with collector name: " + name; |
144 | 0 | RLOG(InfoLogLevel::ERROR_LEVEL, info_log, "%s", msg.c_str()); |
145 | 0 | } |
146 | | |
147 | | bool NotifyCollectTableCollectorsOnAdd( |
148 | | const Slice& key, const Slice& value, uint64_t file_size, |
149 | | const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors, |
150 | 92.1M | Logger* info_log) { |
151 | 92.1M | bool all_succeeded = true; |
152 | 163M | for (auto& collector : collectors) { |
153 | 163M | Status s = collector->InternalAdd(key, value, file_size); |
154 | 163M | all_succeeded = all_succeeded && s.ok(); |
155 | 163M | if (!s.ok()) { |
156 | 0 | LogPropertiesCollectionError(info_log, "Add" /* method */, |
157 | 0 | collector->Name()); |
158 | 0 | } |
159 | 163M | } |
160 | 92.1M | return all_succeeded; |
161 | 92.1M | } |
162 | | |
163 | | bool NotifyCollectTableCollectorsOnFinish( |
164 | | const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors, |
165 | 63.4k | Logger* info_log, PropertyBlockBuilder* builder) { |
166 | 63.4k | bool all_succeeded = true; |
167 | 121k | for (auto& collector : collectors) { |
168 | 121k | UserCollectedProperties user_collected_properties; |
169 | 121k | Status s = collector->Finish(&user_collected_properties); |
170 | | |
171 | 121k | all_succeeded = all_succeeded && s.ok(); |
172 | 121k | if (!s.ok()) { |
173 | 0 | LogPropertiesCollectionError(info_log, "Finish" /* method */, |
174 | 0 | collector->Name()); |
175 | 121k | } else { |
176 | 121k | builder->Add(user_collected_properties); |
177 | 121k | } |
178 | 121k | } |
179 | | |
180 | 63.4k | return all_succeeded; |
181 | 63.4k | } |
182 | | |
183 | | Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file, |
184 | | const Footer& footer, Env* env, Logger* logger, |
185 | 99.8k | TableProperties** table_properties) { |
186 | 99.8k | assert(table_properties); |
187 | | |
188 | 99.8k | Slice v = handle_value; |
189 | 99.8k | BlockHandle handle; |
190 | 99.8k | if (!handle.DecodeFrom(&v).ok()) { |
191 | 0 | return STATUS(InvalidArgument, "Failed to decode properties block handle"); |
192 | 0 | } |
193 | | |
194 | 99.8k | BlockContents block_contents; |
195 | 99.8k | ReadOptions read_options = CreateMetaBlockReadOptions(file); |
196 | 99.8k | Status s = ReadBlockContents(file, footer, read_options, handle, &block_contents, |
197 | 99.8k | env, nullptr /* mem_tracker */, false); |
198 | | |
199 | 99.8k | if (!s.ok()) { |
200 | 0 | return s; |
201 | 0 | } |
202 | | |
203 | 99.8k | Block properties_block(std::move(block_contents)); |
204 | 99.8k | std::unique_ptr<InternalIterator> iter(properties_block.NewIterator( |
205 | 99.8k | BytewiseComparator(), kPropertyBlockKeyValueEncodingFormat)); |
206 | | |
207 | 99.8k | auto new_table_properties = new TableProperties(); |
208 | | // All pre-defined properties of type uint64_t |
209 | 99.8k | std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = { |
210 | 99.8k | {TablePropertiesNames::kDataSize, &new_table_properties->data_size}, |
211 | 99.8k | {TablePropertiesNames::kDataIndexSize, &new_table_properties->data_index_size}, |
212 | 99.8k | {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size}, |
213 | 99.8k | {TablePropertiesNames::kFilterIndexSize, &new_table_properties->filter_index_size}, |
214 | 99.8k | {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size}, |
215 | 99.8k | {TablePropertiesNames::kRawValueSize, &new_table_properties->raw_value_size}, |
216 | 99.8k | {TablePropertiesNames::kNumDataBlocks, &new_table_properties->num_data_blocks}, |
217 | 99.8k | {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries}, |
218 | 99.8k | {TablePropertiesNames::kNumFilterBlocks, &new_table_properties->num_filter_blocks}, |
219 | 99.8k | {TablePropertiesNames::kNumDataIndexBlocks, &new_table_properties->num_data_index_blocks}, |
220 | 99.8k | {TablePropertiesNames::kFormatVersion, &new_table_properties->format_version}, |
221 | 99.8k | {TablePropertiesNames::kFixedKeyLen, &new_table_properties->fixed_key_len}, }; |
222 | | |
223 | 99.8k | std::string last_key; |
224 | 1.89M | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { |
225 | 1.79M | s = iter->status(); |
226 | 1.79M | if (!s.ok()) { |
227 | 0 | break; |
228 | 0 | } |
229 | | |
230 | 1.79M | auto key = iter->key().ToString(); |
231 | | // properties block is strictly sorted with no duplicate key. |
232 | 1.79M | assert(last_key.empty() || |
233 | 1.79M | BytewiseComparator()->Compare(key, last_key) > 0); |
234 | 1.79M | last_key = key; |
235 | | |
236 | 1.79M | auto raw_val = iter->value(); |
237 | 1.79M | auto pos = predefined_uint64_properties.find(key); |
238 | | |
239 | 1.79M | if (pos != predefined_uint64_properties.end()) { |
240 | | // handle predefined rocksdb properties |
241 | 1.19M | uint64_t val; |
242 | 1.19M | if (!GetVarint64(&raw_val, &val)) { |
243 | | // skip malformed value |
244 | 0 | auto error_msg = |
245 | 0 | "Detect malformed value in properties meta-block:" |
246 | 0 | "\tkey: " + key + "\tval: " + raw_val.ToString(); |
247 | 0 | RLOG(InfoLogLevel::ERROR_LEVEL, logger, "%s", error_msg.c_str()); |
248 | 0 | continue; |
249 | 0 | } |
250 | 1.19M | *(pos->second) = val; |
251 | 593k | } else if (key == TablePropertiesNames::kFilterPolicy) { |
252 | 11.8k | new_table_properties->filter_policy_name = raw_val.ToString(); |
253 | 582k | } else { |
254 | | // handle user-collected properties |
255 | 582k | new_table_properties->user_collected_properties.insert( |
256 | 582k | {key, raw_val.ToString()}); |
257 | 582k | } |
258 | 1.79M | } |
259 | 99.8k | if (s.ok()) { |
260 | 99.8k | *table_properties = new_table_properties; |
261 | 18.4E | } else { |
262 | 18.4E | delete new_table_properties; |
263 | 18.4E | } |
264 | | |
265 | 99.8k | return s; |
266 | 99.8k | } |
267 | | |
268 | | Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, |
269 | | uint64_t table_magic_number, Env* env, |
270 | 27.2k | Logger* info_log, TableProperties** properties) { |
271 | | // -- Read metaindex block |
272 | 27.2k | Footer footer; |
273 | 27.2k | auto s = ReadFooterFromFile(file, file_size, &footer, table_magic_number); |
274 | 27.2k | if (!s.ok()) { |
275 | 47 | return s; |
276 | 47 | } |
277 | | |
278 | 27.1k | auto metaindex_handle = footer.metaindex_handle(); |
279 | 27.1k | BlockContents metaindex_contents; |
280 | 27.1k | ReadOptions read_options = CreateMetaBlockReadOptions(file); |
281 | 27.1k | s = ReadBlockContents(file, footer, read_options, metaindex_handle, |
282 | 27.1k | &metaindex_contents, env, nullptr /* mem_tracker */, false); |
283 | 27.1k | if (!s.ok()) { |
284 | 0 | return s; |
285 | 0 | } |
286 | 27.1k | Block metaindex_block(std::move(metaindex_contents)); |
287 | 27.1k | std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewIterator( |
288 | 27.1k | BytewiseComparator(), kMetaIndexBlockKeyValueEncodingFormat)); |
289 | | |
290 | | // -- Read property block |
291 | 27.1k | bool found_properties_block = true; |
292 | 27.1k | s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block); |
293 | 27.1k | if (!s.ok()) { |
294 | 0 | return s; |
295 | 0 | } |
296 | | |
297 | 27.1k | TableProperties table_properties; |
298 | 27.1k | if (found_properties_block == true) { |
299 | 27.1k | s = ReadProperties(meta_iter->value(), file, footer, env, info_log, |
300 | 27.1k | properties); |
301 | 6 | } else { |
302 | 6 | s = STATUS(NotFound, ""); |
303 | 6 | } |
304 | | |
305 | 27.1k | return s; |
306 | 27.1k | } |
307 | | |
308 | | Status FindMetaBlock(InternalIterator* meta_index_iter, |
309 | | const std::string& meta_block_name, |
310 | 27.5k | BlockHandle* block_handle) { |
311 | 27.5k | meta_index_iter->Seek(meta_block_name); |
312 | 27.5k | if (meta_index_iter->status().ok() && meta_index_iter->Valid() && |
313 | 27.5k | meta_index_iter->key() == meta_block_name) { |
314 | 12.3k | Slice v = meta_index_iter->value(); |
315 | 12.3k | return block_handle->DecodeFrom(&v); |
316 | 15.2k | } else { |
317 | 15.2k | return STATUS(Corruption, "Cannot find the meta block", meta_block_name); |
318 | 15.2k | } |
319 | 27.5k | } |
320 | | |
321 | | Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size, |
322 | | uint64_t table_magic_number, Env* env, |
323 | | const std::string& meta_block_name, |
324 | | const std::shared_ptr<yb::MemTracker>& mem_tracker, |
325 | 96 | BlockHandle* block_handle) { |
326 | 96 | Footer footer; |
327 | 96 | auto s = ReadFooterFromFile(file, file_size, &footer, table_magic_number); |
328 | 96 | if (!s.ok()) { |
329 | 0 | return s; |
330 | 0 | } |
331 | | |
332 | 96 | auto metaindex_handle = footer.metaindex_handle(); |
333 | 96 | BlockContents metaindex_contents; |
334 | 96 | ReadOptions read_options = CreateMetaBlockReadOptions(file); |
335 | 96 | s = ReadBlockContents(file, footer, read_options, metaindex_handle, |
336 | 96 | &metaindex_contents, env, mem_tracker, false); |
337 | 96 | if (!s.ok()) { |
338 | 0 | return s; |
339 | 0 | } |
340 | 96 | Block metaindex_block(std::move(metaindex_contents)); |
341 | | |
342 | 96 | std::unique_ptr<InternalIterator> meta_iter; |
343 | 96 | meta_iter.reset( |
344 | 96 | metaindex_block.NewIterator(BytewiseComparator(), kMetaIndexBlockKeyValueEncodingFormat)); |
345 | | |
346 | 96 | return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); |
347 | 96 | } |
348 | | |
349 | | Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size, |
350 | | uint64_t table_magic_number, Env* env, |
351 | | const std::string& meta_block_name, |
352 | | const std::shared_ptr<yb::MemTracker>& mem_tracker, |
353 | 6.22k | BlockContents* contents) { |
354 | 6.22k | Status status; |
355 | 6.22k | Footer footer; |
356 | 6.22k | status = ReadFooterFromFile(file, file_size, &footer, table_magic_number); |
357 | 6.22k | if (!status.ok()) { |
358 | 0 | return status; |
359 | 0 | } |
360 | | |
361 | | // Reading metaindex block |
362 | 6.22k | auto metaindex_handle = footer.metaindex_handle(); |
363 | 6.22k | BlockContents metaindex_contents; |
364 | 6.22k | ReadOptions read_options = CreateMetaBlockReadOptions(file); |
365 | 6.22k | status = ReadBlockContents(file, footer, read_options, metaindex_handle, |
366 | 6.22k | &metaindex_contents, env, mem_tracker, false); |
367 | 6.22k | if (!status.ok()) { |
368 | 0 | return status; |
369 | 0 | } |
370 | | |
371 | | // Finding metablock |
372 | 6.22k | Block metaindex_block(std::move(metaindex_contents)); |
373 | | |
374 | 6.22k | std::unique_ptr<InternalIterator> meta_iter; |
375 | 6.22k | meta_iter.reset( |
376 | 6.22k | metaindex_block.NewIterator(BytewiseComparator(), kMetaIndexBlockKeyValueEncodingFormat)); |
377 | | |
378 | 6.22k | BlockHandle block_handle; |
379 | 6.22k | status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle); |
380 | | |
381 | 6.22k | if (!status.ok()) { |
382 | 6.09k | return status; |
383 | 6.09k | } |
384 | | |
385 | | // Reading metablock |
386 | 128 | return ReadBlockContents( |
387 | 128 | file, footer, read_options, block_handle, contents, env, mem_tracker, false); |
388 | 128 | } |
389 | | |
390 | | } // namespace rocksdb |