/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/plain_table_builder.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | |
21 | | #ifndef ROCKSDB_LITE |
22 | | |
23 | | #include "yb/rocksdb/table/plain_table_builder.h" |
24 | | |
25 | | #include <assert.h> |
26 | | |
27 | | #include <limits> |
28 | | #include <map> |
29 | | |
30 | | #include "yb/rocksdb/comparator.h" |
31 | | #include "yb/rocksdb/env.h" |
32 | | #include "yb/rocksdb/options.h" |
33 | | #include "yb/rocksdb/table.h" |
34 | | #include "yb/rocksdb/table/block_builder.h" |
35 | | #include "yb/rocksdb/table/bloom_block.h" |
36 | | #include "yb/rocksdb/table/format.h" |
37 | | #include "yb/rocksdb/table/meta_blocks.h" |
38 | | #include "yb/rocksdb/util/coding.h" |
39 | | #include "yb/rocksdb/util/file_reader_writer.h" |
40 | | |
41 | | #include "yb/util/status_log.h" |
42 | | |
43 | | namespace rocksdb { |
44 | | |
45 | | namespace { |
46 | | |
47 | | // a utility that helps writing block content to the file |
48 | | // @offset will advance if @block_contents was successfully written. |
49 | | // @block_handle the block handle this particular block. |
50 | | Status WriteBlock(const Slice& block_contents, WritableFileWriter* file, |
51 | 4.25k | uint64_t* offset, BlockHandle* block_handle) { |
52 | 4.25k | block_handle->set_offset(*offset); |
53 | 4.25k | block_handle->set_size(block_contents.size()); |
54 | 4.25k | Status s = file->Append(block_contents); |
55 | | |
56 | 4.25k | if (s.ok()) { |
57 | 4.25k | *offset += block_contents.size(); |
58 | 4.25k | } |
59 | 4.25k | return s; |
60 | 4.25k | } |
61 | | |
62 | | } // namespace |
63 | | |
64 | | // kPlainTableMagicNumber was picked by running |
65 | | // echo rocksdb.table.plain | sha1sum |
66 | | // and taking the leading 64 bits. |
67 | | extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull; |
68 | | extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull; |
69 | | |
70 | | PlainTableBuilder::PlainTableBuilder( |
71 | | const ImmutableCFOptions& ioptions, |
72 | | const IntTblPropCollectorFactories& int_tbl_prop_collector_factories, |
73 | | uint32_t column_family_id, |
74 | | WritableFileWriter* file, |
75 | | uint32_t user_key_len, |
76 | | EncodingType encoding_type, |
77 | | size_t index_sparseness, |
78 | | uint32_t bloom_bits_per_key, |
79 | | uint32_t num_probes, size_t |
80 | | huge_page_tlb_size, |
81 | | double hash_table_ratio, |
82 | | bool store_index_in_file) |
83 | | : ioptions_(ioptions), |
84 | | bloom_block_(num_probes), |
85 | | file_(file), |
86 | | bloom_bits_per_key_(bloom_bits_per_key), |
87 | | huge_page_tlb_size_(huge_page_tlb_size), |
88 | | encoder_(encoding_type, user_key_len, ioptions.prefix_extractor, |
89 | | index_sparseness), |
90 | | store_index_in_file_(store_index_in_file), |
91 | 2.06k | prefix_extractor_(ioptions.prefix_extractor) { |
92 | | // Build index block and save it in the file if hash_table_ratio > 0 |
93 | 2.06k | if (store_index_in_file_) { |
94 | 64 | assert(hash_table_ratio > 0 || IsTotalOrderMode()); |
95 | 64 | index_builder_.reset( |
96 | 64 | new PlainTableIndexBuilder(&arena_, ioptions, index_sparseness, |
97 | 64 | hash_table_ratio, huge_page_tlb_size_)); |
98 | 64 | assert(bloom_bits_per_key_ > 0); |
99 | 64 | properties_.user_collected_properties |
100 | 64 | [PlainTablePropertyNames::kBloomVersion] = "1"; // For future use |
101 | 64 | } |
102 | | |
103 | 2.06k | properties_.fixed_key_len = user_key_len; |
104 | | |
105 | | // for plain table, we put all the data in a big chuck. |
106 | 2.06k | properties_.num_data_blocks = 1; |
107 | | // Fill it later if store_index_in_file_ == true |
108 | 2.06k | properties_.data_index_size = 0; |
109 | 2.06k | properties_.num_data_index_blocks = 0; |
110 | 2.06k | properties_.filter_index_size = 0; |
111 | 2.06k | properties_.num_filter_blocks = 0; |
112 | 2.06k | properties_.filter_size = 0; |
113 | | // To support roll-back to previous version, now still use version 0 for |
114 | | // plain encoding. |
115 | 1.95k | properties_.format_version = (encoding_type == kPlain) ? 0 : 1; |
116 | | |
117 | 2.06k | if (ioptions_.prefix_extractor) { |
118 | 1.70k | properties_.user_collected_properties |
119 | 1.70k | [PlainTablePropertyNames::kPrefixExtractorName] = |
120 | 1.70k | ioptions_.prefix_extractor->Name(); |
121 | 1.70k | } |
122 | | |
123 | 2.06k | std::string val; |
124 | 2.06k | PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType())); |
125 | 2.06k | properties_.user_collected_properties |
126 | 2.06k | [PlainTablePropertyNames::kEncodingType] = val; |
127 | | |
128 | 1.30k | for (auto& collector_factories : int_tbl_prop_collector_factories) { |
129 | 1.30k | table_properties_collectors_.emplace_back( |
130 | 1.30k | collector_factories->CreateIntTblPropCollector(column_family_id)); |
131 | 1.30k | } |
132 | 2.06k | } |
133 | | |
134 | 2.06k | PlainTableBuilder::~PlainTableBuilder() { |
135 | 2.06k | } |
136 | | |
137 | 355k | void PlainTableBuilder::Add(const Slice& key, const Slice& value) { |
138 | | // temp buffer for metadata bytes between key and value. |
139 | 355k | char meta_bytes_buf[6]; |
140 | 355k | size_t meta_bytes_buf_size = 0; |
141 | | |
142 | 355k | ParsedInternalKey internal_key; |
143 | 355k | ParseInternalKey(key, &internal_key); |
144 | | |
145 | | // Store key hash |
146 | 355k | if (store_index_in_file_) { |
147 | 104 | if (ioptions_.prefix_extractor == nullptr) { |
148 | 16 | keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key)); |
149 | 88 | } else { |
150 | 88 | Slice prefix = |
151 | 88 | ioptions_.prefix_extractor->Transform(internal_key.user_key); |
152 | 88 | keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix)); |
153 | 88 | } |
154 | 104 | } |
155 | | |
156 | | // Write value |
157 | 355k | assert(offset_ <= std::numeric_limits<uint32_t>::max()); |
158 | 355k | auto prev_offset = static_cast<uint32_t>(offset_); |
159 | | // Write out the key |
160 | 355k | CHECK_OK(encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf, &meta_bytes_buf_size)); |
161 | 355k | if (SaveIndexInFile()) { |
162 | 104 | index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset); |
163 | 104 | } |
164 | | |
165 | | // Write value length |
166 | 355k | uint32_t value_size = static_cast<uint32_t>(value.size()); |
167 | 355k | char* end_ptr = |
168 | 355k | EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size); |
169 | 355k | assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf)); |
170 | 355k | meta_bytes_buf_size = end_ptr - meta_bytes_buf; |
171 | 355k | CHECK_OK(file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size))); |
172 | | |
173 | | // Write value |
174 | 355k | CHECK_OK(file_->Append(value)); |
175 | 355k | offset_ += value_size + meta_bytes_buf_size; |
176 | | |
177 | 355k | properties_.num_entries++; |
178 | 355k | properties_.raw_key_size += key.size(); |
179 | 355k | properties_.raw_value_size += value.size(); |
180 | | |
181 | | // notify property collectors |
182 | 355k | NotifyCollectTableCollectorsOnAdd( |
183 | 355k | key, value, offset_, table_properties_collectors_, ioptions_.info_log); |
184 | 355k | } |
185 | | |
186 | 53.9k | Status PlainTableBuilder::status() const { return status_; } |
187 | | |
188 | 2.06k | Status PlainTableBuilder::Finish() { |
189 | 2.06k | assert(!closed_); |
190 | 2.06k | closed_ = true; |
191 | | |
192 | 2.06k | properties_.data_size = offset_; |
193 | | |
194 | | // Write the following blocks |
195 | | // 1. [meta block: bloom] - optional |
196 | | // 2. [meta block: index] - optional |
197 | | // 3. [meta block: properties] |
198 | | // 4. [metaindex block] |
199 | | // 5. [footer] |
200 | | |
201 | 2.06k | MetaIndexBuilder meta_index_builer; |
202 | | |
203 | 2.06k | if (store_index_in_file_ && (properties_.num_entries > 0)) { |
204 | 64 | assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max()); |
205 | 64 | bloom_block_.SetTotalBits( |
206 | 64 | &arena_, |
207 | 64 | static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_, |
208 | 64 | ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log); |
209 | | |
210 | 64 | PutVarint32(&properties_.user_collected_properties |
211 | 64 | [PlainTablePropertyNames::kNumBloomBlocks], |
212 | 64 | bloom_block_.GetNumBlocks()); |
213 | | |
214 | 64 | bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_); |
215 | 64 | BlockHandle bloom_block_handle; |
216 | 64 | auto finish_result = bloom_block_.Finish(); |
217 | | |
218 | 64 | properties_.filter_size = finish_result.size(); |
219 | 64 | properties_.num_filter_blocks = 1; |
220 | 64 | auto s = WriteBlock(finish_result, file_, &offset_, &bloom_block_handle); |
221 | | |
222 | 64 | if (!s.ok()) { |
223 | 0 | return s; |
224 | 0 | } |
225 | | |
226 | 64 | BlockHandle index_block_handle; |
227 | 64 | finish_result = index_builder_->Finish(); |
228 | | |
229 | 64 | properties_.data_index_size = finish_result.size(); |
230 | 64 | properties_.num_data_index_blocks = 1; |
231 | 64 | s = WriteBlock(finish_result, file_, &offset_, &index_block_handle); |
232 | | |
233 | 64 | if (!s.ok()) { |
234 | 0 | return s; |
235 | 0 | } |
236 | | |
237 | 64 | meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle); |
238 | 64 | meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock, |
239 | 64 | index_block_handle); |
240 | 64 | } |
241 | | |
242 | | // Calculate bloom block size and index block size |
243 | 2.06k | PropertyBlockBuilder property_block_builder; |
244 | | // -- Add basic properties |
245 | 2.06k | property_block_builder.AddTableProperty(properties_); |
246 | | |
247 | 2.06k | property_block_builder.Add(properties_.user_collected_properties); |
248 | | |
249 | | // -- Add user collected properties |
250 | 2.06k | NotifyCollectTableCollectorsOnFinish(table_properties_collectors_, |
251 | 2.06k | ioptions_.info_log, |
252 | 2.06k | &property_block_builder); |
253 | | |
254 | | // -- Write property block |
255 | 2.06k | BlockHandle property_block_handle; |
256 | 2.06k | auto s = WriteBlock( |
257 | 2.06k | property_block_builder.Finish(), |
258 | 2.06k | file_, |
259 | 2.06k | &offset_, |
260 | 2.06k | &property_block_handle |
261 | 2.06k | ); |
262 | 2.06k | if (!s.ok()) { |
263 | 0 | return s; |
264 | 0 | } |
265 | 2.06k | meta_index_builer.Add(kPropertiesBlock, property_block_handle); |
266 | | |
267 | | // -- write metaindex block |
268 | 2.06k | BlockHandle metaindex_block_handle; |
269 | 2.06k | s = WriteBlock( |
270 | 2.06k | meta_index_builer.Finish(), |
271 | 2.06k | file_, |
272 | 2.06k | &offset_, |
273 | 2.06k | &metaindex_block_handle |
274 | 2.06k | ); |
275 | 2.06k | if (!s.ok()) { |
276 | 0 | return s; |
277 | 0 | } |
278 | | |
279 | | // Write Footer |
280 | | // no need to write out new footer if we're using default checksum |
281 | 2.06k | Footer footer(kLegacyPlainTableMagicNumber, 0); |
282 | 2.06k | footer.set_metaindex_handle(metaindex_block_handle); |
283 | 2.06k | footer.set_index_handle(BlockHandle::NullBlockHandle()); |
284 | 2.06k | std::string footer_encoding; |
285 | 2.06k | footer.AppendEncodedTo(&footer_encoding); |
286 | 2.06k | s = file_->Append(footer_encoding); |
287 | 2.06k | if (s.ok()) { |
288 | 2.06k | offset_ += footer_encoding.size(); |
289 | 2.06k | } |
290 | | |
291 | 2.06k | return s; |
292 | 2.06k | } |
293 | | |
294 | 4 | void PlainTableBuilder::Abandon() { |
295 | 4 | closed_ = true; |
296 | 4 | } |
297 | | |
298 | 1.28k | uint64_t PlainTableBuilder::NumEntries() const { |
299 | 1.28k | return properties_.num_entries; |
300 | 1.28k | } |
301 | | |
302 | 152k | uint64_t PlainTableBuilder::TotalFileSize() const { |
303 | 152k | return offset_; |
304 | 152k | } |
305 | | |
306 | 1.27k | uint64_t PlainTableBuilder::BaseFileSize() const { |
307 | 1.27k | return TotalFileSize(); |
308 | 1.27k | } |
309 | | |
310 | | } // namespace rocksdb |
311 | | #endif // ROCKSDB_LITE |