/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/dbformat.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
21 | | // Use of this source code is governed by a BSD-style license that can be |
22 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
23 | | |
24 | | #ifndef YB_ROCKSDB_DB_DBFORMAT_H |
25 | | #define YB_ROCKSDB_DB_DBFORMAT_H |
26 | | |
27 | | #pragma once |
28 | | |
29 | | #include <stddef.h> |
30 | | #include <stdint.h> |
31 | | #include <stdio.h> |
32 | | |
33 | | #include <memory> |
34 | | #include <string> |
35 | | #include <unordered_map> |
36 | | #include <vector> |
37 | | |
38 | | #include "yb/rocksdb/comparator.h" |
39 | | #include "yb/rocksdb/metadata.h" |
40 | | #include "yb/rocksdb/slice_transform.h" |
41 | | #include "yb/rocksdb/status.h" |
42 | | #include "yb/rocksdb/types.h" |
43 | | #include "yb/rocksdb/util/coding.h" |
44 | | |
45 | | #include "yb/util/slice.h" |
46 | | |
47 | | namespace rocksdb { |
48 | | |
49 | | class InternalKey; |
50 | | |
51 | | // Value types encoded as the last component of internal keys. |
52 | | // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk |
53 | | // data structures. |
54 | | // The highest bit of the value type needs to be reserved to SST tables |
55 | | // for them to do more flexible encoding. |
56 | | enum ValueType : unsigned char { |
57 | | kTypeDeletion = 0x0, |
58 | | kTypeValue = 0x1, |
59 | | kTypeMerge = 0x2, |
60 | | kTypeLogData = 0x3, // WAL only. |
61 | | kTypeColumnFamilyDeletion = 0x4, // WAL only. |
62 | | kTypeColumnFamilyValue = 0x5, // WAL only. |
63 | | kTypeColumnFamilyMerge = 0x6, // WAL only. |
64 | | kTypeSingleDeletion = 0x7, |
65 | | kTypeColumnFamilySingleDeletion = 0x8, // WAL only. |
66 | | kMaxValue = 0x7F // Not used for storing records. |
67 | | }; |
68 | | |
69 | | // kValueTypeForSeek defines the ValueType that should be passed when |
70 | | // constructing a ParsedInternalKey object for seeking to a particular |
71 | | // sequence number (since we sort sequence numbers in decreasing order |
72 | | // and the value type is embedded as the low 8 bits in the sequence |
73 | | // number in internal keys, we need to use the highest-numbered |
74 | | // ValueType, not the lowest). |
75 | | static const ValueType kValueTypeForSeek = kTypeSingleDeletion; |
76 | | |
77 | | // Checks whether a type is a value type (i.e. a type used in memtables and sst |
78 | | // files). |
79 | 1.24G | inline bool IsValueType(ValueType t) { |
80 | 1.24G | return t <= kTypeMerge || t == kTypeSingleDeletion; |
81 | 1.24G | } |
82 | | |
83 | | // We leave eight bits empty at the bottom so a type and sequence# |
84 | | // can be packed together into 64-bits. |
85 | | static const SequenceNumber kMaxSequenceNumber = |
86 | | ((0x1ull << 56) - 1); |
87 | | |
88 | | // Size of the last component of the internal key. Appended by RocksDB, consists of encoded |
89 | | // ValueType and SequenceNumber. |
90 | | constexpr auto kLastInternalComponentSize = 8; |
91 | | |
92 | | struct ParsedInternalKey { |
93 | | Slice user_key; |
94 | | SequenceNumber sequence; |
95 | | ValueType type; |
96 | | |
97 | 830M | ParsedInternalKey() { } // Intentionally left uninitialized (for speed) |
98 | | ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) |
99 | 72.5M | : user_key(u), sequence(seq), type(t) { } |
100 | | std::string DebugString(bool hex = false) const; |
101 | | }; |
102 | | |
103 | | // Return the length of the encoding of "key". |
104 | 0 | inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { |
105 | 0 | return key.user_key.size() + kLastInternalComponentSize; |
106 | 0 | } |
107 | | |
108 | | // Pack a sequence number and a ValueType into a uint64_t |
109 | | extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t); |
110 | | |
111 | | struct SequenceAndType { |
112 | | SequenceNumber sequence; |
113 | | ValueType type; |
114 | | }; |
115 | | |
116 | | SequenceAndType UnPackSequenceAndTypeFromEnd(const void* end); |
117 | | |
118 | | // Append the serialization of "key" to *result. |
119 | | extern void AppendInternalKey(std::string* result, |
120 | | const ParsedInternalKey& key); |
121 | | |
122 | | // Attempt to parse an internal key from "internal_key". On success, |
123 | | // stores the parsed data in "*result", and returns true. |
124 | | // |
125 | | // On error, returns false, leaves "*result" in an undefined state. |
126 | | extern bool ParseInternalKey(const Slice& internal_key, |
127 | | ParsedInternalKey* result); |
128 | | |
129 | | // Returns the user key portion of an internal key. |
130 | 21.1G | inline Slice ExtractUserKey(const Slice& internal_key) { |
131 | 21.1G | assert(internal_key.size() >= kLastInternalComponentSize); |
132 | 21.1G | return Slice(internal_key.data(), internal_key.size() - kLastInternalComponentSize); |
133 | 21.1G | } |
134 | | |
135 | 0 | inline ValueType ExtractValueType(const Slice& internal_key) { |
136 | 0 | assert(internal_key.size() >= kLastInternalComponentSize); |
137 | 0 | const size_t n = internal_key.size(); |
138 | 0 | uint64_t num = DecodeFixed64(internal_key.data() + n - kLastInternalComponentSize); |
139 | 0 | unsigned char c = num & 0xff; |
140 | 0 | return static_cast<ValueType>(c); |
141 | 0 | } |
142 | | |
143 | 12.9M | inline size_t RoundUpTo16(const size_t size) { |
144 | 12.9M | return (size + 15) & ~0xF; |
145 | 12.9M | } |
146 | | |
147 | | // A comparator for internal keys that uses a specified comparator for |
148 | | // the user key portion and breaks ties by decreasing sequence number. |
149 | | class InternalKeyComparator : public Comparator { |
150 | | private: |
151 | | const Comparator* user_comparator_; |
152 | | std::string name_; |
153 | | public: |
154 | | explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c), |
155 | | name_("rocksdb.InternalKeyComparator:" + |
156 | 1.04M | std::string(user_comparator_->Name())) { |
157 | 1.04M | } |
158 | 1.39M | virtual ~InternalKeyComparator() {} |
159 | | |
160 | | virtual const char* Name() const override; |
161 | | virtual int Compare(const Slice& a, const Slice& b) const override; |
162 | | virtual void FindShortestSeparator(std::string* start, |
163 | | const Slice& limit) const override; |
164 | | virtual void FindShortSuccessor(std::string* key) const override; |
165 | | |
166 | 102M | const Comparator* user_comparator() const { return user_comparator_; } |
167 | | |
168 | | int Compare(const InternalKey& a, const InternalKey& b) const; |
169 | | int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const; |
170 | | }; |
171 | | |
172 | | typedef std::shared_ptr<const InternalKeyComparator> InternalKeyComparatorPtr; |
173 | | |
174 | | // Modules in this directory should keep internal keys wrapped inside |
175 | | // the following class instead of plain strings so that we do not |
176 | | // incorrectly use string comparisons instead of an InternalKeyComparator. |
177 | | class InternalKey { |
178 | | private: |
179 | | std::string rep_; |
180 | | public: |
181 | 71.6M | InternalKey() { } // Leave rep_ as empty to indicate it is invalid |
182 | 895k | InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) { |
183 | 895k | AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t)); |
184 | 895k | } |
185 | | |
186 | | // Returns the internal key that is bigger or equal to all internal keys with this user key. |
187 | 24.6k | static InternalKey MaxPossibleForUserKey(const Slice& _user_key) { |
188 | 24.6k | return InternalKey(_user_key, kMaxSequenceNumber, kValueTypeForSeek); |
189 | 24.6k | } |
190 | | |
191 | | // Returns the internal key that is smaller or equal to all internal keys with this user key. |
192 | 1.19k | static InternalKey MinPossibleForUserKey(const Slice& _user_key) { |
193 | 1.19k | return InternalKey(_user_key, 0, static_cast<ValueType>(0)); |
194 | 1.19k | } |
195 | | |
196 | 260k | bool Valid() const { |
197 | 260k | ParsedInternalKey parsed; |
198 | 260k | return ParseInternalKey(Slice(rep_), &parsed); |
199 | 260k | } |
200 | | |
201 | 71.1M | static InternalKey __attribute__ ((warn_unused_result)) DecodeFrom(const Slice& s) { |
202 | 71.1M | return InternalKey(s); |
203 | 71.1M | } |
204 | | |
205 | 15.7M | Slice Encode() const { |
206 | 15.7M | assert(!rep_.empty()); |
207 | 15.7M | return rep_; |
208 | 15.7M | } |
209 | | |
210 | 6.45M | Slice user_key() const { return ExtractUserKey(rep_); } |
211 | 0 | size_t size() const { return rep_.size(); } |
212 | 70.8M | bool empty() const { return rep_.empty(); } |
213 | | |
214 | 0 | void SetFrom(const ParsedInternalKey& p) { |
215 | 0 | rep_.clear(); |
216 | 0 | AppendInternalKey(&rep_, p); |
217 | 0 | } |
218 | | |
219 | 152k | void Clear() { rep_.clear(); } |
220 | | |
221 | 1.63k | std::string DebugString(bool hex = false) const { |
222 | 1.63k | return DebugString(rep_, hex); |
223 | 1.63k | } |
224 | | |
225 | | static std::string DebugString(const std::string& rep, bool hex = false); |
226 | | private: |
227 | | explicit InternalKey(const Slice& slice) |
228 | 71.1M | : rep_(slice.ToBuffer()) { |
229 | 71.1M | } |
230 | | }; |
231 | | |
232 | | class BoundaryValuesExtractor { |
233 | | public: |
234 | | virtual Status Decode(UserBoundaryTag tag, Slice data, UserBoundaryValuePtr* value) = 0; |
235 | | virtual Status Extract(Slice user_key, Slice value, UserBoundaryValues* values) = 0; |
236 | | virtual UserFrontierPtr CreateFrontier() = 0; |
237 | | protected: |
238 | 212 | ~BoundaryValuesExtractor() {} |
239 | | }; |
240 | | |
241 | | yb::Result<FileBoundaryValues<InternalKey>> MakeFileBoundaryValues( |
242 | | BoundaryValuesExtractor* extractor, |
243 | | const Slice& key, |
244 | | const Slice& value); |
245 | | |
246 | | // Create FileBoundaryValues from specified user_key, seqno, value_type. |
247 | | inline FileBoundaryValues<InternalKey> MakeFileBoundaryValues( |
248 | | const std::string& user_key, |
249 | | SequenceNumber seqno, |
250 | 2.93k | ValueType value_type) { |
251 | 2.93k | FileBoundaryValues<InternalKey> result; |
252 | 2.93k | result.key = InternalKey(user_key, seqno, value_type); |
253 | 2.93k | result.seqno = seqno; |
254 | 2.93k | return result; |
255 | 2.93k | } |
256 | | |
257 | | inline int InternalKeyComparator::Compare( |
258 | 4.58M | const InternalKey& a, const InternalKey& b) const { |
259 | 4.58M | return Compare(a.Encode(), b.Encode()); |
260 | 4.58M | } |
261 | | |
262 | | inline bool ParseInternalKey(const Slice& internal_key, |
263 | 991M | ParsedInternalKey* result) { |
264 | 991M | const size_t n = internal_key.size(); |
265 | 991M | if (n < kLastInternalComponentSize) return false; |
266 | 991M | uint64_t num = DecodeFixed64(internal_key.data() + n - kLastInternalComponentSize); |
267 | 991M | unsigned char c = num & 0xff; |
268 | 991M | result->sequence = num >> 8; |
269 | 991M | result->type = static_cast<ValueType>(c); |
270 | 991M | assert(result->type <= ValueType::kMaxValue); |
271 | 991M | result->user_key = Slice(internal_key.data(), n - kLastInternalComponentSize); |
272 | 991M | return IsValueType(result->type); |
273 | 991M | } |
274 | | |
275 | | // Update the sequence number in the internal key. |
276 | | // Guarantees not to invalidate ikey.data(). |
277 | 40.1k | inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) { |
278 | 40.1k | size_t ikey_sz = ikey->size(); |
279 | 40.1k | assert(ikey_sz >= kLastInternalComponentSize); |
280 | 40.1k | uint64_t newval = (seq << 8) | t; |
281 | | |
282 | | // Note: Since C++11, strings are guaranteed to be stored contiguously and |
283 | | // string::operator[]() is guaranteed not to change ikey.data(). |
284 | 40.1k | EncodeFixed64(&(*ikey)[ikey_sz - kLastInternalComponentSize], newval); |
285 | 40.1k | } |
286 | | |
287 | | // Get the sequence number from the internal key |
288 | 7 | inline uint64_t GetInternalKeySeqno(const Slice& internal_key) { |
289 | 7 | const size_t n = internal_key.size(); |
290 | 7 | assert(n >= kLastInternalComponentSize); |
291 | 7 | uint64_t num = DecodeFixed64(internal_key.data() + n - kLastInternalComponentSize); |
292 | 7 | return num >> 8; |
293 | 7 | } |
294 | | |
295 | | |
296 | | // A helper class useful for DBImpl::Get() |
297 | | class LookupKey { |
298 | | public: |
299 | | // Initialize *this for looking up user_key at a snapshot with |
300 | | // the specified sequence number. |
301 | | LookupKey(const Slice& _user_key, SequenceNumber sequence); |
302 | | |
303 | | ~LookupKey(); |
304 | | |
305 | | // Return a key suitable for lookup in a MemTable. |
306 | 16.3M | Slice memtable_key() const { |
307 | 16.3M | return Slice(start_, static_cast<size_t>(end_ - start_)); |
308 | 16.3M | } |
309 | | |
310 | | // Return an internal key (suitable for passing to an internal iterator) |
311 | 7.34M | Slice internal_key() const { |
312 | 7.34M | return Slice(kstart_, static_cast<size_t>(end_ - kstart_)); |
313 | 7.34M | } |
314 | | |
315 | | // Return the user key |
316 | 40.2M | Slice user_key() const { |
317 | 40.2M | return Slice(kstart_, static_cast<size_t>(end_ - kstart_ - kLastInternalComponentSize)); |
318 | 40.2M | } |
319 | | |
320 | | private: |
321 | | // We construct a char array of the form: |
322 | | // klength varint32 <-- start_ |
323 | | // userkey char[klength] <-- kstart_ |
324 | | // tag uint64 |
325 | | // <-- end_ |
326 | | // The array is a suitable MemTable key. |
327 | | // The suffix starting with "userkey" can be used as an InternalKey. |
328 | | const char* start_; |
329 | | const char* kstart_; |
330 | | const char* end_; |
331 | | char space_[200]; // Avoid allocation for short keys |
332 | | |
333 | | // No copying allowed |
334 | | LookupKey(const LookupKey&); |
335 | | void operator=(const LookupKey&); |
336 | | }; |
337 | | |
338 | 21.3M | inline LookupKey::~LookupKey() { |
339 | 21.3M | if (start_ != space_) delete[] start_; |
340 | 21.3M | } |
341 | | |
342 | | class IterKey { |
343 | | public: |
344 | | IterKey() |
345 | 74.0M | : buf_(space_), buf_size_(sizeof(space_)), key_(buf_), key_size_(0) {} |
346 | | |
347 | 74.0M | ~IterKey() { ResetBuffer(); } |
348 | | |
349 | 6.17G | Slice GetKey() const { return Slice(key_, key_size_); } |
350 | | |
351 | 18.1M | Slice GetUserKey() const { |
352 | 18.1M | assert(key_size_ >= kLastInternalComponentSize); |
353 | 18.1M | return Slice(key_, key_size_ - kLastInternalComponentSize); |
354 | 18.1M | } |
355 | | |
356 | 605M | inline size_t Size() const { return key_size_; } |
357 | | |
358 | 131M | void Clear() { key_size_ = 0; } |
359 | | |
360 | | // Append "non_shared_data" to its back, from "shared_len" |
361 | | // This function is used in Block::Iter::ParseNextKey |
362 | | // shared_len: bytes in [0, shard_len-1] would be remained |
363 | | // non_shared_data: data to be append, its length must be >= non_shared_len |
364 | | void TrimAppend(const size_t shared_len, const char* non_shared_data, |
365 | 452M | const size_t non_shared_len) { |
366 | 452M | assert(shared_len <= key_size_); |
367 | 452M | size_t total_size = shared_len + non_shared_len; |
368 | | |
369 | 452M | if (IsKeyPinned() /* key is not in buf_ */) { |
370 | | // Copy the key from external memory to buf_ (copy shared_len bytes) |
371 | 43.3M | EnlargeBufferIfNeeded(total_size); |
372 | 43.3M | memcpy(buf_, key_, shared_len); |
373 | 409M | } else if (total_size > buf_size_) { |
374 | | // Need to allocate space, delete previous space |
375 | 395k | char* p = new char[total_size]; |
376 | 395k | memcpy(p, key_, shared_len); |
377 | | |
378 | 395k | if (buf_ != space_) { |
379 | 351k | delete[] buf_; |
380 | 351k | } |
381 | | |
382 | 395k | buf_ = p; |
383 | 395k | buf_size_ = total_size; |
384 | 395k | } |
385 | | |
386 | 452M | memcpy(buf_ + shared_len, non_shared_data, non_shared_len); |
387 | 452M | key_ = buf_; |
388 | 452M | key_size_ = total_size; |
389 | 452M | } |
390 | | |
391 | | // Updates key_ based on passed information about sizes of components to reuse and |
392 | | // non_shared_data. |
393 | | // key_ contents will be updated to: |
394 | | // ---------------------------------------------------------------------- |
395 | | // |shared_prefix|non_shared_1|shared_middle|non_shared_2|last_component| |
396 | | // ---------------------------------------------------------------------- |
397 | | // where (we use a Python slice like syntax below for byte array slices, end offset is exclusive): |
398 | | // shared_prefix is: key_[0:shared_prefix_size] |
399 | | // non_shared_1 is: non_shared_data[0:non_shared_1_size] |
400 | | // shared_middle is: |
401 | | // key_[source_shared_middle_start:source_shared_middle_start + shared_middle_size] |
402 | | // non_shared_2 is: |
403 | | // non_shared_data[non_shared_1_size:non_shared_1_size + non_shared_2_size] |
404 | | // last_component is: key_[key_.size() - shared_last_component_size:key_.size()] |
405 | | // increased by shared_last_component_increase (as uint64_t for performance reasons). |
406 | | // shared_last_component_size should be either 0 or kLastInternalComponentSize (8). |
407 | | // |
408 | | // This function might reallocate buf_ that is used to store key_ data if its size is not enough. |
409 | | void Update( |
410 | | const char* non_shared_data, const uint32_t shared_prefix_size, |
411 | | const uint32_t non_shared_1_size, const uint32_t source_shared_middle_start, |
412 | | const uint32_t shared_middle_size, const uint32_t non_shared_2_size, |
413 | 1.09M | const uint32_t shared_last_component_size, const uint64_t shared_last_component_increase) { |
414 | 1.09M | DCHECK_LE( |
415 | 1.09M | shared_prefix_size + shared_middle_size + shared_last_component_size, key_size_); |
416 | | |
417 | | // Following constants contains offsets of respective updated key component according to the |
418 | | // diagram above. |
419 | 1.09M | const auto new_shared_middle_start = shared_prefix_size + non_shared_1_size; |
420 | 1.09M | const auto new_non_shared_2_start = new_shared_middle_start + shared_middle_size; |
421 | 1.09M | const auto new_shared_last_component_start = new_non_shared_2_start + non_shared_2_size; |
422 | | |
423 | 1.09M | const auto new_key_size = new_shared_last_component_start + shared_last_component_size; |
424 | | |
425 | 1.09M | uint64_t last_component FASTDEBUG_FAKE_INIT(0); |
426 | 1.09M | if (shared_last_component_size > 0) { |
427 | | // Get shared last component from key_ and increase it by shared_last_component_increase |
428 | | // (could be 0 if we just reuse last component as is). |
429 | 47.4k | DCHECK_EQ(shared_last_component_size, kLastInternalComponentSize); |
430 | 47.4k | last_component = DecodeFixed64(key_ + key_size_ - kLastInternalComponentSize) + |
431 | 47.4k | shared_last_component_increase; |
432 | 47.4k | } |
433 | | |
434 | 1.09M | if (IsKeyPinned() /* key_ pointer was set externally and not owned by us */ ) { |
435 | | // Copy shared_prefix and shared_middle to new positions. Since key_ was set externally by |
436 | | // KeyIter::SetKey caller, it doesn't overlap with buf_ owned by KeyIter and we can just use |
437 | | // memcpy. |
438 | 133k | EnlargeBufferIfNeeded(new_key_size); |
439 | 133k | memcpy(buf_, key_, shared_prefix_size); |
440 | 133k | memcpy(buf_ + new_shared_middle_start, key_ + source_shared_middle_start, shared_middle_size); |
441 | 960k | } else if (new_key_size > buf_size_) { |
442 | | // Enlarge buf_ to be enough to store updated key and copy shared_prefix and shared_middle to |
443 | | // new positions. |
444 | 47 | const auto new_buf_size = RoundUpTo16(new_key_size); |
445 | 47 | char* p = new char[new_buf_size]; |
446 | | |
447 | 47 | memcpy(p, key_, shared_prefix_size); |
448 | 47 | memcpy(p + new_shared_middle_start, key_ + source_shared_middle_start, shared_middle_size); |
449 | | |
450 | 47 | if (buf_ != space_) { |
451 | 47 | delete[] buf_; |
452 | 47 | } |
453 | | |
454 | 47 | buf_ = p; |
455 | 47 | buf_size_ = new_buf_size; |
456 | 959k | } else { |
457 | | // buf_ has enough size to store updated key, no need to reallocate, just move |
458 | | // shared_middle to the new position (shared_prefix always starts at the beginning, so it is |
459 | | // already there). |
460 | 959k | DCHECK_EQ(buf_, key_); |
461 | 959k | if (new_shared_middle_start != source_shared_middle_start && shared_middle_size > 0) { |
462 | 28.2k | memmove( |
463 | 28.2k | buf_ + new_shared_middle_start, key_ + source_shared_middle_start, shared_middle_size); |
464 | 28.2k | } |
465 | 959k | } |
466 | | |
467 | | // At this point buf_ contains shared_prefix and shared_middle at new positions for the updated |
468 | | // key. |
469 | 1.09M | if (shared_last_component_size > 0) { |
470 | | // Put last_component to its new place. |
471 | 47.4k | EncodeFixed64(buf_ + new_shared_last_component_start, last_component); |
472 | 47.4k | } |
473 | | |
474 | | // Copy non-shared components. |
475 | 1.09M | memcpy(buf_ + shared_prefix_size, non_shared_data, non_shared_1_size); |
476 | 1.09M | if (non_shared_2_size > 0) { |
477 | 198k | memcpy(buf_ + new_non_shared_2_start, non_shared_data + non_shared_1_size, non_shared_2_size); |
478 | 198k | } |
479 | 1.09M | key_ = buf_; |
480 | 1.09M | key_size_ = new_key_size; |
481 | 1.09M | } |
482 | | |
483 | 914M | Slice SetKey(const Slice& key, bool copy = true) { |
484 | 914M | size_t size = key.size(); |
485 | 914M | if (copy) { |
486 | | // Copy key to buf_ |
487 | 321M | EnlargeBufferIfNeeded(size); |
488 | 321M | memcpy(buf_, key.data(), size); |
489 | 321M | key_ = buf_; |
490 | 593M | } else { |
491 | | // Update key_ to point to external memory |
492 | 593M | key_ = key.cdata(); |
493 | 593M | } |
494 | 914M | key_size_ = size; |
495 | 914M | return Slice(key_, key_size_); |
496 | 914M | } |
497 | | |
498 | | // Copies the content of key, updates the reference to the user key in ikey |
499 | | // and returns a Slice referencing the new copy. |
500 | 72.2M | Slice SetKey(const Slice& key, ParsedInternalKey* ikey) { |
501 | 72.2M | size_t key_n = key.size(); |
502 | 72.2M | assert(key_n >= kLastInternalComponentSize); |
503 | 72.2M | SetKey(key); |
504 | 72.2M | ikey->user_key = Slice(key_, key_n - kLastInternalComponentSize); |
505 | 72.2M | return Slice(key_, key_n); |
506 | 72.2M | } |
507 | | |
508 | | // Update the sequence number in the internal key. Guarantees not to |
509 | | // invalidate slices to the key (and the user key). |
510 | 40.0M | void UpdateInternalKey(uint64_t seq, ValueType t) { |
511 | 40.0M | assert(!IsKeyPinned()); |
512 | 40.0M | assert(key_size_ >= kLastInternalComponentSize); |
513 | 40.0M | uint64_t newval = (seq << 8) | t; |
514 | 40.0M | EncodeFixed64(&buf_[key_size_ - kLastInternalComponentSize], newval); |
515 | 40.0M | } |
516 | | |
517 | | // Returns true iff key_ is not owned by KeyIter, but instead was set externally |
518 | | // using KeyIter::SetKey(key, /* copy = */ false). |
519 | 494M | bool IsKeyPinned() const { return (key_ != buf_); } |
520 | | |
521 | | void SetInternalKey(const Slice& key_prefix, const Slice& user_key, |
522 | | SequenceNumber s, |
523 | 70.3M | ValueType value_type = kValueTypeForSeek) { |
524 | 70.3M | size_t psize = key_prefix.size(); |
525 | 70.3M | size_t usize = user_key.size(); |
526 | 70.3M | EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t)); |
527 | 70.3M | if (psize > 0) { |
528 | 398 | memcpy(buf_, key_prefix.data(), psize); |
529 | 398 | } |
530 | 70.3M | memcpy(buf_ + psize, user_key.data(), usize); |
531 | 70.3M | EncodeFixed64(buf_ + usize + psize, PackSequenceAndType(s, value_type)); |
532 | | |
533 | 70.3M | key_ = buf_; |
534 | 70.3M | key_size_ = psize + usize + sizeof(uint64_t); |
535 | 70.3M | } |
536 | | |
537 | | void SetInternalKey(const Slice& user_key, SequenceNumber s, |
538 | 67.7M | ValueType value_type = kValueTypeForSeek) { |
539 | 67.7M | SetInternalKey(Slice(), user_key, s, value_type); |
540 | 67.7M | } |
541 | | |
542 | 398 | void Reserve(size_t size) { |
543 | 398 | EnlargeBufferIfNeeded(size); |
544 | 398 | key_size_ = size; |
545 | 398 | } |
546 | | |
547 | 2.58M | void SetInternalKey(const ParsedInternalKey& parsed_key) { |
548 | 2.58M | SetInternalKey(Slice(), parsed_key); |
549 | 2.58M | } |
550 | | |
551 | | void SetInternalKey(const Slice& key_prefix, |
552 | 2.58M | const ParsedInternalKey& parsed_key_suffix) { |
553 | 2.58M | SetInternalKey(key_prefix, parsed_key_suffix.user_key, |
554 | 2.58M | parsed_key_suffix.sequence, parsed_key_suffix.type); |
555 | 2.58M | } |
556 | | |
557 | 2 | void EncodeLengthPrefixedKey(const Slice& key) { |
558 | 2 | auto size = key.size(); |
559 | 2 | EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size))); |
560 | 2 | char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size)); |
561 | 2 | memcpy(ptr, key.data(), size); |
562 | 2 | key_ = buf_; |
563 | 2 | } |
564 | | |
565 | | private: |
566 | | char* buf_; |
567 | | size_t buf_size_; |
568 | | const char* key_; |
569 | | size_t key_size_; |
570 | | char space_[32]; // Avoid allocation for short keys |
571 | | |
572 | 87.0M | void ResetBuffer() { |
573 | 87.0M | if (buf_ != space_) { |
574 | 12.9M | delete[] buf_; |
575 | 12.9M | buf_ = space_; |
576 | 12.9M | } |
577 | 87.0M | buf_size_ = sizeof(space_); |
578 | 87.0M | key_size_ = 0; |
579 | 87.0M | } |
580 | | |
581 | | // Enlarge the buffer size if needed based on key_size. |
582 | | // By default, static allocated buffer is used. Once there is a key |
583 | | // larger than the static allocated buffer, another buffer is dynamically |
584 | | // allocated, until a larger key buffer is requested. In that case, we |
585 | | // reallocate buffer and delete the old one. |
586 | 434M | void EnlargeBufferIfNeeded(const size_t key_size) { |
587 | | // If size is smaller than buffer size, continue using current buffer, |
588 | | // or the static allocated one, as default |
589 | 434M | if (key_size > buf_size_) { |
590 | | // Need to enlarge the buffer. |
591 | 12.9M | ResetBuffer(); |
592 | 12.9M | const auto new_buf_size = RoundUpTo16(key_size); |
593 | 12.9M | buf_ = new char[new_buf_size]; |
594 | 12.9M | buf_size_ = new_buf_size; |
595 | 12.9M | } |
596 | 434M | } |
597 | | |
598 | | // No copying allowed |
599 | | IterKey(const IterKey&) = delete; |
600 | | void operator=(const IterKey&) = delete; |
601 | | }; |
602 | | |
603 | | class InternalKeySliceTransform : public SliceTransform { |
604 | | public: |
605 | | explicit InternalKeySliceTransform(const SliceTransform* transform) |
606 | 63.1k | : transform_(transform) {} |
607 | | |
608 | 0 | virtual const char* Name() const override { return transform_->Name(); } |
609 | | |
610 | 648k | virtual Slice Transform(const Slice& src) const override { |
611 | 648k | auto user_key = ExtractUserKey(src); |
612 | 648k | return transform_->Transform(user_key); |
613 | 648k | } |
614 | | |
615 | 0 | virtual bool InDomain(const Slice& src) const override { |
616 | 0 | auto user_key = ExtractUserKey(src); |
617 | 0 | return transform_->InDomain(user_key); |
618 | 0 | } |
619 | | |
620 | 0 | virtual bool InRange(const Slice& dst) const override { |
621 | 0 | auto user_key = ExtractUserKey(dst); |
622 | 0 | return transform_->InRange(user_key); |
623 | 0 | } |
624 | | |
625 | 0 | const SliceTransform* user_prefix_extractor() const { return transform_; } |
626 | | |
627 | | private: |
628 | | // Like comparator, InternalKeySliceTransform will not take care of the |
629 | | // deletion of transform_ |
630 | | const SliceTransform* const transform_; |
631 | | }; |
632 | | |
633 | | // Read record from a write batch piece from input. |
634 | | // tag, column_family, key, value and blob are return values. Callers own the |
635 | | // Slice they point to. |
636 | | // Tag is defined as ValueType. |
637 | | // input will be advanced to after the record. |
638 | | extern Status ReadRecordFromWriteBatch(Slice* input, char* tag, |
639 | | uint32_t* column_family, Slice* key, |
640 | | Slice* value, Slice* blob); |
641 | | } // namespace rocksdb |
642 | | |
643 | | #endif // YB_ROCKSDB_DB_DBFORMAT_H |