/Users/deen/code/yugabyte-db/src/yb/rocksdb/memtable/hash_skiplist_rep.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | |
21 | | #ifndef ROCKSDB_LITE |
22 | | #include "yb/rocksdb/memtable/hash_skiplist_rep.h" |
23 | | |
24 | | #include <atomic> |
25 | | |
26 | | #include "yb/rocksdb/memtablerep.h" |
27 | | #include "yb/rocksdb/util/arena.h" |
28 | | #include "yb/util/slice.h" |
29 | | #include "yb/rocksdb/slice_transform.h" |
30 | | #include "yb/rocksdb/port/port.h" |
31 | | #include "yb/rocksdb/util/murmurhash.h" |
32 | | #include "yb/rocksdb/db/memtable.h" |
33 | | #include "yb/rocksdb/db/skiplist.h" |
34 | | |
35 | | namespace rocksdb { |
36 | | namespace { |
37 | | |
38 | | class HashSkipListRep : public MemTableRep { |
39 | | public: |
40 | | HashSkipListRep(const MemTableRep::KeyComparator& compare, |
41 | | MemTableAllocator* allocator, const SliceTransform* transform, |
42 | | size_t bucket_size, int32_t skiplist_height, |
43 | | int32_t skiplist_branching_factor); |
44 | | |
45 | | void Insert(KeyHandle handle) override; |
46 | | |
47 | | bool Contains(const char* key) const override; |
48 | | |
49 | | size_t ApproximateMemoryUsage() override; |
50 | | |
51 | | virtual void Get(const LookupKey& k, void* callback_args, |
52 | | bool (*callback_func)(void* arg, |
53 | | const char* entry)) override; |
54 | | |
55 | | virtual ~HashSkipListRep(); |
56 | | |
57 | | MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override; |
58 | | |
59 | | virtual MemTableRep::Iterator* GetDynamicPrefixIterator( |
60 | | Arena* arena = nullptr) override; |
61 | | |
62 | | private: |
63 | | friend class DynamicIterator; |
64 | | typedef SkipList<const char*, const MemTableRep::KeyComparator&> Bucket; |
65 | | |
66 | | size_t bucket_size_; |
67 | | |
68 | | const int32_t skiplist_height_; |
69 | | const int32_t skiplist_branching_factor_; |
70 | | |
71 | | // Maps slices (which are transformed user keys) to buckets of keys sharing |
72 | | // the same transform. |
73 | | std::atomic<Bucket*>* buckets_; |
74 | | |
75 | | // The user-supplied transform whose domain is the user keys. |
76 | | const SliceTransform* transform_; |
77 | | |
78 | | const MemTableRep::KeyComparator& compare_; |
79 | | // immutable after construction |
80 | | MemTableAllocator* const allocator_; |
81 | | |
82 | 757k | inline size_t GetHash(const Slice& slice) const { |
83 | 757k | return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0) % |
84 | 757k | bucket_size_; |
85 | 757k | } |
86 | 766k | inline Bucket* GetBucket(size_t i) const { |
87 | 766k | return buckets_[i].load(std::memory_order_acquire); |
88 | 766k | } |
89 | 510k | inline Bucket* GetBucket(const Slice& slice) const { |
90 | 510k | return GetBucket(GetHash(slice)); |
91 | 510k | } |
92 | | // Get a bucket from buckets_. If the bucket hasn't been initialized yet, |
93 | | // initialize it before returning. |
94 | | Bucket* GetInitializedBucket(const Slice& transformed); |
95 | | |
96 | | class Iterator : public MemTableRep::Iterator { |
97 | | public: |
98 | | explicit Iterator(Bucket* list, bool own_list = true, |
99 | | Arena* arena = nullptr) |
100 | 582 | : list_(list), iter_(list), own_list_(own_list), arena_(arena) {} |
101 | | |
102 | 582 | virtual ~Iterator() { |
103 | | // if we own the list, we should also delete it |
104 | 582 | if (own_list_) { |
105 | 549 | assert(list_ != nullptr); |
106 | 0 | delete list_; |
107 | 549 | } |
108 | 582 | } |
109 | | |
110 | | // Returns true iff the iterator is positioned at a valid node. |
111 | 451k | bool Valid() const override { |
112 | 451k | return list_ != nullptr && iter_.Valid()451k ; |
113 | 451k | } |
114 | | |
115 | | // Returns the key at the current position. |
116 | | // REQUIRES: Valid() |
117 | 143k | const char* key() const override { |
118 | 143k | assert(Valid()); |
119 | 0 | return iter_.key(); |
120 | 143k | } |
121 | | |
122 | | // Advances to the next position. |
123 | | // REQUIRES: Valid() |
124 | 103k | void Next() override { |
125 | 103k | assert(Valid()); |
126 | 0 | iter_.Next(); |
127 | 103k | } |
128 | | |
129 | | // Advances to the previous position. |
130 | | // REQUIRES: Valid() |
131 | 7 | void Prev() override { |
132 | 7 | assert(Valid()); |
133 | 0 | iter_.Prev(); |
134 | 7 | } |
135 | | |
136 | | // Advance to the first entry with a key >= target |
137 | | virtual void Seek(const Slice& internal_key, |
138 | 100k | const char* memtable_key) override { |
139 | 100k | if (list_ != nullptr) { |
140 | 100k | const char* encoded_key = |
141 | 100k | (memtable_key != nullptr) ? |
142 | 100k | memtable_key0 : EncodeKey(&tmp_, internal_key); |
143 | 100k | iter_.Seek(encoded_key); |
144 | 100k | } |
145 | 100k | } |
146 | | |
147 | | // Position at the first entry in collection. |
148 | | // Final state of iterator is Valid() iff collection is not empty. |
149 | 121 | void SeekToFirst() override { |
150 | 121 | if (list_ != nullptr) { |
151 | 121 | iter_.SeekToFirst(); |
152 | 121 | } |
153 | 121 | } |
154 | | |
155 | | // Position at the last entry in collection. |
156 | | // Final state of iterator is Valid() iff collection is not empty. |
157 | 0 | void SeekToLast() override { |
158 | 0 | if (list_ != nullptr) { |
159 | 0 | iter_.SeekToLast(); |
160 | 0 | } |
161 | 0 | } |
162 | | protected: |
163 | 100k | void Reset(Bucket* list) { |
164 | 100k | if (own_list_) { |
165 | 0 | assert(list_ != nullptr); |
166 | 0 | delete list_; |
167 | 0 | } |
168 | 0 | list_ = list; |
169 | 100k | iter_.SetList(list); |
170 | 100k | own_list_ = false; |
171 | 100k | } |
172 | | private: |
173 | | // if list_ is nullptr, we should NEVER call any methods on iter_ |
174 | | // if list_ is nullptr, this Iterator is not Valid() |
175 | | Bucket* list_; |
176 | | Bucket::Iterator iter_; |
177 | | // here we track if we own list_. If we own it, we are also |
178 | | // responsible for it's cleaning. This is a poor man's shared_ptr |
179 | | bool own_list_; |
180 | | std::unique_ptr<Arena> arena_; |
181 | | std::string tmp_; // For passing to EncodeKey |
182 | | }; |
183 | | |
184 | | class DynamicIterator : public HashSkipListRep::Iterator { |
185 | | public: |
186 | | explicit DynamicIterator(const HashSkipListRep& memtable_rep) |
187 | | : HashSkipListRep::Iterator(nullptr, false), |
188 | 33 | memtable_rep_(memtable_rep) {} |
189 | | |
190 | | // Advance to the first entry with a key >= target |
191 | 100k | void Seek(const Slice& k, const char* memtable_key) override { |
192 | 100k | auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k)); |
193 | 100k | Reset(memtable_rep_.GetBucket(transformed)); |
194 | 100k | HashSkipListRep::Iterator::Seek(k, memtable_key); |
195 | 100k | } |
196 | | |
197 | | // Position at the first entry in collection. |
198 | | // Final state of iterator is Valid() iff collection is not empty. |
199 | 0 | void SeekToFirst() override { |
200 | | // Prefix iterator does not support total order. |
201 | | // We simply set the iterator to invalid state |
202 | 0 | Reset(nullptr); |
203 | 0 | } |
204 | | |
205 | | // Position at the last entry in collection. |
206 | | // Final state of iterator is Valid() iff collection is not empty. |
207 | 0 | void SeekToLast() override { |
208 | | // Prefix iterator does not support total order. |
209 | | // We simply set the iterator to invalid state |
210 | 0 | Reset(nullptr); |
211 | 0 | } |
212 | | private: |
213 | | // the underlying memtable |
214 | | const HashSkipListRep& memtable_rep_; |
215 | | }; |
216 | | |
217 | | class EmptyIterator : public MemTableRep::Iterator { |
218 | | // This is used when there wasn't a bucket. It is cheaper than |
219 | | // instantiating an empty bucket over which to iterate. |
220 | | public: |
221 | 0 | EmptyIterator() { } |
222 | 0 | bool Valid() const override { return false; } |
223 | 0 | const char* key() const override { |
224 | 0 | assert(false); |
225 | 0 | return nullptr; |
226 | 0 | } |
227 | 0 | void Next() override {} |
228 | 0 | void Prev() override {} |
229 | | virtual void Seek(const Slice& internal_key, |
230 | 0 | const char* memtable_key) override {} |
231 | 0 | void SeekToFirst() override {} |
232 | 0 | void SeekToLast() override {} |
233 | | |
234 | | private: |
235 | | }; |
236 | | }; |
237 | | |
238 | | HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare, |
239 | | MemTableAllocator* allocator, |
240 | | const SliceTransform* transform, |
241 | | size_t bucket_size, int32_t skiplist_height, |
242 | | int32_t skiplist_branching_factor) |
243 | | : MemTableRep(allocator), |
244 | | bucket_size_(bucket_size), |
245 | | skiplist_height_(skiplist_height), |
246 | | skiplist_branching_factor_(skiplist_branching_factor), |
247 | | transform_(transform), |
248 | | compare_(compare), |
249 | 361 | allocator_(allocator) { |
250 | 361 | auto mem = allocator->AllocateAligned( |
251 | 361 | sizeof(std::atomic<void*>) * bucket_size); |
252 | 361 | buckets_ = new (mem) std::atomic<Bucket*>[bucket_size]; |
253 | | |
254 | 106k | for (size_t i = 0; i < bucket_size_; ++i105k ) { |
255 | 105k | buckets_[i].store(nullptr, std::memory_order_relaxed); |
256 | 105k | } |
257 | 361 | } |
258 | | |
259 | 361 | HashSkipListRep::~HashSkipListRep() { |
260 | 361 | } |
261 | | |
262 | | HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( |
263 | 247k | const Slice& transformed) { |
264 | 247k | size_t hash = GetHash(transformed); |
265 | 247k | auto bucket = GetBucket(hash); |
266 | 247k | if (bucket == nullptr) { |
267 | 63.3k | auto addr = allocator_->AllocateAligned(sizeof(Bucket)); |
268 | 63.3k | bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_, |
269 | 63.3k | skiplist_branching_factor_); |
270 | 63.3k | buckets_[hash].store(bucket, std::memory_order_release); |
271 | 63.3k | } |
272 | 247k | return bucket; |
273 | 247k | } |
274 | | |
275 | 247k | void HashSkipListRep::Insert(KeyHandle handle) { |
276 | 247k | auto* key = static_cast<char*>(handle); |
277 | 247k | assert(!Contains(key)); |
278 | 0 | auto transformed = transform_->Transform(UserKey(key)); |
279 | 247k | auto bucket = GetInitializedBucket(transformed); |
280 | 247k | bucket->Insert(key); |
281 | 247k | } |
282 | | |
283 | 247k | bool HashSkipListRep::Contains(const char* key) const { |
284 | 247k | auto transformed = transform_->Transform(UserKey(key)); |
285 | 247k | auto bucket = GetBucket(transformed); |
286 | 247k | if (bucket == nullptr) { |
287 | 63.3k | return false; |
288 | 63.3k | } |
289 | 183k | return bucket->Contains(key); |
290 | 247k | } |
291 | | |
292 | 247k | size_t HashSkipListRep::ApproximateMemoryUsage() { |
293 | 247k | return 0; |
294 | 247k | } |
295 | | |
296 | | void HashSkipListRep::Get(const LookupKey& k, void* callback_args, |
297 | 164k | bool (*callback_func)(void* arg, const char* entry)) { |
298 | 164k | auto transformed = transform_->Transform(k.user_key()); |
299 | 164k | auto bucket = GetBucket(transformed); |
300 | 164k | if (bucket != nullptr) { |
301 | 163k | Bucket::Iterator iter(bucket); |
302 | 163k | for (iter.Seek(k.memtable_key().cdata()); |
303 | 163k | iter.Valid() && callback_func(callback_args, iter.key())161k ; |
304 | 163k | iter.Next()0 ) { |
305 | 0 | } |
306 | 163k | } |
307 | 164k | } |
308 | | |
309 | 549 | MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) { |
310 | | // allocate a new arena of similar size to the one currently in use |
311 | 549 | Arena* new_arena = new Arena(allocator_->BlockSize()); |
312 | 549 | auto list = new Bucket(compare_, new_arena); |
313 | 9.27k | for (size_t i = 0; i < bucket_size_; ++i8.72k ) { |
314 | 8.72k | auto bucket = GetBucket(i); |
315 | 8.72k | if (bucket != nullptr) { |
316 | 544 | Bucket::Iterator itr(bucket); |
317 | 49.9k | for (itr.SeekToFirst(); itr.Valid(); itr.Next()49.4k ) { |
318 | 49.4k | list->Insert(itr.key()); |
319 | 49.4k | } |
320 | 544 | } |
321 | 8.72k | } |
322 | 549 | if (arena == nullptr) { |
323 | 0 | return new Iterator(list, true, new_arena); |
324 | 549 | } else { |
325 | 549 | auto mem = arena->AllocateAligned(sizeof(Iterator)); |
326 | 549 | return new (mem) Iterator(list, true, new_arena); |
327 | 549 | } |
328 | 549 | } |
329 | | |
330 | 33 | MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) { |
331 | 33 | if (arena == nullptr) { |
332 | 0 | return new DynamicIterator(*this); |
333 | 33 | } else { |
334 | 33 | auto mem = arena->AllocateAligned(sizeof(DynamicIterator)); |
335 | 33 | return new (mem) DynamicIterator(*this); |
336 | 33 | } |
337 | 33 | } |
338 | | |
339 | | } // anon namespace |
340 | | |
341 | | MemTableRep* HashSkipListRepFactory::CreateMemTableRep( |
342 | | const MemTableRep::KeyComparator& compare, MemTableAllocator* allocator, |
343 | 361 | const SliceTransform* transform, Logger* logger) { |
344 | 361 | return new HashSkipListRep(compare, allocator, transform, bucket_count_, |
345 | 361 | skiplist_height_, skiplist_branching_factor_); |
346 | 361 | } |
347 | | |
348 | | MemTableRepFactory* NewHashSkipListRepFactory( |
349 | | size_t bucket_count, int32_t skiplist_height, |
350 | 107 | int32_t skiplist_branching_factor) { |
351 | 107 | return new HashSkipListRepFactory(bucket_count, skiplist_height, |
352 | 107 | skiplist_branching_factor); |
353 | 107 | } |
354 | | |
355 | | } // namespace rocksdb |
356 | | #endif // ROCKSDB_LITE |