YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/util/cache.cc
Line
Count
Source (jump to first uncovered line)
1
//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2
//  This source code is licensed under the BSD-style license found in the
3
//  LICENSE file in the root directory of this source tree. An additional grant
4
//  of patent rights can be found in the PATENTS file in the same directory.
5
//
6
// The following only applies to changes made to this file as part of YugaByte development.
7
//
8
// Portions Copyright (c) YugaByte, Inc.
9
//
10
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
11
// in compliance with the License.  You may obtain a copy of the License at
12
//
13
// http://www.apache.org/licenses/LICENSE-2.0
14
//
15
// Unless required by applicable law or agreed to in writing, software distributed under the License
16
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
17
// or implied.  See the License for the specific language governing permissions and limitations
18
// under the License.
19
//
20
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
21
// Use of this source code is governed by a BSD-style license that can be
22
// found in the LICENSE file. See the AUTHORS file for names of contributors.
23
24
#include <assert.h>
25
#include <stdio.h>
26
27
#include "yb/rocksdb/cache.h"
28
#include "yb/rocksdb/port/port.h"
29
#include "yb/rocksdb/statistics.h"
30
#include "yb/rocksdb/util/autovector.h"
31
#include "yb/rocksdb/util/hash.h"
32
#include "yb/rocksdb/util/mutexlock.h"
33
#include "yb/rocksdb/util/statistics.h"
34
35
#include "yb/util/cache_metrics.h"
36
#include "yb/util/enums.h"
37
#include "yb/util/metrics.h"
38
#include "yb/util/random_util.h"
39
40
// 0 value means that there exist no single_touch cache and
41
// 1 means that the entire cache is treated as a multi-touch cache.
42
DEFINE_double(cache_single_touch_ratio, 0.2,
43
              "Fraction of the cache dedicated to single-touch items");
44
45
DEFINE_bool(cache_overflow_single_touch, true,
46
            "Whether to enable overflow of single touch cache into the multi touch cache "
47
            "allocation");
48
49
namespace rocksdb {
50
51
5.17M
Cache::~Cache() {
52
5.17M
}
53
54
namespace {
55
56
// LRU cache implementation
57
58
// An entry is a variable length heap-allocated structure.
59
// Entries are referenced by cache and/or by any external entity.
60
// The cache keeps all its entries in table. Some elements
61
// are also stored on LRU list.
62
//
63
// LRUHandle can be in these states:
64
// 1. Referenced externally AND in hash table.
65
//  In that case the entry is *not* in the LRU. (refs > 1 && in_cache == true)
66
// 2. Not referenced externally and in hash table. In that case the entry is
67
// in the LRU and can be freed. (refs == 1 && in_cache == true)
68
// 3. Referenced externally and not in hash table. In that case the entry is
69
// in not on LRU and not in table. (refs >= 1 && in_cache == false)
70
//
71
// All newly created LRUHandles are in state 1. If you call LRUCache::Release
72
// on entry in state 1, it will go into state 2. To move from state 1 to
73
// state 3, either call LRUCache::Erase or LRUCache::Insert with the same key.
74
// To move from state 2 to state 1, use LRUCache::Lookup.
75
// Before destruction, make sure that no handles are in state 1. This means
76
// that any successful LRUCache::Lookup/LRUCache::Insert have a matching
77
// RUCache::Release (to move into state 2) or LRUCache::Erase (for state 3)
78
//
79
// LRU also supports scan resistant access by allowing it to be one of two
80
// caches. query_id is to detect that multiple touches from the same query will not
81
// upgrade the cache element from single touch LRU into the multiple touch LRU.
82
// query_id == kInMultiTouchId means that the handle is in the multi
83
// touch cache, which means that this item will be evicted only for new values
84
// that are accessed multiple times by different queries.
85
// query_id == kNoCacheQueryId means that this Handle is not going to be added
86
// into the cache.
87
88
struct LRUHandle {
89
  void* value;
90
  void (*deleter)(const Slice&, void* value);
91
  LRUHandle* next_hash;
92
  LRUHandle* next;
93
  LRUHandle* prev;
94
  size_t charge;      // TODO(opt): Only allow uint32_t?
95
  size_t key_length;
96
  uint32_t refs;      // a number of refs to this entry
97
                      // cache itself is counted as 1
98
  bool in_cache;      // true, if this entry is referenced by the hash table
99
  uint32_t hash;      // Hash of key(); used for fast sharding and comparisons
100
  QueryId query_id;  // Query id that added the value to the cache.
101
  char key_data[1];   // Beginning of key
102
103
49.4M
  Slice key() const {
104
    // For cheaper lookups, we allow a temporary Handle object
105
    // to store a pointer to a key in "value".
106
49.4M
    if (next == this) {
107
0
      return *(reinterpret_cast<Slice*>(value));
108
49.4M
    } else {
109
49.4M
      return Slice(key_data, key_length);
110
49.4M
    }
111
49.4M
  }
112
113
2.42M
  void Free(yb::CacheMetrics* metrics) {
114
2.42M
    assert((refs == 1 && in_cache) || (refs == 0 && !in_cache));
115
2.42M
    (*deleter)(key(), value);
116
2.42M
    if (metrics != nullptr) {
117
231
      if (GetSubCacheType() == MULTI_TOUCH) {
118
15
        metrics->multi_touch_cache_usage->DecrementBy(charge);
119
216
      } else if (GetSubCacheType() == SINGLE_TOUCH) {
120
216
        metrics->single_touch_cache_usage->DecrementBy(charge);
121
216
      }
122
231
      metrics->cache_usage->DecrementBy(charge);
123
231
    }
124
2.42M
    delete[] reinterpret_cast<char*>(this);
125
2.42M
  }
126
127
212M
  SubCacheType GetSubCacheType() const {
128
146M
    return (query_id == kInMultiTouchId) ? MULTI_TOUCH : SINGLE_TOUCH;
129
212M
  }
130
};
131
132
// We provide our own simple hash table since it removes a whole bunch
133
// of porting hacks and is also faster than some of the built-in hash
134
// table implementations in some of the compiler/runtime combinations
135
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
136
// 4.4.3's builtin hashtable.
137
class HandleTable {
138
 public:
139
  HandleTable() :
140
105M
      length_(0), elems_(0), list_(nullptr), metrics_(nullptr) { Resize(); }
141
142
  template <typename T>
143
104M
  void ApplyToAllCacheEntries(T func) {
144
1.77G
    for (uint32_t i = 0; i < length_; i++) {
145
1.67G
      LRUHandle* h = list_[i];
146
1.67G
      while (h != nullptr) {
147
523k
        auto n = h->next_hash;
148
523k
        assert(h->in_cache);
149
523k
        func(h);
150
523k
        h = n;
151
523k
      }
152
1.67G
    }
153
104M
  }
cache.cc:_ZN7rocksdb12_GLOBAL__N_111HandleTable22ApplyToAllCacheEntriesIZNS1_D1EvEUlPNS0_9LRUHandleEE_EEvT_
Line
Count
Source
143
104M
  void ApplyToAllCacheEntries(T func) {
144
1.77G
    for (uint32_t i = 0; i < length_; i++) {
145
1.67G
      LRUHandle* h = list_[i];
146
1.67G
      while (h != nullptr) {
147
523k
        auto n = h->next_hash;
148
523k
        assert(h->in_cache);
149
523k
        func(h);
150
523k
        h = n;
151
523k
      }
152
1.67G
    }
153
104M
  }
cache.cc:_ZN7rocksdb12_GLOBAL__N_111HandleTable22ApplyToAllCacheEntriesIZNS0_8LRUCache22ApplyToAllCacheEntriesEPFvPvmEbE3$_0EEvT_
Line
Count
Source
143
16
  void ApplyToAllCacheEntries(T func) {
144
272
    for (uint32_t i = 0; i < length_; i++) {
145
256
      LRUHandle* h = list_[i];
146
266
      while (h != nullptr) {
147
10
        auto n = h->next_hash;
148
10
        assert(h->in_cache);
149
10
        func(h);
150
10
        h = n;
151
10
      }
152
256
    }
153
16
  }
154
155
104M
  ~HandleTable() {
156
523k
    ApplyToAllCacheEntries([this](LRUHandle* h) {
157
523k
      if (h->refs == 1) {
158
523k
        h->Free(metrics_.get());
159
523k
      }
160
523k
    });
161
104M
    delete[] list_;
162
104M
  }
163
164
44.4M
  LRUHandle* Lookup(const Slice& key, uint32_t hash) const {
165
44.4M
    return *FindPointer(key, hash);
166
44.4M
  }
167
168
183k
  void SetMetrics(shared_ptr<yb::CacheMetrics> metrics) { metrics_ = metrics; }
169
170
  // Checks if the newly created handle is a candidate to be inserted into the multi touch cache.
171
  // It checks to see if the same value is in the multi touch cache, or if it is in the single
172
  // touch cache, checks to see if the query ids are different.
173
2.57M
  SubCacheType GetSubCacheTypeCandidate(LRUHandle* h) {
174
2.57M
    if (h->GetSubCacheType() == MULTI_TOUCH) {
175
118k
      return MULTI_TOUCH;
176
118k
    }
177
178
2.45M
    LRUHandle* val = Lookup(h->key(), h->hash);
179
2.45M
    if (val != nullptr && (val->GetSubCacheType() == MULTI_TOUCH || val->query_id != h->query_id)) {
180
116
      h->query_id = kInMultiTouchId;
181
116
      return MULTI_TOUCH;
182
116
    }
183
2.45M
    return SINGLE_TOUCH;
184
2.45M
  }
185
186
2.57M
  LRUHandle* Insert(LRUHandle* h) {
187
2.57M
    LRUHandle** ptr = FindPointer(h->key(), h->hash);
188
2.57M
    LRUHandle* old = *ptr;
189
2.56M
    h->next_hash = (old == nullptr ? nullptr : old->next_hash);
190
2.57M
    *ptr = h;
191
2.57M
    if (old == nullptr) {
192
2.56M
      ++elems_;
193
2.56M
      if (elems_ > length_) {
194
        // Since each cache entry is fairly large, we aim for a small
195
        // average linked list length (<= 1).
196
14.5k
        Resize();
197
14.5k
      }
198
2.56M
    }
199
2.57M
    return old;
200
2.57M
  }
201
202
1.89M
  LRUHandle* Remove(const Slice& key, uint32_t hash) {
203
1.89M
    LRUHandle** ptr = FindPointer(key, hash);
204
1.89M
    LRUHandle* result = *ptr;
205
1.89M
    if (result != nullptr) {
206
1.89M
      *ptr = result->next_hash;
207
1.89M
      --elems_;
208
1.89M
    }
209
1.89M
    return result;
210
1.89M
  }
211
212
 private:
213
  // The table consists of an array of buckets where each bucket is
214
  // a linked list of cache entries that hash into the bucket.
215
  uint32_t length_;
216
  uint32_t elems_;
217
  LRUHandle** list_;
218
  shared_ptr<yb::CacheMetrics> metrics_;
219
220
  // Return a pointer to slot that points to a cache entry that
221
  // matches key/hash.  If there is no such cache entry, return a
222
  // pointer to the trailing slot in the corresponding linked list.
223
48.8M
  LRUHandle** FindPointer(const Slice& key, uint32_t hash) const {
224
48.8M
    LRUHandle** ptr = &list_[hash & (length_ - 1)];
225
58.1M
    while (*ptr != nullptr &&
226
49.4M
           ((*ptr)->hash != hash || key != (*ptr)->key())) {
227
9.29M
      ptr = &(*ptr)->next_hash;
228
9.29M
    }
229
48.8M
    return ptr;
230
48.8M
  }
231
232
105M
  void Resize() {
233
105M
    uint32_t new_length = 16;
234
105M
    while (new_length < elems_ * 1.5) {
235
28.3k
      new_length *= 2;
236
28.3k
    }
237
105M
    LRUHandle** new_list = new LRUHandle*[new_length];
238
105M
    memset(new_list, 0, sizeof(new_list[0]) * new_length);
239
105M
    uint32_t count = 0;
240
105M
    LRUHandle* h;
241
105M
    LRUHandle* next;
242
105M
    LRUHandle** ptr;
243
105M
    uint32_t hash;
244
106M
    for (uint32_t i = 0; i < length_; i++) {
245
804k
      h = list_[i];
246
1.62M
      while (h != nullptr) {
247
819k
        next = h->next_hash;
248
819k
        hash = h->hash;
249
819k
        ptr = &new_list[hash & (new_length - 1)];
250
819k
        h->next_hash = *ptr;
251
819k
        *ptr = h;
252
819k
        h = next;
253
819k
        count++;
254
819k
      }
255
804k
    }
256
105M
    assert(elems_ == count);
257
105M
    delete[] list_;
258
105M
    list_ = new_list;
259
105M
    length_ = new_length;
260
105M
  }
261
};
262
263
// Sub-cache of the LRUCache that is used to track different LRU pointers, capacity and usage.
264
class LRUSubCache {
265
 public:
266
  LRUSubCache();
267
  ~LRUSubCache();
268
269
  // Accessors.
270
580M
  size_t Usage() const {
271
580M
    return usage_;
272
580M
  }
273
274
204
  size_t LRU_Usage() const {
275
204
    return lru_usage_;
276
204
  }
277
278
496k
  LRUHandle& LRU_Head() {
279
496k
    return lru_;
280
496k
  }
281
282
  // Checks if the head of the LRU linked list is pointing to itself,
283
  // meaning that LRU list is empty.
284
3.20M
  bool IsLRUEmpty() const {
285
3.20M
    return lru_.next == &lru_;
286
3.20M
  }
287
288
93.1k
  size_t GetPinnedUsage() const {
289
93.1k
    assert(usage_ >= lru_usage_);
290
93.1k
    return usage_ - lru_usage_;
291
93.1k
  }
292
293
1.97M
  void DecrementUsage(const size_t charge) {
294
1.97M
    assert(usage_ >= charge);
295
1.97M
    usage_ -= charge;
296
1.97M
  }
297
298
2.64M
  void IncrementUsage(const size_t charge) {
299
2.64M
    assert(usage_ + charge > 0);
300
2.64M
    usage_ += charge;
301
2.64M
  }
302
303
  void LRU_Remove(LRUHandle* e);
304
  void LRU_Append(LRUHandle *e);
305
306
 private:
307
  // Dummy heads of single-touch and multi-touch LRU list.
308
  // lru.prev is newest entry, lru.next is oldest entry.
309
  // LRU contains items which can be evicted, ie referenced only by cache.
310
  LRUHandle lru_;
311
312
  // Memory size for entries residing in the cache.
313
  // Includes entries in the LRU list and referenced by callers and thus not eligible for cleanup.
314
  size_t usage_;
315
316
  // Memory size for entries residing only in the LRU list
317
  size_t lru_usage_;
318
};
319
320
210M
LRUSubCache::LRUSubCache() : usage_(0), lru_usage_(0) {
321
  // Make empty circular linked list
322
210M
  lru_.next = &lru_;
323
210M
  lru_.prev = &lru_;
324
210M
}
325
326
209M
LRUSubCache::~LRUSubCache() {}
327
328
// Remove the handle from the LRU list of the sub cache.
329
34.4M
void LRUSubCache::LRU_Remove(LRUHandle* e) {
330
34.4M
  assert(e->next != nullptr);
331
34.4M
  assert(e->prev != nullptr);
332
34.4M
  e->next->prev = e->prev;
333
34.4M
  e->prev->next = e->next;
334
34.4M
  e->prev = e->next = nullptr;
335
34.4M
  lru_usage_ -= e->charge;
336
34.4M
}
337
338
// Append to the LRU header of the sub cache.
339
35.1M
void LRUSubCache::LRU_Append(LRUHandle *e) {
340
35.1M
  assert(e->next == nullptr);
341
35.1M
  assert(e->next == nullptr);
342
35.1M
  e->next = &lru_;
343
35.1M
  e->prev = lru_.prev;
344
35.1M
  e->prev->next = e;
345
35.1M
  e->next->prev = e;
346
35.1M
  lru_usage_ += e->charge;
347
35.1M
}
348
349
class LRUHandleDeleter {
350
 public:
351
108M
  explicit LRUHandleDeleter(yb::CacheMetrics* metrics) : metrics_(metrics) {}
352
353
498k
  void Add(LRUHandle* handle) {
354
498k
    handles_.push_back(handle);
355
498k
  }
356
357
0
  size_t TotalCharge() const {
358
0
    size_t result = 0;
359
0
    for (LRUHandle* handle : handles_) {
360
0
      result += handle->charge;
361
0
    }
362
0
    return result;
363
0
  }
364
365
108M
  ~LRUHandleDeleter() {
366
498k
    for (LRUHandle* handle : handles_) {
367
498k
      handle->Free(metrics_);
368
498k
    }
369
108M
  }
370
371
 private:
372
  yb::CacheMetrics* metrics_;
373
  autovector<LRUHandle*> handles_;
374
};
375
376
// A single shard of sharded cache.
377
class LRUCache {
378
 public:
379
  LRUCache();
380
  ~LRUCache();
381
382
  // Separate from constructor so caller can easily make an array of LRUCache
383
  // if current usage is more than new capacity, the function will attempt to
384
  // free the needed space
385
  void SetCapacity(size_t capacity);
386
387
183k
  void SetMetrics(shared_ptr<yb::CacheMetrics> metrics) {
388
183k
    metrics_ = metrics;
389
183k
    table_.SetMetrics(metrics);
390
183k
  }
391
392
  // Set the flag to reject insertion if cache if full.
393
  void SetStrictCapacityLimit(bool strict_capacity_limit);
394
395
  // Like Cache methods, but with an extra "hash" parameter.
396
  Status Insert(const Slice& key, uint32_t hash, const QueryId query_id,
397
                void* value, size_t charge, void (*deleter)(const Slice& key, void* value),
398
                Cache::Handle** handle, Statistics* statistics);
399
  Cache::Handle* Lookup(const Slice& key, uint32_t hash, const QueryId query_id,
400
                        Statistics* statistics = nullptr);
401
  void Release(Cache::Handle* handle);
402
  void Erase(const Slice& key, uint32_t hash);
403
  size_t Evict(size_t required);
404
405
  // Although in some platforms the update of size_t is atomic, to make sure
406
  // GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
407
  // protect them with mutex_.
408
409
26.0k
  size_t GetUsage() const {
410
26.0k
    MutexLock l(&mutex_);
411
26.0k
    return single_touch_sub_cache_.Usage() + multi_touch_sub_cache_.Usage();
412
26.0k
  }
413
414
46.5k
  size_t GetPinnedUsage() const {
415
46.5k
    MutexLock l(&mutex_);
416
46.5k
    return single_touch_sub_cache_.GetPinnedUsage() + multi_touch_sub_cache_.GetPinnedUsage();
417
46.5k
  }
418
419
  void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
420
                              bool thread_safe);
421
422
7
  std::pair<size_t, size_t> TEST_GetIndividualUsages() {
423
7
    return std::pair<size_t, size_t>(
424
7
        single_touch_sub_cache_.Usage(), multi_touch_sub_cache_.Usage());
425
7
  }
426
427
 private:
428
  void LRU_Remove(LRUHandle* e);
429
  void LRU_Append(LRUHandle* e);
430
431
  // Returns the correct SubCache based on the input argument.
432
  LRUSubCache* GetSubCache(const SubCacheType subcache_type);
433
  LRUSubCache single_touch_sub_cache_;
434
  LRUSubCache multi_touch_sub_cache_;
435
436
  size_t total_capacity_;
437
  size_t multi_touch_capacity_;
438
439
  // Just reduce the reference count by 1.
440
  // Return true if last reference
441
  bool Unref(LRUHandle* e);
442
443
  // Returns the capacity of the subcache.
444
  // For multi touch cache it is the same as its initial allocation.
445
  // For single touch cache it is the amount of space left in the entire cache.
446
  size_t GetSubCacheCapacity(const SubCacheType subcache_type);
447
448
  // Free some space following strict LRU policy until enough space
449
  // to hold (usage_ + charge) is freed or the lru list is empty.
450
  // This function is not thread safe - it needs to be executed while
451
  // holding the mutex_
452
  void EvictFromLRU(size_t charge, LRUHandleDeleter* deleted, SubCacheType subcache_type);
453
454
  void DecrementUsage(const SubCacheType subcache_type, const size_t charge);
455
456
  // Checks if the corresponding subcache contains space.
457
  bool HasFreeSpace(const SubCacheType subcache_type);
458
459
118M
  size_t TotalUsage() const {
460
118M
    return single_touch_sub_cache_.Usage() + multi_touch_sub_cache_.Usage();
461
118M
  }
462
463
  // Whether to reject insertion if cache reaches its full capacity.
464
  bool strict_capacity_limit_ = false;
465
466
  // mutex_ protects the following state.
467
  // We don't count mutex_ as the cache's internal state so semantically we
468
  // don't mind mutex_ invoking the non-const actions.
469
  mutable port::Mutex mutex_;
470
471
  HandleTable table_;
472
473
  shared_ptr<yb::CacheMetrics> metrics_;
474
};
475
476
105M
LRUCache::LRUCache() {}
477
478
104M
LRUCache::~LRUCache() {}
479
480
42.3M
bool LRUCache::Unref(LRUHandle* e) {
481
42.3M
  assert(e->refs > 0);
482
42.3M
  e->refs--;
483
42.3M
  return e->refs == 0;
484
42.3M
}
485
486
325M
LRUSubCache* LRUCache::GetSubCache(const SubCacheType subcache_type) {
487
325M
  if (FLAGS_cache_single_touch_ratio == 0) {
488
902
    return &multi_touch_sub_cache_;
489
325M
  } else if (FLAGS_cache_single_touch_ratio == 1) {
490
902
    return &single_touch_sub_cache_;
491
902
  }
492
325M
  return (subcache_type == SubCacheType::MULTI_TOUCH) ? &multi_touch_sub_cache_ :
493
146M
                                                        &single_touch_sub_cache_;
494
325M
}
495
496
47.6k
void LRUCache::DecrementUsage(const SubCacheType subcache_type, const size_t charge) {
497
47.6k
  GetSubCache(subcache_type)->DecrementUsage(charge);
498
47.6k
}
499
500
// Call deleter and free
501
502
void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
503
16
                                      bool thread_safe) {
504
16
  if (thread_safe) {
505
16
    mutex_.Lock();
506
16
  }
507
10
  table_.ApplyToAllCacheEntries([callback](LRUHandle* h) {
508
10
    callback(h->value, h->charge);
509
10
  });
510
16
  if (thread_safe) {
511
16
    mutex_.Unlock();
512
16
  }
513
16
}
514
515
33.9M
void LRUCache::LRU_Remove(LRUHandle* e) {
516
33.9M
  GetSubCache(e->GetSubCacheType())->LRU_Remove(e);
517
33.9M
}
518
519
35.1M
void LRUCache::LRU_Append(LRUHandle* e) {
520
  // Make "e" newest entry by inserting just before lru_
521
35.1M
  GetSubCache(e->GetSubCacheType())->LRU_Append(e);
522
35.1M
}
523
524
213M
size_t LRUCache::GetSubCacheCapacity(const SubCacheType subcache_type) {
525
213M
  switch (subcache_type) {
526
108M
    case SINGLE_TOUCH :
527
108M
      if (strict_capacity_limit_ || !FLAGS_cache_overflow_single_touch) {
528
447
        return total_capacity_ - multi_touch_capacity_;
529
447
      }
530
108M
      return total_capacity_ - multi_touch_sub_cache_.Usage();
531
105M
    case MULTI_TOUCH :
532
105M
      return multi_touch_capacity_;
533
0
  }
534
0
  FATAL_INVALID_ENUM_VALUE(SubCacheType, subcache_type);
535
0
}
536
537
void LRUCache::EvictFromLRU(const size_t charge,
538
                            LRUHandleDeleter* deleted,
539
213M
                            const SubCacheType subcache_type) {
540
213M
  LRUSubCache* sub_cache =  GetSubCache(subcache_type);
541
213M
  const size_t capacity = GetSubCacheCapacity(subcache_type);
542
214M
  while (sub_cache->Usage() + charge > capacity && !sub_cache->IsLRUEmpty())  {
543
496k
    LRUHandle* old = sub_cache->LRU_Head().next;
544
496k
    assert(old->in_cache);
545
496k
    assert(old->refs == 1);  // LRU list contains elements which may be evicted
546
496k
    sub_cache->LRU_Remove(old);
547
496k
    table_.Remove(old->key(), old->hash);
548
496k
    old->in_cache = false;
549
496k
    Unref(old);
550
496k
    sub_cache->DecrementUsage(old->charge);
551
496k
    deleted->Add(old);
552
496k
  }
553
213M
}
554
555
105M
void LRUCache::SetCapacity(size_t capacity) {
556
105M
  LRUHandleDeleter last_reference_list(metrics_.get());
557
558
105M
  {
559
105M
    MutexLock l(&mutex_);
560
105M
    multi_touch_capacity_ = round((1 - FLAGS_cache_single_touch_ratio) * capacity);
561
105M
    total_capacity_ = capacity;
562
105M
    EvictFromLRU(0, &last_reference_list, MULTI_TOUCH);
563
105M
    EvictFromLRU(0, &last_reference_list, SINGLE_TOUCH);
564
105M
  }
565
105M
}
566
567
105M
void LRUCache::SetStrictCapacityLimit(bool strict_capacity_limit) {
568
105M
  MutexLock l(&mutex_);
569
  // Allow setting strict capacity limit only when there are no elements in the cache.
570
  // This is because we disable overflowing single touch cache when strict_capacity_limit_ is true.
571
  // We cannot ensure that single touch cache has not already overflown when the cache already has
572
  // elements in it.
573
105M
  assert(TotalUsage() == 0 || !FLAGS_cache_overflow_single_touch);
574
105M
  strict_capacity_limit_ = strict_capacity_limit;
575
105M
}
576
577
Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash, const QueryId query_id,
578
41.9M
                                Statistics* statistics)  {
579
41.9M
  MutexLock l(&mutex_);
580
41.9M
  LRUHandle* e = table_.Lookup(key, hash);
581
41.9M
  if (e != nullptr) {
582
38.2M
    assert(e->in_cache);
583
    // Since the entry is now referenced externally, cannot be evicted, so remove from LRU.
584
38.2M
    if (e->refs == 1) {
585
33.9M
      LRU_Remove(e);
586
33.9M
    }
587
    // Increase the number of references and move to state 1. (in cache and not in LRU)
588
38.2M
    e->refs++;
589
590
    // Now the handle will be added to the multi touch pool only if it exists.
591
38.2M
    if (FLAGS_cache_single_touch_ratio < 1 && e->GetSubCacheType() != MULTI_TOUCH &&
592
11.7M
        e->query_id != query_id) {
593
70.8k
      {
594
70.8k
        LRUHandleDeleter multi_touch_eviction_list(metrics_.get());
595
70.8k
        EvictFromLRU(e->charge, &multi_touch_eviction_list, MULTI_TOUCH);
596
70.8k
      }
597
      // Cannot have any single touch elements in this case.
598
70.8k
      assert(FLAGS_cache_single_touch_ratio != 0);
599
70.8k
      if (!strict_capacity_limit_ ||
600
0
          multi_touch_sub_cache_.Usage() - multi_touch_sub_cache_.LRU_Usage() + e->charge <=
601
70.8k
          multi_touch_capacity_) {
602
70.8k
        e->query_id = kInMultiTouchId;
603
70.8k
        single_touch_sub_cache_.DecrementUsage(e->charge);
604
70.8k
        multi_touch_sub_cache_.IncrementUsage(e->charge);
605
70.8k
       if (metrics_) {
606
66.6k
         metrics_->multi_touch_cache_usage->IncrementBy(e->charge);
607
66.6k
         metrics_->single_touch_cache_usage->DecrementBy(e->charge);
608
66.6k
       }
609
70.8k
      }
610
70.8k
    }
611
38.2M
    if (statistics != nullptr) {
612
      // overall cache hit
613
14.7M
      RecordTick(statistics, BLOCK_CACHE_HIT);
614
      // total bytes read from cache
615
14.7M
      RecordTick(statistics, BLOCK_CACHE_BYTES_READ, e->charge);
616
14.7M
      if (e->GetSubCacheType() == SubCacheType::SINGLE_TOUCH) {
617
3.20M
        RecordTick(statistics, BLOCK_CACHE_SINGLE_TOUCH_HIT);
618
3.20M
        RecordTick(statistics, BLOCK_CACHE_SINGLE_TOUCH_BYTES_READ, e->charge);
619
11.5M
      } else if (e->GetSubCacheType() == SubCacheType::MULTI_TOUCH) {
620
11.5M
        RecordTick(statistics, BLOCK_CACHE_MULTI_TOUCH_HIT);
621
11.5M
        RecordTick(statistics, BLOCK_CACHE_MULTI_TOUCH_BYTES_READ, e->charge);
622
11.5M
      }
623
14.7M
    }
624
3.67M
  } else {
625
3.67M
    if (statistics != nullptr) {
626
244k
      RecordTick(statistics, BLOCK_CACHE_MISS);
627
244k
    }
628
3.67M
  }
629
630
41.9M
  if (metrics_ != nullptr) {
631
13.1M
    metrics_->lookups->Increment();
632
13.1M
    bool was_hit = (e != nullptr);
633
13.1M
    if (was_hit) {
634
12.9M
      metrics_->cache_hits->Increment();
635
233k
    } else {
636
233k
      metrics_->cache_misses->Increment();
637
233k
    }
638
13.1M
  }
639
41.9M
  return reinterpret_cast<Cache::Handle*>(e);
640
41.9M
}
641
642
36.1M
bool LRUCache::HasFreeSpace(const SubCacheType subcache_type) {
643
36.1M
  switch(subcache_type) {
644
12.8M
    case SINGLE_TOUCH :
645
12.8M
      if (strict_capacity_limit_ || !FLAGS_cache_overflow_single_touch) {
646
216
        return single_touch_sub_cache_.Usage() <= (total_capacity_ - multi_touch_capacity_);
647
216
      }
648
12.8M
      return TotalUsage() <= total_capacity_;
649
23.3M
    case MULTI_TOUCH : return multi_touch_sub_cache_.Usage() <= multi_touch_capacity_;
650
0
  }
651
0
  FATAL_INVALID_ENUM_VALUE(SubCacheType, subcache_type);
652
0
}
653
654
40.5M
void LRUCache::Release(Cache::Handle* handle) {
655
40.5M
  if (handle == nullptr) {
656
0
    return;
657
0
  }
658
40.5M
  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
659
40.5M
  bool last_reference = false;
660
40.5M
  {
661
40.5M
    MutexLock l(&mutex_);
662
40.5M
    LRUSubCache* sub_cache = GetSubCache(e->GetSubCacheType());
663
40.5M
    last_reference = Unref(e);
664
40.5M
    if (last_reference) {
665
333
      sub_cache->DecrementUsage(e->charge);
666
333
    }
667
40.5M
    if (e->refs == 1 && e->in_cache) {
668
      // The item is still in cache, and nobody else holds a reference to it
669
36.1M
      if (!HasFreeSpace(e->GetSubCacheType())) {
670
        // The LRU list must be empty since the cache is full.
671
1.35M
        assert(sub_cache->IsLRUEmpty());
672
        // take this opportunity and remove the item
673
1.35M
        table_.Remove(e->key(), e->hash);
674
1.35M
        e->in_cache = false;
675
1.35M
        Unref(e);
676
1.35M
        sub_cache->DecrementUsage(e->charge);
677
1.35M
        last_reference = true;
678
34.7M
      } else {
679
        // put the item on the list to be potentially freed.
680
34.7M
        LRU_Append(e);
681
34.7M
      }
682
36.1M
    }
683
40.5M
  }
684
685
  // free outside of mutex
686
40.5M
  if (last_reference) {
687
1.35M
    e->Free(metrics_.get());
688
1.35M
  }
689
40.5M
}
690
691
0
size_t LRUCache::Evict(size_t required) {
692
0
  LRUHandleDeleter evicted(metrics_.get());
693
0
  {
694
0
    MutexLock l(&mutex_);
695
0
    EvictFromLRU(required, &evicted, SINGLE_TOUCH);
696
0
    if (required > evicted.TotalCharge()) {
697
0
      EvictFromLRU(required, &evicted, MULTI_TOUCH);
698
0
    }
699
0
  }
700
0
  return evicted.TotalCharge();
701
0
}
702
703
Status LRUCache::Insert(const Slice& key, uint32_t hash, const QueryId query_id,
704
                        void* value, size_t charge, void (*deleter)(const Slice& key, void* value),
705
2.57M
                        Cache::Handle** handle, Statistics* statistics) {
706
  // Don't use the cache if disabled by the caller using the special query id.
707
2.57M
  if (query_id == kNoCacheQueryId) {
708
0
    return Status::OK();
709
0
  }
710
  // Allocate the memory here outside of the mutex
711
  // If the cache is full, we'll have to release it
712
  // It shouldn't happen very often though.
713
2.57M
  LRUHandle* e = reinterpret_cast<LRUHandle*>(
714
2.57M
                    new char[sizeof(LRUHandle) - 1 + key.size()]);
715
2.57M
  Status s;
716
2.57M
  LRUHandleDeleter last_reference_list(metrics_.get());
717
718
2.57M
  e->value = value;
719
2.57M
  e->deleter = deleter;
720
2.57M
  e->charge = charge;
721
2.57M
  e->key_length = key.size();
722
2.57M
  e->hash = hash;
723
2.57M
  e->refs = (handle == nullptr
724
364k
                 ? 1
725
2.20M
                 : 2);  // One from LRUCache, one for the returned handle
726
2.57M
  e->next = e->prev = nullptr;
727
2.57M
  e->in_cache = true;
728
  // Adding query id to the handle.
729
2.57M
  e->query_id = query_id;
730
2.57M
  memcpy(e->key_data, key.data(), key.size());
731
732
2.57M
  {
733
2.57M
    MutexLock l(&mutex_);
734
    // Free the space following strict LRU policy until enough space
735
    // is freed or the lru list is empty.
736
    // Check if there is a single touch cache.
737
2.57M
    SubCacheType subcache_type;
738
2.57M
    if (FLAGS_cache_single_touch_ratio == 0) {
739
100
      e->query_id = kInMultiTouchId;
740
100
      subcache_type = MULTI_TOUCH;
741
2.57M
    } else if (FLAGS_cache_single_touch_ratio == 1) {
742
      // If there is no multi touch cache, default to single cache.
743
100
      subcache_type = SINGLE_TOUCH;
744
2.57M
    } else {
745
2.57M
      subcache_type = table_.GetSubCacheTypeCandidate(e);
746
2.57M
    }
747
2.57M
    EvictFromLRU(charge, &last_reference_list, subcache_type);
748
2.57M
    LRUSubCache* sub_cache = GetSubCache(subcache_type);
749
    // If the cache no longer has any more space in the given pool.
750
2.57M
    if (strict_capacity_limit_ &&
751
204
        sub_cache->Usage() - sub_cache->LRU_Usage() + charge > GetSubCacheCapacity(subcache_type)) {
752
2
      if (handle == nullptr) {
753
1
        last_reference_list.Add(e);
754
1
      } else {
755
1
        delete[] reinterpret_cast<char*>(e);
756
1
        *handle = nullptr;
757
1
      }
758
2
      s = STATUS(Incomplete, "Insert failed due to LRU cache being full.");
759
2.57M
    } else {
760
      // insert into the cache
761
      // note that the cache might get larger than its capacity if not enough
762
      // space was freed
763
2.57M
      LRUHandle* old = table_.Insert(e);
764
2.57M
      sub_cache->IncrementUsage(e->charge);
765
2.57M
      if (old != nullptr) {
766
2.75k
        old->in_cache = false;
767
2.75k
        if (Unref(old)) {
768
2.42k
          DecrementUsage(old->GetSubCacheType(), old->charge);
769
          // old is on LRU because it's in cache and its reference count
770
          // was just 1 (Unref returned 0)
771
2.42k
          LRU_Remove(old);
772
2.42k
          last_reference_list.Add(old);
773
2.42k
        }
774
2.75k
      }
775
      // No external reference, so put it in LRU to be potentially evicted.
776
2.57M
      if (handle == nullptr) {
777
364k
        LRU_Append(e);
778
2.20M
      } else {
779
2.20M
        *handle = reinterpret_cast<Cache::Handle*>(e);
780
2.20M
      }
781
2.57M
      if (subcache_type == MULTI_TOUCH && FLAGS_cache_single_touch_ratio != 0) {
782
        // Evict entries from single touch cache if the total size increases. This can happen if
783
        // single touch entries has overflown and we insert entries directly into the multi touch
784
        // cache without it going through the single touch cache.
785
118k
        EvictFromLRU(0, &last_reference_list, SINGLE_TOUCH);
786
118k
      }
787
2.57M
      s = Status::OK();
788
2.57M
    }
789
2.57M
    if (statistics != nullptr) {
790
147k
      if (s.ok()) {
791
147k
        RecordTick(statistics, BLOCK_CACHE_ADD);
792
147k
        RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge);
793
147k
        if (subcache_type == SubCacheType::SINGLE_TOUCH) {
794
146k
          RecordTick(statistics, BLOCK_CACHE_SINGLE_TOUCH_ADD);
795
146k
          RecordTick(statistics, BLOCK_CACHE_SINGLE_TOUCH_BYTES_WRITE, charge);
796
724
        } else if (subcache_type == SubCacheType::MULTI_TOUCH) {
797
720
          RecordTick(statistics, BLOCK_CACHE_MULTI_TOUCH_ADD);
798
720
          RecordTick(statistics, BLOCK_CACHE_MULTI_TOUCH_BYTES_WRITE, charge);
799
720
        }
800
0
      } else {
801
0
        RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
802
0
      }
803
147k
    }
804
2.57M
    if (metrics_ != nullptr) {
805
144k
      if (subcache_type == MULTI_TOUCH) {
806
104
        metrics_->multi_touch_cache_usage->IncrementBy(charge);
807
143k
      } else {
808
143k
        metrics_->single_touch_cache_usage->IncrementBy(charge);
809
143k
      }
810
144k
      metrics_->cache_usage->IncrementBy(charge);
811
144k
    }
812
2.57M
  }
813
814
2.57M
  return s;
815
2.57M
}
816
817
46.9k
void LRUCache::Erase(const Slice& key, uint32_t hash) {
818
46.9k
  LRUHandle* e;
819
46.9k
  bool last_reference = false;
820
46.9k
  {
821
46.9k
    MutexLock l(&mutex_);
822
46.9k
    e = table_.Remove(key, hash);
823
46.9k
    if (e != nullptr) {
824
45.2k
      last_reference = Unref(e);
825
45.2k
      if (last_reference) {
826
45.2k
        DecrementUsage(e->GetSubCacheType(), e->charge);
827
45.2k
      }
828
45.2k
      if (last_reference && e->in_cache) {
829
45.2k
        LRU_Remove(e);
830
45.2k
      }
831
45.2k
      e->in_cache = false;
832
45.2k
    }
833
46.9k
  }
834
  // mutex not held here
835
  // last_reference will only be true if e != nullptr
836
46.9k
  if (last_reference) {
837
45.2k
    e->Free(metrics_.get());
838
45.2k
  }
839
46.9k
}
840
841
static int kNumShardBits = 4;          // default values, can be overridden
842
843
class ShardedLRUCache : public Cache {
844
 private:
845
  LRUCache* shards_;
846
  port::Mutex id_mutex_;
847
  port::Mutex capacity_mutex_;
848
  uint64_t last_id_;
849
  size_t num_shard_bits_;
850
  size_t capacity_;
851
  bool strict_capacity_limit_;
852
  shared_ptr<yb::CacheMetrics> metrics_;
853
854
44.5M
  static inline uint32_t HashSlice(const Slice& s) {
855
44.5M
    return Hash(s.data(), s.size(), 0);
856
44.5M
  }
857
858
85.0M
  uint32_t Shard(uint32_t hash) {
859
    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
860
85.0M
    return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
861
85.0M
  }
862
863
44.5M
  bool IsValidQueryId(const QueryId query_id) {
864
44.5M
    return query_id >= 0 || query_id == kInMultiTouchId || query_id == kNoCacheQueryId;
865
44.5M
  }
866
867
 public:
868
  ShardedLRUCache(size_t capacity, int num_shard_bits,
869
                  bool strict_capacity_limit)
870
      : last_id_(0),
871
        num_shard_bits_(num_shard_bits),
872
        capacity_(capacity),
873
        strict_capacity_limit_(strict_capacity_limit),
874
5.21M
        metrics_(nullptr) {
875
5.21M
    int num_shards = 1 << num_shard_bits_;
876
5.21M
    shards_ = new LRUCache[num_shards];
877
5.21M
    const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
878
110M
    for (int s = 0; s < num_shards; s++) {
879
105M
      shards_[s].SetStrictCapacityLimit(strict_capacity_limit);
880
105M
      shards_[s].SetCapacity(per_shard);
881
105M
    }
882
5.21M
  }
883
884
5.16M
  virtual ~ShardedLRUCache() {
885
5.16M
    delete[] shards_;
886
5.16M
  }
887
888
3
  void SetCapacity(size_t capacity) override {
889
3
    int num_shards = 1 << num_shard_bits_;
890
3
    const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
891
3
    MutexLock l(&capacity_mutex_);
892
6
    for (int s = 0; s < num_shards; s++) {
893
3
      shards_[s].SetCapacity(per_shard);
894
3
    }
895
3
    capacity_ = capacity;
896
3
  }
897
898
  virtual Status Insert(const Slice& key, const QueryId query_id, void* value, size_t charge,
899
                        void (*deleter)(const Slice& key, void* value),
900
2.57M
                        Handle** handle, Statistics* statistics) override {
901
2.57M
    DCHECK(IsValidQueryId(query_id));
902
    // Queries with no cache query ids are not cached.
903
2.57M
    if (query_id == kNoCacheQueryId) {
904
0
      return Status::OK();
905
0
    }
906
2.57M
    const uint32_t hash = HashSlice(key);
907
2.57M
    return shards_[Shard(hash)].Insert(key, hash, query_id, value, charge, deleter,
908
2.57M
                                       handle, statistics);
909
2.57M
  }
910
911
0
  size_t Evict(size_t bytes_to_evict) override {
912
0
    auto num_shards = 1ULL << num_shard_bits_;
913
0
    size_t total_evicted = 0;
914
    // Start at random shard.
915
0
    auto index = Shard(yb::RandomUniformInt<uint32_t>());
916
0
    for (size_t i = 0; bytes_to_evict > total_evicted && i != num_shards; ++i) {
917
0
      total_evicted += shards_[index].Evict(bytes_to_evict - total_evicted);
918
0
      index = (index + 1) & (num_shards - 1);
919
0
    }
920
0
    return total_evicted;
921
0
  }
922
923
41.9M
  Handle* Lookup(const Slice& key, const QueryId query_id, Statistics* statistics) override {
924
41.9M
    DCHECK(IsValidQueryId(query_id));
925
41.9M
    if (query_id == kNoCacheQueryId) {
926
0
      return nullptr;
927
0
    }
928
41.9M
    const uint32_t hash = HashSlice(key);
929
41.9M
    return shards_[Shard(hash)].Lookup(key, hash, query_id, statistics);
930
41.9M
  }
931
932
40.5M
  void Release(Handle* handle) override {
933
40.5M
    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
934
40.5M
    shards_[Shard(h->hash)].Release(handle);
935
40.5M
  }
936
937
46.9k
  void Erase(const Slice& key) override {
938
46.9k
    const uint32_t hash = HashSlice(key);
939
46.9k
    shards_[Shard(hash)].Erase(key, hash);
940
46.9k
  }
941
942
38.3M
  void* Value(Handle* handle) override {
943
38.3M
    return reinterpret_cast<LRUHandle*>(handle)->value;
944
38.3M
  }
945
946
140k
  uint64_t NewId() override {
947
140k
    MutexLock l(&id_mutex_);
948
140k
    return ++(last_id_);
949
140k
  }
950
951
16.6k
  size_t GetCapacity() const override { return capacity_; }
952
953
0
  bool HasStrictCapacityLimit() const override {
954
0
    return strict_capacity_limit_;
955
0
  }
956
957
124
  size_t GetUsage() const override {
958
    // We will not lock the cache when getting the usage from shards.
959
124
    int num_shards = 1 << num_shard_bits_;
960
124
    size_t usage = 0;
961
26.1k
    for (int s = 0; s < num_shards; s++) {
962
26.0k
      usage += shards_[s].GetUsage();
963
26.0k
    }
964
124
    return usage;
965
124
  }
966
967
0
  size_t GetUsage(Handle* handle) const override {
968
0
    return reinterpret_cast<LRUHandle*>(handle)->charge;
969
0
  }
970
971
186
  size_t GetPinnedUsage() const override {
972
    // We will not lock the cache when getting the usage from shards.
973
186
    int num_shards = 1 << num_shard_bits_;
974
186
    size_t usage = 0;
975
46.7k
    for (int s = 0; s < num_shards; s++) {
976
46.5k
      usage += shards_[s].GetPinnedUsage();
977
46.5k
    }
978
186
    return usage;
979
186
  }
980
981
205
  SubCacheType GetSubCacheType(Handle* e) const override {
982
205
    LRUHandle* h = reinterpret_cast<LRUHandle*>(e);
983
205
    return h->GetSubCacheType();
984
205
  }
985
986
0
  void DisownData() override {
987
0
    shards_ = nullptr;
988
0
  }
989
990
  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
991
1
                                      bool thread_safe) override {
992
1
    int num_shards = 1 << num_shard_bits_;
993
17
    for (int s = 0; s < num_shards; s++) {
994
16
      shards_[s].ApplyToAllCacheEntries(callback, thread_safe);
995
16
    }
996
1
  }
997
998
11.4k
  virtual void SetMetrics(const scoped_refptr<yb::MetricEntity>& entity) override {
999
11.4k
    int num_shards = 1 << num_shard_bits_;
1000
11.4k
    metrics_ = std::make_shared<yb::CacheMetrics>(entity);
1001
194k
    for (int s = 0; s < num_shards; s++) {
1002
183k
      shards_[s].SetMetrics(metrics_);
1003
183k
    }
1004
11.4k
  }
1005
1006
7
  virtual std::vector<std::pair<size_t, size_t>> TEST_GetIndividualUsages() override {
1007
7
    std::vector<std::pair<size_t, size_t>> cache_sizes;
1008
7
    cache_sizes.reserve(1 << num_shard_bits_);
1009
1010
14
    for (int i = 0; i < 1 << num_shard_bits_; ++i) {
1011
7
      cache_sizes.emplace_back(shards_[i].TEST_GetIndividualUsages());
1012
7
    }
1013
7
    return cache_sizes;
1014
7
  }
1015
};
1016
1017
}  // end anonymous namespace
1018
1019
4.86M
shared_ptr<Cache> NewLRUCache(size_t capacity) {
1020
4.86M
  return NewLRUCache(capacity, kNumShardBits, false);
1021
4.86M
}
1022
1023
353k
shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits) {
1024
353k
  return NewLRUCache(capacity, num_shard_bits, false);
1025
353k
}
1026
1027
shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
1028
5.21M
                              bool strict_capacity_limit) {
1029
5.21M
  if (num_shard_bits >= 20) {
1030
0
    return nullptr;  // the cache cannot be sharded into too many fine pieces
1031
0
  }
1032
5.21M
  return std::make_shared<ShardedLRUCache>(capacity, num_shard_bits,
1033
5.21M
                                           strict_capacity_limit);
1034
5.21M
}
1035
1036
}  // namespace rocksdb