/Users/deen/code/yugabyte-db/src/yb/rocksdb/db/memtable_list.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | | // This source code is licensed under the BSD-style license found in the |
3 | | // LICENSE file in the root directory of this source tree. An additional grant |
4 | | // of patent rights can be found in the PATENTS file in the same directory. |
5 | | // |
6 | | // The following only applies to changes made to this file as part of YugaByte development. |
7 | | // |
8 | | // Portions Copyright (c) YugaByte, Inc. |
9 | | // |
10 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
11 | | // in compliance with the License. You may obtain a copy of the License at |
12 | | // |
13 | | // http://www.apache.org/licenses/LICENSE-2.0 |
14 | | // |
15 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
16 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
17 | | // or implied. See the License for the specific language governing permissions and limitations |
18 | | // under the License. |
19 | | // |
20 | | #ifndef YB_ROCKSDB_DB_MEMTABLE_LIST_H |
21 | | #define YB_ROCKSDB_DB_MEMTABLE_LIST_H |
22 | | |
23 | | #pragma once |
24 | | |
25 | | #include <deque> |
26 | | #include <list> |
27 | | #include <set> |
28 | | #include <string> |
29 | | #include <vector> |
30 | | |
31 | | #include "yb/rocksdb/db.h" |
32 | | #include "yb/rocksdb/db/dbformat.h" |
33 | | #include "yb/rocksdb/db/memtable.h" |
34 | | #include "yb/rocksdb/iterator.h" |
35 | | #include "yb/rocksdb/options.h" |
36 | | #include "yb/rocksdb/types.h" |
37 | | #include "yb/rocksdb/util/autovector.h" |
38 | | #include "yb/rocksdb/util/instrumented_mutex.h" |
39 | | #include "yb/rocksdb/util/log_buffer.h" |
40 | | |
41 | | namespace rocksdb { |
42 | | |
43 | | class ColumnFamilyData; |
44 | | class InternalKeyComparator; |
45 | | class InstrumentedMutex; |
46 | | class MergeIteratorBuilder; |
47 | | |
48 | | // keeps a list of immutable memtables in a vector. the list is immutable |
49 | | // if refcount is bigger than one. It is used as a state for Get() and |
50 | | // Iterator code paths |
51 | | // |
52 | | // This class is not thread-safe. External synchronization is required |
53 | | // (such as holding the db mutex or being on the write thread). |
54 | | class MemTableListVersion { |
55 | | public: |
56 | | explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
57 | | MemTableListVersion* old = nullptr); |
58 | | explicit MemTableListVersion(size_t* parent_memtable_list_memory_usage, |
59 | | int max_write_buffer_number_to_maintain); |
60 | | |
61 | | void Ref(); |
62 | | void Unref(autovector<MemTable*>* to_delete = nullptr); |
63 | | |
64 | | // Search all the memtables starting from the most recent one. |
65 | | // Return the most recent value found, if any. |
66 | | // |
67 | | // If any operation was found for this key, its most recent sequence number |
68 | | // will be stored in *seq on success (regardless of whether true/false is |
69 | | // returned). Otherwise, *seq will be set to kMaxSequenceNumber. |
70 | | bool Get(const LookupKey& key, std::string* value, Status* s, |
71 | | MergeContext* merge_context, SequenceNumber* seq); |
72 | | |
73 | | bool Get(const LookupKey& key, std::string* value, Status* s, |
74 | 7.63M | MergeContext* merge_context) { |
75 | 7.63M | SequenceNumber seq; |
76 | 7.63M | return Get(key, value, s, merge_context, &seq); |
77 | 7.63M | } |
78 | | |
79 | | // Similar to Get(), but searches the Memtable history of memtables that |
80 | | // have already been flushed. Should only be used from in-memory only |
81 | | // queries (such as Transaction validation) as the history may contain |
82 | | // writes that are also present in the SST files. |
83 | | bool GetFromHistory(const LookupKey& key, std::string* value, Status* s, |
84 | | MergeContext* merge_context, SequenceNumber* seq); |
85 | | bool GetFromHistory(const LookupKey& key, std::string* value, Status* s, |
86 | 0 | MergeContext* merge_context) { |
87 | 0 | SequenceNumber seq; |
88 | 0 | return GetFromHistory(key, value, s, merge_context, &seq); |
89 | 0 | } |
90 | | |
91 | | void AddIterators(const ReadOptions& options, |
92 | | std::vector<InternalIterator*>* iterator_list, |
93 | | Arena* arena); |
94 | | |
95 | | void AddIterators(const ReadOptions& options, |
96 | | MergeIteratorBuilder* merge_iter_builder); |
97 | | |
98 | | uint64_t GetTotalNumEntries() const; |
99 | | |
100 | | uint64_t GetTotalNumDeletes() const; |
101 | | |
102 | | uint64_t ApproximateSize(const Slice& start_ikey, const Slice& end_ikey); |
103 | | |
104 | | // Returns the value of MemTable::GetEarliestSequenceNumber() on the most |
105 | | // recent MemTable in this list or kMaxSequenceNumber if the list is empty. |
106 | | // If include_history=true, will also search Memtables in MemTableList |
107 | | // History. |
108 | | SequenceNumber GetEarliestSequenceNumber(bool include_history = false) const; |
109 | | |
110 | | std::string ToString() const; |
111 | | |
112 | | private: |
113 | | // REQUIRE: m is an immutable memtable |
114 | | void Add(MemTable* m, autovector<MemTable*>* to_delete); |
115 | | // REQUIRE: m is an immutable memtable |
116 | | void Remove(MemTable* m, autovector<MemTable*>* to_delete); |
117 | | |
118 | | void TrimHistory(autovector<MemTable*>* to_delete); |
119 | | |
120 | | bool GetFromList(std::list<MemTable*>* list, const LookupKey& key, |
121 | | std::string* value, Status* s, MergeContext* merge_context, |
122 | | SequenceNumber* seq); |
123 | | |
124 | | void AddMemTable(MemTable* m); |
125 | | |
126 | | void UnrefMemTable(autovector<MemTable*>* to_delete, MemTable* m); |
127 | | |
128 | | friend class MemTableList; |
129 | | |
130 | | // Immutable MemTables that have not yet been flushed. |
131 | | std::list<MemTable*> memlist_; |
132 | | |
133 | | // MemTables that have already been flushed |
134 | | // (used during Transaction validation) |
135 | | std::list<MemTable*> memlist_history_; |
136 | | |
137 | | // Maximum number of MemTables to keep in memory (including both flushed |
138 | | // and not-yet-flushed tables). |
139 | | const int max_write_buffer_number_to_maintain_; |
140 | | |
141 | | int refs_ = 0; |
142 | | |
143 | | size_t* parent_memtable_list_memory_usage_; |
144 | | }; |
145 | | |
146 | | // This class stores references to all the immutable memtables. |
147 | | // The memtables are flushed to L0 as soon as possible and in |
148 | | // any order. If there are more than one immutable memtable, their |
149 | | // flushes can occur concurrently. However, they are 'committed' |
150 | | // to the manifest in FIFO order to maintain correctness and |
151 | | // recoverability from a crash. |
152 | | // |
153 | | // |
154 | | // Other than imm_flush_needed, this class is not thread-safe and requires |
155 | | // external synchronization (such as holding the db mutex or being on the |
156 | | // write thread.) |
157 | | class MemTableList { |
158 | | public: |
159 | | // A list of memtables. |
160 | | explicit MemTableList(int min_write_buffer_number_to_merge, |
161 | | int max_write_buffer_number_to_maintain) |
162 | | : imm_flush_needed(false), |
163 | | min_write_buffer_number_to_merge_(min_write_buffer_number_to_merge), |
164 | | current_(new MemTableListVersion(¤t_memory_usage_, |
165 | | max_write_buffer_number_to_maintain)), |
166 | | num_flush_not_started_(0), |
167 | | commit_in_progress_(false), |
168 | 690k | flush_requested_(false) { |
169 | 690k | current_->Ref(); |
170 | 690k | current_memory_usage_ = 0; |
171 | 690k | } |
172 | | |
173 | | // Should not delete MemTableList without making sure MemTableList::current() |
174 | | // is Unref()'d. |
175 | 653k | ~MemTableList() {} |
176 | | |
177 | 1.65M | MemTableListVersion* current() { return current_; } |
178 | | |
179 | | // so that background threads can detect non-nullptr pointer to |
180 | | // determine whether there is anything more to start flushing. |
181 | | std::atomic<bool> imm_flush_needed; |
182 | | |
183 | | // Returns the total number of memtables in the list that haven't yet |
184 | | // been flushed and logged. |
185 | | int NumNotFlushed() const; |
186 | | |
187 | | // Returns total number of memtables in the list that have been |
188 | | // completely flushed and logged. |
189 | | int NumFlushed() const; |
190 | | |
191 | | // Returns true if there is at least one memtable on which flush has |
192 | | // not yet started. |
193 | | bool IsFlushPending() const; |
194 | | |
195 | | // Returns the earliest memtables that needs to be flushed. The returned |
196 | | // memtables are guaranteed to be in the ascending order of created time. |
197 | | void PickMemtablesToFlush(autovector<MemTable*>* mems, |
198 | | const MemTableFilter& filter = MemTableFilter()); |
199 | | |
200 | | // Reset status of the given memtable list back to pending state so that |
201 | | // they can get picked up again on the next round of flush. |
202 | | void RollbackMemtableFlush(const autovector<MemTable*>& mems, |
203 | | uint64_t file_number); |
204 | | |
205 | | // Commit a successful flush in the manifest file |
206 | | Status InstallMemtableFlushResults( |
207 | | ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, |
208 | | const autovector<MemTable*>& m, VersionSet* vset, InstrumentedMutex* mu, |
209 | | uint64_t file_number, autovector<MemTable*>* to_delete, |
210 | | Directory* db_directory, LogBuffer* log_buffer, const FileNumbersHolder& file_number_holder); |
211 | | |
212 | | // New memtables are inserted at the front of the list. |
213 | | // Takes ownership of the referenced held on *m by the caller of Add(). |
214 | | void Add(MemTable* m, autovector<MemTable*>* to_delete); |
215 | | |
216 | | // Returns an estimate of the number of bytes of data in use. |
217 | | size_t ApproximateMemoryUsage(); |
218 | | |
219 | | // Returns an estimate of the number of bytes of data used by |
220 | | // the unflushed mem-tables. |
221 | | size_t ApproximateUnflushedMemTablesMemoryUsage(); |
222 | | |
223 | | // Request a flush of all existing memtables to storage. This will |
224 | | // cause future calls to IsFlushPending() to return true if this list is |
225 | | // non-empty (regardless of the min_write_buffer_number_to_merge |
226 | | // parameter). This flush request will persist until the next time |
227 | | // PickMemtablesToFlush() is called. |
228 | 13.8k | void FlushRequested() { flush_requested_ = true; } |
229 | | |
230 | | // Copying allowed |
231 | | // MemTableList(const MemTableList&); |
232 | | // void operator=(const MemTableList&); |
233 | | |
234 | 155 | size_t* current_memory_usage() { return ¤t_memory_usage_; } |
235 | | |
236 | | std::string ToString(); |
237 | | |
238 | | private: |
239 | | // DB mutex held |
240 | | void InstallNewVersion(); |
241 | | |
242 | | const int min_write_buffer_number_to_merge_; |
243 | | |
244 | | MemTableListVersion* current_; |
245 | | |
246 | | // the number of elements that still need flushing |
247 | | int num_flush_not_started_; |
248 | | |
249 | | // committing in progress |
250 | | bool commit_in_progress_; |
251 | | |
252 | | // Requested a flush of all memtables to storage |
253 | | bool flush_requested_; |
254 | | |
255 | | // The current memory usage. |
256 | | size_t current_memory_usage_; |
257 | | }; |
258 | | |
259 | | } // namespace rocksdb |
260 | | |
261 | | #endif // YB_ROCKSDB_DB_MEMTABLE_LIST_H |