/Users/deen/code/yugabyte-db/src/yb/rocksdb/utilities/redis/redis_list_iterator.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2013 Facebook |
2 | | // |
3 | | // The following only applies to changes made to this file as part of YugaByte development. |
4 | | // |
5 | | // Portions Copyright (c) YugaByte, Inc. |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
8 | | // in compliance with the License. You may obtain a copy of the License at |
9 | | // |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // |
12 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
13 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
14 | | // or implied. See the License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // |
17 | | /** |
18 | | * RedisListIterator: |
19 | | * An abstraction over the "list" concept (e.g.: for redis lists). |
20 | | * Provides functionality to read, traverse, edit, and write these lists. |
21 | | * |
22 | | * Upon construction, the RedisListIterator is given a block of list data. |
23 | | * Internally, it stores a pointer to the data and a pointer to current item. |
24 | | * It also stores a "result" list that will be mutated over time. |
25 | | * |
26 | | * Traversal and mutation are done by "forward iteration". |
27 | | * The Push() and Skip() methods will advance the iterator to the next item. |
28 | | * However, Push() will also "write the current item to the result". |
29 | | * Skip() will simply move to next item, causing current item to be dropped. |
30 | | * |
31 | | * Upon completion, the result (accessible by WriteResult()) will be saved. |
32 | | * All "skipped" items will be gone; all "pushed" items will remain. |
33 | | * |
34 | | * @throws Any of the operations may throw a RedisListException if an invalid |
35 | | * operation is performed or if the data is found to be corrupt. |
36 | | * |
37 | | * @notes By default, if WriteResult() is called part-way through iteration, |
38 | | * it will automatically advance the iterator to the end, and Keep() |
39 | | * all items that haven't been traversed yet. This may be subject |
40 | | * to review. |
41 | | * |
42 | | * @notes Can access the "current" item via GetCurrent(), and other |
43 | | * list-specific information such as Length(). |
44 | | * |
45 | | * @notes The internal representation is due to change at any time. Presently, |
46 | | * the list is represented as follows: |
47 | | * - 32-bit integer header: the number of items in the list |
48 | | * - For each item: |
49 | | * - 32-bit int (n): the number of bytes representing this item |
50 | | * - n bytes of data: the actual data. |
51 | | * |
52 | | * @author Deon Nicholas (dnicholas@fb.com) |
53 | | */ |
54 | | |
55 | | #pragma once |
56 | | #ifndef ROCKSDB_LITE |
57 | | |
58 | | #include <string> |
59 | | |
60 | | #include "redis_list_exception.h" |
61 | | #include "yb/util/slice.h" |
62 | | #include "yb/rocksdb/util/coding.h" |
63 | | |
64 | | namespace rocksdb { |
65 | | |
66 | | /// An abstraction over the "list" concept. |
67 | | /// All operations may throw a RedisListException |
68 | | class RedisListIterator { |
69 | | public: |
70 | | /// Construct a redis-list-iterator based on data. |
71 | | /// If the data is non-empty, it must formatted according to @notes above. |
72 | | /// |
73 | | /// If the data is valid, we can assume the following invariant(s): |
74 | | /// a) length_, num_bytes_ are set correctly. |
75 | | /// b) cur_byte_ always refers to the start of the current element, |
76 | | /// just before the bytes that specify element length. |
77 | | /// c) cur_elem_ is always the index of the current element. |
78 | | /// d) cur_elem_length_ is always the number of bytes in current element, |
79 | | /// excluding the 4-byte header itself. |
80 | | /// e) result_ will always contain data_[0..cur_byte_) and a header |
81 | | /// f) Whenever corrupt data is encountered or an invalid operation is |
82 | | /// attempted, a RedisListException will immediately be thrown. |
83 | | explicit RedisListIterator(const std::string& list_data) |
84 | | : data_(list_data.data()), |
85 | | num_bytes_(static_cast<uint32_t>(list_data.size())), |
86 | | cur_byte_(0), |
87 | | cur_elem_(0), |
88 | | cur_elem_length_(0), |
89 | | length_(0), |
90 | 352 | result_() { |
91 | | // Initialize the result_ (reserve enough space for header) |
92 | 352 | InitializeResult(); |
93 | | |
94 | | // Parse the data only if it is not empty. |
95 | 352 | if (num_bytes_ == 0) { |
96 | 24 | return; |
97 | 24 | } |
98 | | |
99 | | // If non-empty, but less than 4 bytes, data must be corrupt |
100 | 328 | if (num_bytes_ < sizeof(length_)) { |
101 | 0 | ThrowError("Corrupt header."); // Will break control flow |
102 | 0 | } |
103 | | |
104 | | // Good. The first bytes specify the number of elements |
105 | 328 | length_ = DecodeFixed32(data_); |
106 | 328 | cur_byte_ = sizeof(length_); |
107 | | |
108 | | // If we have at least one element, point to that element. |
109 | | // Also, read the first integer of the element (specifying the size), |
110 | | // if possible. |
111 | 328 | if (length_ > 0) { |
112 | 321 | if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) { |
113 | 321 | cur_elem_length_ = DecodeFixed32(data_+cur_byte_); |
114 | 321 | } else { |
115 | 0 | ThrowError("Corrupt data for first element."); |
116 | 0 | } |
117 | 321 | } |
118 | | |
119 | | // At this point, we are fully set-up. |
120 | | // The invariants described in the header should now be true. |
121 | 328 | } |
122 | | |
123 | | /// Reserve some space for the result_. |
124 | | /// Equivalent to result_.reserve(bytes). |
125 | 140 | void Reserve(int bytes) { |
126 | 140 | result_.reserve(bytes); |
127 | 140 | } |
128 | | |
129 | | /// Go to next element in data file. |
130 | | /// Also writes the current element to result_. |
131 | 325 | RedisListIterator& Push() { |
132 | 325 | WriteCurrentElement(); |
133 | 325 | MoveNext(); |
134 | 325 | return *this; |
135 | 325 | } |
136 | | |
137 | | /// Go to next element in data file. |
138 | | /// Drops/skips the current element. It will not be written to result_. |
139 | 478 | RedisListIterator& Skip() { |
140 | 478 | MoveNext(); |
141 | 478 | --length_; // One less item |
142 | 478 | --cur_elem_; // We moved one forward, but index did not change |
143 | 478 | return *this; |
144 | 478 | } |
145 | | |
146 | | /// Insert elem into the result_ (just BEFORE the current element / byte) |
147 | | /// Note: if Done() (i.e.: iterator points to end), this will append elem. |
148 | 103 | void InsertElement(const Slice& elem) { |
149 | | // Ensure we are in a valid state |
150 | 103 | CheckErrors(); |
151 | | |
152 | 103 | const int kOrigSize = static_cast<int>(result_.size()); |
153 | 103 | result_.resize(kOrigSize + SizeOf(elem)); |
154 | 103 | EncodeFixed32(result_.data() + kOrigSize, |
155 | 103 | static_cast<uint32_t>(elem.size())); |
156 | 103 | memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(), |
157 | 103 | elem.size()); |
158 | 103 | ++length_; |
159 | 103 | ++cur_elem_; |
160 | 103 | } |
161 | | |
162 | | /// Access the current element, and save the result into *curElem |
163 | 337 | void GetCurrent(Slice* curElem) { |
164 | | // Ensure we are in a valid state |
165 | 337 | CheckErrors(); |
166 | | |
167 | | // Ensure that we are not past the last element. |
168 | 337 | if (Done()) { |
169 | 0 | ThrowError("Invalid dereferencing."); |
170 | 0 | } |
171 | | |
172 | | // Dereference the element |
173 | 337 | *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_), |
174 | 337 | cur_elem_length_); |
175 | 337 | } |
176 | | |
177 | | // Number of elements |
178 | 184 | int Length() const { |
179 | 184 | return length_; |
180 | 184 | } |
181 | | |
182 | | // Number of bytes in the final representation (i.e: WriteResult().size()) |
183 | 140 | int Size() const { |
184 | | // result_ holds the currently written data |
185 | | // data_[cur_byte..num_bytes-1] is the remainder of the data |
186 | 140 | return static_cast<int>(result_.size() + (num_bytes_ - cur_byte_)); |
187 | 140 | } |
188 | | |
189 | | // Reached the end? |
190 | 3.28k | bool Done() const { |
191 | 3.28k | return cur_byte_ >= num_bytes_ || cur_elem_ >= length_3.14k ; |
192 | 3.28k | } |
193 | | |
194 | | /// Returns a string representing the final, edited, data. |
195 | | /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read |
196 | | /// and that result_ contains this data. |
197 | | /// The rest of the data must still be written. |
198 | | /// So, this method ADVANCES THE ITERATOR TO THE END before writing. |
199 | 122 | Slice WriteResult() { |
200 | 122 | CheckErrors(); |
201 | | |
202 | | // The header should currently be filled with dummy data (0's) |
203 | | // Correctly update the header. |
204 | | // Note, this is safe since result_ is a vector (guaranteed contiguous) |
205 | 122 | EncodeFixed32(&result_[0],length_); |
206 | | |
207 | | // Append the remainder of the data to the result. |
208 | 122 | result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_); |
209 | | |
210 | | // Seek to end of file |
211 | 122 | cur_byte_ = num_bytes_; |
212 | 122 | cur_elem_ = length_; |
213 | 122 | cur_elem_length_ = 0; |
214 | | |
215 | | // Return the result |
216 | 122 | return Slice(result_.data(),result_.size()); |
217 | 122 | } |
218 | | |
219 | | public: // Static public functions |
220 | | |
221 | | /// An upper-bound on the amount of bytes needed to store this element. |
222 | | /// This is used to hide representation information from the client. |
223 | | /// E.G. This can be used to compute the bytes we want to Reserve(). |
224 | 228 | static uint32_t SizeOf(const Slice& elem) { |
225 | | // [Integer Length . Data] |
226 | 228 | return static_cast<uint32_t>(sizeof(uint32_t) + elem.size()); |
227 | 228 | } |
228 | | |
229 | | private: // Private functions |
230 | | |
231 | | /// Initializes the result_ string. |
232 | | /// It will fill the first few bytes with 0's so that there is |
233 | | /// enough space for header information when we need to write later. |
234 | | /// Currently, "header information" means: the length (number of elements) |
235 | | /// Assumes that result_ is empty to begin with |
236 | 352 | void InitializeResult() { |
237 | 352 | assert(result_.empty()); // Should always be true. |
238 | 0 | result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header |
239 | 352 | } |
240 | | |
241 | | /// Go to the next element (used in Push() and Skip()) |
242 | 803 | void MoveNext() { |
243 | 803 | CheckErrors(); |
244 | | |
245 | | // Check to make sure we are not already in a finished state |
246 | 803 | if (Done()) { |
247 | 0 | ThrowError("Attempting to iterate past end of list."); |
248 | 0 | } |
249 | | |
250 | | // Move forward one element. |
251 | 803 | cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_; |
252 | 803 | ++cur_elem_; |
253 | | |
254 | | // If we are at the end, finish |
255 | 803 | if (Done()) { |
256 | 62 | cur_elem_length_ = 0; |
257 | 62 | return; |
258 | 62 | } |
259 | | |
260 | | // Otherwise, we should be able to read the new element's length |
261 | 741 | if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) { |
262 | 0 | ThrowError("Corrupt element data."); |
263 | 0 | } |
264 | | |
265 | | // Set the new element's length |
266 | 741 | cur_elem_length_ = DecodeFixed32(data_+cur_byte_); |
267 | | |
268 | 741 | return; |
269 | 803 | } |
270 | | |
271 | | /// Append the current element (pointed to by cur_byte_) to result_ |
272 | | /// Assumes result_ has already been reserved appropriately. |
273 | 325 | void WriteCurrentElement() { |
274 | | // First verify that the iterator is still valid. |
275 | 325 | CheckErrors(); |
276 | 325 | if (Done()) { |
277 | 0 | ThrowError("Attempting to write invalid element."); |
278 | 0 | } |
279 | | |
280 | | // Append the cur element. |
281 | 325 | result_.insert(result_.end(), |
282 | 325 | data_+cur_byte_, |
283 | 325 | data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_); |
284 | 325 | } |
285 | | |
286 | | /// Will ThrowError() if necessary. |
287 | | /// Checks for common/ubiquitous errors that can arise after most operations. |
288 | | /// This method should be called before any reading operation. |
289 | | /// If this function succeeds, then we are guaranteed to be in a valid state. |
290 | | /// Other member functions should check for errors and ThrowError() also |
291 | | /// if an error occurs that is specific to it even while in a valid state. |
292 | 1.69k | void CheckErrors() { |
293 | | // Check if any crazy thing has happened recently |
294 | 1.69k | if ((cur_elem_ > length_) || // Bad index |
295 | 1.69k | (cur_byte_ > num_bytes_) || // No more bytes |
296 | 1.69k | (cur_byte_ + cur_elem_length_ > num_bytes_) || // Item too large |
297 | 1.69k | (cur_byte_ == num_bytes_ && cur_elem_ != length_88 ) || // Too many items |
298 | 1.69k | (cur_elem_ == length_ && cur_byte_ != num_bytes_88 )) { // Too many bytes |
299 | 0 | ThrowError("Corrupt data."); |
300 | 0 | } |
301 | 1.69k | } |
302 | | |
303 | | /// Will throw an exception based on the passed-in message. |
304 | | /// This function is guaranteed to STOP THE CONTROL-FLOW. |
305 | | /// (i.e.: you do not have to call "return" after calling ThrowError) |
306 | 0 | void ThrowError(const char* const msg = NULL) { |
307 | | // TODO: For now we ignore the msg parameter. This can be expanded later. |
308 | 0 | throw RedisListException(); |
309 | 0 | } |
310 | | |
311 | | private: |
312 | | const char* const data_; // A pointer to the data (the first byte) |
313 | | const uint32_t num_bytes_; // The number of bytes in this list |
314 | | |
315 | | uint32_t cur_byte_; // The current byte being read |
316 | | uint32_t cur_elem_; // The current element being read |
317 | | uint32_t cur_elem_length_; // The number of bytes in current element |
318 | | |
319 | | uint32_t length_; // The number of elements in this list |
320 | | std::vector<char> result_; // The output data |
321 | | }; |
322 | | |
323 | | } // namespace rocksdb |
324 | | #endif // ROCKSDB_LITE |