/Users/deen/code/yugabyte-db/src/yb/util/faststring.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | #ifndef YB_UTIL_FASTSTRING_H_ |
33 | | #define YB_UTIL_FASTSTRING_H_ |
34 | | |
35 | | #include <string> |
36 | | |
37 | | #include "yb/gutil/dynamic_annotations.h" |
38 | | #include "yb/gutil/macros.h" |
39 | | #include "yb/gutil/strings/fastmem.h" |
40 | | |
41 | | namespace yb { |
42 | | |
43 | | // A faststring is similar to a std::string, except that it is faster for many |
44 | | // common use cases (in particular, resize() will fill with uninitialized data |
45 | | // instead of memsetting to \0) |
46 | | class faststring { |
47 | | public: |
48 | | // Tell PVS Studio it is OK that initial_data_ is uninitialized. |
49 | | //-V:initial_data_:730 |
50 | | faststring() |
51 | | : data_(initial_data_), |
52 | | len_(0), |
53 | 66.8M | capacity_(kInitialCapacity) { |
54 | 66.8M | } |
55 | | |
56 | | // Construct a string with the given capacity, in bytes. |
57 | | // Tell PVS Studio it is OK that initial_data_ is uninitialized. |
58 | | //-V:initial_data_:730 |
59 | | explicit faststring(size_t capacity) |
60 | | : data_(initial_data_), |
61 | | len_(0), |
62 | 0 | capacity_(kInitialCapacity) { |
63 | 0 | if (capacity > capacity_) { |
64 | 0 | data_ = new uint8_t[capacity]; |
65 | 0 | capacity_ = capacity; |
66 | 0 | } |
67 | 0 | ASAN_POISON_MEMORY_REGION(data_, capacity_); |
68 | 0 | } |
69 | | |
70 | 66.8M | ~faststring() { |
71 | 66.8M | ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_)); |
72 | 66.8M | if (data_ != initial_data_) { |
73 | 35.9M | delete[] data_; |
74 | 35.9M | } |
75 | 66.8M | } |
76 | | |
77 | | // Reset the valid length of the string to 0. |
78 | | // |
79 | | // This does not free up any memory. The capacity of the string remains unchanged. |
80 | 29.3M | void clear() { |
81 | 29.3M | resize(0); |
82 | 29.3M | ASAN_POISON_MEMORY_REGION(data_, capacity_); |
83 | 29.3M | } |
84 | | |
85 | | // Resize the string to the given length. |
86 | | // If the new length is larger than the old length, the capacity is expanded as necessary. |
87 | | // |
88 | | // NOTE: in contrast to std::string's implementation, Any newly "exposed" bytes of data are |
89 | | // not cleared. |
90 | 64.9M | void resize(size_t newsize) { |
91 | 64.9M | if (newsize > capacity_) { |
92 | 4.81M | reserve(newsize); |
93 | 4.81M | } |
94 | 64.9M | len_ = newsize; |
95 | 64.9M | ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_); |
96 | 64.9M | ASAN_UNPOISON_MEMORY_REGION(data_, len_); |
97 | 64.9M | } |
98 | | |
99 | | // Releases the underlying array; after this, the buffer is left empty. |
100 | | // |
101 | | // NOTE: the data pointer returned by release() is not necessarily the pointer |
102 | 0 | uint8_t *release() WARN_UNUSED_RESULT { |
103 | 0 | uint8_t *ret = data_; |
104 | 0 | if (ret == initial_data_) { |
105 | 0 | ret = new uint8_t[len_]; |
106 | 0 | memcpy(ret, data_, len_); |
107 | 0 | } |
108 | 0 | len_ = 0; |
109 | 0 | capacity_ = kInitialCapacity; |
110 | 0 | data_ = initial_data_; |
111 | 0 | ASAN_POISON_MEMORY_REGION(data_, capacity_); |
112 | 0 | return ret; |
113 | 0 | } |
114 | | |
115 | | // Reserve space for the given total amount of data. If the current capacity is already |
116 | | // larger than the newly requested capacity, this is a no-op (i.e. it does not ever free memory). |
117 | | // |
118 | | // NOTE: even though the new capacity is reserved, it is illegal to begin writing into that memory |
119 | | // directly using pointers. If ASAN is enabled, this is ensured using manual memory poisoning. |
120 | 29.8M | void reserve(size_t newcapacity) { |
121 | 29.8M | if (PREDICT_TRUE(newcapacity <= capacity_)) return11.1M ; |
122 | 18.6M | GrowArray(newcapacity); |
123 | 18.6M | } |
124 | | |
125 | | // Append the given data to the string, resizing capacity as necessary. |
126 | 1.49G | void append(const void *src_v, size_t count) { |
127 | 1.49G | const uint8_t *src = reinterpret_cast<const uint8_t *>(src_v); |
128 | 1.49G | EnsureRoomForAppend(count); |
129 | 1.49G | ASAN_UNPOISON_MEMORY_REGION(data_ + len_, count); |
130 | | |
131 | | // appending short values is common enough that this |
132 | | // actually helps, according to benchmarks. In theory |
133 | | // memcpy_inlined should already be just as good, but this |
134 | | // was ~20% faster for reading a large prefix-coded string file |
135 | | // where each string was only a few chars different |
136 | 1.49G | if (count <= 4) { |
137 | 1.25G | uint8_t *p = &data_[len_]; |
138 | 3.69G | for (size_t i = 0; i < count; i++2.44G ) { |
139 | 2.44G | *p++ = *src++; |
140 | 2.44G | } |
141 | 1.25G | } else { |
142 | 244M | strings::memcpy_inlined(&data_[len_], src, count); |
143 | 244M | } |
144 | 1.49G | len_ += count; |
145 | 1.49G | } |
146 | | |
147 | | // Append the given string to this string. |
148 | 91.0M | void append(const std::string &str) { |
149 | 91.0M | append(str.data(), str.size()); |
150 | 91.0M | } |
151 | | |
152 | | // Append the given character to this string. |
153 | 68 | void push_back(const char byte) { |
154 | 68 | EnsureRoomForAppend(1); |
155 | 68 | ASAN_UNPOISON_MEMORY_REGION(data_ + len_, 1); |
156 | 68 | data_[len_] = byte; |
157 | 68 | len_++; |
158 | 68 | } |
159 | | |
160 | | // Return true if the string is empty. |
161 | 32.7M | bool empty() const { |
162 | 32.7M | return len_ == 0; |
163 | 32.7M | } |
164 | | |
165 | | // Return the valid length of this string. |
166 | 29.7k | size_t length() const { |
167 | 29.7k | return len_; |
168 | 29.7k | } |
169 | | |
170 | | // Return the valid length of this string (identical to length()) |
171 | 115M | size_t size() const { |
172 | 115M | return len_; |
173 | 115M | } |
174 | | |
175 | | // Return the allocated capacity of this string. |
176 | 0 | size_t capacity() const { |
177 | 0 | return capacity_; |
178 | 0 | } |
179 | | |
180 | | // Return a pointer to the data in this string. Note that this pointer |
181 | | // may be invalidated by any later non-const operation. |
182 | 57.1M | const uint8_t *data() const { |
183 | 57.1M | return &data_[0]; |
184 | 57.1M | } |
185 | | |
186 | | // Return a pointer to the data in this string. Note that this pointer |
187 | | // may be invalidated by any later non-const operation. |
188 | 73 | const char *c_str() const { |
189 | 73 | return reinterpret_cast<const char *>(data()); |
190 | 73 | } |
191 | | |
192 | | // Return a pointer to the data in this string. Note that this pointer |
193 | | // may be invalidated by any later non-const operation. |
194 | 130M | uint8_t *data() { |
195 | 130M | return &data_[0]; |
196 | 130M | } |
197 | | |
198 | | // Return the given element of this string. Note that this does not perform |
199 | | // any bounds checking. |
200 | 0 | const uint8_t &at(size_t i) const { |
201 | 0 | return data_[i]; |
202 | 0 | } |
203 | | |
204 | | // Return the given element of this string. Note that this does not perform |
205 | | // any bounds checking. |
206 | 0 | const uint8_t &operator[](size_t i) const { |
207 | 0 | return data_[i]; |
208 | 0 | } |
209 | | |
210 | | // Return the given element of this string. Note that this does not perform |
211 | | // any bounds checking. |
212 | 22.4M | uint8_t &operator[](size_t i) { |
213 | 22.4M | return data_[i]; |
214 | 22.4M | } |
215 | | |
216 | | // Reset the contents of this string by copying 'len' bytes from 'src'. |
217 | 0 | void assign_copy(const uint8_t *src, size_t len) { |
218 | 0 | // Reset length so that the first resize doesn't need to copy the current |
219 | 0 | // contents of the array. |
220 | 0 | len_ = 0; |
221 | 0 | resize(len); |
222 | 0 | memcpy(data(), src, len); |
223 | 0 | } |
224 | | |
225 | | // Reset the contents of this string by copying from the given std::string. |
226 | 0 | void assign_copy(const std::string &str) { |
227 | 0 | assign_copy(reinterpret_cast<const uint8_t *>(str.c_str()), |
228 | 0 | str.size()); |
229 | 0 | } |
230 | | |
231 | 0 | void CopyFrom(const faststring& other) { |
232 | 0 | clear(); |
233 | 0 | append(other.data(), other.size()); |
234 | 0 | } |
235 | | |
236 | | // Return a copy of this string as a std::string. |
237 | 1.14k | std::string ToString() const { |
238 | 1.14k | return std::string(reinterpret_cast<const char *>(data()), |
239 | 1.14k | len_); |
240 | 1.14k | } |
241 | | |
242 | | private: |
243 | | |
244 | | // If necessary, expand the buffer to fit at least 'count' more bytes. |
245 | | // If the array has to be grown, it is grown by at least 50%. |
246 | 1.49G | void EnsureRoomForAppend(size_t count) { |
247 | 1.49G | if (PREDICT_TRUE(len_ + count <= capacity_)) { |
248 | 1.46G | return; |
249 | 1.46G | } |
250 | | |
251 | | // Call the non-inline slow path - this reduces the number of instructions |
252 | | // on the hot path. |
253 | 37.9M | GrowByAtLeast(count); |
254 | 37.9M | } |
255 | | |
256 | | // The slow path of MakeRoomFor. Grows the buffer by either |
257 | | // 'count' bytes, or 50%, whichever is more. |
258 | | void GrowByAtLeast(size_t count); |
259 | | |
260 | | // Grow the array to the given capacity, which must be more than |
261 | | // the current capacity. |
262 | | void GrowArray(size_t newcapacity); |
263 | | |
264 | | enum { |
265 | | kInitialCapacity = 32 |
266 | | }; |
267 | | |
268 | | uint8_t* data_; |
269 | | uint8_t initial_data_[kInitialCapacity]; |
270 | | size_t len_; |
271 | | size_t capacity_; |
272 | | |
273 | | DISALLOW_COPY_AND_ASSIGN(faststring); |
274 | | }; |
275 | | |
276 | | } // namespace yb |
277 | | |
278 | | #endif // YB_UTIL_FASTSTRING_H_ |