/Users/deen/code/yugabyte-db/src/yb/gutil/strings/stringpiece.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2001, Google Inc. All rights reserved. |
2 | | // Maintainer: mec@google.com (Michael Chastain) |
3 | | // |
4 | | // The following only applies to changes made to this file as part of YugaByte development. |
5 | | // |
6 | | // Portions Copyright (c) YugaByte, Inc. |
7 | | // |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
9 | | // in compliance with the License. You may obtain a copy of the License at |
10 | | // |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
14 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
15 | | // or implied. See the License for the specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // A GStringPiece points to part or all of a string, Cord, double-quoted string |
19 | | // literal, or other string-like object. A GStringPiece does *not* own the |
20 | | // string to which it points. A GStringPiece is not null-terminated. |
21 | | // |
22 | | // You can use GStringPiece as a function or method parameter. A GStringPiece |
23 | | // parameter can receive a double-quoted string literal argument, a "const |
24 | | // char*" argument, a string argument, or a GStringPiece argument with no data |
25 | | // copying. Systematic use of GStringPiece for arguments reduces data |
26 | | // copies and strlen() calls. |
27 | | // |
28 | | // You may pass a GStringPiece argument by value or const reference. |
29 | | // Passing by value generates slightly smaller code. |
30 | | // void MyFunction(const GStringPiece& arg); |
31 | | // // Slightly better, but same lifetime requirements as const-ref parameter: |
32 | | // void MyFunction(GStringPiece arg); |
33 | | // |
34 | | // GStringPiece is also suitable for local variables if you know that |
35 | | // the lifetime of the underlying object is longer than the lifetime |
36 | | // of your GStringPiece variable. |
37 | | // |
38 | | // Beware of binding a GStringPiece to a temporary: |
39 | | // GStringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem |
40 | | // |
41 | | // This code is okay: |
42 | | // string str = obj.MethodReturningString(); // str owns its contents |
43 | | // GStringPiece sp(str); // GOOD, although you may not need sp at all |
44 | | // |
45 | | // GStringPiece is sometimes a poor choice for a return value and usually a poor |
46 | | // choice for a data member. If you do use a GStringPiece this way, it is your |
47 | | // responsibility to ensure that the object pointed to by the GStringPiece |
48 | | // outlives the GStringPiece. |
49 | | // |
50 | | // A GStringPiece may represent just part of a string; thus the name "Piece". |
51 | | // For example, when splitting a string, vector<GStringPiece> is a natural data |
52 | | // type for the output. For another example, a Cord is a non-contiguous, |
53 | | // potentially very long string-like object. The Cord class has an interface |
54 | | // that iteratively provides GStringPiece objects that point to the |
55 | | // successive pieces of a Cord object. |
56 | | // |
57 | | // A GStringPiece is not null-terminated. If you write code that scans a |
58 | | // GStringPiece, you must check its length before reading any characters. |
59 | | // Common idioms that work on null-terminated strings do not work on |
60 | | // GStringPiece objects. |
61 | | // |
62 | | // There are several ways to create a null GStringPiece: |
63 | | // GStringPiece() |
64 | | // GStringPiece(NULL) |
65 | | // GStringPiece(NULL, 0) |
66 | | // For all of the above, sp.data() == NULL, sp.length() == 0, |
67 | | // and sp.empty() == true. Also, if you create a GStringPiece with |
68 | | // a non-NULL pointer then sp.data() != non-NULL. Once created, |
69 | | // sp.data() will stay either NULL or not-NULL, except if you call |
70 | | // sp.clear() or sp.set(). |
71 | | // |
72 | | // Thus, you can use GStringPiece(NULL) to signal an out-of-band value |
73 | | // that is different from other GStringPiece values. This is similar |
74 | | // to the way that const char* p1 = NULL; is different from |
75 | | // const char* p2 = "";. |
76 | | // |
77 | | // There are many ways to create an empty GStringPiece: |
78 | | // GStringPiece() |
79 | | // GStringPiece(NULL) |
80 | | // GStringPiece(NULL, 0) |
81 | | // GStringPiece("") |
82 | | // GStringPiece("", 0) |
83 | | // GStringPiece("abcdef", 0) |
84 | | // GStringPiece("abcdef"+6, 0) |
85 | | // For all of the above, sp.length() will be 0 and sp.empty() will be true. |
86 | | // For some empty GStringPiece values, sp.data() will be NULL. |
87 | | // For some empty GStringPiece values, sp.data() will not be NULL. |
88 | | // |
89 | | // Be careful not to confuse: null GStringPiece and empty GStringPiece. |
90 | | // The set of empty GStringPieces properly includes the set of null GStringPieces. |
91 | | // That is, every null GStringPiece is an empty GStringPiece, |
92 | | // but some non-null GStringPieces are empty Stringpieces too. |
93 | | // |
94 | | // All empty GStringPiece values compare equal to each other. |
95 | | // Even a null GStringPieces compares equal to a non-null empty GStringPiece: |
96 | | // GStringPiece() == GStringPiece("", 0) |
97 | | // GStringPiece(NULL) == GStringPiece("abc", 0) |
98 | | // GStringPiece(NULL, 0) == GStringPiece("abcdef"+6, 0) |
99 | | // |
100 | | // Look carefully at this example: |
101 | | // GStringPiece("") == NULL |
102 | | // True or false? TRUE, because GStringPiece::operator== converts |
103 | | // the right-hand side from NULL to GStringPiece(NULL), |
104 | | // and then compares two zero-length spans of characters. |
105 | | // However, we are working to make this example produce a compile error. |
106 | | // |
107 | | // Suppose you want to write: |
108 | | // bool TestWhat?(GStringPiece sp) { return sp == NULL; } // BAD |
109 | | // Do not do that. Write one of these instead: |
110 | | // bool TestNull(GStringPiece sp) { return sp.data() == NULL; } |
111 | | // bool TestEmpty(GStringPiece sp) { return sp.empty(); } |
112 | | // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty? |
113 | | // Right now, TestWhat? behaves likes TestEmpty. |
114 | | // We are working to make TestWhat? produce a compile error. |
115 | | // TestNull is good to test for an out-of-band signal. |
116 | | // TestEmpty is good to test for an empty GStringPiece. |
117 | | // |
118 | | // Caveats (again): |
119 | | // (1) The lifetime of the pointed-to string (or piece of a string) |
120 | | // must be longer than the lifetime of the GStringPiece. |
121 | | // (2) There may or may not be a '\0' character after the end of |
122 | | // GStringPiece data. |
123 | | // (3) A null GStringPiece is empty. |
124 | | // An empty GStringPiece may or may not be a null GStringPiece. |
125 | | |
126 | | #ifndef YB_GUTIL_STRINGS_STRINGPIECE_H |
127 | | #define YB_GUTIL_STRINGS_STRINGPIECE_H |
128 | | |
129 | | #include <assert.h> |
130 | | |
131 | | #include <iosfwd> |
132 | | #include <limits> |
133 | | #include <string> |
134 | | |
135 | | #include "yb/gutil/strings/fastmem.h" |
136 | | |
137 | | class GStringPiece { |
138 | | private: |
139 | | const char* ptr_; |
140 | | size_t length_; |
141 | | |
142 | | public: |
143 | | // standard STL container boilerplate |
144 | | typedef char value_type; |
145 | | typedef const char* pointer; |
146 | | typedef const char& reference; |
147 | | typedef const char& const_reference; |
148 | | typedef size_t size_type; |
149 | | typedef ptrdiff_t difference_type; |
150 | | static const size_type npos; |
151 | | typedef const char* const_iterator; |
152 | | typedef const char* iterator; |
153 | | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
154 | | typedef std::reverse_iterator<iterator> reverse_iterator; |
155 | | |
156 | | // We provide non-explicit singleton constructors so users can pass |
157 | | // in a "const char*" or a "string" wherever a "GStringPiece" is |
158 | | // expected. |
159 | | // |
160 | | // Style guide exception granted: |
161 | | // http://goto/style-guide-exception-20978288 |
162 | 1.66M | GStringPiece() : ptr_(NULL), length_(0) {} |
163 | | GStringPiece(const char* str) // NOLINT(runtime/explicit) |
164 | 1.11G | : ptr_(str), length_(0) { |
165 | 1.11G | if (str != NULL) { |
166 | 1.11G | size_t length = strlen(str); |
167 | 1.11G | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
168 | 0 | length_ = static_cast<int>(length); |
169 | 1.11G | } |
170 | 1.11G | } |
171 | | GStringPiece(const std::string& str) // NOLINT(runtime/explicit) |
172 | 186M | : ptr_(str.data()), length_(str.length()) { |
173 | 186M | } |
174 | | |
175 | 1.22M | GStringPiece(const char* offset, size_type len) : ptr_(offset), length_(len) { |
176 | 1.22M | } |
177 | | |
178 | | // Substring of another GStringPiece. |
179 | | // pos must be non-negative and <= x.length(). |
180 | | GStringPiece(GStringPiece x, size_type pos); |
181 | | // Substring of another GStringPiece. |
182 | | // pos must be non-negative and <= x.length(). |
183 | | // len must be non-negative and will be pinned to at most x.length() - pos. |
184 | | GStringPiece(GStringPiece x, size_type pos, size_type len); |
185 | | |
186 | | // data() may return a pointer to a buffer with embedded NULs, and the |
187 | | // returned buffer may or may not be null terminated. Therefore it is |
188 | | // typically a mistake to pass data() to a routine that expects a NUL |
189 | | // terminated string. |
190 | 1.07G | const char* data() const { return ptr_; } |
191 | 10.9G | size_type size() const { return length_; } |
192 | 711k | size_type length() const { return length_; } |
193 | 1.65M | bool empty() const { return length_ == 0; } |
194 | | |
195 | 0 | void clear() { |
196 | 0 | ptr_ = NULL; |
197 | 0 | length_ = 0; |
198 | 0 | } |
199 | | |
200 | 1.89M | void set(const char* data, size_type len) { |
201 | 1.89M | ptr_ = data; |
202 | 1.89M | length_ = len; |
203 | 1.89M | } |
204 | | |
205 | 0 | void set(const char* str) { |
206 | 0 | ptr_ = str; |
207 | 0 | if (str != NULL) |
208 | 0 | length_ = strlen(str); |
209 | 0 | else |
210 | 0 | length_ = 0; |
211 | 0 | } |
212 | | |
213 | 0 | void set(const void* data, size_type len) { |
214 | 0 | ptr_ = reinterpret_cast<const char*>(data); |
215 | 0 | length_ = len; |
216 | 0 | } |
217 | | |
218 | 14.7G | char operator[](size_type i) const { |
219 | 14.7G | assert(i < length_); |
220 | 0 | return ptr_[i]; |
221 | 14.7G | } |
222 | | |
223 | 1.21M | void remove_prefix(size_type n) { |
224 | 1.21M | assert(length_ >= n); |
225 | 0 | ptr_ += n; |
226 | 1.21M | length_ -= n; |
227 | 1.21M | } |
228 | | |
229 | 24 | void remove_suffix(size_type n) { |
230 | 24 | assert(length_ >= n); |
231 | 0 | length_ -= n; |
232 | 24 | } |
233 | | |
234 | | // returns {-1, 0, 1} |
235 | 0 | int compare(GStringPiece x) const { |
236 | 0 | const size_type min_size = length_ < x.length_ ? length_ : x.length_; |
237 | 0 | int r = memcmp(ptr_, x.ptr_, min_size); |
238 | 0 | if (r < 0) return -1; |
239 | 0 | if (r > 0) return 1; |
240 | 0 | if (length_ < x.length_) return -1; |
241 | 0 | if (length_ > x.length_) return 1; |
242 | 0 | return 0; |
243 | 0 | } |
244 | | |
245 | 31 | std::string as_string() const { |
246 | 31 | return ToString(); |
247 | 31 | } |
248 | | // We also define ToString() here, since many other string-like |
249 | | // interfaces name the routine that converts to a C++ string |
250 | | // "ToString", and it's confusing to have the method that does that |
251 | | // for a GStringPiece be called "as_string()". We also leave the |
252 | | // "as_string()" method defined here for existing code. |
253 | 1.75M | std::string ToString() const { |
254 | 1.75M | if (ptr_ == NULL) return std::string()0 ; |
255 | 1.75M | return std::string(data(), size()); |
256 | 1.75M | } |
257 | | |
258 | | void CopyToString(std::string* target) const; |
259 | | void AppendToString(std::string* target) const; |
260 | | |
261 | 342k | bool starts_with(GStringPiece x) const { |
262 | 342k | return (length_ >= x.length_)342k && (memcmp(ptr_, x.ptr_, x.length_) == 0); |
263 | 342k | } |
264 | | |
265 | 133k | bool ends_with(GStringPiece x) const { |
266 | 133k | return ((length_ >= x.length_) && |
267 | 133k | (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); |
268 | 133k | } |
269 | | |
270 | 7.28M | iterator begin() const { return ptr_; } |
271 | 8.39M | iterator end() const { return ptr_ + length_; } |
272 | 0 | const_reverse_iterator rbegin() const { |
273 | 0 | return const_reverse_iterator(ptr_ + length_); |
274 | 0 | } |
275 | 0 | const_reverse_iterator rend() const { |
276 | 0 | return const_reverse_iterator(ptr_); |
277 | 0 | } |
278 | | |
279 | 0 | size_type max_size() const { return length_; } |
280 | 0 | size_type capacity() const { return length_; } |
281 | | |
282 | | // cpplint.py emits a false positive [build/include_what_you_use] |
283 | | size_type copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT |
284 | | |
285 | | bool contains(GStringPiece s) const; |
286 | | |
287 | | size_type find(GStringPiece s, size_type pos = 0) const; |
288 | | size_type find(char c, size_type pos = 0) const; |
289 | | size_type rfind(GStringPiece s, size_type pos = npos) const; |
290 | | size_type rfind(char c, size_type pos = npos) const; |
291 | | |
292 | | size_type find_first_of(GStringPiece s, size_type pos = 0) const; |
293 | 0 | size_type find_first_of(char c, size_type pos = 0) const { return find(c, pos); } |
294 | | size_type find_first_not_of(GStringPiece s, size_type pos = 0) const; |
295 | | size_type find_first_not_of(char c, size_type pos = 0) const; |
296 | | size_type find_last_of(GStringPiece s, size_type pos = npos) const; |
297 | 0 | size_type find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); } |
298 | | size_type find_last_not_of(GStringPiece s, size_type pos = npos) const; |
299 | | size_type find_last_not_of(char c, size_type pos = npos) const; |
300 | | |
301 | | GStringPiece substr(size_type pos, size_type n = npos) const; |
302 | | |
303 | | size_t hash() const; |
304 | | }; |
305 | | |
306 | | // This large function is defined inline so that in a fairly common case where |
307 | | // one of the arguments is a literal, the compiler can elide a lot of the |
308 | | // following comparisons. |
309 | 58.5M | inline bool operator==(GStringPiece x, GStringPiece y) { |
310 | 58.5M | auto len = x.size(); |
311 | 58.5M | if (len != y.size()) { |
312 | 31.0M | return false; |
313 | 31.0M | } |
314 | | |
315 | 27.5M | return x.data() == y.data() || len <= 027.5M || strings::memeq(x.data(), y.data(), len)27.5M ; |
316 | 58.5M | } |
317 | | |
318 | 0 | inline bool operator!=(GStringPiece x, GStringPiece y) { |
319 | 0 | return !(x == y); |
320 | 0 | } |
321 | | |
322 | 0 | inline bool operator<(GStringPiece x, GStringPiece y) { |
323 | 0 | const auto min_size = x.size() < y.size() ? x.size() : y.size(); |
324 | 0 | const int r = memcmp(x.data(), y.data(), min_size); |
325 | 0 | return (r < 0) || (r == 0 && x.size() < y.size()); |
326 | 0 | } |
327 | | |
328 | 0 | inline bool operator>(GStringPiece x, GStringPiece y) { |
329 | 0 | return y < x; |
330 | 0 | } |
331 | | |
332 | 0 | inline bool operator<=(GStringPiece x, GStringPiece y) { |
333 | 0 | return !(x > y); |
334 | 0 | } |
335 | | |
336 | 0 | inline bool operator>=(GStringPiece x, GStringPiece y) { |
337 | 0 | return !(x < y); |
338 | 0 | } |
339 | | class GStringPiece; |
340 | | template <class X> struct GoodFastHash; |
341 | | |
342 | | // ------------------------------------------------------------------ |
343 | | // Functions used to create STL containers that use GStringPiece |
344 | | // Remember that a GStringPiece's lifetime had better be less than |
345 | | // that of the underlying string or char*. If it is not, then you |
346 | | // cannot safely store a GStringPiece into an STL container |
347 | | // ------------------------------------------------------------------ |
348 | | |
349 | | // SWIG doesn't know how to parse this stuff properly. Omit it. |
350 | | #ifndef SWIG |
351 | | |
352 | | namespace std { |
353 | | template<> struct hash<GStringPiece> { |
354 | | size_t operator()(GStringPiece s) const; |
355 | | }; |
356 | | } // namespace std |
357 | | |
358 | | |
359 | | // An implementation of GoodFastHash for GStringPiece. See |
360 | | // GoodFastHash values. |
361 | | template<> struct GoodFastHash<GStringPiece> { |
362 | 0 | size_t operator()(GStringPiece s) const { |
363 | 0 | return s.hash(); |
364 | 0 | } |
365 | | |
366 | | // Less than operator, for MSVC. |
367 | 0 | bool operator()(const GStringPiece& s1, const GStringPiece& s2) const { |
368 | 0 | return s1 < s2; |
369 | 0 | } |
370 | | static const size_t bucket_size = 4; // These are required by MSVC |
371 | | static const size_t min_buckets = 8; // 4 and 8 are defaults. |
372 | | }; |
373 | | #endif |
374 | | |
375 | | // allow GStringPiece to be logged |
376 | | extern std::ostream& operator<<(std::ostream& o, GStringPiece piece); |
377 | | |
378 | | #endif // YB_GUTIL_STRINGS_STRINGPIECE_H |