YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/split.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 and onwards Google, Inc.
2
//
3
// #status: RECOMMENDED
4
// #category: operations on strings
5
// #summary: Functions for splitting strings into substrings.
6
//
7
// This file contains functions for splitting strings. The new and recommended
8
// API for string splitting is the strings::Split() function. The old API is a
9
// large collection of standalone functions declared at the bottom of this file
10
// in the global scope.
11
//
12
// TODO(user): Rough migration plan from old API to new API
13
// (1) Add comments to old Split*() functions showing how to do the same things
14
//     with the new API.
15
// (2) Reimplement some of the old Split*() functions in terms of the new
16
//     Split() API. This will allow deletion of code in split.cc.
17
// (3) (Optional) Replace old Split*() API calls at call sites with calls to new
18
//     Split() API.
19
//
20
// The following only applies to changes made to this file as part of YugaByte development.
21
//
22
// Portions Copyright (c) YugaByte, Inc.
23
//
24
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
25
// in compliance with the License.  You may obtain a copy of the License at
26
//
27
// http://www.apache.org/licenses/LICENSE-2.0
28
//
29
// Unless required by applicable law or agreed to in writing, software distributed under the License
30
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
31
// or implied.  See the License for the specific language governing permissions and limitations
32
// under the License.
33
//
34
#ifndef YB_GUTIL_STRINGS_SPLIT_H
35
#define YB_GUTIL_STRINGS_SPLIT_H
36
37
#include <stddef.h>
38
#include <algorithm>
39
using std::copy;
40
using std::max;
41
using std::min;
42
using std::reverse;
43
using std::sort;
44
using std::swap;
45
#include <iterator>
46
using std::back_insert_iterator;
47
using std::iterator_traits;
48
#include <map>
49
using std::map;
50
using std::multimap;
51
#include <set>
52
using std::multiset;
53
using std::set;
54
#include <string>
55
using std::string;
56
#include <utility>
57
using std::make_pair;
58
using std::pair;
59
#include <vector>
60
using std::vector;
61
62
#include <glog/logging.h>
63
64
#include "yb/gutil/integral_types.h"
65
#include "yb/gutil/logging-inl.h"
66
#include "yb/gutil/strings/charset.h"
67
#include "yb/gutil/strings/split_internal.h"
68
#include "yb/gutil/strings/stringpiece.h"
69
#include "yb/gutil/strings/strip.h"
70
71
namespace strings {
72
73
//                              The new Split API
74
//                                  aka Split2
75
//                              aka strings::Split()
76
//
77
// This string splitting API consists of a Split() function in the ::strings
78
// namespace and a handful of delimiter objects in the ::strings::delimiter
79
// namespace (more on delimiter objects below). The Split() function always
80
// takes two arguments: the text to be split and the delimiter on which to split
81
// the text. An optional third argument may also be given, which is a Predicate
82
// functor that will be used to filter the results, e.g., to skip empty strings
83
// (more on predicates below). The Split() function adapts the returned
84
// collection to the type specified by the caller.
85
//
86
// Example 1:
87
//   // Splits the given string on commas. Returns the results in a
88
//   // vector of strings.
89
//   vector<string> v = strings::Split("a,b,c", ",");
90
//   assert(v.size() == 3);
91
//
92
// Example 2:
93
//   // By default, empty strings are *included* in the output. See the
94
//   // strings::SkipEmpty predicate below to omit them.
95
//   vector<string> v = strings::Split("a,b,,c", ",");
96
//   assert(v.size() == 4);  // "a", "b", "", "c"
97
//   v = strings::Split("", ",");
98
//   assert(v.size() == 1);  // v contains a single ""
99
//
100
// Example 3:
101
//   // Splits the string as in the previous example, except that the results
102
//   // are returned as GStringPiece objects. Note that because we are storing
103
//   // the results within GStringPiece objects, we have to ensure that the input
104
//   // string outlives any results.
105
//   vector<GStringPiece> v = strings::Split("a,b,c", ",");
106
//   assert(v.size() == 3);
107
//
108
// Example 4:
109
//   // Stores results in a set<string>.
110
//   set<string> a = strings::Split("a,b,c,a,b,c", ",");
111
//   assert(a.size() == 3);
112
//
113
// Example 5:
114
//   // Stores results in a map. The map implementation assumes that the input
115
//   // is provided as a series of key/value pairs. For example, the 0th element
116
//   // resulting from the split will be stored as a key to the 1st element. If
117
//   // an odd number of elements are resolved, the last element is paired with
118
//   // a default-constructed value (e.g., empty string).
119
//   map<string, string> m = strings::Split("a,b,c", ",");
120
//   assert(m.size() == 2);
121
//   assert(m["a"] == "b");
122
//   assert(m["c"] == "");  // last component value equals ""
123
//
124
// Example 6:
125
//   // Splits on the empty string, which results in each character of the input
126
//   // string becoming one element in the output collection.
127
//   vector<string> v = strings::Split("abc", "");
128
//   assert(v.size() == 3);
129
//
130
// Example 7:
131
//   // Stores first two split strings as the members in an std::pair.
132
//   std::pair<string, string> p = strings::Split("a,b,c", ",");
133
//   EXPECT_EQ("a", p.first);
134
//   EXPECT_EQ("b", p.second);
135
//   // "c" is omitted because std::pair can hold only two elements.
136
//
137
// As illustrated above, the Split() function adapts the returned collection to
138
// the type specified by the caller. The returned collections may contain
139
// string, GStringPiece, Cord, or any object that has a constructor (explicit or
140
// not) that takes a single GStringPiece argument. This pattern works for all
141
// standard STL containers including vector, list, deque, set, multiset, map,
142
// and multimap, non-standard containers including hash_set and hash_map, and
143
// even std::pair which is not actually a container.
144
//
145
// Splitting to std::pair is an interesting case because it can hold only two
146
// elements and is not a collection type. When splitting to an std::pair the
147
// first two split strings become the std::pair's .first and .second members
148
// respectively. The remaining split substrings are discarded. If there are less
149
// than two split substrings, the empty string is used for the corresponding
150
// std::pair member.
151
//
152
// The strings::Split() function can be used multiple times to perform more
153
// complicated splitting logic, such as intelligently parsing key-value pairs.
154
// For example
155
//
156
//   // The input string "a=b=c,d=e,f=,g" becomes
157
//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
158
//   map<string, string> m;
159
//   for (GStringPiece sp : strings::Split("a=b=c,d=e,f=,g", ",")) {
160
//     m.insert(strings::Split(sp, strings::delimiter::Limit("=", 1)));
161
//   }
162
//   EXPECT_EQ("b=c", m.find("a")->second);
163
//   EXPECT_EQ("e", m.find("d")->second);
164
//   EXPECT_EQ("", m.find("f")->second);
165
//   EXPECT_EQ("", m.find("g")->second);
166
//
167
// The above example stores the results in an std::map. But depending on your
168
// data requirements, you can just as easily store the results in an
169
// std::multimap or even a vector<std::pair<>>.
170
//
171
//
172
//                                  Delimiters
173
//
174
// The Split() function also takes a second argument that is a delimiter. This
175
// delimiter is actually an object that defines the boundaries between elements
176
// in the provided input. If a string (const char*, ::string, or GStringPiece) is
177
// passed in place of an explicit Delimiter object, the argument is implicitly
178
// converted to a ::strings::delimiter::Literal.
179
//
180
// With this split API comes the formal concept of a Delimiter (big D). A
181
// Delimiter is an object with a Find() function that knows how find the first
182
// occurrence of itself in a given GStringPiece. Models of the Delimiter concept
183
// represent specific kinds of delimiters, such as single characters,
184
// substrings, or even regular expressions.
185
//
186
// The following Delimiter objects are provided as part of the Split() API:
187
//
188
//   - Literal (default)
189
//   - AnyOf
190
//   - Limit
191
//
192
// The following are examples of using some provided Delimiter objects:
193
//
194
// Example 1:
195
//   // Because a string literal is converted to a strings::delimiter::Literal,
196
//   // the following two splits are equivalent.
197
//   vector<string> v1 = strings::Split("a,b,c", ",");           // (1)
198
//   using ::strings::delimiter::Literal;
199
//   vector<string> v2 = strings::Split("a,b,c", Literal(","));  // (2)
200
//
201
// Example 2:
202
//   // Splits on any of the characters specified in the delimiter string.
203
//   using ::strings::delimiter::AnyOf;
204
//   vector<string> v = strings::Split("a,b;c-d", AnyOf(",;-"));
205
//   assert(v.size() == 4);
206
//
207
// Example 3:
208
//   // Uses the Limit meta-delimiter to limit the number of matches a delimiter
209
//   // can have. In this case, the delimiter of a Literal comma is limited to
210
//   // to matching at most one time. The last element in the returned
211
//   // collection will contain all unsplit pieces, which may contain instances
212
//   // of the delimiter.
213
//   using ::strings::delimiter::Limit;
214
//   vector<string> v = strings::Split("a,b,c", Limit(",", 1));
215
//   assert(v.size() == 2);  // Limited to 1 delimiter; so two elements found
216
//   assert(v[0] == "a");
217
//   assert(v[1] == "b,c");
218
//
219
//
220
//                                  Predicates
221
//
222
// Predicates can filter the results of a Split() operation by determining
223
// whether or not a resultant element is included in the result set. A predicate
224
// may be passed as an *optional* third argument to the Split() function.
225
//
226
// Predicates are unary functions (or functors) that take a single GStringPiece
227
// argument and return bool indicating whether the argument should be included
228
// (true) or excluded (false).
229
//
230
// One example where this is useful is when filtering out empty substrings. By
231
// default, empty substrings may be returned by strings::Split(), which is
232
// similar to the way split functions work in other programming languages. For
233
// example:
234
//
235
//   // Empty strings *are* included in the returned collection.
236
//   vector<string> v = strings::Split(",a,,b,", ",");
237
//   assert(v.size() ==  5);  // v[0] == "", v[1] == "a", v[2] == "", ...
238
//
239
// These empty strings can be filtered out of the results by simply passing the
240
// provided SkipEmpty predicate as the third argument to the Split() function.
241
// SkipEmpty does not consider a string containing all whitespace to be empty.
242
// For that behavior use the SkipWhitespace predicate. For example:
243
//
244
// Example 1:
245
//   // Uses SkipEmpty to omit empty strings. Strings containing whitespace are
246
//   // not empty and are therefore not skipped.
247
//   using strings::SkipEmpty;
248
//   vector<string> v = strings::Split(",a, ,b,", ",", SkipEmpty());
249
//   assert(v.size() == 3);
250
//   assert(v[0] == "a");
251
//   assert(v[1] == " ");  // <-- The whitespace makes the string not empty.
252
//   assert(v[2] == "b");
253
//
254
// Example 2:
255
//   // Uses SkipWhitespace to skip all strings that are either empty or contain
256
//   // only whitespace.
257
//   using strings::SkipWhitespace;
258
//   vector<string> v = strings::Split(",a, ,b,", ",",  SkipWhitespace());
259
//   assert(v.size() == 2);
260
//   assert(v[0] == "a");
261
//   assert(v[1] == "b");
262
//
263
//
264
//                     Differences between Split1 and Split2
265
//
266
// Split2 is the strings::Split() API described above. Split1 is a name for the
267
// collection of legacy Split*() functions declared later in this file. Most of
268
// the Split1 functions follow a set of conventions that don't necessarily match
269
// the conventions used in Split2. The following are some of the important
270
// differences between Split1 and Split2:
271
//
272
// Split1 -> Split2
273
// ----------------
274
// Append -> Assign:
275
//   The Split1 functions all returned their output collections via a pointer to
276
//   an out parameter as is typical in Google code. In some cases the comments
277
//   explicitly stated that results would be *appended* to the output
278
//   collection. In some cases it was ambiguous whether results were appended.
279
//   This ambiguity is gone in the Split2 API as results are always assigned to
280
//   the output collection, never appended.
281
//
282
// AnyOf -> Literal:
283
//   Most Split1 functions treated their delimiter argument as a string of
284
//   individual byte delimiters. For example, a delimiter of ",;" would split on
285
//   "," and ";", not the substring ",;". This behavior is equivalent to the
286
//   Split2 delimiter strings::delimiter::AnyOf, which is *not* the default. By
287
//   default, strings::Split() splits using strings::delimiter::Literal() which
288
//   would treat the whole string ",;" as a single delimiter string.
289
//
290
// SkipEmpty -> allow empty:
291
//   Most Split1 functions omitted empty substrings in the results. To keep
292
//   empty substrings one would have to use an explicitly named
293
//   Split*AllowEmpty() function. This behavior is reversed in Split2. By
294
//   default, strings::Split() *allows* empty substrings in the output. To skip
295
//   them, use the strings::SkipEmpty predicate.
296
//
297
// string -> user's choice:
298
//   Most Split1 functions return collections of string objects. Some return
299
//   char*, but the type returned is dictated by each Split1 function. With
300
//   Split2 the caller can choose which string-like object to return. (Note:
301
//   char* C-strings are not supported in Split2--use GStringPiece instead).
302
//
303
304
// Definitions of the main Split() function.
305
template <typename Delimiter>
306
0
inline internal::Splitter<Delimiter> Split(GStringPiece text, Delimiter d) {
307
0
  return internal::Splitter<Delimiter>(text, d);
308
0
}
Unexecuted instantiation: strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter> strings::Split<strings::delimiter::AnyOf>(GStringPiece, strings::delimiter::AnyOf)
Unexecuted instantiation: strings::internal::Splitter<strings::delimiter::LimitImpl<strings::delimiter::AnyOf>, strings::internal::NoFilter> strings::Split<strings::delimiter::LimitImpl<strings::delimiter::AnyOf> >(GStringPiece, strings::delimiter::LimitImpl<strings::delimiter::AnyOf>)
309
310
template <typename Delimiter, typename Predicate>
311
inline internal::Splitter<Delimiter, Predicate> Split(
312
0
    GStringPiece text, Delimiter d, Predicate p) {
313
0
  return internal::Splitter<Delimiter, Predicate>(text, d, p);
314
0
}
315
316
namespace delimiter {
317
// A Delimiter object represents a single separator, such as a character,
318
// literal string, or regular expression. A Delimiter object must have the
319
// following member:
320
//
321
//   GStringPiece Find(GStringPiece text);
322
//
323
// This Find() member function should return a GStringPiece referring to the next
324
// occurrence of the represented delimiter within the given string text. If no
325
// delimiter is found in the given text, a zero-length GStringPiece referring to
326
// text.end() should be returned (e.g., GStringPiece(text.end(), 0)). It is
327
// important that the returned GStringPiece always be within the bounds of the
328
// GStringPiece given as an argument--it must not refer to a string that is
329
// physically located outside of the given string. The following example is a
330
// simple Delimiter object that is created with a single char and will look for
331
// that char in the text given to the Find() function:
332
//
333
//   struct SimpleDelimiter {
334
//     const char c_;
335
//     explicit SimpleDelimiter(char c) : c_(c) {}
336
//     GStringPiece Find(GStringPiece text) {
337
//       int pos = text.find(c_);
338
//       if (pos == GStringPiece::npos) return GStringPiece(text.end(), 0);
339
//       return GStringPiece(text, pos, 1);
340
//     }
341
//   };
342
343
// Represents a literal string delimiter. Examples:
344
//
345
//   using ::strings::delimiter::Literal;
346
//   vector<string> v = strings::Split("a=>b=>c", Literal("=>"));
347
//   assert(v.size() == 3);
348
//   assert(v[0] == "a");
349
//   assert(v[1] == "b");
350
//   assert(v[2] == "c");
351
//
352
// The next example uses the empty string as a delimiter.
353
//
354
//   using ::strings::delimiter::Literal;
355
//   vector<string> v = strings::Split("abc", Literal(""));
356
//   assert(v.size() == 3);
357
//   assert(v[0] == "a");
358
//   assert(v[1] == "b");
359
//   assert(v[2] == "c");
360
//
361
class Literal {
362
 public:
363
  explicit Literal(GStringPiece sp);
364
  GStringPiece Find(GStringPiece text) const;
365
366
 private:
367
  const string delimiter_;
368
};
369
370
// Represents a delimiter that will match any of the given byte-sized
371
// characters. AnyOf is similar to Literal, except that AnyOf uses
372
// GStringPiece::find_first_of() and Literal uses GStringPiece::find(). AnyOf
373
// examples:
374
//
375
//   using ::strings::delimiter::AnyOf;
376
//   vector<string> v = strings::Split("a,b=c", AnyOf(",="));
377
//
378
//   assert(v.size() == 3);
379
//   assert(v[0] == "a");
380
//   assert(v[1] == "b");
381
//   assert(v[2] == "c");
382
//
383
// If AnyOf is given the empty string, it behaves exactly like Literal and
384
// matches each individual character in the input string.
385
//
386
// Note: The string passed to AnyOf is assumed to be a string of single-byte
387
// ASCII characters. AnyOf does not work with multi-byte characters.
388
class AnyOf {
389
 public:
390
  explicit AnyOf(GStringPiece sp);
391
  GStringPiece Find(GStringPiece text) const;
392
393
 private:
394
  const string delimiters_;
395
};
396
397
// Wraps another delimiter and sets a max number of matches for that delimiter.
398
// Create LimitImpls using the Limit() function. Example:
399
//
400
//   using ::strings::delimiter::Limit;
401
//   vector<string> v = strings::Split("a,b,c,d", Limit(",", 2));
402
//
403
//   assert(v.size() == 3);  // Split on 2 commas, giving a vector with 3 items
404
//   assert(v[0] == "a");
405
//   assert(v[1] == "b");
406
//   assert(v[2] == "c,d");
407
//
408
template <typename Delimiter>
409
class LimitImpl {
410
 public:
411
  LimitImpl(Delimiter delimiter, size_t limit)
412
0
      : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {}
413
0
  GStringPiece Find(GStringPiece text) {
414
0
    if (count_++ == limit_) {
415
0
      return GStringPiece(text.end(), 0);  // No more matches.
416
0
    }
417
0
    return delimiter_.Find(text);
418
0
  }
419
420
 private:
421
  Delimiter delimiter_;
422
  const size_t limit_;
423
  size_t count_;
424
};
425
426
// Overloaded Limit() function to create LimitImpl<> objects. Uses the Delimiter
427
// Literal as the default if string-like objects are passed as the delimiter
428
// parameter. This is similar to the overloads for Split() below.
429
template <typename Delimiter>
430
0
inline LimitImpl<Delimiter> Limit(Delimiter delim, size_t limit) {
431
0
  return LimitImpl<Delimiter>(delim, limit);
432
0
}
433
434
0
inline LimitImpl<Literal> Limit(const char* s, size_t limit) {
435
0
  return LimitImpl<Literal>(Literal(s), limit);
436
0
}
437
438
0
inline LimitImpl<Literal> Limit(const string& s, size_t limit) {
439
0
  return LimitImpl<Literal>(Literal(s), limit);
440
0
}
441
442
0
inline LimitImpl<Literal> Limit(GStringPiece s, size_t limit) {
443
0
  return LimitImpl<Literal>(Literal(s), limit);
444
0
}
445
446
}  // namespace delimiter
447
448
//
449
// Predicates are functors that return bool indicating whether the given
450
// GStringPiece should be included in the split output. If the predicate returns
451
// false then the string will be excluded from the output from strings::Split().
452
//
453
454
// Always returns true, indicating that all strings--including empty
455
// strings--should be included in the split output. This predicate is not
456
// strictly needed because this is the default behavior of the strings::Split()
457
// function. But it might be useful at some call sites to make the intent
458
// explicit.
459
//
460
// vector<string> v = Split(" a , ,,b,", ",", AllowEmpty());
461
// EXPECT_THAT(v, ElementsAre(" a ", " ", "", "b", ""));
462
struct AllowEmpty {
463
212
  bool operator()(GStringPiece sp) const {
464
212
    return true;
465
212
  }
466
};
467
468
// Returns false if the given GStringPiece is empty, indicating that the
469
// strings::Split() API should omit the empty string.
470
//
471
// vector<string> v = Split(" a , ,,b,", ",", SkipEmpty());
472
// EXPECT_THAT(v, ElementsAre(" a ", " ", "b"));
473
struct SkipEmpty {
474
412k
  bool operator()(GStringPiece sp) const {
475
412k
    return !sp.empty();
476
412k
  }
477
};
478
479
// Returns false if the given GStringPiece is empty or contains only whitespace,
480
// indicating that the strings::Split() API should omit the string.
481
//
482
// vector<string> v = Split(" a , ,,b,", ",", SkipWhitespace());
483
// EXPECT_THAT(v, ElementsAre(" a ", "b"));
484
struct SkipWhitespace {
485
0
  bool operator()(GStringPiece sp) const {
486
0
    StripWhiteSpace(&sp);
487
0
    return !sp.empty();
488
0
  }
489
};
490
491
// Split() function overloads to effectively give Split() a default Delimiter
492
// type of Literal. If Split() is called and a string is passed as the delimiter
493
// instead of an actual Delimiter object, then one of these overloads will be
494
// invoked and will create a Splitter<Literal> with the delimiter string.
495
//
496
// Since Split() is a function template above, these overload signatures need to
497
// be explicit about the string type so they match better than the templated
498
// version. These functions are overloaded for:
499
//
500
//   - const char*
501
//   - const string&
502
//   - GStringPiece
503
504
inline internal::Splitter<delimiter::Literal> Split(
505
222k
    GStringPiece text, const char* delimiter) {
506
222k
  return internal::Splitter<delimiter::Literal>(
507
222k
      text, delimiter::Literal(delimiter));
508
222k
}
509
510
inline internal::Splitter<delimiter::Literal> Split(
511
0
    GStringPiece text, const string& delimiter) {
512
0
  return internal::Splitter<delimiter::Literal>(
513
0
      text, delimiter::Literal(delimiter));
514
0
}
515
516
inline internal::Splitter<delimiter::Literal> Split(
517
0
    GStringPiece text, GStringPiece delimiter) {
518
0
  return internal::Splitter<delimiter::Literal>(
519
0
      text, delimiter::Literal(delimiter));
520
0
}
521
522
// Same overloads as above, but also including a Predicate argument.
523
template <typename Predicate>
524
inline internal::Splitter<delimiter::Literal, Predicate> Split(
525
332k
    GStringPiece text, const char* delimiter, Predicate p) {
526
332k
  return internal::Splitter<delimiter::Literal, Predicate>(
527
332k
      text, delimiter::Literal(delimiter), p);
528
332k
}
strings::internal::Splitter<strings::delimiter::Literal, strings::SkipEmpty> strings::Split<strings::SkipEmpty>(GStringPiece, char const*, strings::SkipEmpty)
Line
Count
Source
525
332k
    GStringPiece text, const char* delimiter, Predicate p) {
526
332k
  return internal::Splitter<delimiter::Literal, Predicate>(
527
332k
      text, delimiter::Literal(delimiter), p);
528
332k
}
strings::internal::Splitter<strings::delimiter::Literal, strings::AllowEmpty> strings::Split<strings::AllowEmpty>(GStringPiece, char const*, strings::AllowEmpty)
Line
Count
Source
525
101
    GStringPiece text, const char* delimiter, Predicate p) {
526
101
  return internal::Splitter<delimiter::Literal, Predicate>(
527
101
      text, delimiter::Literal(delimiter), p);
528
101
}
529
530
template <typename Predicate>
531
inline internal::Splitter<delimiter::Literal, Predicate> Split(
532
    GStringPiece text, const string& delimiter, Predicate p) {
533
  return internal::Splitter<delimiter::Literal, Predicate>(
534
      text, delimiter::Literal(delimiter), p);
535
}
536
537
template <typename Predicate>
538
inline internal::Splitter<delimiter::Literal, Predicate> Split(
539
    GStringPiece text, GStringPiece delimiter, Predicate p) {
540
  return internal::Splitter<delimiter::Literal, Predicate>(
541
      text, delimiter::Literal(delimiter), p);
542
}
543
544
}  // namespace strings
545
546
//
547
// ==================== LEGACY SPLIT FUNCTIONS ====================
548
//
549
550
// NOTE: The instruction below creates a Module titled
551
// GlobalSplitFunctions within the auto-generated Doxygen documentation.
552
// This instruction is needed to expose global functions that are not
553
// within a namespace.
554
//
555
// START DOXYGEN SplitFunctions grouping
556
/* @defgroup SplitFunctions
557
 * @{ */
558
559
// ----------------------------------------------------------------------
560
// ClipString
561
//    Clip a string to a max length. We try to clip on a word boundary
562
//    if this is possible. If the string is clipped, we append an
563
//    ellipsis.
564
//
565
//    ***NOTE***
566
//    ClipString counts length with strlen.  If you have non-ASCII
567
//    strings like UTF-8, this is wrong.  If you are displaying the
568
//    clipped strings to users in a frontend, consider using
569
//    ClipStringOnWordBoundary in
570
//    webserver/util/snippets/rewriteboldtags, which considers the width
571
//    of the string, not just the number of bytes.
572
//
573
// TODO(user) Move ClipString back to strutil.  The problem with this is
574
//    that ClipStringHelper is used behind the scenes by SplitStringToLines, but
575
//    probably shouldn't be exposed in the .h files.
576
// ----------------------------------------------------------------------
577
void ClipString(char* str, size_t max_len);
578
579
// ----------------------------------------------------------------------
580
// ClipString
581
//    Version of ClipString() that uses string instead of char*.
582
//    NOTE: See comment above.
583
// ----------------------------------------------------------------------
584
void ClipString(string* full_str, size_t max_len);
585
586
// ----------------------------------------------------------------------
587
// SplitStringToLines() Split a string into lines of maximum length
588
// 'max_len'. Append the resulting lines to 'result'. Will attempt
589
// to split on word boundaries.  If 'num_lines'
590
// is zero it splits up the whole string regardless of length. If
591
// 'num_lines' is positive, it returns at most num_lines lines, and
592
// appends a "..." to the end of the last line if the string is too
593
// long to fit completely into 'num_lines' lines.
594
// ----------------------------------------------------------------------
595
void SplitStringToLines(const char* full,
596
                        size_t max_len,
597
                        size_t num_lines,
598
                        vector<string>* result);
599
600
// ----------------------------------------------------------------------
601
// SplitOneStringToken()
602
//   Returns the first "delim" delimited string from "*source" and modifies
603
//   *source to point after the delimiter that was found. If no delimiter is
604
//   found, *source is set to NULL.
605
//
606
//   If the start of *source is a delimiter, an empty string is returned.
607
//   If *source is NULL, an empty string is returned.
608
//
609
//   "delim" is treated as a sequence of 1 or more character delimiters. Any one
610
//   of the characters present in "delim" is considered to be a single
611
//   delimiter; The delimiter is not "delim" as a whole. For example:
612
//
613
//     const char* s = "abc=;de";
614
//     string r = SplitOneStringToken(&s, ";=");
615
//     // r = "abc"
616
//     // s points to ";de"
617
// ----------------------------------------------------------------------
618
string SplitOneStringToken(const char** source, const char* delim);
619
620
// ----------------------------------------------------------------------
621
// SplitUsing()
622
//    Split a string into substrings based on the nul-terminated list
623
//    of bytes at delimiters (uses strsep) and return a vector of
624
//    those strings. Modifies 'full' We allocate the return vector,
625
//    and you should free it.  Note that empty fields are ignored.
626
//    Use SplitToVector with last argument 'false' if you want the
627
//    empty fields.
628
//    ----------------------------------------------------------------------
629
vector<char*>* SplitUsing(char* full, const char* delimiters);
630
631
// ----------------------------------------------------------------------
632
// SplitToVector()
633
//    Split a string into substrings based on the nul-terminated list
634
//    of bytes at delim (uses strsep) and appends the split
635
//    strings to 'vec'.  Modifies "full".  If omit empty strings is
636
//    true, empty strings are omitted from the resulting vector.
637
// ----------------------------------------------------------------------
638
void SplitToVector(char* full, const char* delimiters,
639
                   vector<char*>* vec,
640
                   bool omit_empty_strings);
641
void SplitToVector(char* full, const char* delimiters,
642
                   vector<const char*>* vec,
643
                   bool omit_empty_strings);
644
645
// ----------------------------------------------------------------------
646
// SplitGStringPieceToVector
647
//    Split a GStringPiece into sub-GStringPieces based on the
648
//    nul-terminated list of bytes at delim and appends the
649
//    pieces to 'vec'.  If omit empty strings is true, empty strings
650
//    are omitted from the resulting vector.
651
//    Expects the original string (from which 'full' is derived) to exist
652
//    for the full lifespan of 'vec'.
653
// ----------------------------------------------------------------------
654
void SplitGStringPieceToVector(const GStringPiece& full,
655
                              const char* delim,
656
                              vector<GStringPiece>* vec,
657
                              bool omit_empty_strings);
658
659
// ----------------------------------------------------------------------
660
// SplitStringUsing()
661
// SplitStringToHashsetUsing()
662
// SplitStringToSetUsing()
663
// SplitStringToMapUsing()
664
// SplitStringToHashmapUsing()
665
666
// Splits a string using one or more byte delimiters, presented as a
667
// nul-terminated c string. Append the components to 'result'. If there are
668
// consecutive delimiters, this function skips over all of them: in other words,
669
// empty components are dropped. If you want to keep empty components, try
670
// SplitStringAllowEmpty().
671
//
672
// NOTE: Do not use this for multi-byte delimiters such as UTF-8 strings. Use
673
// strings::Split() with strings::delimiter::Literal as the delimiter.
674
//
675
// ==> NEW API: Consider using the new Split API defined above. <==
676
// Example:
677
//
678
//   using strings::SkipEmpty;
679
//   using strings::Split;
680
//   using strings::delimiter::AnyOf;
681
//
682
//   vector<string> v = Split(full, AnyOf(delimiter), SkipEmpty());
683
//
684
// For even better performance, store the result in a vector<GStringPiece>
685
// to avoid string copies.
686
// ----------------------------------------------------------------------
687
void SplitStringUsing(const string& full, const char* delimiters,
688
                      vector<string>* result);
689
void SplitStringToSetUsing(const string& full, const char* delimiters,
690
                           set<string>* result);
691
// The even-positioned (0-based) components become the keys for the
692
// odd-positioned components that follow them. When there is an odd
693
// number of components, the value for the last key will be unchanged
694
// if the key was already present in the hash table, or will be the
695
// empty string if the key is a newly inserted key.
696
void SplitStringToMapUsing(const string& full, const char* delim,
697
                           map<string, string>* result);
698
699
// ----------------------------------------------------------------------
700
// SplitStringAllowEmpty()
701
//
702
// Split a string using one or more byte delimiters, presented as a
703
// nul-terminated c string. Append the components to 'result'. If there are
704
// consecutive delimiters, this function will return corresponding empty
705
// strings.  If you want to drop the empty strings, try SplitStringUsing().
706
//
707
// If "full" is the empty string, yields an empty string as the only value.
708
//
709
// ==> NEW API: Consider using the new Split API defined above. <==
710
//
711
//   using strings::Split;
712
//   using strings::delimiter::AnyOf;
713
//
714
//   vector<string> v = Split(full, AnyOf(delimiter));
715
//
716
// For even better performance, store the result in a vector<GStringPiece> to
717
// avoid string copies.
718
// ----------------------------------------------------------------------
719
void SplitStringAllowEmpty(const string& full, const char* delim,
720
                           vector<string>* result);
721
722
// ----------------------------------------------------------------------
723
// SplitStringWithEscaping()
724
// SplitStringWithEscapingAllowEmpty()
725
// SplitStringWithEscapingToSet()
726
// SplitStringWithEscapingToHashset()
727
728
//   Split the string using the specified delimiters, taking escaping into
729
//   account. '\' is not allowed as a delimiter.
730
//
731
//   Within the string, preserve a delimiter preceded by a backslash as a
732
//   literal delimiter. In addition, preserve two consecutive backslashes as
733
//   a single literal backslash. Do not unescape any other backslash-character
734
//   sequence.
735
//
736
//   Eg. 'foo\=bar=baz\\qu\ux' split on '=' becomes ('foo=bar', 'baz\qu\ux')
737
//
738
//   All versions other than "AllowEmpty" discard any empty substrings.
739
// ----------------------------------------------------------------------
740
void SplitStringWithEscaping(const string& full,
741
                             const strings::CharSet& delimiters,
742
                             vector<string>* result);
743
void SplitStringWithEscapingAllowEmpty(const string& full,
744
                                       const strings::CharSet& delimiters,
745
                                       vector<string>* result);
746
void SplitStringWithEscapingToSet(const string& full,
747
                                  const strings::CharSet& delimiters,
748
                                  set<string>* result);
749
750
// ----------------------------------------------------------------------
751
// SplitStringIntoNPiecesAllowEmpty()
752
753
//    Split a string using a nul-terminated list of byte
754
//    delimiters. Append the components to 'result'.  If there are
755
//    consecutive delimiters, this function will return corresponding
756
//    empty strings. The string is split into at most the specified
757
//    number of pieces greedily. This means that the last piece may
758
//    possibly be split further. To split into as many pieces as
759
//    possible, specify 0 as the number of pieces.
760
//
761
//    If "full" is the empty string, yields an empty string as the only value.
762
// ----------------------------------------------------------------------
763
void SplitStringIntoNPiecesAllowEmpty(const string& full,
764
                                      const char* delimiters,
765
                                      size_t pieces,
766
                                      vector<string>* result);
767
768
// ----------------------------------------------------------------------
769
// SplitStringAndParse()
770
// SplitStringAndParseToContainer()
771
// SplitStringAndParseToList()
772
//    Split a string using a nul-terminated list of character
773
//    delimiters.  For each component, parse using the provided
774
//    parsing function and if successful, append it to 'result'.
775
//    Return true if and only if all components parse successfully.
776
//    If there are consecutive delimiters, this function skips over
777
//    all of them.  This function will correctly handle parsing
778
//    strings that have embedded \0s.
779
//
780
// SplitStringAndParse fills into a vector.
781
// SplitStringAndParseToContainer fills into any container that implements
782
//    a single-argument insert function. (i.e. insert(const value_type& x) ).
783
// SplitStringAndParseToList fills into any container that implements a single-
784
//    argument push_back function (i.e. push_back(const value_type& x) ), plus
785
//    value_type& back() and pop_back().
786
//    NOTE: This implementation relies on parsing in-place into the "back()"
787
//    reference, so its performance may depend on the efficiency of back().
788
//
789
// Example Usage:
790
//  vector<double> values;
791
//  CHECK(SplitStringAndParse("1.0,2.0,3.0", ",", &safe_strtod, &values));
792
//  CHECK_EQ(3, values.size());
793
//
794
//  vector<int64> values;
795
//  CHECK(SplitStringAndParse("1M,2M,3M", ",",
796
//        &HumanReadableNumBytes::ToInt64, &values));
797
//  CHECK_EQ(3, values.size());
798
//
799
//  set<int64> values;
800
//  CHECK(SplitStringAndParseToContainer("3,1,1,2", ",",
801
//        &safe_strto64, &values));
802
//  CHECK_EQ(4, values.size());
803
//
804
//  deque<int64> values;
805
//  CHECK(SplitStringAndParseToList("3,1,1,2", ",", &safe_strto64, &values));
806
//  CHECK_EQ(4, values.size());
807
// ----------------------------------------------------------------------
808
template <class T>
809
bool SplitStringAndParse(GStringPiece source, GStringPiece delim,
810
                         bool (*parse)(const string& str, T* value),
811
                         vector<T>* result);
812
template <class Container>
813
bool SplitStringAndParseToContainer(
814
    GStringPiece source, GStringPiece delim,
815
    bool (*parse)(const string& str, typename Container::value_type* value),
816
    Container* result);
817
818
template <class List>
819
bool SplitStringAndParseToList(
820
    GStringPiece source, GStringPiece delim,
821
    bool (*parse)(const string& str, typename List::value_type* value),
822
    List* result);
823
// ----------------------------------------------------------------------
824
// SplitRange()
825
//    Splits a string of the form "<from>-<to>".  Either or both can be
826
//    missing.  A raw number (<to>) is interpreted as "<to>-".  Modifies
827
//    parameters insofar as they're specified by the string.  RETURNS
828
//    true iff the input is a well-formed range.  If it RETURNS false,
829
//    from and to remain unchanged.  The range in rangestr should be
830
//    terminated either by "\0" or by whitespace.
831
// ----------------------------------------------------------------------
832
bool SplitRange(const char* rangestr, int* from, int* to);
833
834
// ----------------------------------------------------------------------
835
// SplitCSVLineWithDelimiter()
836
//    CSV lines come in many guises.  There's the Comma Separated Values
837
//    variety, in which fields are separated by (surprise!) commas.  There's
838
//    also the tab-separated values variant, in which tabs separate the
839
//    fields.  This routine handles both, which makes it almost like
840
//    SplitUsing(line, delimiter), but for some special processing.  For both
841
//    delimiters, whitespace is trimmed from either side of the field value.
842
//    If the delimiter is ',', we play additional games with quotes.  A
843
//    field value surrounded by double quotes is allowed to contain commas,
844
//    which are not treated as field separators.  Within a double-quoted
845
//    string, a series of two double quotes signals an escaped single double
846
//    quote.  It'll be clearer in the examples.
847
//    Example:
848
//     Google , x , "Buchheit, Paul", "string with "" quote in it"
849
//     -->  [Google], [x], [Buchheit, Paul], [string with " quote in it]
850
//
851
// SplitCSVLine()
852
//    A convenience wrapper around SplitCSVLineWithDelimiter which uses
853
//    ',' as the delimiter.
854
//
855
// The following variants of SplitCSVLine() are not recommended for new code.
856
// Please consider the CSV parser in //util/csv as an alternative.  Examples:
857
// To parse a single line:
858
//     #include "yb/util/csv/parser.h"
859
//     vector<string> fields = util::csv::ParseLine(line).fields();
860
//
861
// To parse an entire file:
862
//     #include "yb/util/csv/parser.h"
863
//     for (Record rec : Parser(source)) {
864
//       vector<string> fields = rec.fields();
865
//     }
866
//
867
// See //util/csv/parser.h for more complete documentation.
868
//
869
// ----------------------------------------------------------------------
870
void SplitCSVLine(char* line, vector<char*>* cols);
871
void SplitCSVLineWithDelimiter(char* line, char delimiter,
872
                               vector<char*>* cols);
873
// SplitCSVLine string wrapper that internally makes a copy of string line.
874
void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter,
875
                                         vector<string>* cols);
876
877
// ----------------------------------------------------------------------
878
// SplitStructuredLine()
879
//    Splits a line using the given delimiter, and places the columns
880
//    into 'cols'. This is unlike 'SplitUsing(line, ",")' because you can
881
//    define pairs of opening closing symbols inside which the delimiter should
882
//    be ignored. If the symbol_pair string has an odd number of characters,
883
//    the last character (which cannot be paired) will be assumed to be both an
884
//    opening and closing symbol.
885
//    WARNING : The input string 'line' is destroyed in the process.
886
//    The function returns 0 if the line was parsed correctly (i.e all the
887
//    opened braces had their closing braces) otherwise, it returns the position
888
//    of the error.
889
//    Example:
890
//     SplitStructuredLine("item1,item2,{subitem1,subitem2},item4,[5,{6,7}]",
891
//                         ',',
892
//                         "{}[]", &output)
893
//     --> output = { "item1", "item2", "{subitem1,subitem2}", "item4",
894
//                    "[5,{6,7}]" }
895
//    Example2: trying to split "item1,[item2,{4,5],5}" will fail and the
896
//              function will return the position of the problem : ]
897
//
898
// ----------------------------------------------------------------------
899
char* SplitStructuredLine(char* line,
900
                          char delimiter,
901
                          const char* symbol_pairs,
902
                          vector<char*>* cols);
903
904
// Similar to the function with the same name above, but splits a GStringPiece
905
// into GStringPiece parts. Returns true if successful.
906
bool SplitStructuredLine(GStringPiece line,
907
                         char delimiter,
908
                         const char* symbol_pairs,
909
                         vector<GStringPiece>* cols);
910
911
// ----------------------------------------------------------------------
912
// SplitStructuredLineWithEscapes()
913
//    Like SplitStructuredLine but also allows characters to be escaped.
914
//
915
//    WARNING: the escape characters will be replicated in the output
916
//    columns rather than being consumed, i.e. if {} were the opening and
917
//    closing symbols, using \{ to quote a curly brace in the middle of
918
//    an option would pass this unchanged.
919
//
920
//    Example:
921
//     SplitStructuredLineWithEscapes(
922
//       "\{item1\},it\\em2,{\{subitem1\},sub\\item2},item4\,item5,[5,{6,7}]",
923
//                     ',',
924
//                     "{}[]",
925
//                     &output)
926
//     --> output = { "\{item1\}", "it\\em2", "{\{subitem1\},sub\\item2}",
927
//                    "item4\,item5", "[5,{6,7}]" }
928
//
929
// ----------------------------------------------------------------------
930
char* SplitStructuredLineWithEscapes(char* line,
931
                                     char delimiter,
932
                                     const char* symbol_pairs,
933
                                     vector<char*>* cols);
934
935
// Similar to the function with the same name above, but splits a GStringPiece
936
// into GStringPiece parts. Returns true if successful.
937
bool SplitStructuredLineWithEscapes(GStringPiece line,
938
                                    char delimiter,
939
                                    const char* symbol_pairs,
940
                                    vector<GStringPiece>* cols);
941
942
// ----------------------------------------------------------------------
943
// DEPRECATED(jgm): See the "NEW API" comment about this function below for
944
// example code showing an alternative.
945
//
946
// SplitStringIntoKeyValues()
947
// Split a line into a key string and a vector of value strings. The line has
948
// the following format:
949
//
950
// <key><kvsep>+<vvsep>*<value1><vvsep>+<value2><vvsep>+<value3>...<vvsep>*
951
//
952
// where key and value are strings; */+ means zero/one or more; <kvsep> is
953
// a delimiter character to separate key and value; and <vvsep> is a delimiter
954
// character to separate between values. The user can specify a bunch of
955
// delimiter characters using a string. For example, if the user specifies
956
// the separator string as "\t ", then either ' ' or '\t' or any combination
957
// of them wil be treated as separator. For <vvsep>, the user can specify a
958
// empty string to indicate there is only one value.
959
//
960
// Note: this function assumes the input string begins exactly with a
961
// key. Therefore, if you use whitespaces to separate key and value, you
962
// should not let whitespace precedes the key in the input. Otherwise, you
963
// will get an empty string as the key.
964
//
965
// A line with no <kvsep> will return an empty string as the key, even if
966
// <key> is non-empty!
967
//
968
// The syntax makes it impossible for a value to be the empty string.
969
// It is possible for the number of values to be zero.
970
//
971
// Returns false if the line has no <kvsep> or if the number of values is
972
// zero.
973
//
974
// ==> NEW API: Consider using the new Split API defined above. <==
975
//
976
// The SplitStringIntoKeyValues() function has some subtle and surprising
977
// semantics in various corner cases. To avoid this the strings::Split API is
978
// recommended. The following example shows how to split a string of delimited
979
// key-value pairs into a vector of pairs using the strings::Split API.
980
//
981
//   using strings::Split;
982
//   using strings::delimiter::AnyOf;
983
//   using strings::delimiter::Limit;
984
//
985
//   pair<string, GStringPiece> key_values =
986
//       Split(line, Limit(AnyOf(kv_delim), 1));
987
//   string key = key_values.first;
988
//   vector<string> values = Split(key_values.second, AnyOf(vv_delim));
989
//
990
// ----------------------------------------------------------------------
991
bool SplitStringIntoKeyValues(const string& line,
992
                              const string& key_value_delimiters,
993
                              const string& value_value_delimiters,
994
                              string* key, vector<string>* values);
995
996
// ----------------------------------------------------------------------
997
// SplitStringIntoKeyValuePairs()
998
// Split a line into a vector of <key, value> pairs. The line has
999
// the following format:
1000
//
1001
// <kvpsep>*<key1><kvsep>+<value1><kvpsep>+<key2><kvsep>+<value2>...<kvpsep>*
1002
//
1003
// Where key and value are strings; */+ means zero/one or more. <kvsep> is
1004
// a delimiter character to separate key and value and <kvpsep> is a delimiter
1005
// character to separate key value pairs. The user can specify a bunch of
1006
// delimiter characters using a string.
1007
//
1008
// Note: this function assumes each key-value pair begins exactly with a
1009
// key. Therefore, if you use whitespaces to separate key and value, you
1010
// should not let whitespace precede the key in the pair. Otherwise, you
1011
// will get an empty string as the key.
1012
//
1013
// A pair with no <kvsep> will return empty strings as the key and value,
1014
// even if <key> is non-empty!
1015
//
1016
// Returns false for pairs with no <kvsep> specified and for pairs with
1017
// empty strings as values.
1018
//
1019
// ==> NEW API: Consider using the new Split API defined above. <==
1020
//
1021
// The SplitStringIntoKeyValuePairs() function has some subtle and surprising
1022
// semantics in various corner cases. To avoid this the strings::Split API is
1023
// recommended. The following example shows how to split a string of delimited
1024
// key-value pairs into a vector of pairs using the strings::Split API.
1025
//
1026
//   using strings::SkipEmpty;
1027
//   using strings::Split;
1028
//   using strings::delimiter::AnyOf;
1029
//   using strings::delimiter::Limit;
1030
//
1031
//   vector<pair<string, string>> pairs;  // or even map<string, string>
1032
//   for (GStringPiece sp : Split(line, AnyOf(pair_delim), SkipEmpty())) {
1033
//     pairs.push_back(Split(sp, Limit(AnyOf(kv_delim), 1), SkipEmpty()));
1034
//   }
1035
//
1036
// ----------------------------------------------------------------------
1037
bool SplitStringIntoKeyValuePairs(const string& line,
1038
                                  const string& key_value_delimiters,
1039
                                  const string& key_value_pair_delimiters,
1040
                                  vector<pair<string, string> >* kv_pairs);
1041
1042
1043
// ----------------------------------------------------------------------
1044
// SplitLeadingDec32Values()
1045
// SplitLeadingDec64Values()
1046
//    A simple parser for space-separated decimal int32/int64 values.
1047
//    Appends parsed integers to the end of the result vector, stopping
1048
//    at the first unparsable spot.  Skips past leading and repeated
1049
//    whitespace (does not consume trailing whitespace), and returns
1050
//    a pointer beyond the last character parsed.
1051
// --------------------------------------------------------------------
1052
const char* SplitLeadingDec32Values(const char* next, vector<int32>* result);
1053
const char* SplitLeadingDec64Values(const char* next, vector<int64>* result);
1054
1055
// ----------------------------------------------------------------------
1056
// SplitOneIntToken()
1057
// SplitOneInt32Token()
1058
// SplitOneUint32Token()
1059
// SplitOneInt64Token()
1060
// SplitOneUint64Token()
1061
// SplitOneDoubleToken()
1062
// SplitOneFloatToken()
1063
//   Parse a single "delim" delimited number from "*source" into "*value".
1064
//   Modify *source to point after the delimiter.
1065
//   If no delimiter is present after the number, set *source to NULL.
1066
//
1067
//   If the start of *source is not an number, return false.
1068
//   If the int is followed by the null character, return true.
1069
//   If the int is not followed by a character from delim, return false.
1070
//   If *source is NULL, return false.
1071
//
1072
//   They cannot handle decimal numbers with leading 0s, since they will be
1073
//   treated as octal.
1074
// ----------------------------------------------------------------------
1075
bool SplitOneIntToken(const char** source, const char* delim,
1076
                      int* value);
1077
bool SplitOneInt32Token(const char** source, const char* delim,
1078
                        int32* value);
1079
bool SplitOneUint32Token(const char** source, const char* delim,
1080
                         uint32* value);
1081
bool SplitOneInt64Token(const char** source, const char* delim,
1082
                        int64* value);
1083
bool SplitOneUint64Token(const char** source, const char* delim,
1084
                         uint64* value);
1085
bool SplitOneDoubleToken(const char** source, const char* delim,
1086
                         double* value);
1087
bool SplitOneFloatToken(const char** source, const char* delim,
1088
                        float* value);
1089
1090
// Some aliases, so that the function names are standardized against the names
1091
// of the reflection setters/getters in proto2. This makes it easier to use
1092
// certain macros with reflection when creating custom text formats for protos.
1093
1094
inline bool SplitOneUInt32Token(const char** source, const char* delim,
1095
0
                         uint32* value) {
1096
0
  return SplitOneUint32Token(source, delim, value);
1097
0
}
1098
1099
inline bool SplitOneUInt64Token(const char** source, const char* delim,
1100
0
                         uint64* value) {
1101
0
  return SplitOneUint64Token(source, delim, value);
1102
0
}
1103
1104
// ----------------------------------------------------------------------
1105
// SplitOneDecimalIntToken()
1106
// SplitOneDecimalInt32Token()
1107
// SplitOneDecimalUint32Token()
1108
// SplitOneDecimalInt64Token()
1109
// SplitOneDecimalUint64Token()
1110
// Parse a single "delim"-delimited number from "*source" into "*value".
1111
// Unlike SplitOneIntToken, etc., this function always interprets
1112
// the numbers as decimal.
1113
bool SplitOneDecimalIntToken(const char** source, const char* delim,
1114
                             int* value);
1115
bool SplitOneDecimalInt32Token(const char** source, const char* delim,
1116
                               int32* value);
1117
bool SplitOneDecimalUint32Token(const char** source, const char* delim,
1118
                                uint32* value);
1119
bool SplitOneDecimalInt64Token(const char** source, const char* delim,
1120
                               int64* value);
1121
bool SplitOneDecimalUint64Token(const char** source, const char* delim,
1122
                                uint64* value);
1123
1124
// ----------------------------------------------------------------------
1125
// SplitOneHexUint32Token()
1126
// SplitOneHexUint64Token()
1127
// Once more, for hexadecimal numbers (unsigned only).
1128
bool SplitOneHexUint32Token(const char** source, const char* delim,
1129
                            uint32* value);
1130
bool SplitOneHexUint64Token(const char** source, const char* delim,
1131
                            uint64* value);
1132
1133
1134
// ###################### TEMPLATE INSTANTIATIONS BELOW #######################
1135
1136
// SplitStringAndParse() -- see description above
1137
template <class T>
1138
bool SplitStringAndParse(GStringPiece source, GStringPiece delim,
1139
                         bool (*parse)(const string& str, T* value),
1140
                         vector<T>* result) {
1141
  return SplitStringAndParseToList(source, delim, parse, result);
1142
}
1143
1144
namespace strings {
1145
namespace internal {
1146
1147
template <class Container, class InsertPolicy>
1148
bool SplitStringAndParseToInserter(
1149
    GStringPiece source, GStringPiece delim,
1150
    bool (*parse)(const string& str, typename Container::value_type* value),
1151
    Container* result, InsertPolicy insert_policy) {
1152
  CHECK(NULL != parse);
1153
  CHECK(NULL != result);
1154
  CHECK(NULL != delim.data());
1155
  CHECK_GT(delim.size(), 0);
1156
  bool retval = true;
1157
  vector<GStringPiece> pieces = strings::Split(source,
1158
                                              strings::delimiter::AnyOf(delim),
1159
                                              strings::SkipEmpty());
1160
  for (const auto& piece : pieces) {
1161
    typename Container::value_type t;
1162
    if (parse(piece.as_string(), &t)) {
1163
      insert_policy(result, t);
1164
    } else {
1165
      retval = false;
1166
    }
1167
  }
1168
  return retval;
1169
}
1170
1171
// Cannot use output iterator here (e.g. std::inserter, std::back_inserter)
1172
// because some callers use non-standard containers that don't have iterators,
1173
// only an insert() or push_back() method.
1174
struct BasicInsertPolicy {
1175
  template <class C, class V>
1176
  void operator()(C* c, const V& v) const { c->insert(v); }
1177
};
1178
1179
struct BackInsertPolicy {
1180
  template <class C, class V>
1181
  void operator()(C* c, const V& v) const { c->push_back(v); }
1182
};
1183
1184
}  // namespace internal
1185
}  // namespace strings
1186
1187
// SplitStringAndParseToContainer() -- see description above
1188
template <class Container>
1189
bool SplitStringAndParseToContainer(
1190
    GStringPiece source, GStringPiece delim,
1191
    bool (*parse)(const string& str, typename Container::value_type* value),
1192
    Container* result) {
1193
  return strings::internal::SplitStringAndParseToInserter(
1194
      source, delim, parse, result, strings::internal::BasicInsertPolicy());
1195
}
1196
1197
// SplitStringAndParseToList() -- see description above
1198
template <class List>
1199
bool SplitStringAndParseToList(
1200
    GStringPiece source, GStringPiece delim,
1201
    bool (*parse)(const string& str, typename List::value_type* value),
1202
    List* result) {
1203
  return strings::internal::SplitStringAndParseToInserter(
1204
      source, delim, parse, result, strings::internal::BackInsertPolicy());
1205
}
1206
1207
// END DOXYGEN SplitFunctions grouping
1208
/* @} */
1209
1210
#endif  // YB_GUTIL_STRINGS_SPLIT_H