YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/split.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 and onwards Google Inc.  All rights reserved.
2
//
3
// The following only applies to changes made to this file as part of YugaByte development.
4
//
5
// Portions Copyright (c) YugaByte, Inc.
6
//
7
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
8
// in compliance with the License.  You may obtain a copy of the License at
9
//
10
// http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software distributed under the License
13
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14
// or implied.  See the License for the specific language governing permissions and limitations
15
// under the License.
16
//
17
// Maintainer: Greg Miller <jgm@google.com>
18
19
#include "yb/gutil/strings/split.h"
20
21
#include <assert.h>
22
#include <stdlib.h>
23
#include <string.h>
24
#include <iterator>
25
using std::back_insert_iterator;
26
using std::iterator_traits;
27
#include <limits>
28
using std::numeric_limits;
29
30
#include "yb/gutil/integral_types.h"
31
#include <glog/logging.h>
32
#include "yb/gutil/logging-inl.h"
33
#include "yb/gutil/macros.h"
34
#include "yb/gutil/strtoint.h"
35
#include "yb/gutil/strings/ascii_ctype.h"
36
#include "yb/gutil/strings/util.h"
37
#include "yb/gutil/hash/hash.h"
38
39
// Implementations for some of the Split2 API. Much of the Split2 API is
40
// templated so it exists in header files, either strings/split.h or
41
// strings/split_iternal.h.
42
namespace strings {
43
namespace delimiter {
44
45
namespace {
46
47
// This GenericFind() template function encapsulates the finding algorithm
48
// shared between the Literal and AnyOf delimiters. The FindPolicy template
49
// parameter allows each delimiter to customize the actual find function to use
50
// and the length of the found delimiter. For example, the Literal delimiter
51
// will ultimately use GStringPiece::find(), and the AnyOf delimiter will use
52
// GStringPiece::find_first_of().
53
template <typename FindPolicy>
54
GStringPiece GenericFind(
55
    GStringPiece text,
56
    GStringPiece delimiter,
57
1.21M
    FindPolicy find_policy) {
58
1.21M
  if (delimiter.empty() && 
text.length() > 00
) {
59
    // Special case for empty string delimiters: always return a zero-length
60
    // GStringPiece referring to the item at position 1.
61
0
    return GStringPiece(text.begin() + 1, 0);
62
0
  }
63
1.21M
  auto found_pos = GStringPiece::npos;
64
1.21M
  GStringPiece found(text.end(), 0);  // By default, not found
65
1.21M
  found_pos = find_policy.Find(text, delimiter);
66
1.21M
  if (found_pos != GStringPiece::npos) {
67
677k
    found.set(text.data() + found_pos, find_policy.Length(delimiter));
68
677k
  }
69
1.21M
  return found;
70
1.21M
}
split.cc:GStringPiece strings::delimiter::(anonymous namespace)::GenericFind<strings::delimiter::(anonymous namespace)::LiteralPolicy>(GStringPiece, GStringPiece, strings::delimiter::(anonymous namespace)::LiteralPolicy)
Line
Count
Source
57
1.21M
    FindPolicy find_policy) {
58
1.21M
  if (delimiter.empty() && 
text.length() > 00
) {
59
    // Special case for empty string delimiters: always return a zero-length
60
    // GStringPiece referring to the item at position 1.
61
0
    return GStringPiece(text.begin() + 1, 0);
62
0
  }
63
1.21M
  auto found_pos = GStringPiece::npos;
64
1.21M
  GStringPiece found(text.end(), 0);  // By default, not found
65
1.21M
  found_pos = find_policy.Find(text, delimiter);
66
1.21M
  if (found_pos != GStringPiece::npos) {
67
677k
    found.set(text.data() + found_pos, find_policy.Length(delimiter));
68
677k
  }
69
1.21M
  return found;
70
1.21M
}
Unexecuted instantiation: split.cc:GStringPiece strings::delimiter::(anonymous namespace)::GenericFind<strings::delimiter::(anonymous namespace)::AnyOfPolicy>(GStringPiece, GStringPiece, strings::delimiter::(anonymous namespace)::AnyOfPolicy)
71
72
// Finds using GStringPiece::find(), therefore the length of the found delimiter
73
// is delimiter.length().
74
struct LiteralPolicy {
75
1.21M
  size_t Find(GStringPiece text, GStringPiece delimiter) {
76
1.21M
    return text.find(delimiter);
77
1.21M
  }
78
677k
  size_t Length(GStringPiece delimiter) {
79
677k
    return delimiter.length();
80
677k
  }
81
};
82
83
// Finds using GStringPiece::find_first_of(), therefore the length of the found
84
// delimiter is 1.
85
struct AnyOfPolicy {
86
0
  size_t Find(GStringPiece text, GStringPiece delimiter) {
87
0
    return text.find_first_of(delimiter);
88
0
  }
89
0
  size_t Length(GStringPiece delimiter) {
90
0
    return 1;
91
0
  }
92
};
93
94
}  // namespace
95
96
//
97
// Literal
98
//
99
100
554k
Literal::Literal(GStringPiece sp) : delimiter_(sp.ToString()) {
101
554k
}
102
103
1.21M
GStringPiece Literal::Find(GStringPiece text) const {
104
1.21M
  return GenericFind(text, delimiter_, LiteralPolicy());
105
1.21M
}
106
107
//
108
// AnyOf
109
//
110
111
0
AnyOf::AnyOf(GStringPiece sp) : delimiters_(sp.ToString()) {
112
0
}
113
114
0
GStringPiece AnyOf::Find(GStringPiece text) const {
115
0
  return GenericFind(text, delimiters_, AnyOfPolicy());
116
0
}
117
118
}  // namespace delimiter
119
}  // namespace strings
120
121
//
122
// ==================== LEGACY SPLIT FUNCTIONS ====================
123
//
124
125
using ::strings::SkipEmpty;
126
using ::strings::delimiter::AnyOf;
127
using ::strings::delimiter::Limit;
128
129
namespace {
130
131
// Appends the results of a split to the specified container. This function has
132
// the following overloads:
133
// - vector<string>           - for better performance
134
// - map<string, string>      - to change append semantics
135
// - hash_map<string, string> - to change append semantics
136
template <typename Container, typename Splitter>
137
0
void AppendToImpl(Container* container, Splitter splitter) {
138
0
  Container c = splitter;  // Calls implicit conversion operator.
139
0
  std::copy(c.begin(), c.end(), std::inserter(*container, container->end()));
140
0
}
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendToImpl<std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty> >(std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendToImpl<std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty> >(std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendToImpl<std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter> >(std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter>)
141
142
// Overload of AppendToImpl() that is optimized for appending to vector<string>.
143
// This version eliminates a couple string copies by using a vector<GStringPiece>
144
// as the intermediate container.
145
template <typename Splitter>
146
0
void AppendToImpl(vector<string>* container, Splitter splitter) {
147
0
  vector<GStringPiece> vsp = splitter;  // Calls implicit conversion operator.
148
0
  size_t container_size = container->size();
149
0
  container->resize(container_size + vsp.size());
150
0
  for (const auto& sp : vsp) {
151
0
    sp.CopyToString(&(*container)[container_size++]);
152
0
  }
153
0
}
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendToImpl<strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter> >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendToImpl<strings::internal::Splitter<strings::delimiter::LimitImpl<strings::delimiter::AnyOf>, strings::internal::NoFilter> >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::LimitImpl<strings::delimiter::AnyOf>, strings::internal::NoFilter>)
154
155
// Here we define two AppendToImpl() overloads for map<> and hash_map<>. Both of
156
// these overloads call through to this AppendToMap() function. This is needed
157
// because inserting a duplicate key into a map does NOT overwrite the previous
158
// value, which was not the behavior of the split1 Split*() functions. Consider
159
// this example:
160
//
161
//   map<string, string> m;
162
//   m.insert(std::make_pair("a", "1"));
163
//   m.insert(std::make_pair("a", "2"));  // <-- doesn't actually insert.
164
//   ASSERT_EQ(m["a"], "1");  // <-- "a" has value "1" not "2".
165
//
166
// Due to this behavior of map::insert, we can't rely on a normal std::inserter
167
// for a maps. Instead, maps and hash_maps need to be special cased to implement
168
// the desired append semantic of inserting an existing value overwrites the
169
// previous value.
170
//
171
// This same issue is true with sets as well. However, since sets don't have a
172
// separate key and value, failing to overwrite an existing value in a set is
173
// fine because the value already exists in the set.
174
//
175
template <typename Map, typename Splitter>
176
0
void AppendToMap(Map* m, Splitter splitter) {
177
0
  Map tmp = splitter;  // Calls implicit conversion operator.
178
0
  for (typename Map::const_iterator it = tmp.begin(); it != tmp.end(); ++it) {
179
0
    (*m)[it->first] = it->second;
180
0
  }
181
0
}
182
183
template <typename Splitter>
184
0
void AppendToImpl(map<string, string>* map_container, Splitter splitter) {
185
0
  AppendToMap(map_container, splitter);
186
0
}
187
188
// Appends the results of a call to strings::Split() to the specified container.
189
// This function is used with the new strings::Split() API to implement the
190
// append semantics of the legacy Split*() functions.
191
//
192
// The "Splitter" template parameter is intended to be a
193
// ::strings::internal::Splitter<>, which is the return value of a call to
194
// strings::Split(). Sample usage:
195
//
196
//   vector<string> v;
197
//   ... add stuff to "v" ...
198
//   AppendTo(&v, strings::Split("a,b,c", ","));
199
//
200
template <typename Container, typename Splitter>
201
0
void AppendTo(Container* container, Splitter splitter) {
202
0
  if (container->empty()) {
203
    // "Appending" to an empty container is by far the common case. For this we
204
    // assign directly to the output container, which is more efficient than
205
    // explicitly appending.
206
0
    *container = splitter;  // Calls implicit conversion operator.
207
0
  } else {
208
0
    AppendToImpl(container, splitter);
209
0
  }
210
0
}
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter> >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, strings::internal::Splitter<strings::delimiter::LimitImpl<strings::delimiter::AnyOf>, strings::internal::NoFilter> >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::LimitImpl<strings::delimiter::AnyOf>, strings::internal::NoFilter>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty> >(std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty> >(std::__1::map<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::pair<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty> >(std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::SkipEmpty>)
Unexecuted instantiation: split.cc:void (anonymous namespace)::AppendTo<std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter> >(std::__1::vector<GStringPiece, std::__1::allocator<GStringPiece> >*, strings::internal::Splitter<strings::delimiter::AnyOf, strings::internal::NoFilter>)
211
212
}  // anonymous namespace
213
214
// Constants for ClipString()
215
static const int kMaxOverCut = 12;
216
// The ellipsis to add to strings that are too long
217
static const char kCutStr[] = "...";
218
static const size_t kCutStrSize = sizeof(kCutStr) - 1;
219
220
// ----------------------------------------------------------------------
221
// Return the place to clip the string at, or -1
222
// if the string doesn't need to be clipped.
223
// ----------------------------------------------------------------------
224
0
static size_t ClipStringHelper(const char* str, size_t max_len, bool use_ellipsis) {
225
0
  if (strlen(str) <= max_len)
226
0
    return std::numeric_limits<size_t>::max();
227
228
0
  auto max_substr_len = max_len;
229
230
0
  if (use_ellipsis && max_len > kCutStrSize) {
231
0
    max_substr_len -= kCutStrSize;
232
0
  }
233
234
0
  const char* cut_by =
235
0
      (max_substr_len < kMaxOverCut ? str : str + max_len - kMaxOverCut);
236
0
  const char* cut_at = str + max_substr_len;
237
0
  while (!ascii_isspace(*cut_at) && cut_at > cut_by)
238
0
    cut_at--;
239
240
0
  if (cut_at == cut_by) {
241
    // No space was found
242
0
    return max_substr_len;
243
0
  } else {
244
0
    return cut_at-str;
245
0
  }
246
0
}
247
248
// ----------------------------------------------------------------------
249
// ClipString
250
//    Clip a string to a max length. We try to clip on a word boundary
251
//    if this is possible. If the string is clipped, we append an
252
//    ellipsis.
253
// ----------------------------------------------------------------------
254
255
0
void ClipString(char* str, size_t max_len) {
256
0
  auto cut_at = ClipStringHelper(str, max_len, true);
257
0
  if (cut_at != std::numeric_limits<size_t>::max()) {
258
0
    if (max_len > kCutStrSize) {
259
0
      strcpy(str+cut_at, kCutStr); // NOLINT
260
0
    } else {
261
0
      strcpy(str+cut_at, ""); // NOLINT
262
0
    }
263
0
  }
264
0
}
265
266
// ----------------------------------------------------------------------
267
// ClipString
268
//    Version of ClipString() that uses string instead of char*.
269
// ----------------------------------------------------------------------
270
0
void ClipString(string* full_str, size_t max_len) {
271
0
  auto cut_at = ClipStringHelper(full_str->c_str(), max_len, true);
272
0
  if (cut_at != std::numeric_limits<size_t>::max()) {
273
0
    full_str->erase(cut_at);
274
0
    if (max_len > kCutStrSize) {
275
0
      full_str->append(kCutStr);
276
0
    }
277
0
  }
278
0
}
279
280
// ----------------------------------------------------------------------
281
// SplitStringToIteratorAllowEmpty()
282
//    Split a string using a character delimiter. Append the components
283
//    to 'result'.  If there are consecutive delimiters, this function
284
//    will return corresponding empty strings. The string is split into
285
//    at most the specified number of pieces greedily. This means that the
286
//    last piece may possibly be split further. To split into as many pieces
287
//    as possible, specify 0 as the number of pieces.
288
//
289
//    If "full" is the empty string, yields an empty string as the only value.
290
//
291
//    If "pieces" is negative for some reason, it returns the whole string
292
// ----------------------------------------------------------------------
293
template <typename StringType, typename ITR>
294
static inline
295
void SplitStringToIteratorAllowEmpty(const StringType& full,
296
                                     const char* delim,
297
                                     size_t pieces,
298
                                     ITR& result) { // NOLINT
299
  string::size_type begin_index, end_index;
300
  begin_index = 0;
301
302
  for (size_t i = 0; (i < pieces-1) || (pieces == 0); i++) {
303
    end_index = full.find_first_of(delim, begin_index);
304
    if (end_index == string::npos) {
305
      *result++ = full.substr(begin_index);
306
      return;
307
    }
308
    *result++ = full.substr(begin_index, (end_index - begin_index));
309
    begin_index = end_index + 1;
310
  }
311
  *result++ = full.substr(begin_index);
312
}
313
314
void SplitStringIntoNPiecesAllowEmpty(const string& full,
315
                                      const char* delim,
316
                                      size_t pieces,
317
0
                                      vector<string>* result) {
318
0
  if (pieces == 0) {
319
    // No limit when pieces is 0.
320
0
    AppendTo(result, strings::Split(full, AnyOf(delim)));
321
0
  } else {
322
    // The input argument "pieces" specifies the max size that *result should
323
    // be. However, the argument to the Limit() delimiter is the max number of
324
    // delimiters, which should be one less than "pieces". Example: "a,b,c" has
325
    // 3 pieces and two comma delimiters.
326
0
    auto limit = std::max<size_t>(pieces - 1, 0);
327
0
    AppendTo(result, strings::Split(full, Limit(AnyOf(delim), limit)));
328
0
  }
329
0
}
330
331
// ----------------------------------------------------------------------
332
// SplitStringAllowEmpty
333
//    Split a string using a character delimiter. Append the components
334
//    to 'result'.  If there are consecutive delimiters, this function
335
//    will return corresponding empty strings.
336
// ----------------------------------------------------------------------
337
void SplitStringAllowEmpty(const string& full, const char* delim,
338
0
                           vector<string>* result) {
339
0
  AppendTo(result, strings::Split(full, AnyOf(delim)));
340
0
}
341
342
// If we know how much to allocate for a vector of strings, we can
343
// allocate the vector<string> only once and directly to the right size.
344
// This saves in between 33-66 % of memory space needed for the result,
345
// and runs faster in the microbenchmarks.
346
//
347
// The reserve is only implemented for the single character delim.
348
//
349
// The implementation for counting is cut-and-pasted from
350
// SplitStringToIteratorUsing. I could have written my own counting iterator,
351
// and use the existing template function, but probably this is more clear
352
// and more sure to get optimized to reasonable code.
353
576
static size_t CalculateReserveForVector(const string& full, const char* delim) {
354
576
  size_t count = 0;
355
576
  if (delim[0] != '\0' && delim[1] == '\0') {
356
    // Optimize the common case where delim is a single character.
357
576
    char c = delim[0];
358
576
    const char* p = full.data();
359
576
    const char* end = p + full.size();
360
1.15k
    while (p != end) {
361
576
      if (*p == c) {  // This could be optimized with hasless(v,1) trick.
362
0
        ++p;
363
576
      } else {
364
17.0k
        while (++p != end && 
*p != c16.5k
) {
365
          // Skip to the next occurence of the delimiter.
366
16.5k
        }
367
576
        ++count;
368
576
      }
369
576
    }
370
576
  }
371
576
  return count;
372
576
}
373
374
// ----------------------------------------------------------------------
375
// SplitStringUsing()
376
// SplitStringToHashsetUsing()
377
// SplitStringToSetUsing()
378
// SplitStringToMapUsing()
379
// SplitStringToHashmapUsing()
380
//    Split a string using a character delimiter. Append the components
381
//    to 'result'.
382
//
383
// Note: For multi-character delimiters, this routine will split on *ANY* of
384
// the characters in the string, not the entire string as a single delimiter.
385
// ----------------------------------------------------------------------
386
template <typename StringType, typename ITR>
387
static inline
388
void SplitStringToIteratorUsing(const StringType& full,
389
                                const char* delim,
390
576
                                ITR& result) { // NOLINT
391
  // Optimize the common case where delim is a single character.
392
576
  if (delim[0] != '\0' && delim[1] == '\0') {
393
576
    char c = delim[0];
394
576
    const char* p = full.data();
395
576
    const char* end = p + full.size();
396
1.15k
    while (p != end) {
397
576
      if (*p == c) {
398
0
        ++p;
399
576
      } else {
400
576
        const char* start = p;
401
17.0k
        while (++p != end && 
*p != c16.5k
) {
402
          // Skip to the next occurence of the delimiter.
403
16.5k
        }
404
576
        *result++ = StringType(start, p - start);
405
576
      }
406
576
    }
407
576
    return;
408
576
  }
409
410
0
  string::size_type begin_index, end_index;
411
0
  begin_index = full.find_first_not_of(delim);
412
0
  while (begin_index != string::npos) {
413
0
    end_index = full.find_first_of(delim, begin_index);
414
0
    if (end_index == string::npos) {
415
0
      *result++ = full.substr(begin_index);
416
0
      return;
417
0
    }
418
0
    *result++ = full.substr(begin_index, (end_index - begin_index));
419
0
    begin_index = full.find_first_not_of(delim, end_index);
420
0
  }
421
0
}
422
423
void SplitStringUsing(const string& full,
424
                      const char* delim,
425
576
                      vector<string>* result) {
426
576
  result->reserve(result->size() + CalculateReserveForVector(full, delim));
427
576
  std::back_insert_iterator< vector<string> > it(*result);
428
576
  SplitStringToIteratorUsing(full, delim, it);
429
576
}
430
431
void SplitStringToSetUsing(const string& full, const char* delim,
432
0
                           set<string>* result) {
433
0
  AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
434
0
}
435
436
void SplitStringToMapUsing(const string& full, const char* delim,
437
0
                           map<string, string>* result) {
438
0
  AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
439
0
}
440
441
// ----------------------------------------------------------------------
442
// SplitGStringPieceToVector()
443
//    Split a GStringPiece into sub-GStringPieces based on delim
444
//    and appends the pieces to 'vec'.
445
//    If omit empty strings is true, empty strings are omitted
446
//    from the resulting vector.
447
// ----------------------------------------------------------------------
448
void SplitGStringPieceToVector(const GStringPiece& full,
449
                              const char* delim,
450
                              vector<GStringPiece>* vec,
451
0
                              bool omit_empty_strings) {
452
0
  if (omit_empty_strings) {
453
0
    AppendTo(vec, strings::Split(full, AnyOf(delim), SkipEmpty()));
454
0
  } else {
455
0
    AppendTo(vec, strings::Split(full, AnyOf(delim)));
456
0
  }
457
0
}
458
459
// ----------------------------------------------------------------------
460
// SplitUsing()
461
//    Split a string using a string of delimiters, returning vector
462
//    of strings. The original string is modified to insert nulls.
463
// ----------------------------------------------------------------------
464
465
0
vector<char*>* SplitUsing(char* full, const char* delim) {
466
0
  auto vec = new vector<char*>;
467
0
  SplitToVector(full, delim, vec, true);        // Omit empty strings
468
0
  return vec;
469
0
}
470
471
void SplitToVector(char* full, const char* delim, vector<char*>* vec,
472
0
                   bool omit_empty_strings) {
473
0
  char* next  = full;
474
0
  while ((next = gstrsep(&full, delim)) != nullptr) {
475
0
    if (omit_empty_strings && next[0] == '\0') continue;
476
0
    vec->push_back(next);
477
0
  }
478
  // Add last element (or full string if no delimeter found):
479
0
  if (full != nullptr) {
480
0
    vec->push_back(full);
481
0
  }
482
0
}
483
484
void SplitToVector(char* full, const char* delim, vector<const char*>* vec,
485
0
                   bool omit_empty_strings) {
486
0
  char* next  = full;
487
0
  while ((next = gstrsep(&full, delim)) != nullptr) {
488
0
    if (omit_empty_strings && next[0] == '\0') continue;
489
0
    vec->push_back(next);
490
0
  }
491
  // Add last element (or full string if no delimeter found):
492
0
  if (full != nullptr) {
493
0
    vec->push_back(full);
494
0
  }
495
0
}
496
497
// ----------------------------------------------------------------------
498
// SplitOneStringToken()
499
//   Mainly a stringified wrapper around strpbrk()
500
// ----------------------------------------------------------------------
501
0
string SplitOneStringToken(const char** source, const char* delim) {
502
0
  assert(source);
503
0
  assert(delim);
504
0
  if (!*source) {
505
0
    return string();
506
0
  }
507
0
  const char * begin = *source;
508
  // Optimize the common case where delim is a single character.
509
0
  if (delim[0] != '\0' && delim[1] == '\0') {
510
0
    *source = strchr(*source, delim[0]);
511
0
  } else {
512
0
    *source = strpbrk(*source, delim);
513
0
  }
514
0
  if (*source) {
515
0
    return string(begin, (*source)++);
516
0
  } else {
517
0
    return string(begin);
518
0
  }
519
0
}
520
521
// ----------------------------------------------------------------------
522
// SplitStringWithEscaping()
523
// SplitStringWithEscapingAllowEmpty()
524
// SplitStringWithEscapingToSet()
525
// SplitStringWithWithEscapingToHashset()
526
//   Split the string using the specified delimiters, taking escaping into
527
//   account. '\' is not allowed as a delimiter.
528
// ----------------------------------------------------------------------
529
template <typename ITR>
530
static inline
531
void SplitStringWithEscapingToIterator(const string& src,
532
                                       const strings::CharSet& delimiters,
533
                                       const bool allow_empty,
534
0
                                       ITR* result) {
535
0
  CHECK(!delimiters.Test('\\')) << "\\ is not allowed as a delimiter.";
536
0
  CHECK(result);
537
0
  string part;
538
539
0
  for (uint32 i = 0; i < src.size(); ++i) {
540
0
    char current_char = src[i];
541
0
    if (delimiters.Test(current_char)) {
542
      // Push substrings when we encounter delimiters.
543
0
      if (allow_empty || !part.empty()) {
544
0
        *(*result)++ = part;
545
0
        part.clear();
546
0
      }
547
0
    } else if (current_char == '\\' && ++i < src.size()) {
548
      // If we see a backslash, the next delimiter or backslash is literal.
549
0
      current_char = src[i];
550
0
      if (current_char != '\\' && !delimiters.Test(current_char)) {
551
        // Don't honour unknown escape sequences: emit \f for \f.
552
0
        part.push_back('\\');
553
0
      }
554
0
      part.push_back(current_char);
555
0
    } else {
556
      // Otherwise, we have a normal character or trailing backslash.
557
0
      part.push_back(current_char);
558
0
    }
559
0
  }
560
561
  // Push the trailing part.
562
0
  if (allow_empty || !part.empty()) {
563
0
    *(*result)++ = part;
564
0
  }
565
0
}
Unexecuted instantiation: split.cc:void SplitStringWithEscapingToIterator<std::__1::back_insert_iterator<std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, strings::CharSet const&, bool, std::__1::back_insert_iterator<std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >*)
Unexecuted instantiation: split.cc:void SplitStringWithEscapingToIterator<std::__1::insert_iterator<std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > > >(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, strings::CharSet const&, bool, std::__1::insert_iterator<std::__1::set<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::less<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > > >*)
566
567
void SplitStringWithEscaping(const string &full,
568
                             const strings::CharSet& delimiters,
569
0
                             vector<string> *result) {
570
0
  std::back_insert_iterator< vector<string> > it(*result);
571
0
  SplitStringWithEscapingToIterator(full, delimiters, false, &it);
572
0
}
573
574
void SplitStringWithEscapingAllowEmpty(const string &full,
575
                                       const strings::CharSet& delimiters,
576
0
                                       vector<string> *result) {
577
0
  std::back_insert_iterator< vector<string> > it(*result);
578
0
  SplitStringWithEscapingToIterator(full, delimiters, true, &it);
579
0
}
580
581
void SplitStringWithEscapingToSet(const string &full,
582
                                  const strings::CharSet& delimiters,
583
0
                                  set<string> *result) {
584
0
  std::insert_iterator< set<string> > it(*result, result->end());
585
0
  SplitStringWithEscapingToIterator(full, delimiters, false, &it);
586
0
}
587
588
// ----------------------------------------------------------------------
589
// SplitOneIntToken()
590
// SplitOneInt32Token()
591
// SplitOneUint32Token()
592
// SplitOneInt64Token()
593
// SplitOneUint64Token()
594
// SplitOneDoubleToken()
595
// SplitOneFloatToken()
596
// SplitOneDecimalIntToken()
597
// SplitOneDecimalInt32Token()
598
// SplitOneDecimalUint32Token()
599
// SplitOneDecimalInt64Token()
600
// SplitOneDecimalUint64Token()
601
// SplitOneHexUint32Token()
602
// SplitOneHexUint64Token()
603
//   Mainly a stringified wrapper around strtol/strtoul/strtod
604
// ----------------------------------------------------------------------
605
// Curried functions for the macro below
606
0
static inline int32_t strto32_0(const char* source, char** end) {
607
0
  return strto32(source, end, 0); }
608
0
static inline uint32_t strtou32_0(const char* source, char** end) {
609
0
  return strtou32(source, end, 0); }
610
0
static inline int64 strto64_0(const char* source, char** end) {
611
0
  return strto64(source, end, 0); }
612
0
static inline uint64 strtou64_0(const char* source, char** end) {
613
0
  return strtou64(source, end, 0); }
614
0
static inline int32_t strto32_10(const char* source, char** end) {
615
0
  return strto32(source, end, 10); }
616
0
static inline uint32_t strtou32_10(const char* source, char** end) {
617
0
  return strtou32(source, end, 10); }
618
0
static inline int64 strto64_10(const char* source, char** end) {
619
0
  return strto64(source, end, 10); }
620
0
static inline uint64 strtou64_10(const char* source, char** end) {
621
0
  return strtou64(source, end, 10); }
622
0
static inline uint32 strtou32_16(const char* source, char** end) {
623
0
  return strtou32(source, end, 16); }
624
0
static inline uint64 strtou64_16(const char* source, char** end) {
625
0
  return strtou64(source, end, 16); }
626
627
#define DEFINE_SPLIT_ONE_NUMBER_TOKEN(name, type, function) \
628
bool SplitOne##name##Token(const char ** source, const char * delim, \
629
0
                           type * value) {                      \
630
0
  assert(source);                                               \
631
0
  assert(delim);                                                \
632
0
  assert(value);                                                \
633
0
  if (!*source) {                                               \
634
0
    return false;                                               \
635
0
  }                                                             \
636
0
  /* Parse int */                                               \
637
0
  char * end;                                                   \
638
0
  *value = function(*source, &end);                             \
639
0
  if (end == *source)                                           \
640
0
    return false; /* number not present at start of string */   \
641
0
  if (end[0] && !strchr(delim, end[0])) {                       \
642
0
    return false; /* Garbage characters after int */            \
643
0
  }                                                             \
644
0
  /* Advance past token */                                      \
645
0
  if (*end != '\0')                                             \
646
0
    *source = const_cast<const char *>(end+1);                  \
647
0
  else                                                          \
648
0
    *source = NULL;                                             \
649
0
  return true;                                                  \
650
0
}
Unexecuted instantiation: SplitOneIntToken(char const**, char const*, int*)
Unexecuted instantiation: SplitOneInt32Token(char const**, char const*, int*)
Unexecuted instantiation: SplitOneUint32Token(char const**, char const*, unsigned int*)
Unexecuted instantiation: SplitOneInt64Token(char const**, char const*, long long*)
Unexecuted instantiation: SplitOneUint64Token(char const**, char const*, unsigned long long*)
Unexecuted instantiation: SplitOneDoubleToken(char const**, char const*, double*)
Unexecuted instantiation: SplitOneFloatToken(char const**, char const*, float*)
Unexecuted instantiation: SplitOneDecimalIntToken(char const**, char const*, int*)
Unexecuted instantiation: SplitOneDecimalInt32Token(char const**, char const*, int*)
Unexecuted instantiation: SplitOneDecimalUint32Token(char const**, char const*, unsigned int*)
Unexecuted instantiation: SplitOneDecimalInt64Token(char const**, char const*, long long*)
Unexecuted instantiation: SplitOneDecimalUint64Token(char const**, char const*, unsigned long long*)
Unexecuted instantiation: SplitOneHexUint32Token(char const**, char const*, unsigned int*)
Unexecuted instantiation: SplitOneHexUint64Token(char const**, char const*, unsigned long long*)
651
652
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int, int, strto32_0)
653
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int32, int32, strto32_0)
654
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint32, uint32, strtou32_0)
655
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int64, int64, strto64_0)
656
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint64, uint64, strtou64_0)
657
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Double, double, strtod)
658
#ifdef _MSC_VER  // has no strtof()
659
// Note: does an implicit cast to float.
660
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Float, float, strtod)
661
#else
662
DEFINE_SPLIT_ONE_NUMBER_TOKEN(Float, float, strtof)
663
#endif
664
DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt, int, strto32_10)
665
DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt32, int32, strto32_10)
666
DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalUint32, uint32, strtou32_10)
667
DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt64, int64, strto64_10)
668
DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalUint64, uint64, strtou64_10)
669
DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint32, uint32, strtou32_16)
670
DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint64, uint64, strtou64_16)
671
672
673
// ----------------------------------------------------------------------
674
// SplitRange()
675
//    Splits a string of the form "<from>-<to>".  Either or both can be
676
//    missing.  A raw number (<to>) is interpreted as "<to>-".  Modifies
677
//    parameters insofar as they're specified by the string.  RETURNS
678
//    true iff the input is a well-formed range.  If it RETURNS false,
679
//    from and to remain unchanged.  The range in rangestr should be
680
//    terminated either by "\0" or by whitespace.
681
// ----------------------------------------------------------------------
682
683
0
#define EOS(ch)  ( (ch) == '\0' || ascii_isspace(ch) )
684
0
bool SplitRange(const char* rangestr, int* from, int* to) {
685
  // We need to do the const-cast because strol takes a char**, not const char**
686
0
  char* val = const_cast<char*>(rangestr);
687
0
  if (val == nullptr || EOS(*val))  return true;  // we'll say nothingness is ok
688
689
0
  if ( val[0] == '-' && EOS(val[1]) )    // CASE 1: -
690
0
    return true;                         // nothing changes
691
692
0
  if ( val[0] == '-' ) {                 // CASE 2: -<i2>
693
0
    const int int2 = strto32(val+1, &val, 10);
694
0
    if ( !EOS(*val) )  return false;     // not a valid integer
695
0
    *to = int2;                          // only "to" changes
696
0
    return true;
697
698
0
  } else {
699
0
    const int int1 = strto32(val, &val, 10);
700
0
    if ( EOS(*val) || (*val == '-' && EOS(*(val+1))) ) {
701
0
      *from = int1;                      // CASE 3: <i1>, same as <i1>-
702
0
      return true;                       // only "from" changes
703
0
    } else if (*val != '-') {            // not a valid range
704
0
      return false;
705
0
    }
706
0
    const int int2 = strto32(val+1, &val, 10);
707
0
    if ( !EOS(*val) )  return false;     // not a valid integer
708
0
    *from = int1;                        // CASE 4: <i1>-<i2>
709
0
    *to = int2;
710
0
    return true;
711
0
  }
712
0
}
713
714
void SplitCSVLineWithDelimiter(char* line, char delimiter,
715
0
                               vector<char*>* cols) {
716
0
  char* end_of_line = line + strlen(line);
717
0
  char* end;
718
0
  char* start;
719
720
0
  for (; line < end_of_line; line++) {
721
    // Skip leading whitespace, unless said whitespace is the delimiter.
722
0
    while (ascii_isspace(*line) && *line != delimiter)
723
0
      ++line;
724
725
0
    if (*line == '"' && delimiter == ',') {     // Quoted value...
726
0
      start = ++line;
727
0
      end = start;
728
0
      for (; *line; line++) {
729
0
        if (*line == '"') {
730
0
          line++;
731
0
          if (*line != '"')  // [""] is an escaped ["]
732
0
            break;           // but just ["] is end of value
733
0
        }
734
0
        *end++ = *line;
735
0
      }
736
      // All characters after the closing quote and before the comma
737
      // are ignored.
738
0
      line = strchr(line, delimiter);
739
0
      if (!line) line = end_of_line;
740
0
    } else {
741
0
      start = line;
742
0
      line = strchr(line, delimiter);
743
0
      if (!line) line = end_of_line;
744
      // Skip all trailing whitespace, unless said whitespace is the delimiter.
745
0
      for (end = line; end > start; --end) {
746
0
        if (!ascii_isspace(end[-1]) || end[-1] == delimiter)
747
0
          break;
748
0
      }
749
0
    }
750
0
    const bool need_another_column =
751
0
      (*line == delimiter) && (line == end_of_line - 1);
752
0
    *end = '\0';
753
0
    cols->push_back(start);
754
    // If line was something like [paul,] (comma is the last character
755
    // and is not proceeded by whitespace or quote) then we are about
756
    // to eliminate the last column (which is empty). This would be
757
    // incorrect.
758
0
    if (need_another_column)
759
0
      cols->push_back(end);
760
761
0
    assert(*line == '\0' || *line == delimiter);
762
0
  }
763
0
}
764
765
0
void SplitCSVLine(char* line, vector<char*>* cols) {
766
0
  SplitCSVLineWithDelimiter(line, ',', cols);
767
0
}
768
769
void SplitCSVLineWithDelimiterForStrings(const string &line,
770
                                         char delimiter,
771
0
                                         vector<string> *cols) {
772
  // Unfortunately, the interface requires char* instead of const char*
773
  // which requires copying the string.
774
0
  char *cline = strndup_with_new(line.c_str(), line.size());
775
0
  vector<char *> v;
776
0
  SplitCSVLineWithDelimiter(cline, delimiter, &v);
777
0
  for (vector<char*>::const_iterator ci = v.begin(); ci != v.end(); ++ci) {
778
0
    cols->push_back(*ci);
779
0
  }
780
0
  delete[] cline;
781
0
}
782
783
// ----------------------------------------------------------------------
784
namespace {
785
786
// Helper class used by SplitStructuredLineInternal.
787
class ClosingSymbolLookup {
788
 public:
789
  explicit ClosingSymbolLookup(const char* symbol_pairs)
790
      : closing_(),
791
0
        valid_closing_() {
792
    // Initialize the opening/closing arrays.
793
0
    for (const char* symbol = symbol_pairs; *symbol != 0; ++symbol) {
794
0
      unsigned char opening = *symbol;
795
0
      ++symbol;
796
      // If the string ends before the closing character has been found,
797
      // use the opening character as the closing character.
798
0
      unsigned char closing = *symbol != 0 ? *symbol : opening;
799
0
      closing_[opening] = closing;
800
0
      valid_closing_[closing] = true;
801
0
      if (*symbol == 0) break;
802
0
    }
803
0
  }
804
805
  // Returns the closing character corresponding to an opening one,
806
  // or 0 if the argument is not an opening character.
807
0
  char GetClosingChar(char opening) const {
808
0
    return closing_[static_cast<unsigned char>(opening)];
809
0
  }
810
811
  // Returns true if the argument is a closing character.
812
0
  bool IsClosing(char c) const {
813
0
    return valid_closing_[static_cast<unsigned char>(c)];
814
0
  }
815
816
 private:
817
  // Maps an opening character to its closing. If the entry contains 0,
818
  // the character is not in the opening set.
819
  char closing_[256];
820
  // Valid closing characters.
821
  bool valid_closing_[256];
822
823
  DISALLOW_COPY_AND_ASSIGN(ClosingSymbolLookup);
824
};
825
826
char* SplitStructuredLineInternal(char* line,
827
                                  char delimiter,
828
                                  const char* symbol_pairs,
829
                                  vector<char*>* cols,
830
0
                                  bool with_escapes) {
831
0
  ClosingSymbolLookup lookup(symbol_pairs);
832
833
  // Stack of symbols expected to close the current opened expressions.
834
0
  vector<char> expected_to_close;
835
0
  bool in_escape = false;
836
837
0
  CHECK(cols);
838
0
  cols->push_back(line);
839
0
  char* current;
840
0
  for (current = line; *current; ++current) {
841
0
    char c = *current;
842
0
    if (in_escape) {
843
0
      in_escape = false;
844
0
    } else if (with_escapes && c == '\\') {
845
      // We are escaping the next character. Note the escape still appears
846
      // in the output.
847
0
      in_escape = true;
848
0
    } else if (expected_to_close.empty() && c == delimiter) {
849
      // We don't have any open expression, this is a valid separator.
850
0
      *current = 0;
851
0
      cols->push_back(current + 1);
852
0
    } else if (!expected_to_close.empty() && c == expected_to_close.back()) {
853
      // Can we close the currently open expression?
854
0
      expected_to_close.pop_back();
855
0
    } else if (lookup.GetClosingChar(c)) {
856
      // If this is an opening symbol, we open a new expression and push
857
      // the expected closing symbol on the stack.
858
0
      expected_to_close.push_back(lookup.GetClosingChar(c));
859
0
    } else if (lookup.IsClosing(c)) {
860
      // Error: mismatched closing symbol.
861
0
      return current;
862
0
    }
863
0
  }
864
0
  if (!expected_to_close.empty()) {
865
0
    return current;  // Missing closing symbol(s)
866
0
  }
867
0
  return nullptr;  // Success
868
0
}
869
870
bool SplitStructuredLineInternal(GStringPiece line,
871
                                 char delimiter,
872
                                 const char* symbol_pairs,
873
                                 vector<GStringPiece>* cols,
874
0
                                 bool with_escapes) {
875
0
  ClosingSymbolLookup lookup(symbol_pairs);
876
877
  // Stack of symbols expected to close the current opened expressions.
878
0
  vector<char> expected_to_close;
879
0
  bool in_escape = false;
880
881
0
  CHECK_NOTNULL(cols);
882
0
  cols->push_back(line);
883
0
  for (size_t i = 0; i < line.size(); ++i) {
884
0
    char c = line[i];
885
0
    if (in_escape) {
886
0
      in_escape = false;
887
0
    } else if (with_escapes && c == '\\') {
888
      // We are escaping the next character. Note the escape still appears
889
      // in the output.
890
0
      in_escape = true;
891
0
    } else if (expected_to_close.empty() && c == delimiter) {
892
      // We don't have any open expression, this is a valid separator.
893
0
      cols->back().remove_suffix(line.size() - i);
894
0
      cols->push_back(GStringPiece(line, i + 1));
895
0
    } else if (!expected_to_close.empty() && c == expected_to_close.back()) {
896
      // Can we close the currently open expression?
897
0
      expected_to_close.pop_back();
898
0
    } else if (lookup.GetClosingChar(c)) {
899
      // If this is an opening symbol, we open a new expression and push
900
      // the expected closing symbol on the stack.
901
0
      expected_to_close.push_back(lookup.GetClosingChar(c));
902
0
    } else if (lookup.IsClosing(c)) {
903
      // Error: mismatched closing symbol.
904
0
      return false;
905
0
    }
906
0
  }
907
0
  if (!expected_to_close.empty()) {
908
0
    return false;  // Missing closing symbol(s)
909
0
  }
910
0
  return true;  // Success
911
0
}
912
913
}  // anonymous namespace
914
915
char* SplitStructuredLine(char* line,
916
                          char delimiter,
917
                          const char *symbol_pairs,
918
0
                          vector<char*>* cols) {
919
0
  return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
920
0
                                     false);
921
0
}
922
923
bool SplitStructuredLine(GStringPiece line,
924
                         char delimiter,
925
                         const char* symbol_pairs,
926
0
                         vector<GStringPiece>* cols) {
927
0
  return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
928
0
                                     false);
929
0
}
930
931
char* SplitStructuredLineWithEscapes(char* line,
932
                                     char delimiter,
933
                                     const char *symbol_pairs,
934
0
                                     vector<char*>* cols) {
935
0
  return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
936
0
                                     true);
937
0
}
938
939
bool SplitStructuredLineWithEscapes(GStringPiece line,
940
                                     char delimiter,
941
                                     const char* symbol_pairs,
942
0
                                     vector<GStringPiece>* cols) {
943
0
  return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
944
0
                                     true);
945
0
}
946
947
948
// ----------------------------------------------------------------------
949
// SplitStringIntoKeyValues()
950
// ----------------------------------------------------------------------
951
bool SplitStringIntoKeyValues(const string& line,
952
                              const string& key_value_delimiters,
953
                              const string& value_value_delimiters,
954
0
                              string *key, vector<string> *values) {
955
0
  key->clear();
956
0
  values->clear();
957
958
  // find the key string
959
0
  size_t end_key_pos = line.find_first_of(key_value_delimiters);
960
0
  if (end_key_pos == string::npos) {
961
0
    VLOG(1) << "cannot parse key from line: " << line;
962
0
    return false;    // no key
963
0
  }
964
0
  key->assign(line, 0, end_key_pos);
965
966
  // find the values string
967
0
  string remains(line, end_key_pos, line.size() - end_key_pos);
968
0
  size_t begin_values_pos = remains.find_first_not_of(key_value_delimiters);
969
0
  if (begin_values_pos == string::npos) {
970
0
    VLOG(1) << "cannot parse value from line: " << line;
971
0
    return false;   // no value
972
0
  }
973
0
  string values_string(remains,
974
0
                       begin_values_pos,
975
0
                       remains.size() - begin_values_pos);
976
977
  // construct the values vector
978
0
  if (value_value_delimiters.empty()) {  // one value
979
0
    values->push_back(values_string);
980
0
  } else {                               // multiple values
981
0
    SplitStringUsing(values_string, value_value_delimiters.c_str(), values);
982
0
    if (values->size() < 1) {
983
0
      VLOG(1) << "cannot parse value from line: " << line;
984
0
      return false;  // no value
985
0
    }
986
0
  }
987
0
  return true;
988
0
}
989
990
bool SplitStringIntoKeyValuePairs(const string& line,
991
                                  const string& key_value_delimiters,
992
                                  const string& key_value_pair_delimiters,
993
0
                                  vector<pair<string, string> >* kv_pairs) {
994
0
  kv_pairs->clear();
995
996
0
  vector<string> pairs;
997
0
  SplitStringUsing(line, key_value_pair_delimiters.c_str(), &pairs);
998
999
0
  bool success = true;
1000
0
  for (const auto& pair : pairs) {
1001
0
    string key;
1002
0
    vector<string> value;
1003
0
    if (!SplitStringIntoKeyValues(pair,
1004
0
                                  key_value_delimiters,
1005
0
                                  "", &key, &value)) {
1006
      // Don't return here, to allow for keys without associated
1007
      // values; just record that our split failed.
1008
0
      success = false;
1009
0
    }
1010
    // we expect atmost one value because we passed in an empty vsep to
1011
    // SplitStringIntoKeyValues
1012
0
    DCHECK_LE(value.size(), 1);
1013
0
    kv_pairs->push_back(make_pair(key, value.empty()? "" : value[0]));
1014
0
  }
1015
0
  return success;
1016
0
}
1017
1018
// ----------------------------------------------------------------------
1019
// SplitLeadingDec32Values()
1020
// SplitLeadingDec64Values()
1021
//    A simple parser for space-separated decimal int32/int64 values.
1022
//    Appends parsed integers to the end of the result vector, stopping
1023
//    at the first unparsable spot.  Skips past leading and repeated
1024
//    whitespace (does not consume trailing whitespace), and returns
1025
//    a pointer beyond the last character parsed.
1026
// --------------------------------------------------------------------
1027
0
const char* SplitLeadingDec32Values(const char *str, vector<int32> *result) {
1028
0
  for (;;) {
1029
0
    char *end = nullptr;
1030
0
    int64_t value = strtol(str, &end, 10);
1031
0
    if (end == str)
1032
0
      break;
1033
    // Limit long values to int32 min/max.  Needed for lp64.
1034
0
    if (value > numeric_limits<int32>::max()) {
1035
0
      value = numeric_limits<int32>::max();
1036
0
    } else if (value < numeric_limits<int32>::min()) {
1037
0
      value = numeric_limits<int32>::min();
1038
0
    }
1039
0
    result->push_back(narrow_cast<int32>(value));
1040
0
    str = end;
1041
0
    if (!ascii_isspace(*end))
1042
0
      break;
1043
0
  }
1044
0
  return str;
1045
0
}
1046
1047
0
const char* SplitLeadingDec64Values(const char *str, vector<int64> *result) {
1048
0
  for (;;) {
1049
0
    char *end = nullptr;
1050
0
    const int64 value = strtoll(str, &end, 10);
1051
0
    if (end == str)
1052
0
      break;
1053
0
    result->push_back(value);
1054
0
    str = end;
1055
0
    if (!ascii_isspace(*end))
1056
0
      break;
1057
0
  }
1058
0
  return str;
1059
0
}
1060
1061
void SplitStringToLines(const char* full,
1062
                        size_t max_len,
1063
                        size_t num_lines,
1064
0
                        vector<string>* result) {
1065
0
  if (max_len <= 0) {
1066
0
    return;
1067
0
  }
1068
0
  size_t pos = 0;
1069
0
  for (size_t i = 0; (i < num_lines || num_lines <= 0); i++) {
1070
0
    auto cut_at = ClipStringHelper(full+pos, max_len, (i == num_lines - 1));
1071
0
    if (cut_at == std::numeric_limits<size_t>::max()) {
1072
0
      result->push_back(string(full+pos));
1073
0
      return;
1074
0
    }
1075
0
    result->push_back(string(full+pos, cut_at));
1076
0
    if (i == num_lines - 1 && max_len > kCutStrSize) {
1077
0
      result->at(i).append(kCutStr);
1078
0
    }
1079
0
    pos += cut_at;
1080
0
  }
1081
0
}