YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/util.cc
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (C) 1999-2005 Google, Inc.
3
//
4
// The following only applies to changes made to this file as part of YugaByte development.
5
//
6
// Portions Copyright (c) YugaByte, Inc.
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
9
// in compliance with the License.  You may obtain a copy of the License at
10
//
11
// http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing, software distributed under the License
14
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15
// or implied.  See the License for the specific language governing permissions and limitations
16
// under the License.
17
//
18
19
// TODO(user): visit each const_cast.  Some of them are no longer necessary
20
// because last Single Unix Spec and grte v2 are more const-y.
21
22
#include "yb/gutil/strings/util.h"
23
24
#include <assert.h>
25
#include <stdarg.h>
26
#include <stdio.h>
27
#include <string.h>
28
#include <time.h>           // for FastTimeToBuffer()
29
30
#include <algorithm>
31
32
#include <glog/logging.h>
33
34
#include "yb/gutil/casts.h"
35
#include "yb/gutil/stl_util.h"  // for string_as_array, STLAppendToString
36
#include "yb/gutil/strings/ascii_ctype.h"
37
#include "yb/gutil/strings/numbers.h"
38
#include "yb/gutil/utf/utf.h"
39
40
using std::copy;
41
using std::max;
42
using std::min;
43
using std::reverse;
44
using std::sort;
45
using std::swap;
46
using std::string;
47
using std::vector;
48
49
50
#ifdef OS_WINDOWS
51
#ifdef min  // windows.h defines this to something silly
52
#undef min
53
#endif
54
#endif
55
56
// Use this instead of gmtime_r if you want to build for Windows.
57
// Windows doesn't have a 'gmtime_r', but it has the similar 'gmtime_s'.
58
// TODO(user): Probably belongs in //base:time_support.{cc|h}.
59
0
static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) {
60
#ifdef OS_WINDOWS
61
  return gmtime_s(result, timep) == 0 ? result : NULL;
62
#else
63
0
  return gmtime_r(timep, result);
64
0
#endif  // OS_WINDOWS
65
0
}
66
67
char* strnstr(const char* haystack, const char* needle,
68
0
                     size_t haystack_len) {
69
0
  if (*needle == '\0') {
70
0
    return const_cast<char*>(haystack);
71
0
  }
72
0
  size_t needle_len = strlen(needle);
73
0
  char* where;
74
0
  while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) {
75
0
    if (where - haystack + needle_len > haystack_len) {
76
0
      return nullptr;
77
0
    }
78
0
    if (strncmp(where, needle, needle_len) == 0) {
79
0
      return where;
80
0
    }
81
0
    haystack_len -= where + 1 - haystack;
82
0
    haystack = where + 1;
83
0
  }
84
0
  return nullptr;
85
0
}
86
87
const char* strnprefix(const char* haystack, int haystack_size,
88
0
                              const char* needle, int needle_size) {
89
0
  if (needle_size > haystack_size) {
90
0
    return nullptr;
91
0
  } else {
92
0
    if (strncmp(haystack, needle, needle_size) == 0) {
93
0
      return haystack + needle_size;
94
0
    } else {
95
0
      return nullptr;
96
0
    }
97
0
  }
98
0
}
99
100
const char* strncaseprefix(const char* haystack, int haystack_size,
101
0
                                  const char* needle, int needle_size) {
102
0
  if (needle_size > haystack_size) {
103
0
    return nullptr;
104
0
  } else {
105
0
    if (strncasecmp(haystack, needle, needle_size) == 0) {
106
0
      return haystack + needle_size;
107
0
    } else {
108
0
      return nullptr;
109
0
    }
110
0
  }
111
0
}
112
113
0
char* strcasesuffix(char* str, const char* suffix) {
114
0
  const auto lenstr = strlen(str);
115
0
  const auto lensuffix = strlen(suffix);
116
0
  char* strbeginningoftheend = str + lenstr - lensuffix;
117
118
0
  if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) {
119
0
    return (strbeginningoftheend);
120
0
  } else {
121
0
    return (nullptr);
122
0
  }
123
0
}
124
125
const char* strnsuffix(const char* haystack, int haystack_size,
126
0
                              const char* needle, int needle_size) {
127
0
  if (needle_size > haystack_size) {
128
0
    return nullptr;
129
0
  } else {
130
0
    const char* start = haystack + haystack_size - needle_size;
131
0
    if (strncmp(start, needle, needle_size) == 0) {
132
0
      return start;
133
0
    } else {
134
0
      return nullptr;
135
0
    }
136
0
  }
137
0
}
138
139
const char* strncasesuffix(const char* haystack, int haystack_size,
140
0
                           const char* needle, int needle_size) {
141
0
  if (needle_size > haystack_size) {
142
0
    return nullptr;
143
0
  } else {
144
0
    const char* start = haystack + haystack_size - needle_size;
145
0
    if (strncasecmp(start, needle, needle_size) == 0) {
146
0
      return start;
147
0
    } else {
148
0
      return nullptr;
149
0
    }
150
0
  }
151
0
}
152
153
0
char* strchrnth(const char* str, const char& c, int n) {
154
0
  if (str == nullptr)
155
0
    return nullptr;
156
0
  if (n <= 0)
157
0
    return const_cast<char*>(str);
158
0
  const char* sp;
159
0
  int k = 0;
160
0
  for (sp = str; *sp != '\0'; sp ++) {
161
0
    if (*sp == c) {
162
0
      ++k;
163
0
      if (k >= n)
164
0
        break;
165
0
    }
166
0
  }
167
0
  return (k < n) ? nullptr : const_cast<char*>(sp);
168
0
}
169
170
0
char* AdjustedLastPos(const char* str, char separator, int n) {
171
0
  if ( str == nullptr )
172
0
    return nullptr;
173
0
  const char* pos = nullptr;
174
0
  if ( n > 0 )
175
0
    pos = strchrnth(str, separator, n);
176
177
  // if n <= 0 or separator appears fewer than n times, get the last occurrence
178
0
  if ( pos == nullptr)
179
0
    pos = strrchr(str, separator);
180
0
  return const_cast<char*>(pos);
181
0
}
182
183
184
// ----------------------------------------------------------------------
185
// Misc. routines
186
// ----------------------------------------------------------------------
187
188
0
bool IsAscii(const char* str, size_t len) {
189
0
  const char* end = str + len;
190
0
  while (str < end) {
191
0
    if (!ascii_isascii(*str++)) {
192
0
      return false;
193
0
    }
194
0
  }
195
0
  return true;
196
0
}
197
198
// ----------------------------------------------------------------------
199
// StringReplace()
200
//    Give me a string and two patterns "old" and "new", and I replace
201
//    the first instance of "old" in the string with "new", if it
202
//    exists.  If "replace_all" is true then call this repeatedly until it
203
//    fails.  RETURN a new string, regardless of whether the replacement
204
//    happened or not.
205
// ----------------------------------------------------------------------
206
207
string StringReplace(const GStringPiece& s, const GStringPiece& oldsub,
208
15.2k
                     const GStringPiece& newsub, bool replace_all) {
209
15.2k
  string ret;
210
15.2k
  StringReplace(s, oldsub, newsub, replace_all, &ret);
211
15.2k
  return ret;
212
15.2k
}
213
214
215
// ----------------------------------------------------------------------
216
// StringReplace()
217
//    Replace the "old" pattern with the "new" pattern in a string,
218
//    and append the result to "res".  If replace_all is false,
219
//    it only replaces the first instance of "old."
220
// ----------------------------------------------------------------------
221
222
void StringReplace(const GStringPiece& s, const GStringPiece& oldsub,
223
                   const GStringPiece& newsub, bool replace_all,
224
15.2k
                   string* res) {
225
15.2k
  if (oldsub.empty()) {
226
0
    res->append(s.data(), s.length());  // If empty, append the given string.
227
0
    return;
228
0
  }
229
230
15.2k
  GStringPiece::size_type start_pos = 0;
231
15.2k
  GStringPiece::size_type pos;
232
15.7k
  do {
233
15.7k
    pos = s.find(oldsub, start_pos);
234
15.7k
    if (pos == GStringPiece::npos) {
235
15.2k
      break;
236
15.2k
    }
237
472
    res->append(s.data() + start_pos, pos - start_pos);
238
472
    res->append(newsub.data(), newsub.length());
239
    // Start searching again after the "old".
240
472
    start_pos = pos + oldsub.length();
241
472
  } while (replace_all);
242
15.2k
  res->append(s.data() + start_pos, s.length() - start_pos);
243
15.2k
}
244
245
// ----------------------------------------------------------------------
246
// GlobalReplaceSubstring()
247
//    Replaces all instances of a substring in a string.  Does nothing
248
//    if 'substring' is empty.  Returns the number of replacements.
249
//
250
//    NOTE: The string pieces must not overlap s.
251
// ----------------------------------------------------------------------
252
253
int GlobalReplaceSubstring(const GStringPiece& substring,
254
                           const GStringPiece& replacement,
255
0
                           string* s) {
256
0
  CHECK(s != nullptr);
257
0
  if (s->empty() || substring.empty())
258
0
    return 0;
259
0
  string tmp;
260
0
  int num_replacements = 0;
261
0
  size_t pos = 0;
262
0
  for (size_t match_pos = s->find(substring.data(), pos, substring.length());
263
0
       match_pos != string::npos;
264
0
       pos = match_pos + substring.length(),
265
0
           match_pos = s->find(substring.data(), pos, substring.length())) {
266
0
    ++num_replacements;
267
    // Append the original content before the match.
268
0
    tmp.append(*s, pos, match_pos - pos);
269
    // Append the replacement for the match.
270
0
    tmp.append(replacement.begin(), replacement.end());
271
0
  }
272
  // Append the content after the last match. If no replacements were made, the
273
  // original string is left untouched.
274
0
  if (num_replacements > 0) {
275
0
    tmp.append(*s, pos, s->length() - pos);
276
0
    s->swap(tmp);
277
0
  }
278
0
  return num_replacements;
279
0
}
280
281
//---------------------------------------------------------------------------
282
// RemoveStrings()
283
//   Remove the strings from v given by the (sorted least -> greatest)
284
//   numbers in indices.
285
//   Order of v is *not* preserved.
286
//---------------------------------------------------------------------------
287
0
void RemoveStrings(vector<string>* v, const vector<size_t>& indices) {
288
0
  (void)DCHECK_NOTNULL(v);
289
0
  DCHECK_LE(indices.size(), v->size());
290
  // go from largest index to smallest so that smaller indices aren't
291
  // invalidated
292
0
  for (auto lcv = indices.size(); lcv > 0;) {
293
0
    --lcv;
294
0
#ifndef NDEBUG
295
    // verify that indices is sorted least->greatest
296
0
    if (indices.size() >= 2 && lcv > 0)
297
      // use LT and not LE because we should never see repeat indices
298
0
      CHECK_LT(indices[lcv-1], indices[lcv]);
299
0
#endif
300
0
    DCHECK_GE(indices[lcv], 0);
301
0
    DCHECK_LT(indices[lcv], v->size());
302
0
    swap((*v)[indices[lcv]], v->back());
303
0
    v->pop_back();
304
0
  }
305
0
}
306
307
// ----------------------------------------------------------------------
308
// gstrcasestr is a case-insensitive strstr. Eventually we should just
309
// use the GNU libc version of strcasestr, but it isn't compiled into
310
// RedHat Linux by default in version 6.1.
311
//
312
// This function uses ascii_tolower() instead of tolower(), for speed.
313
// ----------------------------------------------------------------------
314
315
0
char *gstrcasestr(const char* haystack, const char* needle) {
316
0
  char c, sc;
317
0
  size_t len;
318
319
0
  if ((c = *needle++) != 0) {
320
0
    c = ascii_tolower(c);
321
0
    len = strlen(needle);
322
0
    do {
323
0
      do {
324
0
        if ((sc = *haystack++) == 0)
325
0
          return nullptr;
326
0
      } while (ascii_tolower(sc) != c);
327
0
    } while (strncasecmp(haystack, needle, len) != 0);
328
0
    haystack--;
329
0
  }
330
  // This is a const violation but strstr() also returns a char*.
331
0
  return const_cast<char*>(haystack);
332
0
}
333
334
// ----------------------------------------------------------------------
335
// gstrncasestr is a case-insensitive strnstr.
336
//    Finds the occurence of the (null-terminated) needle in the
337
//    haystack, where no more than len bytes of haystack is searched.
338
//    Characters that appear after a '\0' in the haystack are not searched.
339
//
340
// This function uses ascii_tolower() instead of tolower(), for speed.
341
// ----------------------------------------------------------------------
342
0
const char *gstrncasestr(const char* haystack, const char* needle, size_t len) {
343
0
  char c, sc;
344
345
0
  if ((c = *needle++) != 0) {
346
0
    c = ascii_tolower(c);
347
0
    size_t needle_len = strlen(needle);
348
0
    do {
349
0
      do {
350
0
        if (len-- <= needle_len
351
0
            || 0 == (sc = *haystack++))
352
0
          return nullptr;
353
0
      } while (ascii_tolower(sc) != c);
354
0
    } while (strncasecmp(haystack, needle, needle_len) != 0);
355
0
    haystack--;
356
0
  }
357
0
  return haystack;
358
0
}
359
360
// ----------------------------------------------------------------------
361
// gstrncasestr is a case-insensitive strnstr.
362
//    Finds the occurence of the (null-terminated) needle in the
363
//    haystack, where no more than len bytes of haystack is searched.
364
//    Characters that appear after a '\0' in the haystack are not searched.
365
//
366
//    This function uses ascii_tolower() instead of tolower(), for speed.
367
// ----------------------------------------------------------------------
368
0
char *gstrncasestr(char* haystack, const char* needle, size_t len) {
369
0
  return const_cast<char *>(gstrncasestr(static_cast<const char *>(haystack),
370
0
                                         needle, len));
371
0
}
372
// ----------------------------------------------------------------------
373
// gstrncasestr_split performs a case insensitive search
374
// on (prefix, non_alpha, suffix).
375
// ----------------------------------------------------------------------
376
char *gstrncasestr_split(const char* str,
377
                         const char* prefix, char non_alpha,
378
                         const char* suffix,
379
0
                         size_t n) {
380
0
  auto prelen = prefix == nullptr ? 0 : strlen(prefix);
381
0
  auto suflen = suffix == nullptr ? 0 : strlen(suffix);
382
383
  // adjust the string and its length to avoid unnessary searching.
384
  // an added benefit is to avoid unnecessary range checks in the if
385
  // statement in the inner loop.
386
0
  if (suflen + prelen >= n)  return nullptr;
387
0
  str += prelen;
388
0
  n -= prelen;
389
0
  n -= suflen;
390
391
0
  const char* where = nullptr;
392
393
  // for every occurance of non_alpha in the string ...
394
0
  while ((where = static_cast<const char*>(
395
0
            memchr(str, non_alpha, n))) != nullptr) {
396
    // ... test whether it is followed by suffix and preceded by prefix
397
0
    if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) &&
398
0
        (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) {
399
0
      return const_cast<char*>(where - prelen);
400
0
    }
401
    // if not, advance the pointer, and adjust the length according
402
0
    n -= (where + 1) - str;
403
0
    str = where + 1;
404
0
  }
405
406
0
  return nullptr;
407
0
}
408
409
// ----------------------------------------------------------------------
410
// strcasestr_alnum is like a case-insensitive strstr, except that it
411
// ignores non-alphanumeric characters in both strings for the sake of
412
// comparison.
413
//
414
// This function uses ascii_isalnum() instead of isalnum() and
415
// ascii_tolower() instead of tolower(), for speed.
416
//
417
// E.g. strcasestr_alnum("i use google all the time", " !!Google!! ")
418
// returns pointer to "google all the time"
419
// ----------------------------------------------------------------------
420
0
char *strcasestr_alnum(const char *haystack, const char *needle) {
421
0
  const char *haystack_ptr;
422
0
  const char *needle_ptr;
423
424
  // Skip non-alnums at beginning
425
0
  while ( !ascii_isalnum(*needle) )
426
0
    if ( *needle++ == '\0' )
427
0
      return const_cast<char*>(haystack);
428
0
  needle_ptr = needle;
429
430
  // Skip non-alnums at beginning
431
0
  while ( !ascii_isalnum(*haystack) )
432
0
    if ( *haystack++ == '\0' )
433
0
      return nullptr;
434
0
  haystack_ptr = haystack;
435
436
0
  while ( *needle_ptr != '\0' ) {
437
    // Non-alnums - advance
438
0
    while ( !ascii_isalnum(*needle_ptr) )
439
0
      if ( *needle_ptr++ == '\0' )
440
0
        return const_cast<char *>(haystack);
441
442
0
    while ( !ascii_isalnum(*haystack_ptr) )
443
0
      if ( *haystack_ptr++ == '\0' )
444
0
        return nullptr;
445
446
0
    if ( ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr) ) {
447
      // Case-insensitive match - advance
448
0
      needle_ptr++;
449
0
      haystack_ptr++;
450
0
    } else {
451
      // No match - rollback to next start point in haystack
452
0
      haystack++;
453
0
      while ( !ascii_isalnum(*haystack) )
454
0
        if ( *haystack++ == '\0' )
455
0
          return nullptr;
456
0
      haystack_ptr = haystack;
457
0
      needle_ptr = needle;
458
0
    }
459
0
  }
460
0
  return const_cast<char *>(haystack);
461
0
}
462
463
464
// ----------------------------------------------------------------------
465
// CountSubstring()
466
//    Return the number times a "substring" appears in the "text"
467
//    NOTE: this function's complexity is O(|text| * |substring|)
468
//          It is meant for short "text" (such as to ensure the
469
//          printf format string has the right number of arguments).
470
//          DO NOT pass in long "text".
471
// ----------------------------------------------------------------------
472
0
int CountSubstring(GStringPiece text, GStringPiece substring) {
473
0
  CHECK_GT(substring.length(), 0);
474
475
0
  int count = 0;
476
0
  GStringPiece::size_type curr = 0;
477
0
  while (GStringPiece::npos != (curr = text.find(substring, curr))) {
478
0
    ++count;
479
0
    ++curr;
480
0
  }
481
0
  return count;
482
0
}
483
484
// ----------------------------------------------------------------------
485
// strstr_delimited()
486
//    Just like strstr(), except it ensures that the needle appears as
487
//    a complete item (or consecutive series of items) in a delimited
488
//    list.
489
//
490
//    Like strstr(), returns haystack if needle is empty, or NULL if
491
//    either needle/haystack is NULL.
492
// ----------------------------------------------------------------------
493
const char* strstr_delimited(const char* haystack,
494
                             const char* needle,
495
0
                             char delim) {
496
0
  if (!needle || !haystack) return nullptr;
497
0
  if (*needle == '\0') return haystack;
498
499
0
  auto needle_len = strlen(needle);
500
501
0
  while (true) {
502
    // Skip any leading delimiters.
503
0
    while (*haystack == delim) ++haystack;
504
505
    // Walk down the haystack, matching every character in the needle.
506
0
    const char* this_match = haystack;
507
0
    size_t i = 0;
508
0
    for (; i < needle_len; i++) {
509
0
      if (*haystack != needle[i]) {
510
        // We ran out of haystack or found a non-matching character.
511
0
        break;
512
0
      }
513
0
      ++haystack;
514
0
    }
515
516
    // If we matched the whole needle, ensure that it's properly delimited.
517
0
    if (i == needle_len && (*haystack == '\0' || *haystack == delim)) {
518
0
      return this_match;
519
0
    }
520
521
    // No match. Consume non-delimiter characters until we run out of them.
522
0
    while (*haystack != delim) {
523
0
      if (*haystack == '\0') return nullptr;
524
0
      ++haystack;
525
0
    }
526
0
  }
527
0
  LOG(FATAL) << "Unreachable statement";
528
0
  return nullptr;
529
0
}
530
531
532
// ----------------------------------------------------------------------
533
// Older versions of libc have a buggy strsep.
534
// ----------------------------------------------------------------------
535
536
0
char* gstrsep(char** stringp, const char* delim) {
537
0
  char *s;
538
0
  const char *spanp;
539
0
  int c, sc;
540
0
  char *tok;
541
542
0
  if ((s = *stringp) == nullptr)
543
0
    return nullptr;
544
545
0
  tok = s;
546
0
  while (true) {
547
0
    c = *s++;
548
0
    spanp = delim;
549
0
    do {
550
0
      if ((sc = *spanp++) == c) {
551
0
        if (c == 0)
552
0
          s = nullptr;
553
0
        else
554
0
          s[-1] = 0;
555
0
        *stringp = s;
556
0
        return tok;
557
0
      }
558
0
    } while (sc != 0);
559
0
  }
560
561
0
  return nullptr; /* should not happen */
562
0
}
563
564
0
void FastStringAppend(string* s, const char* data, int len) {
565
0
  STLAppendToString(s, data, len);
566
0
}
567
568
569
// TODO(user): add a microbenchmark and revisit
570
// the optimizations done here.
571
//
572
// Several converters use this table to reduce
573
// division and modulo operations.
574
extern const char two_ASCII_digits[100][2];
575
576
const char two_ASCII_digits[100][2] = {
577
  {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
578
  {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
579
  {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
580
  {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
581
  {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
582
  {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
583
  {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
584
  {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
585
  {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
586
  {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
587
  {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
588
  {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
589
  {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
590
  {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
591
  {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
592
  {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
593
  {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
594
  {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
595
  {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
596
  {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
597
};
598
599
0
static inline void PutTwoDigits(int i, char* p) {
600
0
  DCHECK_GE(i, 0);
601
0
  DCHECK_LT(i, 100);
602
0
  p[0] = two_ASCII_digits[i][0];
603
0
  p[1] = two_ASCII_digits[i][1];
604
0
}
605
606
0
char* FastTimeToBuffer(time_t s, char* buffer) {
607
0
  if (s == 0) {
608
0
    time(&s);
609
0
  }
610
611
0
  struct tm tm;
612
0
  if (PortableSafeGmtime(&s, &tm) == nullptr) {
613
    // Error message must fit in 30-char buffer.
614
0
    memcpy(buffer, "Invalid:", sizeof("Invalid:"));
615
0
    FastInt64ToBufferLeft(s, buffer+strlen(buffer));
616
0
    return buffer;
617
0
  }
618
619
  // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
620
  // but strftime does locale stuff which we do not want
621
  // plus strftime takes > 10x the time of hard code
622
623
0
  const char* weekday_name = "Xxx";
624
0
  switch (tm.tm_wday) {
625
0
    default: { DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday; } break;
626
0
    case 0:  weekday_name = "Sun"; break;
627
0
    case 1:  weekday_name = "Mon"; break;
628
0
    case 2:  weekday_name = "Tue"; break;
629
0
    case 3:  weekday_name = "Wed"; break;
630
0
    case 4:  weekday_name = "Thu"; break;
631
0
    case 5:  weekday_name = "Fri"; break;
632
0
    case 6:  weekday_name = "Sat"; break;
633
0
  }
634
635
0
  const char* month_name = "Xxx";
636
0
  switch (tm.tm_mon) {
637
0
    default:  { DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon; } break;
638
0
    case 0:   month_name = "Jan"; break;
639
0
    case 1:   month_name = "Feb"; break;
640
0
    case 2:   month_name = "Mar"; break;
641
0
    case 3:   month_name = "Apr"; break;
642
0
    case 4:   month_name = "May"; break;
643
0
    case 5:   month_name = "Jun"; break;
644
0
    case 6:   month_name = "Jul"; break;
645
0
    case 7:   month_name = "Aug"; break;
646
0
    case 8:   month_name = "Sep"; break;
647
0
    case 9:   month_name = "Oct"; break;
648
0
    case 10:  month_name = "Nov"; break;
649
0
    case 11:  month_name = "Dec"; break;
650
0
  }
651
652
  // Write out the buffer.
653
654
0
  memcpy(buffer+0, weekday_name, 3);
655
0
  buffer[3] = ',';
656
0
  buffer[4] = ' ';
657
658
0
  PutTwoDigits(tm.tm_mday, buffer+5);
659
0
  buffer[7] = ' ';
660
661
0
  memcpy(buffer+8, month_name, 3);
662
0
  buffer[11] = ' ';
663
664
0
  int32 year = tm.tm_year + 1900;
665
0
  PutTwoDigits(year/100, buffer+12);
666
0
  PutTwoDigits(year%100, buffer+14);
667
0
  buffer[16] = ' ';
668
669
0
  PutTwoDigits(tm.tm_hour, buffer+17);
670
0
  buffer[19] = ':';
671
672
0
  PutTwoDigits(tm.tm_min, buffer+20);
673
0
  buffer[22] = ':';
674
675
0
  PutTwoDigits(tm.tm_sec, buffer+23);
676
677
  // includes ending NUL
678
0
  memcpy(buffer+25, " GMT", 5);
679
680
0
  return buffer;
681
0
}
682
683
// ----------------------------------------------------------------------
684
// strdup_with_new()
685
// strndup_with_new()
686
//
687
//    strdup_with_new() is the same as strdup() except that the memory
688
//    is allocated by new[] and hence an exception will be generated
689
//    if out of memory.
690
//
691
//    strndup_with_new() is the same as strdup_with_new() except that it will
692
//    copy up to the specified number of characters.  This function
693
//    is useful when we want to copy a substring out of a string
694
//    and didn't want to (or cannot) modify the string
695
// ----------------------------------------------------------------------
696
0
char* strdup_with_new(const char* the_string) {
697
0
  if (the_string == nullptr)
698
0
    return nullptr;
699
0
  else
700
0
    return strndup_with_new(the_string, strlen(the_string));
701
0
}
702
703
0
char* strndup_with_new(const char* the_string, size_t max_length) {
704
0
  if (the_string == nullptr)
705
0
    return nullptr;
706
707
0
  auto result = new char[max_length + 1];
708
0
  result[max_length] = '\0';  // terminate the string because strncpy might not
709
0
  return strncpy(result, the_string, max_length);
710
0
}
711
712
713
714
715
// ----------------------------------------------------------------------
716
// ScanForFirstWord()
717
//    This function finds the first word in the string "the_string" given.
718
//    A word is defined by consecutive !ascii_isspace() characters.
719
//    If no valid words are found,
720
//        return NULL and *end_ptr will contain junk
721
//    else
722
//        return the beginning of the first word and
723
//        *end_ptr will store the address of the first invalid character
724
//        (ascii_isspace() or '\0').
725
//
726
//    Precondition: (end_ptr != NULL)
727
// ----------------------------------------------------------------------
728
0
const char* ScanForFirstWord(const char* the_string, const char** end_ptr) {
729
0
  CHECK(end_ptr != nullptr) << ": precondition violated";
730
731
0
  if (the_string == nullptr)  // empty string
732
0
    return nullptr;
733
734
0
  const char* curr = the_string;
735
0
  while ((*curr != '\0') && ascii_isspace(*curr))  // skip initial spaces
736
0
    ++curr;
737
738
0
  if (*curr == '\0')  // no valid word found
739
0
    return nullptr;
740
741
  // else has a valid word
742
0
  const char* first_word = curr;
743
744
  // now locate the end of the word
745
0
  while ((*curr != '\0') && !ascii_isspace(*curr))
746
0
    ++curr;
747
748
0
  *end_ptr = curr;
749
0
  return first_word;
750
0
}
751
752
// ----------------------------------------------------------------------
753
// AdvanceIdentifier()
754
//    This function returns a pointer past the end of the longest C-style
755
//    identifier that is a prefix of str or NULL if str does not start with
756
//    one.  A C-style identifier begins with an ASCII letter or underscore
757
//    and continues with ASCII letters, digits, or underscores.
758
// ----------------------------------------------------------------------
759
0
const char *AdvanceIdentifier(const char *str) {
760
  // Not using isalpha and isalnum so as not to rely on the locale.
761
  // We could have used ascii_isalpha and ascii_isalnum.
762
0
  char ch = *str++;
763
0
  if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'))
764
0
    return nullptr;
765
0
  while (true) {
766
0
    ch = *str;
767
0
    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
768
0
          (ch >= '0' && ch <= '9') || ch == '_'))
769
0
      return str;
770
0
    str++;
771
0
  }
772
0
}
773
774
775
// ----------------------------------------------------------------------
776
// IsIdentifier()
777
//    This function returns true if str is a C-style identifier.
778
//    A C-style identifier begins with an ASCII letter or underscore
779
//    and continues with ASCII letters, digits, or underscores.
780
// ----------------------------------------------------------------------
781
0
bool IsIdentifier(const char *str) {
782
0
  const char *end = AdvanceIdentifier(str);
783
0
  return end && *end == '\0';
784
0
}
785
786
134k
static bool IsWildcard(Rune character) {
787
134k
  return character == '*' || character == '?';
788
134k
}
789
790
// Move the strings pointers to the point where they start to differ.
791
template <typename CHAR, typename NEXT>
792
static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
793
                         const CHAR** string, const CHAR* string_end,
794
135k
                         NEXT next) {
795
135k
  const CHAR* escape = nullptr;
796
135k
  while (*pattern != pattern_end && *string != string_end) {
797
134k
    if (!escape && IsWildcard(**pattern)) {
798
      // We don't want to match wildcard here, except if it's escaped.
799
131k
      return;
800
131k
    }
801
802
    // Check if the escapement char is found. If so, skip it and move to the
803
    // next character.
804
3.02k
    if (!escape && **pattern == '\\') {
805
1
      escape = *pattern;
806
1
      next(pattern, pattern_end);
807
1
      continue;
808
1
    }
809
810
    // Check if the chars match, if so, increment the ptrs.
811
3.02k
    const CHAR* pattern_next = *pattern;
812
3.02k
    const CHAR* string_next = *string;
813
3.02k
    Rune pattern_char = next(&pattern_next, pattern_end);
814
3.02k
    if (pattern_char == next(&string_next, string_end) &&
815
196
        pattern_char != Runeerror &&
816
196
        pattern_char <= Runemax) {
817
195
      *pattern = pattern_next;
818
195
      *string = string_next;
819
2.82k
    } else {
820
      // Uh ho, it did not match, we are done. If the last char was an
821
      // escapement, that means that it was an error to advance the ptr here,
822
      // let's put it back where it was. This also mean that the MatchPattern
823
      // function will return false because if we can't match an escape char
824
      // here, then no one will.
825
2.82k
      if (escape) {
826
0
        *pattern = escape;
827
0
      }
828
2.82k
      return;
829
2.82k
    }
830
831
195
    escape = nullptr;
832
195
  }
833
135k
}
834
835
template <typename CHAR, typename NEXT>
836
419
static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
837
648
  while (*pattern != end) {
838
503
    if (!IsWildcard(**pattern))
839
274
      return;
840
229
    next(pattern, end);
841
229
  }
842
419
}
843
844
template <typename CHAR, typename NEXT>
845
static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
846
                          const CHAR* pattern, const CHAR* pattern_end,
847
                          int depth,
848
266k
                          NEXT next) {
849
266k
  const int kMaxDepth = 16;
850
266k
  if (depth > kMaxDepth)
851
131k
    return false;
852
853
  // Eat all the matching chars.
854
135k
  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
855
856
  // If the string is empty, then the pattern must be empty too, or contains
857
  // only wildcards.
858
135k
  if (eval == eval_end) {
859
26
    EatWildcard(&pattern, pattern_end, next);
860
26
    return pattern == pattern_end;
861
26
  }
862
863
  // Pattern is empty but not string, this is not a match.
864
135k
  if (pattern == pattern_end)
865
991
    return false;
866
867
  // If this is a question mark, then we need to compare the rest with
868
  // the current string or the string with one character eaten.
869
134k
  const CHAR* next_pattern = pattern;
870
134k
  next(&next_pattern, pattern_end);
871
134k
  if (pattern[0] == '?') {
872
131k
    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
873
131k
                      depth + 1, next))
874
0
      return true;
875
131k
    const CHAR* next_eval = eval;
876
131k
    next(&next_eval, eval_end);
877
131k
    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
878
131k
                      depth + 1, next))
879
6
      return true;
880
134k
  }
881
882
  // This is a *, try to match all the possible substrings with the remainder
883
  // of the pattern.
884
134k
  if (pattern[0] == '*') {
885
    // Collapse duplicate wild cards (********** into *) so that the
886
    // method does not recurse unnecessarily. http://crbug.com/52839
887
198
    EatWildcard(&next_pattern, pattern_end, next);
888
889
3.67k
    while (eval != eval_end) {
890
3.48k
      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
891
3.48k
                        depth + 1, next))
892
3
        return true;
893
3.48k
      eval++;
894
3.48k
    }
895
896
    // We reached the end of the string, let see if the pattern contains only
897
    // wildcards.
898
195
    if (eval == eval_end) {
899
195
      EatWildcard(&pattern, pattern_end, next);
900
195
      if (pattern != pattern_end)
901
135
        return false;
902
60
      return true;
903
60
    }
904
195
  }
905
906
133k
  return false;
907
133k
}
908
909
struct NextCharUTF8 {
910
271k
  Rune operator()(const char** p, const char* end) {
911
271k
    Rune c;
912
271k
    int offset = charntorune(&c, *p, static_cast<int>(end - *p));
913
271k
    *p += offset;
914
271k
    return c;
915
271k
  }
916
};
917
918
bool MatchPattern(const GStringPiece& eval,
919
554
                  const GStringPiece& pattern) {
920
554
  return MatchPatternT(eval.data(), eval.data() + eval.size(),
921
554
                       pattern.data(), pattern.data() + pattern.size(),
922
554
                       0, NextCharUTF8());
923
554
}
924
925
926
927
// ----------------------------------------------------------------------
928
// FindTagValuePair
929
//    Given a string of the form
930
//    <something><attr_sep><tag><tag_value_sep><value><attr_sep>...<string_term>
931
//    where the part before the first attr_sep is optional,
932
//    this function extracts the first tag and value, if any.
933
//    The function returns true if successful, in which case "tag" and "value"
934
//    are set to point to the beginning of the tag and the value, respectively,
935
//    and "tag_len" and "value_len" are set to the respective lengths.
936
// ----------------------------------------------------------------------
937
938
bool FindTagValuePair(const char* arg_str, char tag_value_separator,
939
                      char attribute_separator, char string_terminal,
940
                      char **tag, size_t *tag_len,
941
0
                      char **value, size_t *value_len) {
942
0
  char* in_str = const_cast<char*>(arg_str);  // For msvc8.
943
0
  if (in_str == nullptr)
944
0
    return false;
945
0
  char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'};
946
0
  char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'};
947
948
  // Look for beginning of tag
949
0
  *tag = strpbrk(in_str, attr_sep_or_term);
950
  // If string_terminal is '\0', strpbrk won't find it but return null.
951
0
  if (*tag == nullptr || **tag == string_terminal)
952
0
    *tag = in_str;
953
0
  else
954
0
    (*tag)++;   // Move past separator
955
  // Now look for value...
956
0
  char *tv_sep_pos = strpbrk(*tag, tv_sep_or_term);
957
0
  if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal)
958
0
    return false;
959
  // ...and end of value
960
0
  char *attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term);
961
962
0
  *tag_len = tv_sep_pos - *tag;
963
0
  *value = tv_sep_pos + 1;
964
0
  if (attr_sep_pos != nullptr)
965
0
    *value_len = attr_sep_pos - *value;
966
0
  else
967
0
    *value_len = strlen(*value);
968
0
  return true;
969
0
}
970
971
0
void UniformInsertString(string* s, int interval, const char* separator) {
972
0
  const size_t separator_len = strlen(separator);
973
974
0
  if (interval < 1 ||      // invalid interval
975
0
      s->empty() ||        // nothing to do
976
0
      separator_len == 0)  // invalid separator
977
0
    return;
978
979
0
  auto num_inserts = (s->size() - 1) / interval;  // -1 to avoid appending at end
980
0
  if (num_inserts == 0)  // nothing to do
981
0
    return;
982
983
0
  string tmp;
984
0
  tmp.reserve(s->size() + num_inserts * separator_len + 1);
985
986
0
  for (size_t i = 0; i < num_inserts ; ++i) {
987
    // append this interval
988
0
    tmp.append(*s, i * interval, interval);
989
    // append a separator
990
0
    tmp.append(separator, separator_len);
991
0
  }
992
993
  // append the tail
994
0
  const size_t tail_pos = num_inserts * interval;
995
0
  tmp.append(*s, tail_pos, s->size() - tail_pos);
996
997
0
  s->swap(tmp);
998
0
}
999
1000
void InsertString(string *const s,
1001
                  const vector<uint32> &indices,
1002
0
                  char const *const separator) {
1003
0
  const auto num_indices = indices.size();
1004
0
  if (num_indices == 0) {
1005
0
    return;  // nothing to do...
1006
0
  }
1007
1008
0
  const auto separator_len = strlen(separator);
1009
0
  if (separator_len == 0) {
1010
0
    return;  // still nothing to do...
1011
0
  }
1012
1013
0
  string tmp;
1014
0
  const auto s_len = s->size();
1015
0
  tmp.reserve(s_len + separator_len * num_indices);
1016
1017
0
  vector<uint32>::const_iterator const ind_end(indices.end());
1018
0
  auto ind_pos(indices.begin());
1019
1020
0
  uint32 last_pos(0);
1021
0
  while (ind_pos != ind_end) {
1022
0
    const uint32 pos(*ind_pos);
1023
0
    DCHECK_GE(pos, last_pos);
1024
0
    DCHECK_LE(pos, s_len);
1025
1026
0
    tmp.append(s->substr(last_pos, pos - last_pos));
1027
0
    tmp.append(separator);
1028
1029
0
    last_pos = pos;
1030
0
    ++ind_pos;
1031
0
  }
1032
0
  tmp.append(s->substr(last_pos));
1033
1034
0
  s->swap(tmp);
1035
0
}
1036
1037
//------------------------------------------------------------------------
1038
// FindNth()
1039
//  return index of nth occurrence of c in the string,
1040
//  or string::npos if n > number of occurrences of c.
1041
//  (returns string::npos = -1 if n <= 0)
1042
//------------------------------------------------------------------------
1043
0
size_t FindNth(GStringPiece s, char c, size_t n) {
1044
0
  size_t pos = string::npos;
1045
1046
0
  for (size_t i = 0; i < n; ++i) {
1047
0
    pos = s.find_first_of(c, pos + 1);
1048
0
    if ( pos == GStringPiece::npos ) {
1049
0
      break;
1050
0
    }
1051
0
  }
1052
0
  return pos;
1053
0
}
1054
1055
//------------------------------------------------------------------------
1056
// ReverseFindNth()
1057
//  return index of nth-to-last occurrence of c in the string,
1058
//  or string::npos if n > number of occurrences of c.
1059
//  (returns string::npos if n <= 0)
1060
//------------------------------------------------------------------------
1061
0
size_t ReverseFindNth(GStringPiece s, char c, size_t n) {
1062
0
  if ( n <= 0 ) {
1063
0
    return static_cast<int>(GStringPiece::npos);
1064
0
  }
1065
1066
0
  size_t pos = s.size();
1067
1068
0
  for (size_t i = 0; i < n; ++i) {
1069
    // If pos == 0, we return GStringPiece::npos right away. Otherwise,
1070
    // the following find_last_of call would take (pos - 1) as string::npos,
1071
    // which means it would again search the entire input string.
1072
0
    if (pos == 0) {
1073
0
      return static_cast<int>(GStringPiece::npos);
1074
0
    }
1075
0
    pos = s.find_last_of(c, pos - 1);
1076
0
    if (pos == string::npos) {
1077
0
      break;
1078
0
    }
1079
0
  }
1080
0
  return pos;
1081
0
}
1082
1083
namespace strings {
1084
1085
// FindEol()
1086
// Returns the location of the next end-of-line sequence.
1087
1088
0
GStringPiece FindEol(GStringPiece s) {
1089
0
  for (size_t i = 0; i < s.length(); ++i) {
1090
0
    if (s[i] == '\n') {
1091
0
      return GStringPiece(s.data() + i, 1);
1092
0
    }
1093
0
    if (s[i] == '\r') {
1094
0
      if (i+1 < s.length() && s[i+1] == '\n') {
1095
0
        return GStringPiece(s.data() + i, 2);
1096
0
      } else {
1097
0
        return GStringPiece(s.data() + i, 1);
1098
0
      }
1099
0
    }
1100
0
  }
1101
0
  return GStringPiece(s.data() + s.length(), 0);
1102
0
}
1103
1104
}  // namespace strings
1105
1106
//------------------------------------------------------------------------
1107
// OnlyWhitespace()
1108
//  return true if string s contains only whitespace characters
1109
//------------------------------------------------------------------------
1110
0
bool OnlyWhitespace(const GStringPiece& s) {
1111
0
  for (const auto& c : s) {
1112
0
    if ( !ascii_isspace(c) ) return false;
1113
0
  }
1114
0
  return true;
1115
0
}
1116
1117
0
string ImmediateSuccessor(const GStringPiece& s) {
1118
  // Return the input string, with an additional NUL byte appended.
1119
0
  string out;
1120
0
  out.reserve(s.size() + 1);
1121
0
  out.append(s.data(), s.size());
1122
0
  out.push_back('\0');
1123
0
  return out;
1124
0
}
1125
1126
0
int SafeSnprintf(char *str, size_t size, const char *format, ...) {
1127
0
  va_list printargs;
1128
0
  va_start(printargs, format);
1129
0
  int ncw = vsnprintf(str, size, format, printargs);
1130
0
  va_end(printargs);
1131
0
  return (ncw < narrow_cast<int>(size) && ncw >= 0) ? ncw : 0;
1132
0
}
1133
1134
0
bool GetlineFromStdioFile(FILE* file, string* str, char delim) {
1135
0
  str->erase();
1136
0
  while (true) {
1137
0
    if (feof(file) || ferror(file)) {
1138
0
      return false;
1139
0
    }
1140
0
    int c = getc(file);
1141
0
    if (c == EOF) return false;
1142
0
    if (c == delim) return true;
1143
0
    str->push_back(c);
1144
0
  }
1145
0
}
1146
1147
namespace {
1148
1149
template <typename CHAR>
1150
13
size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1151
346
  for (size_t i = 0; i < dst_size; ++i) {
1152
346
    if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
1153
13
      return i;
1154
346
  }
1155
1156
  // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1157
0
  if (dst_size != 0)
1158
0
    dst[dst_size - 1] = 0;
1159
1160
  // Count the rest of the |src|, and return it's length in characters.
1161
0
  while (src[dst_size]) ++dst_size;
1162
0
  return dst_size;
1163
13
}
1164
1165
}  // namespace
1166
1167
13
size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) {
1168
13
  return lcpyT<char>(dst, src, dst_size);
1169
13
}