YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// The following only applies to changes made to this file as part of YugaByte development.
5
//
6
// Portions Copyright (c) YugaByte, Inc.
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
9
// in compliance with the License.  You may obtain a copy of the License at
10
//
11
// http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing, software distributed under the License
14
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15
// or implied.  See the License for the specific language governing permissions and limitations
16
// under the License.
17
//
18
// This file contains string processing functions related to
19
// numeric values.
20
21
#include "yb/gutil/strings/numbers.h"
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <errno.h>
26
#include <float.h>          // for DBL_DIG and FLT_DIG
27
#include <math.h>           // for HUGE_VAL
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <string.h>
31
32
#include <iomanip>
33
#include <limits>
34
#include <sstream>
35
36
#include <glog/logging.h>
37
38
#include "yb/gutil/casts.h"
39
#include "yb/gutil/int128.h"
40
#include "yb/gutil/integral_types.h"
41
#include "yb/gutil/stringprintf.h"
42
#include "yb/gutil/strings/ascii_ctype.h"
43
#include "yb/gutil/strtoint.h"
44
45
using std::numeric_limits;
46
using std::string;
47
48
49
// Reads a <double> in *text, which may not be whitespace-initiated.
50
// *len is the length, or -1 if text is '\0'-terminated, which is more
51
// efficient.  Sets *text to the end of the double, and val to the
52
// converted value, and the length of the double is subtracted from
53
// *len. <double> may also be a '?', in which case val will be
54
// unchanged. Returns true upon success.  If initial_minus is
55
// non-NULL, then *initial_minus will indicate whether the first
56
// symbol seen was a '-', which will be ignored. Similarly, if
57
// final_period is non-NULL, then *final_period will indicate whether
58
// the last symbol seen was a '.', which will be ignored. This is
59
// useful in case that an initial '-' or final '.' would have another
60
// meaning (as a separator, e.g.).
61
static inline bool EatADouble(const char** text, ssize_t* len, bool allow_question,
62
                              double* val, bool* initial_minus,
63
0
                              bool* final_period) {
64
0
  const char* pos = *text;
65
0
  ssize_t rem = *len;  // remaining length, or -1 if null-terminated
66
67
0
  if (pos == nullptr || rem == 0)
68
0
    return false;
69
70
0
  if (allow_question && (*pos == '?')) {
71
0
    *text = pos + 1;
72
0
    if (rem != -1)
73
0
      *len = rem - 1;
74
0
    return true;
75
0
  }
76
77
0
  if (initial_minus) {
78
0
    if ((*initial_minus = (*pos == '-'))) {  // Yes, we want assignment.
79
0
      if (rem == 1)
80
0
        return false;
81
0
      ++pos;
82
0
      if (rem != -1)
83
0
        --rem;
84
0
    }
85
0
  }
86
87
  // a double has to begin one of these (we don't allow 'inf' or whitespace)
88
  // this also serves as an optimization.
89
0
  if (!strchr("-+.0123456789", *pos))
90
0
    return false;
91
92
  // strtod is evil in that the second param is a non-const char**
93
0
  char* end_nonconst;
94
0
  double retval;
95
0
  if (rem == -1) {
96
0
    retval = strtod(pos, &end_nonconst);
97
0
  } else {
98
    // not '\0'-terminated & no obvious terminator found. must copy.
99
0
    std::unique_ptr<char[]> buf(new char[rem + 1]);
100
0
    memcpy(buf.get(), pos, rem);
101
0
    buf[rem] = '\0';
102
0
    retval = strtod(buf.get(), &end_nonconst);
103
0
    end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
104
0
  }
105
106
0
  if (pos == end_nonconst)
107
0
    return false;
108
109
0
  if (final_period) {
110
0
    *final_period = (end_nonconst[-1] == '.');
111
0
    if (*final_period) {
112
0
      --end_nonconst;
113
0
    }
114
0
  }
115
116
0
  *text = end_nonconst;
117
0
  *val = retval;
118
0
  if (rem != -1)
119
0
    *len = rem - (end_nonconst - pos);
120
0
  return true;
121
0
}
122
123
// If update, consume one of acceptable_chars from string *text of
124
// length len and return that char, or '\0' otherwise. If len is -1,
125
// *text is null-terminated. If update is false, don't alter *text and
126
// *len. If null_ok, then update must be false, and, if text has no
127
// more chars, then return '\1' (arbitrary nonzero).
128
static inline char EatAChar(const char** text, ssize_t* len,
129
                            const char* acceptable_chars,
130
0
                            bool update, bool null_ok) {
131
0
  assert(!(update && null_ok));
132
0
  if ((*len == 0) || (**text == '\0'))
133
0
    return (null_ok ? '\1' : '\0');  // if null_ok, we're in predicate mode.
134
135
0
  if (strchr(acceptable_chars, **text)) {
136
0
    char result = **text;
137
0
    if (update) {
138
0
      ++(*text);
139
0
      if (*len != -1)
140
0
        --(*len);
141
0
    }
142
0
    return result;
143
0
  }
144
145
0
  return '\0';  // no match; no update
146
0
}
147
148
// Parse an expression in 'text' of the form: <comparator><double> or
149
// <double><sep><double> See full comments in header file.
150
bool ParseDoubleRange(const char* text, ssize_t len, const char** end,
151
                      double* from, double* to, bool* is_currency,
152
0
                      const DoubleRangeOptions& opts) {
153
0
  const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
154
155
0
  if (!opts.dont_modify_unbounded) {
156
0
    *from = -HUGE_VAL;
157
0
    *to = HUGE_VAL;
158
0
  }
159
0
  if (opts.allow_currency && (is_currency != nullptr))
160
0
    *is_currency = false;
161
162
0
  assert(len >= -1);
163
0
  assert(opts.separators && (*opts.separators != '\0'));
164
  // these aren't valid separators
165
0
  assert(strlen(opts.separators) ==
166
0
         strcspn(opts.separators, "+0123456789eE$"));
167
0
  assert(opts.num_required_bounds <= 2);
168
169
  // Handle easier cases of comparators (<, >) first
170
0
  if (opts.allow_comparators) {
171
0
    char comparator = EatAChar(&text, &len, "<>", true, false);
172
0
    if (comparator) {
173
0
      double* dest = (comparator == '>') ? from : to;
174
0
      EatAChar(&text, &len, "=", true, false);
175
0
      if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
176
0
        if (is_currency != nullptr)
177
0
          *is_currency = true;
178
0
      if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr,
179
0
                      nullptr))
180
0
        return false;
181
0
      *end = text;
182
0
      return EatAChar(&text, &len, opts.acceptable_terminators, false,
183
0
                      opts.null_terminator_ok);
184
0
    }
185
0
  }
186
187
0
  bool seen_dollar = (opts.allow_currency &&
188
0
                      EatAChar(&text, &len, "$", true, false));
189
190
  // If we see a '-', two things could be happening: -<to> or
191
  // <from>... where <from> is negative. Treat initial minus sign as a
192
  // separator if '-' is a valid separator.
193
  // Similarly, we prepare for the possibility of seeing a '.' at the
194
  // end of the number, in case '.' (which really means '..') is a
195
  // separator.
196
0
  bool initial_minus_sign = false;
197
0
  bool final_period = false;
198
0
  bool* check_initial_minus = (strchr(opts.separators, '-') && !seen_dollar
199
0
                               && (opts.num_required_bounds < 2)) ?
200
0
                              (&initial_minus_sign) : nullptr;
201
0
  bool* check_final_period = strchr(opts.separators, '.') ? (&final_period)
202
0
                             : nullptr;
203
0
  bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers,
204
0
                                from, check_initial_minus, check_final_period);
205
206
  // if 2 bounds required, must see a double (or '?' if allowed)
207
0
  if ((opts.num_required_bounds == 2) && !double_seen) return false;
208
209
0
  if (seen_dollar && !double_seen) {
210
0
      --text;
211
0
      if (len != -1)
212
0
        ++len;
213
0
      seen_dollar = false;
214
0
  }
215
  // If we're here, we've read the first double and now expect a
216
  // separator and another <double>.
217
0
  char separator = EatAChar(&text, &len, opts.separators, true, false);
218
0
  if (separator == '.') {
219
    // seen one '.' as separator; must check for another; perhaps set seplen=2
220
0
    if (EatAChar(&text, &len, ".", true, false)) {
221
0
      if (final_period) {
222
        // We may have three periods in a row. The first is part of the
223
        // first number, the others are a separator. Policy: 234...567
224
        // is "234." to "567", not "234" to ".567".
225
0
        EatAChar(&text, &len, ".", true, false);
226
0
      }
227
0
    } else if (!EatAChar(&text, &len, opts.separators, true, false)) {
228
      // just one '.' and no other separator; uneat the first '.' we saw
229
0
      --text;
230
0
      if (len != -1)
231
0
        ++len;
232
0
      separator = '\0';
233
0
    }
234
0
  }
235
  // By now, we've consumed whatever separator there may have been,
236
  // and separator is true iff there was one.
237
0
  if (!separator) {
238
0
    if (final_period)  // final period now considered part of first double
239
0
      EatAChar(&text, &len, ".", true, false);
240
0
    if (initial_minus_sign && double_seen) {
241
0
      *to = *from;
242
0
      *from = from_default;
243
0
    } else if (opts.require_separator ||
244
0
               (opts.num_required_bounds > 0 && !double_seen) ||
245
0
               (opts.num_required_bounds > 1) ) {
246
0
      return false;
247
0
    }
248
0
  } else {
249
0
    if (initial_minus_sign && double_seen)
250
0
      *from = -(*from);
251
    // read second <double>
252
0
    bool second_dollar_seen = (seen_dollar
253
0
                               || (opts.allow_currency && !double_seen))
254
0
                              && EatAChar(&text, &len, "$", true, false);
255
0
    bool second_double_seen = EatADouble(
256
0
      &text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
257
0
    if (opts.num_required_bounds > static_cast<uint32_t>(double_seen + second_double_seen))
258
0
      return false;
259
0
    if (second_dollar_seen && !second_double_seen) {
260
0
      --text;
261
0
      if (len != -1)
262
0
        ++len;
263
0
      second_dollar_seen = false;
264
0
    }
265
0
    seen_dollar = seen_dollar || second_dollar_seen;
266
0
  }
267
268
0
  if (seen_dollar && (is_currency != nullptr))
269
0
    *is_currency = true;
270
  // We're done. But we have to check that the next char is a proper
271
  // terminator.
272
0
  *end = text;
273
0
  char terminator = EatAChar(&text, &len, opts.acceptable_terminators, false,
274
0
                             opts.null_terminator_ok);
275
0
  if (terminator == '.')
276
0
    --(*end);
277
0
  return terminator;
278
0
}
279
280
// ----------------------------------------------------------------------
281
// ConsumeStrayLeadingZeroes
282
//    Eliminates all leading zeroes (unless the string itself is composed
283
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
284
// --------------------------------------------------------------------
285
286
0
void ConsumeStrayLeadingZeroes(string *const str) {
287
0
  const string::size_type len(str->size());
288
0
  if (len > 1 && (*str)[0] == '0') {
289
0
    const char
290
0
      *const begin(str->c_str()),
291
0
      *const end(begin + len),
292
0
      *ptr(begin + 1);
293
0
    while (ptr != end && *ptr == '0') {
294
0
      ++ptr;
295
0
    }
296
0
    string::size_type remove(ptr - begin);
297
0
    DCHECK_GT(ptr, begin);
298
0
    if (remove == len) {
299
0
      --remove;  // if they are all zero, leave one...
300
0
    }
301
0
    str->erase(0, remove);
302
0
  }
303
0
}
304
305
// ----------------------------------------------------------------------
306
// ParseLeadingInt32Value()
307
// ParseLeadingUInt32Value()
308
//    A simple parser for [u]int32 values. Returns the parsed value
309
//    if a valid value is found; else returns deflt
310
//    This cannot handle decimal numbers with leading 0s.
311
// --------------------------------------------------------------------
312
313
1
int32 ParseLeadingInt32Value(const char *str, int32 deflt) {
314
1
  char *error = nullptr;
315
1
  auto value = strtol(str, &error, 0);
316
  // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
317
1
  if (value > numeric_limits<int32>::max()) {
318
0
    value = numeric_limits<int32>::max();
319
1
  } else if (value < numeric_limits<int32>::min()) {
320
0
    value = numeric_limits<int32>::min();
321
0
  }
322
1
  return (error == str) ? deflt : narrow_cast<int32>(value);
323
1
}
324
325
0
uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) {
326
0
  if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) {
327
    // When long is 32 bits, we can use strtoul.
328
0
    char *error = nullptr;
329
0
    const uint32 value = strtoul(str, &error, 0);
330
0
    return (error == str) ? deflt : value;
331
0
  } else {
332
    // When long is 64 bits, we must use strto64 and handle limits
333
    // by hand.  The reason we cannot use a 64-bit strtoul is that
334
    // it would be impossible to differentiate "-2" (that should wrap
335
    // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
336
    // (that should be pegged to UINT_MAX due to overflow).
337
0
    char *error = nullptr;
338
0
    int64 value = strto64(str, &error, 0);
339
0
    if (value > numeric_limits<uint32>::max() ||
340
0
        value < -static_cast<int64>(numeric_limits<uint32>::max())) {
341
0
      value = numeric_limits<uint32>::max();
342
0
    }
343
    // Within these limits, truncation to 32 bits handles negatives correctly.
344
0
    return (error == str) ? deflt : narrow_cast<uint32>(value);
345
0
  }
346
0
}
347
348
// ----------------------------------------------------------------------
349
// ParseLeadingDec32Value
350
// ParseLeadingUDec32Value
351
//    A simple parser for [u]int32 values. Returns the parsed value
352
//    if a valid value is found; else returns deflt
353
//    The string passed in is treated as *10 based*.
354
//    This can handle strings with leading 0s.
355
// --------------------------------------------------------------------
356
357
0
int32 ParseLeadingDec32Value(const char *str, int32 deflt) {
358
0
  char *error = nullptr;
359
0
  auto value = strtol(str, &error, 10);
360
  // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
361
0
  if (value > numeric_limits<int32>::max()) {
362
0
    value = numeric_limits<int32>::max();
363
0
  } else if (value < numeric_limits<int32>::min()) {
364
0
    value = numeric_limits<int32>::min();
365
0
  }
366
0
  return (error == str) ? deflt : narrow_cast<int32>(value);
367
0
}
368
369
0
uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) {
370
0
  if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) {
371
    // When long is 32 bits, we can use strtoul.
372
0
    char *error = nullptr;
373
0
    const uint32 value = strtoul(str, &error, 10);
374
0
    return (error == str) ? deflt : value;
375
0
  } else {
376
    // When long is 64 bits, we must use strto64 and handle limits
377
    // by hand.  The reason we cannot use a 64-bit strtoul is that
378
    // it would be impossible to differentiate "-2" (that should wrap
379
    // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
380
    // (that should be pegged to UINT_MAX due to overflow).
381
0
    char *error = nullptr;
382
0
    int64 value = strto64(str, &error, 10);
383
0
    if (value > numeric_limits<uint32>::max() ||
384
0
        value < -static_cast<int64>(numeric_limits<uint32>::max())) {
385
0
      value = numeric_limits<uint32>::max();
386
0
    }
387
    // Within these limits, truncation to 32 bits handles negatives correctly.
388
0
    return (error == str) ? deflt : narrow_cast<uint32>(value);
389
0
  }
390
0
}
391
392
// ----------------------------------------------------------------------
393
// ParseLeadingUInt64Value
394
// ParseLeadingInt64Value
395
// ParseLeadingHex64Value
396
//    A simple parser for 64-bit values. Returns the parsed value if a
397
//    valid integer is found; else returns deflt
398
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
399
// --------------------------------------------------------------------
400
0
uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
401
0
  char *error = nullptr;
402
0
  const uint64 value = strtou64(str, &error, 0);
403
0
  return (error == str) ? deflt : value;
404
0
}
405
406
0
int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
407
0
  char *error = nullptr;
408
0
  const int64 value = strto64(str, &error, 0);
409
0
  return (error == str) ? deflt : value;
410
0
}
411
412
0
uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
413
0
  char *error = nullptr;
414
0
  const uint64 value = strtou64(str, &error, 16);
415
0
  return (error == str) ? deflt : value;
416
0
}
417
418
// ----------------------------------------------------------------------
419
// ParseLeadingDec64Value
420
// ParseLeadingUDec64Value
421
//    A simple parser for [u]int64 values. Returns the parsed value
422
//    if a valid value is found; else returns deflt
423
//    The string passed in is treated as *10 based*.
424
//    This can handle strings with leading 0s.
425
// --------------------------------------------------------------------
426
427
0
int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
428
0
  char *error = nullptr;
429
0
  const int64 value = strto64(str, &error, 10);
430
0
  return (error == str) ? deflt : value;
431
0
}
432
433
0
uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
434
0
  char *error = nullptr;
435
0
  const uint64 value = strtou64(str, &error, 10);
436
0
  return (error == str) ? deflt : value;
437
0
}
438
439
// ----------------------------------------------------------------------
440
// ParseLeadingDoubleValue()
441
//    A simple parser for double values. Returns the parsed value
442
//    if a valid value is found; else returns deflt
443
// --------------------------------------------------------------------
444
445
0
double ParseLeadingDoubleValue(const char *str, double deflt) {
446
0
  char *error = nullptr;
447
0
  errno = 0;
448
0
  const double value = strtod(str, &error);
449
0
  if (errno != 0 ||  // overflow/underflow happened
450
0
      error == str) {  // no valid parse
451
0
    return deflt;
452
0
  } else {
453
0
    return value;
454
0
  }
455
0
}
456
457
// ----------------------------------------------------------------------
458
// ParseLeadingBoolValue()
459
//    A recognizer of boolean string values. Returns the parsed value
460
//    if a valid value is found; else returns deflt.  This skips leading
461
//    whitespace, is case insensitive, and recognizes these forms:
462
//    0/1, false/true, no/yes, n/y
463
// --------------------------------------------------------------------
464
46.1k
bool ParseLeadingBoolValue(const char *str, bool deflt) {
465
46.1k
  static const int kMaxLen = 5;
466
46.1k
  char value[kMaxLen + 1];
467
  // Skip whitespace
468
46.1k
  while (ascii_isspace(*str)) {
469
0
    ++str;
470
0
  }
471
46.1k
  int len = 0;
472
277k
  for (; len <= kMaxLen && ascii_isalnum(*str); ++str)
473
230k
    value[len++] = ascii_tolower(*str);
474
46.1k
  if (len == 0 || len > kMaxLen)
475
0
    return deflt;
476
46.1k
  value[len] = '\0';
477
46.1k
  switch (len) {
478
0
    case 1:
479
0
      if (value[0] == '0' || value[0] == 'n')
480
0
        return false;
481
0
      if (value[0] == '1' || value[0] == 'y')
482
0
        return true;
483
0
      break;
484
0
    case 2:
485
0
      if (!strcmp(value, "no"))
486
0
        return false;
487
0
      break;
488
0
    case 3:
489
0
      if (!strcmp(value, "yes"))
490
0
        return true;
491
0
      break;
492
0
    case 4:
493
0
      if (!strcmp(value, "true"))
494
0
        return true;
495
0
      break;
496
46.1k
    case 5:
497
46.1k
      if (!strcmp(value, "false"))
498
46.1k
        return false;
499
0
      break;
500
0
  }
501
0
  return deflt;
502
0
}
503
504
505
// ----------------------------------------------------------------------
506
// FpToString()
507
// FloatToString()
508
// IntToString()
509
//    Convert various types to their string representation, possibly padded
510
//    with spaces, using snprintf format specifiers.
511
// ----------------------------------------------------------------------
512
513
0
string FpToString(Fprint fp) {
514
0
  char buf[17];
515
0
  snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
516
0
  return string(buf);
517
0
}
518
519
// Default arguments
520
0
string Uint128ToHexString(uint128 ui128) {
521
0
  char buf[33];
522
0
  snprintf(buf, sizeof(buf), "%016" PRIx64,
523
0
           Uint128High64(ui128));
524
0
  snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64,
525
0
           Uint128Low64(ui128));
526
0
  return string(buf);
527
0
}
528
529
9.36M
string Uint16ToHexString(uint16_t ui16) {
530
9.36M
  char buf[5];
531
9.36M
  snprintf(buf, sizeof(buf), "%04X", ui16);
532
9.36M
  return string(buf);
533
9.36M
}
534
535
namespace {
536
537
// Represents integer values of digits.
538
// Uses 36 to indicate an invalid character since we support
539
// bases up to 36.
540
static const int8 kAsciiToInt[256] = {
541
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
542
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
543
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
544
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
545
  36, 36, 36, 36, 36, 36, 36,
546
  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
547
  26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
548
  36, 36, 36, 36, 36, 36,
549
  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
550
  26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
551
  36, 36, 36, 36, 36,
552
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
553
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
554
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
555
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
556
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
557
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
558
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
559
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36 };
560
561
// Input format based on POSIX.1-2008 strtol
562
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
563
template<typename IntType>
564
bool safe_int_internal(const char* start, const char* end, int base,
565
0
                       IntType* value_p) {
566
  // Consume whitespace.
567
0
  while (start < end && ascii_isspace(start[0])) {
568
0
    ++start;
569
0
  }
570
0
  while (start < end && ascii_isspace(end[-1])) {
571
0
    --end;
572
0
  }
573
0
  if (start >= end) {
574
0
    return false;
575
0
  }
576
577
  // Consume sign.
578
0
  const bool negative = (start[0] == '-');
579
0
  if (negative || start[0] == '+') {
580
0
    ++start;
581
0
    if (start >= end) {
582
0
      return false;
583
0
    }
584
0
  }
585
586
  // Consume base-dependent prefix.
587
  //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
588
  //  base 16: "0x" -> base 16
589
  // Also validate the base.
590
0
  if (base == 0) {
591
0
    if (end - start >= 2 && start[0] == '0' &&
592
0
        (start[1] == 'x' || start[1] == 'X')) {
593
0
      base = 16;
594
0
      start += 2;
595
0
    } else if (end - start >= 1 && start[0] == '0') {
596
0
      base = 8;
597
0
      start += 1;
598
0
    } else {
599
0
      base = 10;
600
0
    }
601
0
  } else if (base == 16) {
602
0
    if (end - start >= 2 && start[0] == '0' &&
603
0
        (start[1] == 'x' || start[1] == 'X')) {
604
0
      start += 2;
605
0
    }
606
0
  } else if (base >= 2 && base <= 36) {
607
    // okay
608
0
  } else {
609
0
    return false;
610
0
  }
611
612
  // Consume digits.
613
  //
614
  // The classic loop:
615
  //
616
  //   for each digit
617
  //     value = value * base + digit
618
  //   value *= sign
619
  //
620
  // The classic loop needs overflow checking.  It also fails on the most
621
  // negative integer, -2147483648 in 32-bit two's complement representation.
622
  //
623
  // My improved loop:
624
  //
625
  //  if (!negative)
626
  //    for each digit
627
  //      value = value * base
628
  //      value = value + digit
629
  //  else
630
  //    for each digit
631
  //      value = value * base
632
  //      value = value - digit
633
  //
634
  // Overflow checking becomes simple.
635
  //
636
  // I present the positive code first for easier reading.
637
0
  IntType value = 0;
638
0
  if (!negative) {
639
0
    const IntType vmax = std::numeric_limits<IntType>::max();
640
0
    assert(vmax > 0);
641
0
    assert(vmax >= base);
642
0
    const IntType vmax_over_base = vmax / base;
643
    // loop over digits
644
    // loop body is interleaved for perf, not readability
645
0
    for (; start < end; ++start) {
646
0
      unsigned char c = static_cast<unsigned char>(start[0]);
647
0
      int digit = kAsciiToInt[c];
648
0
      if (value > vmax_over_base) return false;
649
0
      value *= base;
650
0
      if (digit >= base) return false;
651
0
      if (value > vmax - digit) return false;
652
0
      value += digit;
653
0
    }
654
0
  } else {
655
0
    const IntType vmin = std::numeric_limits<IntType>::min();
656
0
    assert(vmin < 0);
657
0
    assert(vmin <= 0 - base);
658
0
    IntType vmin_over_base = vmin / base;
659
    // 2003 c++ standard [expr.mul]
660
    // "... the sign of the remainder is implementation-defined."
661
    // Although (vmin/base)*base + vmin%base is always vmin.
662
    // 2011 c++ standard tightens the spec but we cannot rely on it.
663
0
    if (vmin % base > 0) {
664
0
      vmin_over_base += 1;
665
0
    }
666
    // loop over digits
667
    // loop body is interleaved for perf, not readability
668
0
    for (; start < end; ++start) {
669
0
      unsigned char c = static_cast<unsigned char>(start[0]);
670
0
      int digit = kAsciiToInt[c];
671
0
      if (value < vmin_over_base) return false;
672
0
      value *= base;
673
0
      if (digit >= base) return false;
674
0
      if (value < vmin + digit) return false;
675
0
      value -= digit;
676
0
    }
677
0
  }
678
679
  // Store output.
680
0
  *value_p = value;
681
0
  return true;
682
0
}
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIxEEbPKcS2_iPT_
683
684
}  // anonymous namespace
685
686
bool safe_strto32_base(const char* startptr, const int buffer_size,
687
0
                       int32* v, int base) {
688
0
  return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
689
0
}
690
691
bool safe_strto64_base(const char* startptr, const int buffer_size,
692
0
                       int64* v, int base) {
693
0
  return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
694
0
}
695
696
0
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
697
0
  return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
698
0
}
699
700
0
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
701
0
  return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
702
0
}
703
704
559k
bool safe_strto32_base(const char* str, int32* value, int base) {
705
559k
  char* endptr;
706
559k
  errno = 0;  // errno only gets set on errors
707
559k
  *value = strto32(str, &endptr, base);
708
559k
  if (endptr != str) {
709
559k
    while (ascii_isspace(*endptr)) ++endptr;
710
559k
  }
711
559k
  return *str != '\0' && *endptr == '\0' && errno == 0;
712
559k
}
713
714
9
bool safe_strto64_base(const char* str, int64* value, int base) {
715
9
  char* endptr;
716
9
  errno = 0;  // errno only gets set on errors
717
9
  *value = strto64(str, &endptr, base);
718
9
  if (endptr != str) {
719
9
    while (ascii_isspace(*endptr)) ++endptr;
720
9
  }
721
9
  return *str != '\0' && *endptr == '\0' && errno == 0;
722
9
}
723
724
235k
bool safe_strtou32_base(const char* str, uint32* value, int base) {
725
  // strtoul does not give any errors on negative numbers, so we have to
726
  // search the string for '-' manually.
727
235k
  while (ascii_isspace(*str)) ++str;
728
235k
  if (*str == '-') return false;
729
730
235k
  char* endptr;
731
235k
  errno = 0;  // errno only gets set on errors
732
235k
  *value = strtou32(str, &endptr, base);
733
235k
  if (endptr != str) {
734
235k
    while (ascii_isspace(*endptr)) ++endptr;
735
235k
  }
736
235k
  return *str != '\0' && *endptr == '\0' && errno == 0;
737
235k
}
738
739
2
bool safe_strtou64_base(const char* str, uint64* value, int base) {
740
  // strtou64 does not give any errors on negative numbers, so we have to
741
  // search the string for '-' manually.
742
2
  while (ascii_isspace(*str)) ++str;
743
2
  if (*str == '-') return false;
744
745
2
  char* endptr;
746
2
  errno = 0;  // errno only gets set on errors
747
2
  *value = strtou64(str, &endptr, base);
748
2
  if (endptr != str) {
749
2
    while (ascii_isspace(*endptr)) ++endptr;
750
2
  }
751
2
  return *str != '\0' && *endptr == '\0' && errno == 0;
752
2
}
753
754
// ----------------------------------------------------------------------
755
// u64tostr_base36()
756
//    Converts unsigned number to string representation in base-36.
757
// --------------------------------------------------------------------
758
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
759
0
  CHECK_GT(buf_size, 0);
760
0
  CHECK(buffer);
761
0
  static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
762
763
0
  buffer[buf_size - 1] = '\0';
764
0
  size_t result_size = 1;
765
766
0
  do {
767
0
    if (buf_size == result_size) {  // Ran out of space.
768
0
      return 0;
769
0
    }
770
0
    int remainder = number % 36;
771
0
    number /= 36;
772
0
    buffer[buf_size - result_size - 1] = kAlphabet[remainder];
773
0
    result_size++;
774
0
  } while (number);
775
776
0
  memmove(buffer, buffer + buf_size - result_size, result_size);
777
778
0
  return result_size - 1;
779
0
}
780
781
// Generate functions that wrap safe_strtoXXX_base.
782
#define GEN_SAFE_STRTO(name, type)                           \
783
0
bool name##_base(const string& str, type* value, int base) { \
784
0
  return name##_base(str.c_str(), value, base);              \
785
0
}                                                            \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPxi
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPyi
786
794k
bool name(const char* str, type* value) {                    \
787
794k
  return name##_base(str, value, 10);                        \
788
794k
}                                                            \
_Z12safe_strto32PKcPi
Line
Count
Source
786
559k
bool name(const char* str, type* value) {                    \
787
559k
  return name##_base(str, value, 10);                        \
788
559k
}                                                            \
_Z13safe_strtou32PKcPj
Line
Count
Source
786
235k
bool name(const char* str, type* value) {                    \
787
235k
  return name##_base(str, value, 10);                        \
788
235k
}                                                            \
Unexecuted instantiation: _Z12safe_strto64PKcPx
Unexecuted instantiation: _Z13safe_strtou64PKcPy
789
9
bool name(const string& str, type* value) {                  \
790
9
  return name##_base(str.c_str(), value, 10);                \
791
9
}
Unexecuted instantiation: _Z12safe_strto32RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPj
_Z12safe_strto64RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPx
Line
Count
Source
789
9
bool name(const string& str, type* value) {                  \
790
9
  return name##_base(str.c_str(), value, 10);                \
791
9
}
Unexecuted instantiation: _Z13safe_strtou64RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPy
792
GEN_SAFE_STRTO(safe_strto32, int32);
793
GEN_SAFE_STRTO(safe_strtou32, uint32);
794
GEN_SAFE_STRTO(safe_strto64, int64);
795
GEN_SAFE_STRTO(safe_strtou64, uint64);
796
#undef GEN_SAFE_STRTO
797
798
5
bool safe_strtof(const char* str, float* value) {
799
5
  char* endptr;
800
#ifdef _MSC_VER  // has no strtof()
801
  *value = strtod(str, &endptr);
802
#else
803
5
  *value = strtof(str, &endptr);
804
5
#endif
805
5
  if (endptr != str) {
806
5
    while (ascii_isspace(*endptr)) ++endptr;
807
5
  }
808
  // Ignore range errors from strtod/strtof.
809
  // The values it returns on underflow and
810
  // overflow are the right fallback in a
811
  // robust setting.
812
5
  return *str != '\0' && *endptr == '\0';
813
5
}
814
815
0
bool safe_strtod(const char* str, double* value) {
816
0
  char* endptr;
817
0
  *value = strtod(str, &endptr);
818
0
  if (endptr != str) {
819
0
    while (ascii_isspace(*endptr)) ++endptr;
820
0
  }
821
  // Ignore range errors from strtod.  The values it
822
  // returns on underflow and overflow are the right
823
  // fallback in a robust setting.
824
0
  return *str != '\0' && *endptr == '\0';
825
0
}
826
827
0
bool safe_strtof(const string& str, float* value) {
828
0
  return safe_strtof(str.c_str(), value);
829
0
}
830
831
0
bool safe_strtod(const string& str, double* value) {
832
0
  return safe_strtod(str.c_str(), value);
833
0
}
834
835
0
uint64 atoi_kmgt(const char* s) {
836
0
  char* endptr;
837
0
  uint64 n = strtou64(s, &endptr, 10);
838
0
  uint64 scale = 1;
839
0
  char c = *endptr;
840
0
  if (c != '\0') {
841
0
    c = ascii_toupper(c);
842
0
    switch (c) {
843
0
      case 'K':
844
0
        scale = GG_ULONGLONG(1) << 10;
845
0
        break;
846
0
      case 'M':
847
0
        scale = GG_ULONGLONG(1) << 20;
848
0
        break;
849
0
      case 'G':
850
0
        scale = GG_ULONGLONG(1) << 30;
851
0
        break;
852
0
      case 'T':
853
0
        scale = GG_ULONGLONG(1) << 40;
854
0
        break;
855
0
      default:
856
0
        LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
857
0
                   << " should be one of `K', `M', `G', and `T'.";
858
0
    }
859
0
  }
860
0
  return n * scale;
861
0
}
862
863
// ----------------------------------------------------------------------
864
// FastIntToBuffer()
865
// FastInt64ToBuffer()
866
// FastHexToBuffer()
867
// FastHex64ToBuffer()
868
// FastHex32ToBuffer()
869
// FastTimeToBuffer()
870
//    These are intended for speed.  FastHexToBuffer() assumes the
871
//    integer is non-negative.  FastHexToBuffer() puts output in
872
//    hex rather than decimal.  FastTimeToBuffer() puts the output
873
//    into RFC822 format.  If time is 0, uses the current time.
874
//
875
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
876
//    padded to exactly 16 bytes (plus one byte for '\0')
877
//
878
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
879
//    padded to exactly 8 bytes (plus one byte for '\0')
880
//
881
//       All functions take the output buffer as an arg.  FastInt()
882
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
883
//    They all return a pointer to the beginning of the output,
884
//    which may not be the beginning of the input buffer.  (Though
885
//    for FastTimeToBuffer(), we guarantee that it is.)
886
// ----------------------------------------------------------------------
887
888
0
char *FastInt64ToBuffer(int64 i, char* buffer) {
889
0
  FastInt64ToBufferLeft(i, buffer);
890
0
  return buffer;
891
0
}
892
893
0
char *FastInt32ToBuffer(int32 i, char* buffer) {
894
0
  FastInt32ToBufferLeft(i, buffer);
895
0
  return buffer;
896
0
}
897
898
0
char *FastHexToBuffer(int i, char* buffer) {
899
0
  CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
900
901
0
  static const char *hexdigits = "0123456789abcdef";
902
0
  char *p = buffer + 21;
903
0
  *p-- = '\0';
904
0
  do {
905
0
    *p-- = hexdigits[i & 15];   // mod by 16
906
0
    i >>= 4;                    // divide by 16
907
0
  } while (i > 0);
908
0
  return p + 1;
909
0
}
910
911
5.81M
char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
912
5.81M
  static const char *hexdigits = "0123456789abcdef";
913
5.81M
  buffer[num_byte] = '\0';
914
98.7M
  for (int i = num_byte - 1; i >= 0; i--) {
915
92.9M
    buffer[i] = hexdigits[value & 0xf];
916
92.9M
    value >>= 4;
917
92.9M
  }
918
5.81M
  return buffer;
919
5.81M
}
920
921
5.81M
char *FastHex64ToBuffer(uint64 value, char* buffer) {
922
5.81M
  return InternalFastHexToBuffer(value, buffer, 16);
923
5.81M
}
924
925
8
std::string FastHex64ToString(uint64 value) {
926
8
  std::string result;
927
8
  result.resize(16);
928
8
  InternalFastHexToBuffer(value, &result[0], 16);
929
8
  return result;
930
8
}
931
932
1
char *FastHex32ToBuffer(uint32 value, char* buffer) {
933
1
  return InternalFastHexToBuffer(value, buffer, 8);
934
1
}
935
936
// TODO(user): revisit the two_ASCII_digits optimization.
937
//
938
// Several converters use this table to reduce
939
// division and modulo operations.
940
extern const char two_ASCII_digits[100][2];  // from strutil.cc
941
942
// ----------------------------------------------------------------------
943
// FastInt32ToBufferLeft()
944
// FastUInt32ToBufferLeft()
945
// FastInt64ToBufferLeft()
946
// FastUInt64ToBufferLeft()
947
//
948
// Like the Fast*ToBuffer() functions above, these are intended for speed.
949
// Unlike the Fast*ToBuffer() functions, however, these functions write
950
// their output to the beginning of the buffer (hence the name, as the
951
// output is left-aligned).  The caller is responsible for ensuring that
952
// the buffer has enough space to hold the output.
953
//
954
// Returns a pointer to the end of the string (i.e. the null character
955
// terminating the string).
956
// ----------------------------------------------------------------------
957
958
226M
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
959
226M
  uint digits;
960
226M
  const char *ASCII_digits = nullptr;
961
  // The idea of this implementation is to trim the number of divides to as few
962
  // as possible by using multiplication and subtraction rather than mod (%),
963
  // and by outputting two digits at a time rather than one.
964
  // The huge-number case is first, in the hopes that the compiler will output
965
  // that case in one branch-free block of code, and only output conditional
966
  // branches into it from below.
967
226M
  if (u >= 1000000000) {  // >= 1,000,000,000
968
2.98M
    digits = u / 100000000;  // 100,000,000
969
2.98M
    ASCII_digits = two_ASCII_digits[digits];
970
2.98M
    buffer[0] = ASCII_digits[0];
971
2.98M
    buffer[1] = ASCII_digits[1];
972
2.98M
    buffer += 2;
973
3.98M
 sublt100_000_000:
974
3.98M
    u -= digits * 100000000;  // 100,000,000
975
5.78M
 lt100_000_000:
976
5.78M
    digits = u / 1000000;  // 1,000,000
977
5.78M
    ASCII_digits = two_ASCII_digits[digits];
978
5.78M
    buffer[0] = ASCII_digits[0];
979
5.78M
    buffer[1] = ASCII_digits[1];
980
5.78M
    buffer += 2;
981
48.7M
 sublt1_000_000:
982
48.7M
    u -= digits * 1000000;  // 1,000,000
983
54.3M
 lt1_000_000:
984
54.3M
    digits = u / 10000;  // 10,000
985
54.3M
    ASCII_digits = two_ASCII_digits[digits];
986
54.3M
    buffer[0] = ASCII_digits[0];
987
54.3M
    buffer[1] = ASCII_digits[1];
988
54.3M
    buffer += 2;
989
77.5M
 sublt10_000:
990
77.5M
    u -= digits * 10000;  // 10,000
991
86.3M
 lt10_000:
992
86.3M
    digits = u / 100;
993
86.3M
    ASCII_digits = two_ASCII_digits[digits];
994
86.3M
    buffer[0] = ASCII_digits[0];
995
86.3M
    buffer[1] = ASCII_digits[1];
996
86.3M
    buffer += 2;
997
91.1M
 sublt100:
998
91.1M
    u -= digits * 100;
999
104M
 lt100:
1000
104M
    digits = u;
1001
104M
    ASCII_digits = two_ASCII_digits[digits];
1002
104M
    buffer[0] = ASCII_digits[0];
1003
104M
    buffer[1] = ASCII_digits[1];
1004
104M
    buffer += 2;
1005
226M
 done:
1006
226M
    *buffer = 0;
1007
226M
    return buffer;
1008
223M
  }
1009
1010
223M
  if (u < 100) {
1011
135M
    digits = u;
1012
135M
    if (u >= 10) goto lt100;
1013
121M
    *buffer++ = '0' + digits;
1014
121M
    goto done;
1015
121M
  }
1016
88.1M
  if (u  <  10000) {   // 10,000
1017
13.6M
    if (u >= 1000) goto lt10_000;
1018
4.87M
    digits = u / 100;
1019
4.87M
    *buffer++ = '0' + digits;
1020
4.87M
    goto sublt100;
1021
4.87M
  }
1022
74.4M
  if (u  <  1000000) {   // 1,000,000
1023
28.8M
    if (u >= 100000) goto lt1_000_000;
1024
23.2M
    digits = u / 10000;  //    10,000
1025
23.2M
    *buffer++ = '0' + digits;
1026
23.2M
    goto sublt10_000;
1027
23.2M
  }
1028
45.6M
  if (u  <  100000000) {   // 100,000,000
1029
44.7M
    if (u >= 10000000) goto lt100_000_000;
1030
42.9M
    digits = u / 1000000;  //   1,000,000
1031
42.9M
    *buffer++ = '0' + digits;
1032
42.9M
    goto sublt1_000_000;
1033
42.9M
  }
1034
  // we already know that u < 1,000,000,000
1035
889k
  digits = u / 100000000;   // 100,000,000
1036
889k
  *buffer++ = '0' + digits;
1037
889k
  goto sublt100_000_000;
1038
889k
}
1039
1040
128M
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
1041
128M
  uint32 u = i;
1042
128M
  if (i < 0) {
1043
991
    *buffer++ = '-';
1044
    // We need to do the negation in modular (i.e., "unsigned")
1045
    // arithmetic; MSVC++ apprently warns for plain "-u", so
1046
    // we write the equivalent expression "0 - u" instead.
1047
991
    u = 0 - u;
1048
991
  }
1049
128M
  return FastUInt32ToBufferLeft(u, buffer);
1050
128M
}
1051
1052
90.6M
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
1053
90.6M
  uint digits;
1054
90.6M
  const char *ASCII_digits = nullptr;
1055
1056
90.6M
  uint32 u = static_cast<uint32>(u64);
1057
90.6M
  if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
1058
1059
5.22M
  uint64 top_11_digits = u64 / 1000000000;
1060
5.22M
  buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
1061
5.22M
  u = narrow_cast<uint32>(u64 - (top_11_digits * 1000000000));
1062
1063
5.22M
  digits = u / 10000000;  // 10,000,000
1064
5.22M
  DCHECK_LT(digits, 100);
1065
5.22M
  ASCII_digits = two_ASCII_digits[digits];
1066
5.22M
  buffer[0] = ASCII_digits[0];
1067
5.22M
  buffer[1] = ASCII_digits[1];
1068
5.22M
  buffer += 2;
1069
5.22M
  u -= digits * 10000000;  // 10,000,000
1070
5.22M
  digits = u / 100000;  // 100,000
1071
5.22M
  ASCII_digits = two_ASCII_digits[digits];
1072
5.22M
  buffer[0] = ASCII_digits[0];
1073
5.22M
  buffer[1] = ASCII_digits[1];
1074
5.22M
  buffer += 2;
1075
5.22M
  u -= digits * 100000;  // 100,000
1076
5.22M
  digits = u / 1000;  // 1,000
1077
5.22M
  ASCII_digits = two_ASCII_digits[digits];
1078
5.22M
  buffer[0] = ASCII_digits[0];
1079
5.22M
  buffer[1] = ASCII_digits[1];
1080
5.22M
  buffer += 2;
1081
5.22M
  u -= digits * 1000;  // 1,000
1082
5.22M
  digits = u / 10;
1083
5.22M
  ASCII_digits = two_ASCII_digits[digits];
1084
5.22M
  buffer[0] = ASCII_digits[0];
1085
5.22M
  buffer[1] = ASCII_digits[1];
1086
5.22M
  buffer += 2;
1087
5.22M
  u -= digits * 10;
1088
5.22M
  digits = u;
1089
5.22M
  *buffer++ = '0' + digits;
1090
5.22M
  *buffer = 0;
1091
5.22M
  return buffer;
1092
5.22M
}
1093
1094
22.0M
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1095
22.0M
  uint64 u = i;
1096
22.0M
  if (i < 0) {
1097
463k
    *buffer++ = '-';
1098
463k
    u = 0 - u;
1099
463k
  }
1100
22.0M
  return FastUInt64ToBufferLeft(u, buffer);
1101
22.0M
}
1102
1103
0
int HexDigitsPrefix(const char* buf, int num_digits) {
1104
0
  for (int i = 0; i < num_digits; i++)
1105
0
    if (!ascii_isxdigit(buf[i]))
1106
0
      return 0;  // This also detects end of string as '\0' is not xdigit.
1107
0
  return 1;
1108
0
}
1109
1110
// ----------------------------------------------------------------------
1111
// AutoDigitStrCmp
1112
// AutoDigitLessThan
1113
// StrictAutoDigitLessThan
1114
// autodigit_less
1115
// autodigit_greater
1116
// strict_autodigit_less
1117
// strict_autodigit_greater
1118
//    These are like less<string> and greater<string>, except when a
1119
//    run of digits is encountered at corresponding points in the two
1120
//    arguments.  Such digit strings are compared numerically instead
1121
//    of lexicographically.  Therefore if you sort by
1122
//    "autodigit_less", some machine names might get sorted as:
1123
//        exaf1
1124
//        exaf2
1125
//        exaf10
1126
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
1127
//    set to true, or the strict version of the other functions),
1128
//    strings that represent equal numbers will not be considered equal if
1129
//    the string representations are not identical.  That is, "01" < "1" in
1130
//    strict mode, but "01" == "1" otherwise.
1131
// ----------------------------------------------------------------------
1132
1133
int AutoDigitStrCmp(const char* a, size_t alen,
1134
                    const char* b, size_t blen,
1135
0
                    bool strict) {
1136
0
  size_t aindex = 0;
1137
0
  size_t bindex = 0;
1138
0
  while ((aindex < alen) && (bindex < blen)) {
1139
0
    if (isdigit(a[aindex]) && isdigit(b[bindex])) {
1140
      // Compare runs of digits.  Instead of extracting numbers, we
1141
      // just skip leading zeroes, and then get the run-lengths.  This
1142
      // allows us to handle arbitrary precision numbers.  We remember
1143
      // how many zeroes we found so that we can differentiate between
1144
      // "1" and "01" in strict mode.
1145
1146
      // Skip leading zeroes, but remember how many we found
1147
0
      size_t azeroes = aindex;
1148
0
      size_t bzeroes = bindex;
1149
0
      while ((aindex < alen) && (a[aindex] == '0')) aindex++;
1150
0
      while ((bindex < blen) && (b[bindex] == '0')) bindex++;
1151
0
      azeroes = aindex - azeroes;
1152
0
      bzeroes = bindex - bzeroes;
1153
1154
      // Count digit lengths
1155
0
      size_t astart = aindex;
1156
0
      size_t bstart = bindex;
1157
0
      while ((aindex < alen) && isdigit(a[aindex])) aindex++;
1158
0
      while ((bindex < blen) && isdigit(b[bindex])) bindex++;
1159
0
      if (aindex - astart < bindex - bstart) {
1160
        // a has shorter run of digits: so smaller
1161
0
        return -1;
1162
0
      } else if (aindex - astart > bindex - bstart) {
1163
        // a has longer run of digits: so larger
1164
0
        return 1;
1165
0
      } else {
1166
        // Same lengths, so compare digit by digit
1167
0
        for (size_t i = 0; i < aindex-astart; i++) {
1168
0
          if (a[astart+i] < b[bstart+i]) {
1169
0
            return -1;
1170
0
          } else if (a[astart+i] > b[bstart+i]) {
1171
0
            return 1;
1172
0
          }
1173
0
        }
1174
        // Equal: did one have more leading zeroes?
1175
0
        if (strict && azeroes != bzeroes) {
1176
0
          if (azeroes > bzeroes) {
1177
            // a has more leading zeroes: a < b
1178
0
            return -1;
1179
0
          } else {
1180
            // b has more leading zeroes: a > b
1181
0
            return 1;
1182
0
          }
1183
0
        }
1184
        // Equal: so continue scanning
1185
0
      }
1186
0
    } else if (a[aindex] < b[bindex]) {
1187
0
      return -1;
1188
0
    } else if (a[aindex] > b[bindex]) {
1189
0
      return 1;
1190
0
    } else {
1191
0
      aindex++;
1192
0
      bindex++;
1193
0
    }
1194
0
  }
1195
1196
0
  if (aindex < alen) {
1197
    // b is prefix of a
1198
0
    return 1;
1199
0
  } else if (bindex < blen) {
1200
    // a is prefix of b
1201
0
    return -1;
1202
0
  } else {
1203
    // a is equal to b
1204
0
    return 0;
1205
0
  }
1206
0
}
1207
1208
0
bool AutoDigitLessThan(const char* a, size_t alen, const char* b, size_t blen) {
1209
0
  return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
1210
0
}
1211
1212
bool StrictAutoDigitLessThan(const char* a, size_t alen,
1213
0
                             const char* b, size_t blen) {
1214
0
  return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
1215
0
}
1216
1217
// ----------------------------------------------------------------------
1218
// SimpleDtoa()
1219
// SimpleFtoa()
1220
// DoubleToBuffer()
1221
// FloatToBuffer()
1222
//    We want to print the value without losing precision, but we also do
1223
//    not want to print more digits than necessary.  This turns out to be
1224
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
1225
//    exactly in binary.  If we print 0.2 with a very large precision,
1226
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1227
//    On the other hand, if we set the precision too low, we lose
1228
//    significant digits when printing numbers that actually need them.
1229
//    It turns out there is no precision value that does the right thing
1230
//    for all numbers.
1231
//
1232
//    Our strategy is to first try printing with a precision that is never
1233
//    over-precise, then parse the result with strtod() to see if it
1234
//    matches.  If not, we print again with a precision that will always
1235
//    give a precise result, but may use more digits than necessary.
1236
//
1237
//    An arguably better strategy would be to use the algorithm described
1238
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
1239
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
1240
//    however, that the following implementation is about as fast as
1241
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
1242
//    will not scale well on multi-core machines.  DMG's code is slightly
1243
//    more accurate (in that it will never use more digits than
1244
//    necessary), but this is probably irrelevant for most users.
1245
//
1246
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
1247
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
1248
//    one in that it makes guesses and then uses strtod() to check them.
1249
//    Their implementation is faster because they use their own code to
1250
//    generate the digits in the first place rather than use snprintf(),
1251
//    thus avoiding format string parsing overhead.  However, this makes
1252
//    it considerably more complicated than the following implementation,
1253
//    and it is embedded in a larger library.  If speed turns out to be
1254
//    an issue, we could re-implement this in terms of their
1255
//    implementation.
1256
// ----------------------------------------------------------------------
1257
1258
31
string SimpleDtoa(double value) {
1259
31
  char buffer[kDoubleToBufferSize];
1260
31
  return DoubleToBuffer(value, buffer);
1261
31
}
1262
1263
5
string SimpleFtoa(float value) {
1264
5
  char buffer[kFloatToBufferSize];
1265
5
  return FloatToBuffer(value, buffer);
1266
5
}
1267
1268
39
char* DoubleToBuffer(double value, char* buffer) {
1269
  // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1270
  // platforms these days.  Just in case some system exists where DBL_DIG
1271
  // is significantly larger -- and risks overflowing our buffer -- we have
1272
  // this assert.
1273
39
  COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1274
1275
39
  if (value == std::numeric_limits<double>::infinity()) {
1276
6
    strncpy(buffer, "inf", kDoubleToBufferSize);
1277
6
    return buffer;
1278
33
  } else if (value == -std::numeric_limits<double>::infinity()) {
1279
4
    strncpy(buffer, "-inf", kDoubleToBufferSize);
1280
4
    return buffer;
1281
29
  } else if (isnan(value)) {
1282
10
    strncpy(buffer, "nan", kDoubleToBufferSize);
1283
10
    return buffer;
1284
10
  }
1285
1286
19
  int snprintf_result =
1287
19
    snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1288
1289
  // The snprintf should never overflow because the buffer is significantly
1290
  // larger than the precision we asked for.
1291
19
  DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1292
1293
  // We need to make parsed_value volatile in order to force the compiler to
1294
  // write it out to the stack.  Otherwise, it may keep the value in a
1295
  // register, and if it does that, it may keep it as a long double instead
1296
  // of a double.  This long double may have extra bits that make it compare
1297
  // unequal to "value" even though it would be exactly equal if it were
1298
  // truncated to a double.
1299
19
  volatile double parsed_value = strtod(buffer, NULL);
1300
19
  if (parsed_value != value) {
1301
6
    int snprintf_result =
1302
6
      snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
1303
1304
    // Should never overflow; see above.
1305
6
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1306
6
  }
1307
1308
19
  return buffer;
1309
19
}
1310
1311
5
char* FloatToBuffer(float value, char* buffer) {
1312
  // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1313
  // platforms these days.  Just in case some system exists where FLT_DIG
1314
  // is significantly larger -- and risks overflowing our buffer -- we have
1315
  // this assert.
1316
5
  COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1317
1318
5
  if (value == std::numeric_limits<double>::infinity()) {
1319
0
    strncpy(buffer, "inf", kFloatToBufferSize);
1320
0
    return buffer;
1321
5
  } else if (value == -std::numeric_limits<double>::infinity()) {
1322
0
    strncpy(buffer, "-inf", kFloatToBufferSize);
1323
0
    return buffer;
1324
5
  } else if (isnan(value)) {
1325
0
    strncpy(buffer, "nan", kFloatToBufferSize);
1326
0
    return buffer;
1327
0
  }
1328
1329
5
  int snprintf_result =
1330
5
    snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1331
1332
  // The snprintf should never overflow because the buffer is significantly
1333
  // larger than the precision we asked for.
1334
5
  DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1335
1336
5
  float parsed_value;
1337
5
  if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1338
5
    int snprintf_result =
1339
5
      snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+3, value);
1340
1341
    // Should never overflow; see above.
1342
5
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1343
5
  }
1344
1345
5
  return buffer;
1346
5
}
1347
1348
// ----------------------------------------------------------------------
1349
// SimpleItoaWithCommas()
1350
//    Description: converts an integer to a string.
1351
//    Puts commas every 3 spaces.
1352
//    Faster than printf("%d")?
1353
//
1354
//    Return value: string
1355
// ----------------------------------------------------------------------
1356
0
string SimpleItoaWithCommas(int32 i) {
1357
  // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1358
  // Longest is -2,147,483,648.
1359
0
  char local[14];
1360
0
  char *p = local + sizeof(local);
1361
  // Need to use uint32 instead of int32 to correctly handle
1362
  // -2,147,483,648.
1363
0
  uint32 n = i;
1364
0
  if (i < 0)
1365
0
    n = 0 - n;  // negate the unsigned value to avoid overflow
1366
0
  *--p = '0' + n % 10;          // this case deals with the number "0"
1367
0
  n /= 10;
1368
0
  while (n) {
1369
0
    *--p = '0' + n % 10;
1370
0
    n /= 10;
1371
0
    if (n == 0) break;
1372
1373
0
    *--p = '0' + n % 10;
1374
0
    n /= 10;
1375
0
    if (n == 0) break;
1376
1377
0
    *--p = ',';
1378
0
    *--p = '0' + n % 10;
1379
0
    n /= 10;
1380
    // For this unrolling, we check if n == 0 in the main while loop
1381
0
  }
1382
0
  if (i < 0)
1383
0
    *--p = '-';
1384
0
  return string(p, local + sizeof(local));
1385
0
}
1386
1387
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1388
// compile.
1389
0
string SimpleItoaWithCommas(uint32 i) {
1390
  // 10 digits and 3 commas are good for 32-bit or smaller ints.
1391
  // Longest is 4,294,967,295.
1392
0
  char local[13];
1393
0
  char *p = local + sizeof(local);
1394
0
  *--p = '0' + i % 10;          // this case deals with the number "0"
1395
0
  i /= 10;
1396
0
  while (i) {
1397
0
    *--p = '0' + i % 10;
1398
0
    i /= 10;
1399
0
    if (i == 0) break;
1400
1401
0
    *--p = '0' + i % 10;
1402
0
    i /= 10;
1403
0
    if (i == 0) break;
1404
1405
0
    *--p = ',';
1406
0
    *--p = '0' + i % 10;
1407
0
    i /= 10;
1408
    // For this unrolling, we check if i == 0 in the main while loop
1409
0
  }
1410
0
  return string(p, local + sizeof(local));
1411
0
}
1412
1413
0
string SimpleItoaWithCommas(int64 i) {
1414
  // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1415
0
  char local[26];
1416
0
  char *p = local + sizeof(local);
1417
  // Need to use uint64 instead of int64 to correctly handle
1418
  // -9,223,372,036,854,775,808.
1419
0
  uint64 n = i;
1420
0
  if (i < 0)
1421
0
    n = 0 - n;
1422
0
  *--p = '0' + n % 10;          // this case deals with the number "0"
1423
0
  n /= 10;
1424
0
  while (n) {
1425
0
    *--p = '0' + n % 10;
1426
0
    n /= 10;
1427
0
    if (n == 0) break;
1428
1429
0
    *--p = '0' + n % 10;
1430
0
    n /= 10;
1431
0
    if (n == 0) break;
1432
1433
0
    *--p = ',';
1434
0
    *--p = '0' + n % 10;
1435
0
    n /= 10;
1436
    // For this unrolling, we check if n == 0 in the main while loop
1437
0
  }
1438
0
  if (i < 0)
1439
0
    *--p = '-';
1440
0
  return string(p, local + sizeof(local));
1441
0
}
1442
1443
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1444
// compile.
1445
0
string SimpleItoaWithCommas(uint64 i) {
1446
  // 20 digits and 6 commas are good for 64-bit or smaller ints.
1447
  // Longest is 18,446,744,073,709,551,615.
1448
0
  char local[26];
1449
0
  char *p = local + sizeof(local);
1450
0
  *--p = '0' + i % 10;          // this case deals with the number "0"
1451
0
  i /= 10;
1452
0
  while (i) {
1453
0
    *--p = '0' + i % 10;
1454
0
    i /= 10;
1455
0
    if (i == 0) break;
1456
1457
0
    *--p = '0' + i % 10;
1458
0
    i /= 10;
1459
0
    if (i == 0) break;
1460
1461
0
    *--p = ',';
1462
0
    *--p = '0' + i % 10;
1463
0
    i /= 10;
1464
    // For this unrolling, we check if i == 0 in the main while loop
1465
0
  }
1466
0
  return string(p, local + sizeof(local));
1467
0
}
1468
1469
// ----------------------------------------------------------------------
1470
// ItoaKMGT()
1471
//    Description: converts an integer to a string
1472
//    Truncates values to a readable unit: K, G, M or T
1473
//    Opposite of atoi_kmgt()
1474
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1475
//
1476
//    Return value: string
1477
// ----------------------------------------------------------------------
1478
0
string ItoaKMGT(int64 i) {
1479
0
  const char *sign = "", *suffix = "";
1480
0
  if (i < 0) {
1481
    // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1482
    // that's OK as this function is only for human readability
1483
0
    if (i == numeric_limits<int64>::min()) i++;
1484
0
    sign = "-";
1485
0
    i = -i;
1486
0
  }
1487
1488
0
  int64 val;
1489
1490
0
  if ((val = (i >> 40)) > 1) {
1491
0
    suffix = "T";
1492
0
  } else if ((val = (i >> 30)) > 1) {
1493
0
    suffix = "G";
1494
0
  } else if ((val = (i >> 20)) > 1) {
1495
0
    suffix = "M";
1496
0
  } else if ((val = (i >> 10)) > 1) {
1497
0
    suffix = "K";
1498
0
  } else {
1499
0
    val = i;
1500
0
  }
1501
1502
0
  return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1503
0
}
1504
1505
// DEPRECATED(wadetregaskis).
1506
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1507
1508
0
string FloatToString(float f, const char* format) {
1509
0
  return StringPrintf(format, f);
1510
0
}
1511
1512
0
string IntToString(int i, const char* format) {
1513
0
  return StringPrintf(format, i);
1514
0
}
1515
1516
0
string Int64ToString(int64 i64, const char* format) {
1517
0
  return StringPrintf(format, i64);
1518
0
}
1519
1520
0
string UInt64ToString(uint64 ui64, const char* format) {
1521
0
  return StringPrintf(format, ui64);
1522
0
}
1523
1524
namespace {
1525
  constexpr int64_t kBytesPerGB = 1000000000;
1526
  constexpr int64_t kBytesPerMB = 1000000;
1527
  constexpr int64_t kBytesPerKB = 1000;
1528
}
1529
1530
327
string HumanizeBytes(uint64_t bytes, int precision) {
1531
327
  std::ostringstream op_stream;
1532
327
  op_stream << std::fixed << std::setprecision(precision);
1533
327
  if (bytes >= kBytesPerGB) {
1534
0
    op_stream << static_cast<double> (bytes)/kBytesPerGB << " GB";
1535
327
  } else if (bytes >= kBytesPerMB) {
1536
44
    op_stream << static_cast<double> (bytes)/kBytesPerMB << " MB";
1537
283
  } else if (bytes >= kBytesPerKB) {
1538
273
    op_stream << static_cast<double> (bytes)/kBytesPerKB << " KB";
1539
10
  } else {
1540
10
    op_stream << bytes << " B";
1541
10
  }
1542
327
  return op_stream.str();
1543
327
}