YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// The following only applies to changes made to this file as part of YugaByte development.
5
//
6
// Portions Copyright (c) YugaByte, Inc.
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
9
// in compliance with the License.  You may obtain a copy of the License at
10
//
11
// http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing, software distributed under the License
14
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15
// or implied.  See the License for the specific language governing permissions and limitations
16
// under the License.
17
//
18
// This file contains string processing functions related to
19
// numeric values.
20
21
#include "yb/gutil/strings/numbers.h"
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <errno.h>
26
#include <float.h>          // for DBL_DIG and FLT_DIG
27
#include <math.h>           // for HUGE_VAL
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <string.h>
31
32
#include <iomanip>
33
#include <limits>
34
#include <sstream>
35
36
#include <glog/logging.h>
37
38
#include "yb/gutil/casts.h"
39
#include "yb/gutil/int128.h"
40
#include "yb/gutil/integral_types.h"
41
#include "yb/gutil/stringprintf.h"
42
#include "yb/gutil/strings/ascii_ctype.h"
43
#include "yb/gutil/strtoint.h"
44
45
using std::numeric_limits;
46
using std::string;
47
48
49
// Reads a <double> in *text, which may not be whitespace-initiated.
50
// *len is the length, or -1 if text is '\0'-terminated, which is more
51
// efficient.  Sets *text to the end of the double, and val to the
52
// converted value, and the length of the double is subtracted from
53
// *len. <double> may also be a '?', in which case val will be
54
// unchanged. Returns true upon success.  If initial_minus is
55
// non-NULL, then *initial_minus will indicate whether the first
56
// symbol seen was a '-', which will be ignored. Similarly, if
57
// final_period is non-NULL, then *final_period will indicate whether
58
// the last symbol seen was a '.', which will be ignored. This is
59
// useful in case that an initial '-' or final '.' would have another
60
// meaning (as a separator, e.g.).
61
static inline bool EatADouble(const char** text, ssize_t* len, bool allow_question,
62
                              double* val, bool* initial_minus,
63
0
                              bool* final_period) {
64
0
  const char* pos = *text;
65
0
  ssize_t rem = *len;  // remaining length, or -1 if null-terminated
66
67
0
  if (pos == nullptr || rem == 0)
68
0
    return false;
69
70
0
  if (allow_question && (*pos == '?')) {
71
0
    *text = pos + 1;
72
0
    if (rem != -1)
73
0
      *len = rem - 1;
74
0
    return true;
75
0
  }
76
77
0
  if (initial_minus) {
78
0
    if ((*initial_minus = (*pos == '-'))) {  // Yes, we want assignment.
79
0
      if (rem == 1)
80
0
        return false;
81
0
      ++pos;
82
0
      if (rem != -1)
83
0
        --rem;
84
0
    }
85
0
  }
86
87
  // a double has to begin one of these (we don't allow 'inf' or whitespace)
88
  // this also serves as an optimization.
89
0
  if (!strchr("-+.0123456789", *pos))
90
0
    return false;
91
92
  // strtod is evil in that the second param is a non-const char**
93
0
  char* end_nonconst;
94
0
  double retval;
95
0
  if (rem == -1) {
96
0
    retval = strtod(pos, &end_nonconst);
97
0
  } else {
98
    // not '\0'-terminated & no obvious terminator found. must copy.
99
0
    std::unique_ptr<char[]> buf(new char[rem + 1]);
100
0
    memcpy(buf.get(), pos, rem);
101
0
    buf[rem] = '\0';
102
0
    retval = strtod(buf.get(), &end_nonconst);
103
0
    end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
104
0
  }
105
106
0
  if (pos == end_nonconst)
107
0
    return false;
108
109
0
  if (final_period) {
110
0
    *final_period = (end_nonconst[-1] == '.');
111
0
    if (*final_period) {
112
0
      --end_nonconst;
113
0
    }
114
0
  }
115
116
0
  *text = end_nonconst;
117
0
  *val = retval;
118
0
  if (rem != -1)
119
0
    *len = rem - (end_nonconst - pos);
120
0
  return true;
121
0
}
122
123
// If update, consume one of acceptable_chars from string *text of
124
// length len and return that char, or '\0' otherwise. If len is -1,
125
// *text is null-terminated. If update is false, don't alter *text and
126
// *len. If null_ok, then update must be false, and, if text has no
127
// more chars, then return '\1' (arbitrary nonzero).
128
static inline char EatAChar(const char** text, ssize_t* len,
129
                            const char* acceptable_chars,
130
0
                            bool update, bool null_ok) {
131
0
  assert(!(update && null_ok));
132
0
  if ((*len == 0) || (**text == '\0'))
133
0
    return (null_ok ? '\1' : '\0');  // if null_ok, we're in predicate mode.
134
135
0
  if (strchr(acceptable_chars, **text)) {
136
0
    char result = **text;
137
0
    if (update) {
138
0
      ++(*text);
139
0
      if (*len != -1)
140
0
        --(*len);
141
0
    }
142
0
    return result;
143
0
  }
144
145
0
  return '\0';  // no match; no update
146
0
}
147
148
// Parse an expression in 'text' of the form: <comparator><double> or
149
// <double><sep><double> See full comments in header file.
150
bool ParseDoubleRange(const char* text, ssize_t len, const char** end,
151
                      double* from, double* to, bool* is_currency,
152
0
                      const DoubleRangeOptions& opts) {
153
0
  const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
154
155
0
  if (!opts.dont_modify_unbounded) {
156
0
    *from = -HUGE_VAL;
157
0
    *to = HUGE_VAL;
158
0
  }
159
0
  if (opts.allow_currency && (is_currency != nullptr))
160
0
    *is_currency = false;
161
162
0
  assert(len >= -1);
163
0
  assert(opts.separators && (*opts.separators != '\0'));
164
  // these aren't valid separators
165
0
  assert(strlen(opts.separators) ==
166
0
         strcspn(opts.separators, "+0123456789eE$"));
167
0
  assert(opts.num_required_bounds <= 2);
168
169
  // Handle easier cases of comparators (<, >) first
170
0
  if (opts.allow_comparators) {
171
0
    char comparator = EatAChar(&text, &len, "<>", true, false);
172
0
    if (comparator) {
173
0
      double* dest = (comparator == '>') ? from : to;
174
0
      EatAChar(&text, &len, "=", true, false);
175
0
      if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
176
0
        if (is_currency != nullptr)
177
0
          *is_currency = true;
178
0
      if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr,
179
0
                      nullptr))
180
0
        return false;
181
0
      *end = text;
182
0
      return EatAChar(&text, &len, opts.acceptable_terminators, false,
183
0
                      opts.null_terminator_ok);
184
0
    }
185
0
  }
186
187
0
  bool seen_dollar = (opts.allow_currency &&
188
0
                      EatAChar(&text, &len, "$", true, false));
189
190
  // If we see a '-', two things could be happening: -<to> or
191
  // <from>... where <from> is negative. Treat initial minus sign as a
192
  // separator if '-' is a valid separator.
193
  // Similarly, we prepare for the possibility of seeing a '.' at the
194
  // end of the number, in case '.' (which really means '..') is a
195
  // separator.
196
0
  bool initial_minus_sign = false;
197
0
  bool final_period = false;
198
0
  bool* check_initial_minus = (strchr(opts.separators, '-') && !seen_dollar
199
0
                               && (opts.num_required_bounds < 2)) ?
200
0
                              (&initial_minus_sign) : nullptr;
201
0
  bool* check_final_period = strchr(opts.separators, '.') ? (&final_period)
202
0
                             : nullptr;
203
0
  bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers,
204
0
                                from, check_initial_minus, check_final_period);
205
206
  // if 2 bounds required, must see a double (or '?' if allowed)
207
0
  if ((opts.num_required_bounds == 2) && !double_seen) return false;
208
209
0
  if (seen_dollar && !double_seen) {
210
0
      --text;
211
0
      if (len != -1)
212
0
        ++len;
213
0
      seen_dollar = false;
214
0
  }
215
  // If we're here, we've read the first double and now expect a
216
  // separator and another <double>.
217
0
  char separator = EatAChar(&text, &len, opts.separators, true, false);
218
0
  if (separator == '.') {
219
    // seen one '.' as separator; must check for another; perhaps set seplen=2
220
0
    if (EatAChar(&text, &len, ".", true, false)) {
221
0
      if (final_period) {
222
        // We may have three periods in a row. The first is part of the
223
        // first number, the others are a separator. Policy: 234...567
224
        // is "234." to "567", not "234" to ".567".
225
0
        EatAChar(&text, &len, ".", true, false);
226
0
      }
227
0
    } else if (!EatAChar(&text, &len, opts.separators, true, false)) {
228
      // just one '.' and no other separator; uneat the first '.' we saw
229
0
      --text;
230
0
      if (len != -1)
231
0
        ++len;
232
0
      separator = '\0';
233
0
    }
234
0
  }
235
  // By now, we've consumed whatever separator there may have been,
236
  // and separator is true iff there was one.
237
0
  if (!separator) {
238
0
    if (final_period)  // final period now considered part of first double
239
0
      EatAChar(&text, &len, ".", true, false);
240
0
    if (initial_minus_sign && double_seen) {
241
0
      *to = *from;
242
0
      *from = from_default;
243
0
    } else if (opts.require_separator ||
244
0
               (opts.num_required_bounds > 0 && !double_seen) ||
245
0
               (opts.num_required_bounds > 1) ) {
246
0
      return false;
247
0
    }
248
0
  } else {
249
0
    if (initial_minus_sign && double_seen)
250
0
      *from = -(*from);
251
    // read second <double>
252
0
    bool second_dollar_seen = (seen_dollar
253
0
                               || (opts.allow_currency && !double_seen))
254
0
                              && EatAChar(&text, &len, "$", true, false);
255
0
    bool second_double_seen = EatADouble(
256
0
      &text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
257
0
    if (opts.num_required_bounds > static_cast<uint32_t>(double_seen + second_double_seen))
258
0
      return false;
259
0
    if (second_dollar_seen && !second_double_seen) {
260
0
      --text;
261
0
      if (len != -1)
262
0
        ++len;
263
0
      second_dollar_seen = false;
264
0
    }
265
0
    seen_dollar = seen_dollar || second_dollar_seen;
266
0
  }
267
268
0
  if (seen_dollar && (is_currency != nullptr))
269
0
    *is_currency = true;
270
  // We're done. But we have to check that the next char is a proper
271
  // terminator.
272
0
  *end = text;
273
0
  char terminator = EatAChar(&text, &len, opts.acceptable_terminators, false,
274
0
                             opts.null_terminator_ok);
275
0
  if (terminator == '.')
276
0
    --(*end);
277
0
  return terminator;
278
0
}
279
280
// ----------------------------------------------------------------------
281
// ConsumeStrayLeadingZeroes
282
//    Eliminates all leading zeroes (unless the string itself is composed
283
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
284
// --------------------------------------------------------------------
285
286
0
void ConsumeStrayLeadingZeroes(string *const str) {
287
0
  const string::size_type len(str->size());
288
0
  if (len > 1 && (*str)[0] == '0') {
289
0
    const char
290
0
      *const begin(str->c_str()),
291
0
      *const end(begin + len),
292
0
      *ptr(begin + 1);
293
0
    while (ptr != end && *ptr == '0') {
294
0
      ++ptr;
295
0
    }
296
0
    string::size_type remove(ptr - begin);
297
0
    DCHECK_GT(ptr, begin);
298
0
    if (remove == len) {
299
0
      --remove;  // if they are all zero, leave one...
300
0
    }
301
0
    str->erase(0, remove);
302
0
  }
303
0
}
304
305
// ----------------------------------------------------------------------
306
// ParseLeadingInt32Value()
307
// ParseLeadingUInt32Value()
308
//    A simple parser for [u]int32 values. Returns the parsed value
309
//    if a valid value is found; else returns deflt
310
//    This cannot handle decimal numbers with leading 0s.
311
// --------------------------------------------------------------------
312
313
1
int32 ParseLeadingInt32Value(const char *str, int32 deflt) {
314
1
  char *error = nullptr;
315
1
  auto value = strtol(str, &error, 0);
316
  // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
317
1
  if (value > numeric_limits<int32>::max()) {
318
0
    value = numeric_limits<int32>::max();
319
1
  } else if (value < numeric_limits<int32>::min()) {
320
0
    value = numeric_limits<int32>::min();
321
0
  }
322
1
  return (error == str) ? 
deflt0
: narrow_cast<int32>(value);
323
1
}
324
325
0
uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) {
326
0
  if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) {
327
    // When long is 32 bits, we can use strtoul.
328
0
    char *error = nullptr;
329
0
    const uint32 value = strtoul(str, &error, 0);
330
0
    return (error == str) ? deflt : value;
331
0
  } else {
332
    // When long is 64 bits, we must use strto64 and handle limits
333
    // by hand.  The reason we cannot use a 64-bit strtoul is that
334
    // it would be impossible to differentiate "-2" (that should wrap
335
    // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
336
    // (that should be pegged to UINT_MAX due to overflow).
337
0
    char *error = nullptr;
338
0
    int64 value = strto64(str, &error, 0);
339
0
    if (value > numeric_limits<uint32>::max() ||
340
0
        value < -static_cast<int64>(numeric_limits<uint32>::max())) {
341
0
      value = numeric_limits<uint32>::max();
342
0
    }
343
    // Within these limits, truncation to 32 bits handles negatives correctly.
344
0
    return (error == str) ? deflt : narrow_cast<uint32>(value);
345
0
  }
346
0
}
347
348
// ----------------------------------------------------------------------
349
// ParseLeadingDec32Value
350
// ParseLeadingUDec32Value
351
//    A simple parser for [u]int32 values. Returns the parsed value
352
//    if a valid value is found; else returns deflt
353
//    The string passed in is treated as *10 based*.
354
//    This can handle strings with leading 0s.
355
// --------------------------------------------------------------------
356
357
0
int32 ParseLeadingDec32Value(const char *str, int32 deflt) {
358
0
  char *error = nullptr;
359
0
  auto value = strtol(str, &error, 10);
360
  // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
361
0
  if (value > numeric_limits<int32>::max()) {
362
0
    value = numeric_limits<int32>::max();
363
0
  } else if (value < numeric_limits<int32>::min()) {
364
0
    value = numeric_limits<int32>::min();
365
0
  }
366
0
  return (error == str) ? deflt : narrow_cast<int32>(value);
367
0
}
368
369
0
uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) {
370
0
  if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) {
371
    // When long is 32 bits, we can use strtoul.
372
0
    char *error = nullptr;
373
0
    const uint32 value = strtoul(str, &error, 10);
374
0
    return (error == str) ? deflt : value;
375
0
  } else {
376
    // When long is 64 bits, we must use strto64 and handle limits
377
    // by hand.  The reason we cannot use a 64-bit strtoul is that
378
    // it would be impossible to differentiate "-2" (that should wrap
379
    // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
380
    // (that should be pegged to UINT_MAX due to overflow).
381
0
    char *error = nullptr;
382
0
    int64 value = strto64(str, &error, 10);
383
0
    if (value > numeric_limits<uint32>::max() ||
384
0
        value < -static_cast<int64>(numeric_limits<uint32>::max())) {
385
0
      value = numeric_limits<uint32>::max();
386
0
    }
387
    // Within these limits, truncation to 32 bits handles negatives correctly.
388
0
    return (error == str) ? deflt : narrow_cast<uint32>(value);
389
0
  }
390
0
}
391
392
// ----------------------------------------------------------------------
393
// ParseLeadingUInt64Value
394
// ParseLeadingInt64Value
395
// ParseLeadingHex64Value
396
//    A simple parser for 64-bit values. Returns the parsed value if a
397
//    valid integer is found; else returns deflt
398
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
399
// --------------------------------------------------------------------
400
0
uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
401
0
  char *error = nullptr;
402
0
  const uint64 value = strtou64(str, &error, 0);
403
0
  return (error == str) ? deflt : value;
404
0
}
405
406
0
int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
407
0
  char *error = nullptr;
408
0
  const int64 value = strto64(str, &error, 0);
409
0
  return (error == str) ? deflt : value;
410
0
}
411
412
0
uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
413
0
  char *error = nullptr;
414
0
  const uint64 value = strtou64(str, &error, 16);
415
0
  return (error == str) ? deflt : value;
416
0
}
417
418
// ----------------------------------------------------------------------
419
// ParseLeadingDec64Value
420
// ParseLeadingUDec64Value
421
//    A simple parser for [u]int64 values. Returns the parsed value
422
//    if a valid value is found; else returns deflt
423
//    The string passed in is treated as *10 based*.
424
//    This can handle strings with leading 0s.
425
// --------------------------------------------------------------------
426
427
0
int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
428
0
  char *error = nullptr;
429
0
  const int64 value = strto64(str, &error, 10);
430
0
  return (error == str) ? deflt : value;
431
0
}
432
433
0
uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
434
0
  char *error = nullptr;
435
0
  const uint64 value = strtou64(str, &error, 10);
436
0
  return (error == str) ? deflt : value;
437
0
}
438
439
// ----------------------------------------------------------------------
440
// ParseLeadingDoubleValue()
441
//    A simple parser for double values. Returns the parsed value
442
//    if a valid value is found; else returns deflt
443
// --------------------------------------------------------------------
444
445
0
double ParseLeadingDoubleValue(const char *str, double deflt) {
446
0
  char *error = nullptr;
447
0
  errno = 0;
448
0
  const double value = strtod(str, &error);
449
0
  if (errno != 0 ||  // overflow/underflow happened
450
0
      error == str) {  // no valid parse
451
0
    return deflt;
452
0
  } else {
453
0
    return value;
454
0
  }
455
0
}
456
457
// ----------------------------------------------------------------------
458
// ParseLeadingBoolValue()
459
//    A recognizer of boolean string values. Returns the parsed value
460
//    if a valid value is found; else returns deflt.  This skips leading
461
//    whitespace, is case insensitive, and recognizes these forms:
462
//    0/1, false/true, no/yes, n/y
463
// --------------------------------------------------------------------
464
49.8k
bool ParseLeadingBoolValue(const char *str, bool deflt) {
465
49.8k
  static const int kMaxLen = 5;
466
49.8k
  char value[kMaxLen + 1];
467
  // Skip whitespace
468
49.8k
  while (ascii_isspace(*str)) {
469
0
    ++str;
470
0
  }
471
49.8k
  int len = 0;
472
299k
  for (; len <= kMaxLen && ascii_isalnum(*str); 
++str249k
)
473
249k
    value[len++] = ascii_tolower(*str);
474
49.8k
  if (len == 0 || len > kMaxLen)
475
0
    return deflt;
476
49.8k
  value[len] = '\0';
477
49.8k
  switch (len) {
478
0
    case 1:
479
0
      if (value[0] == '0' || value[0] == 'n')
480
0
        return false;
481
0
      if (value[0] == '1' || value[0] == 'y')
482
0
        return true;
483
0
      break;
484
0
    case 2:
485
0
      if (!strcmp(value, "no"))
486
0
        return false;
487
0
      break;
488
0
    case 3:
489
0
      if (!strcmp(value, "yes"))
490
0
        return true;
491
0
      break;
492
0
    case 4:
493
0
      if (!strcmp(value, "true"))
494
0
        return true;
495
0
      break;
496
49.8k
    case 5:
497
49.8k
      if (!strcmp(value, "false"))
498
49.8k
        return false;
499
0
      break;
500
49.8k
  }
501
0
  return deflt;
502
49.8k
}
503
504
505
// ----------------------------------------------------------------------
506
// FpToString()
507
// FloatToString()
508
// IntToString()
509
//    Convert various types to their string representation, possibly padded
510
//    with spaces, using snprintf format specifiers.
511
// ----------------------------------------------------------------------
512
513
0
string FpToString(Fprint fp) {
514
0
  char buf[17];
515
0
  snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
516
0
  return string(buf);
517
0
}
518
519
// Default arguments
520
0
string Uint128ToHexString(uint128 ui128) {
521
0
  char buf[33];
522
0
  snprintf(buf, sizeof(buf), "%016" PRIx64,
523
0
           Uint128High64(ui128));
524
0
  snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64,
525
0
           Uint128Low64(ui128));
526
0
  return string(buf);
527
0
}
528
529
8.95M
string Uint16ToHexString(uint16_t ui16) {
530
8.95M
  char buf[5];
531
8.95M
  snprintf(buf, sizeof(buf), "%04X", ui16);
532
8.95M
  return string(buf);
533
8.95M
}
534
535
namespace {
536
537
// Represents integer values of digits.
538
// Uses 36 to indicate an invalid character since we support
539
// bases up to 36.
540
static const int8 kAsciiToInt[256] = {
541
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
542
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
543
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
544
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
545
  36, 36, 36, 36, 36, 36, 36,
546
  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
547
  26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
548
  36, 36, 36, 36, 36, 36,
549
  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
550
  26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
551
  36, 36, 36, 36, 36,
552
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
553
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
554
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
555
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
556
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
557
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
558
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
559
  36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36 };
560
561
// Input format based on POSIX.1-2008 strtol
562
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
563
template<typename IntType>
564
bool safe_int_internal(const char* start, const char* end, int base,
565
0
                       IntType* value_p) {
566
  // Consume whitespace.
567
0
  while (start < end && ascii_isspace(start[0])) {
568
0
    ++start;
569
0
  }
570
0
  while (start < end && ascii_isspace(end[-1])) {
571
0
    --end;
572
0
  }
573
0
  if (start >= end) {
574
0
    return false;
575
0
  }
576
577
  // Consume sign.
578
0
  const bool negative = (start[0] == '-');
579
0
  if (negative || start[0] == '+') {
580
0
    ++start;
581
0
    if (start >= end) {
582
0
      return false;
583
0
    }
584
0
  }
585
586
  // Consume base-dependent prefix.
587
  //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
588
  //  base 16: "0x" -> base 16
589
  // Also validate the base.
590
0
  if (base == 0) {
591
0
    if (end - start >= 2 && start[0] == '0' &&
592
0
        (start[1] == 'x' || start[1] == 'X')) {
593
0
      base = 16;
594
0
      start += 2;
595
0
    } else if (end - start >= 1 && start[0] == '0') {
596
0
      base = 8;
597
0
      start += 1;
598
0
    } else {
599
0
      base = 10;
600
0
    }
601
0
  } else if (base == 16) {
602
0
    if (end - start >= 2 && start[0] == '0' &&
603
0
        (start[1] == 'x' || start[1] == 'X')) {
604
0
      start += 2;
605
0
    }
606
0
  } else if (base >= 2 && base <= 36) {
607
    // okay
608
0
  } else {
609
0
    return false;
610
0
  }
611
612
  // Consume digits.
613
  //
614
  // The classic loop:
615
  //
616
  //   for each digit
617
  //     value = value * base + digit
618
  //   value *= sign
619
  //
620
  // The classic loop needs overflow checking.  It also fails on the most
621
  // negative integer, -2147483648 in 32-bit two's complement representation.
622
  //
623
  // My improved loop:
624
  //
625
  //  if (!negative)
626
  //    for each digit
627
  //      value = value * base
628
  //      value = value + digit
629
  //  else
630
  //    for each digit
631
  //      value = value * base
632
  //      value = value - digit
633
  //
634
  // Overflow checking becomes simple.
635
  //
636
  // I present the positive code first for easier reading.
637
0
  IntType value = 0;
638
0
  if (!negative) {
639
0
    const IntType vmax = std::numeric_limits<IntType>::max();
640
0
    assert(vmax > 0);
641
0
    assert(vmax >= base);
642
0
    const IntType vmax_over_base = vmax / base;
643
    // loop over digits
644
    // loop body is interleaved for perf, not readability
645
0
    for (; start < end; ++start) {
646
0
      unsigned char c = static_cast<unsigned char>(start[0]);
647
0
      int digit = kAsciiToInt[c];
648
0
      if (value > vmax_over_base) return false;
649
0
      value *= base;
650
0
      if (digit >= base) return false;
651
0
      if (value > vmax - digit) return false;
652
0
      value += digit;
653
0
    }
654
0
  } else {
655
0
    const IntType vmin = std::numeric_limits<IntType>::min();
656
0
    assert(vmin < 0);
657
0
    assert(vmin <= 0 - base);
658
0
    IntType vmin_over_base = vmin / base;
659
    // 2003 c++ standard [expr.mul]
660
    // "... the sign of the remainder is implementation-defined."
661
    // Although (vmin/base)*base + vmin%base is always vmin.
662
    // 2011 c++ standard tightens the spec but we cannot rely on it.
663
0
    if (vmin % base > 0) {
664
0
      vmin_over_base += 1;
665
0
    }
666
    // loop over digits
667
    // loop body is interleaved for perf, not readability
668
0
    for (; start < end; ++start) {
669
0
      unsigned char c = static_cast<unsigned char>(start[0]);
670
0
      int digit = kAsciiToInt[c];
671
0
      if (value < vmin_over_base) return false;
672
0
      value *= base;
673
0
      if (digit >= base) return false;
674
0
      if (value < vmin + digit) return false;
675
0
      value -= digit;
676
0
    }
677
0
  }
678
679
  // Store output.
680
0
  *value_p = value;
681
0
  return true;
682
0
}
Unexecuted instantiation: numbers.cc:bool (anonymous namespace)::safe_int_internal<int>(char const*, char const*, int, int*)
Unexecuted instantiation: numbers.cc:bool (anonymous namespace)::safe_int_internal<long long>(char const*, char const*, int, long long*)
683
684
}  // anonymous namespace
685
686
bool safe_strto32_base(const char* startptr, const int buffer_size,
687
0
                       int32* v, int base) {
688
0
  return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
689
0
}
690
691
bool safe_strto64_base(const char* startptr, const int buffer_size,
692
0
                       int64* v, int base) {
693
0
  return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
694
0
}
695
696
0
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
697
0
  return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
698
0
}
699
700
0
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
701
0
  return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
702
0
}
703
704
1.13M
bool safe_strto32_base(const char* str, int32* value, int base) {
705
1.13M
  char* endptr;
706
1.13M
  errno = 0;  // errno only gets set on errors
707
1.13M
  *value = strto32(str, &endptr, base);
708
1.13M
  if (endptr != str) {
709
1.13M
    while (ascii_isspace(*endptr)) 
++endptr0
;
710
1.13M
  }
711
1.13M
  return *str != '\0' && *endptr == '\0' && errno == 0;
712
1.13M
}
713
714
9
bool safe_strto64_base(const char* str, int64* value, int base) {
715
9
  char* endptr;
716
9
  errno = 0;  // errno only gets set on errors
717
9
  *value = strto64(str, &endptr, base);
718
9
  if (endptr != str) {
719
9
    while (ascii_isspace(*endptr)) 
++endptr0
;
720
9
  }
721
9
  return *str != '\0' && *endptr == '\0' && errno == 0;
722
9
}
723
724
361k
bool safe_strtou32_base(const char* str, uint32* value, int base) {
725
  // strtoul does not give any errors on negative numbers, so we have to
726
  // search the string for '-' manually.
727
361k
  while (ascii_isspace(*str)) 
++str0
;
728
361k
  if (*str == '-') 
return false0
;
729
730
361k
  char* endptr;
731
361k
  errno = 0;  // errno only gets set on errors
732
361k
  *value = strtou32(str, &endptr, base);
733
361k
  if (
endptr != str361k
) {
734
361k
    while (ascii_isspace(*endptr)) 
++endptr0
;
735
361k
  }
736
361k
  return 
*str != '\0'361k
&& *endptr == '\0' && errno
== 0361k
;
737
361k
}
738
739
2
bool safe_strtou64_base(const char* str, uint64* value, int base) {
740
  // strtou64 does not give any errors on negative numbers, so we have to
741
  // search the string for '-' manually.
742
2
  while (ascii_isspace(*str)) 
++str0
;
743
2
  if (*str == '-') 
return false0
;
744
745
2
  char* endptr;
746
2
  errno = 0;  // errno only gets set on errors
747
2
  *value = strtou64(str, &endptr, base);
748
2
  if (endptr != str) {
749
2
    while (ascii_isspace(*endptr)) 
++endptr0
;
750
2
  }
751
2
  return *str != '\0' && *endptr == '\0' && errno == 0;
752
2
}
753
754
// ----------------------------------------------------------------------
755
// u64tostr_base36()
756
//    Converts unsigned number to string representation in base-36.
757
// --------------------------------------------------------------------
758
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
759
0
  CHECK_GT(buf_size, 0);
760
0
  CHECK(buffer);
761
0
  static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
762
763
0
  buffer[buf_size - 1] = '\0';
764
0
  size_t result_size = 1;
765
766
0
  do {
767
0
    if (buf_size == result_size) {  // Ran out of space.
768
0
      return 0;
769
0
    }
770
0
    int remainder = number % 36;
771
0
    number /= 36;
772
0
    buffer[buf_size - result_size - 1] = kAlphabet[remainder];
773
0
    result_size++;
774
0
  } while (number);
775
776
0
  memmove(buffer, buffer + buf_size - result_size, result_size);
777
778
0
  return result_size - 1;
779
0
}
780
781
// Generate functions that wrap safe_strtoXXX_base.
782
#define GEN_SAFE_STRTO(name, type)                           \
783
0
bool name##_base(const string& str, type* value, int base) { \
784
0
  return name##_base(str.c_str(), value, base);              \
785
0
}                                                            \
Unexecuted instantiation: safe_strto32_base(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int*, int)
Unexecuted instantiation: safe_strtou32_base(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int*, int)
Unexecuted instantiation: safe_strto64_base(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long long*, int)
Unexecuted instantiation: safe_strtou64_base(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long long*, int)
786
1.50M
bool name(const char* str, type* value) {                    \
787
1.50M
  return name##_base(str, value, 10);                        \
788
1.50M
}                                                            \
safe_strto32(char const*, int*)
Line
Count
Source
786
1.13M
bool name(const char* str, type* value) {                    \
787
1.13M
  return name##_base(str, value, 10);                        \
788
1.13M
}                                                            \
safe_strtou32(char const*, unsigned int*)
Line
Count
Source
786
361k
bool name(const char* str, type* value) {                    \
787
361k
  return name##_base(str, value, 10);                        \
788
361k
}                                                            \
Unexecuted instantiation: safe_strto64(char const*, long long*)
Unexecuted instantiation: safe_strtou64(char const*, unsigned long long*)
789
9
bool name(const string& str, type* value) {                  \
790
9
  return name##_base(str.c_str(), value, 10);                \
791
9
}
Unexecuted instantiation: safe_strto32(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int*)
Unexecuted instantiation: safe_strtou32(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int*)
safe_strto64(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long long*)
Line
Count
Source
789
9
bool name(const string& str, type* value) {                  \
790
9
  return name##_base(str.c_str(), value, 10);                \
791
9
}
Unexecuted instantiation: safe_strtou64(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long long*)
792
GEN_SAFE_STRTO(safe_strto32, int32);
793
GEN_SAFE_STRTO(safe_strtou32, uint32);
794
GEN_SAFE_STRTO(safe_strto64, int64);
795
GEN_SAFE_STRTO(safe_strtou64, uint64);
796
#undef GEN_SAFE_STRTO
797
798
5
bool safe_strtof(const char* str, float* value) {
799
5
  char* endptr;
800
#ifdef _MSC_VER  // has no strtof()
801
  *value = strtod(str, &endptr);
802
#else
803
5
  *value = strtof(str, &endptr);
804
5
#endif
805
5
  if (endptr != str) {
806
5
    while (ascii_isspace(*endptr)) 
++endptr0
;
807
5
  }
808
  // Ignore range errors from strtod/strtof.
809
  // The values it returns on underflow and
810
  // overflow are the right fallback in a
811
  // robust setting.
812
5
  return *str != '\0' && *endptr == '\0';
813
5
}
814
815
0
bool safe_strtod(const char* str, double* value) {
816
0
  char* endptr;
817
0
  *value = strtod(str, &endptr);
818
0
  if (endptr != str) {
819
0
    while (ascii_isspace(*endptr)) ++endptr;
820
0
  }
821
  // Ignore range errors from strtod.  The values it
822
  // returns on underflow and overflow are the right
823
  // fallback in a robust setting.
824
0
  return *str != '\0' && *endptr == '\0';
825
0
}
826
827
0
bool safe_strtof(const string& str, float* value) {
828
0
  return safe_strtof(str.c_str(), value);
829
0
}
830
831
0
bool safe_strtod(const string& str, double* value) {
832
0
  return safe_strtod(str.c_str(), value);
833
0
}
834
835
0
uint64 atoi_kmgt(const char* s) {
836
0
  char* endptr;
837
0
  uint64 n = strtou64(s, &endptr, 10);
838
0
  uint64 scale = 1;
839
0
  char c = *endptr;
840
0
  if (c != '\0') {
841
0
    c = ascii_toupper(c);
842
0
    switch (c) {
843
0
      case 'K':
844
0
        scale = GG_ULONGLONG(1) << 10;
845
0
        break;
846
0
      case 'M':
847
0
        scale = GG_ULONGLONG(1) << 20;
848
0
        break;
849
0
      case 'G':
850
0
        scale = GG_ULONGLONG(1) << 30;
851
0
        break;
852
0
      case 'T':
853
0
        scale = GG_ULONGLONG(1) << 40;
854
0
        break;
855
0
      default:
856
0
        LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
857
0
                   << " should be one of `K', `M', `G', and `T'.";
858
0
    }
859
0
  }
860
0
  return n * scale;
861
0
}
862
863
// ----------------------------------------------------------------------
864
// FastIntToBuffer()
865
// FastInt64ToBuffer()
866
// FastHexToBuffer()
867
// FastHex64ToBuffer()
868
// FastHex32ToBuffer()
869
// FastTimeToBuffer()
870
//    These are intended for speed.  FastHexToBuffer() assumes the
871
//    integer is non-negative.  FastHexToBuffer() puts output in
872
//    hex rather than decimal.  FastTimeToBuffer() puts the output
873
//    into RFC822 format.  If time is 0, uses the current time.
874
//
875
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
876
//    padded to exactly 16 bytes (plus one byte for '\0')
877
//
878
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
879
//    padded to exactly 8 bytes (plus one byte for '\0')
880
//
881
//       All functions take the output buffer as an arg.  FastInt()
882
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
883
//    They all return a pointer to the beginning of the output,
884
//    which may not be the beginning of the input buffer.  (Though
885
//    for FastTimeToBuffer(), we guarantee that it is.)
886
// ----------------------------------------------------------------------
887
888
0
char *FastInt64ToBuffer(int64 i, char* buffer) {
889
0
  FastInt64ToBufferLeft(i, buffer);
890
0
  return buffer;
891
0
}
892
893
0
char *FastInt32ToBuffer(int32 i, char* buffer) {
894
0
  FastInt32ToBufferLeft(i, buffer);
895
0
  return buffer;
896
0
}
897
898
0
char *FastHexToBuffer(int i, char* buffer) {
899
0
  CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
900
901
0
  static const char *hexdigits = "0123456789abcdef";
902
0
  char *p = buffer + 21;
903
0
  *p-- = '\0';
904
0
  do {
905
0
    *p-- = hexdigits[i & 15];   // mod by 16
906
0
    i >>= 4;                    // divide by 16
907
0
  } while (i > 0);
908
0
  return p + 1;
909
0
}
910
911
6.99M
char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
912
6.99M
  static const char *hexdigits = "0123456789abcdef";
913
6.99M
  buffer[num_byte] = '\0';
914
118M
  for (int i = num_byte - 1; i >= 0; 
i--111M
) {
915
111M
    buffer[i] = hexdigits[value & 0xf];
916
111M
    value >>= 4;
917
111M
  }
918
6.99M
  return buffer;
919
6.99M
}
920
921
6.99M
char *FastHex64ToBuffer(uint64 value, char* buffer) {
922
6.99M
  return InternalFastHexToBuffer(value, buffer, 16);
923
6.99M
}
924
925
8
std::string FastHex64ToString(uint64 value) {
926
8
  std::string result;
927
8
  result.resize(16);
928
8
  InternalFastHexToBuffer(value, &result[0], 16);
929
8
  return result;
930
8
}
931
932
1
char *FastHex32ToBuffer(uint32 value, char* buffer) {
933
1
  return InternalFastHexToBuffer(value, buffer, 8);
934
1
}
935
936
// TODO(user): revisit the two_ASCII_digits optimization.
937
//
938
// Several converters use this table to reduce
939
// division and modulo operations.
940
extern const char two_ASCII_digits[100][2];  // from strutil.cc
941
942
// ----------------------------------------------------------------------
943
// FastInt32ToBufferLeft()
944
// FastUInt32ToBufferLeft()
945
// FastInt64ToBufferLeft()
946
// FastUInt64ToBufferLeft()
947
//
948
// Like the Fast*ToBuffer() functions above, these are intended for speed.
949
// Unlike the Fast*ToBuffer() functions, however, these functions write
950
// their output to the beginning of the buffer (hence the name, as the
951
// output is left-aligned).  The caller is responsible for ensuring that
952
// the buffer has enough space to hold the output.
953
//
954
// Returns a pointer to the end of the string (i.e. the null character
955
// terminating the string).
956
// ----------------------------------------------------------------------
957
958
496M
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
959
496M
  uint digits;
960
496M
  const char *ASCII_digits = nullptr;
961
  // The idea of this implementation is to trim the number of divides to as few
962
  // as possible by using multiplication and subtraction rather than mod (%),
963
  // and by outputting two digits at a time rather than one.
964
  // The huge-number case is first, in the hopes that the compiler will output
965
  // that case in one branch-free block of code, and only output conditional
966
  // branches into it from below.
967
496M
  if (u >= 1000000000) {  // >= 1,000,000,000
968
3.83M
    digits = u / 100000000;  // 100,000,000
969
3.83M
    ASCII_digits = two_ASCII_digits[digits];
970
3.83M
    buffer[0] = ASCII_digits[0];
971
3.83M
    buffer[1] = ASCII_digits[1];
972
3.83M
    buffer += 2;
973
5.10M
 sublt100_000_000:
974
5.10M
    u -= digits * 100000000;  // 100,000,000
975
6.41M
 lt100_000_000:
976
6.41M
    digits = u / 1000000;  // 1,000,000
977
6.41M
    ASCII_digits = two_ASCII_digits[digits];
978
6.41M
    buffer[0] = ASCII_digits[0];
979
6.41M
    buffer[1] = ASCII_digits[1];
980
6.41M
    buffer += 2;
981
52.4M
 sublt1_000_000:
982
52.4M
    u -= digits * 1000000;  // 1,000,000
983
59.0M
 lt1_000_000:
984
59.0M
    digits = u / 10000;  // 10,000
985
59.0M
    ASCII_digits = two_ASCII_digits[digits];
986
59.0M
    buffer[0] = ASCII_digits[0];
987
59.0M
    buffer[1] = ASCII_digits[1];
988
59.0M
    buffer += 2;
989
172M
 sublt10_000:
990
172M
    u -= digits * 10000;  // 10,000
991
188M
 lt10_000:
992
188M
    digits = u / 100;
993
188M
    ASCII_digits = two_ASCII_digits[digits];
994
188M
    buffer[0] = ASCII_digits[0];
995
188M
    buffer[1] = ASCII_digits[1];
996
188M
    buffer += 2;
997
196M
 sublt100:
998
196M
    u -= digits * 100;
999
236M
 lt100:
1000
236M
    digits = u;
1001
236M
    ASCII_digits = two_ASCII_digits[digits];
1002
236M
    buffer[0] = ASCII_digits[0];
1003
236M
    buffer[1] = ASCII_digits[1];
1004
236M
    buffer += 2;
1005
496M
 done:
1006
496M
    *buffer = 0;
1007
496M
    return buffer;
1008
236M
  }
1009
1010
492M
  if (u < 100) {
1011
299M
    digits = u;
1012
299M
    if (u >= 10) 
goto lt10039.8M
;
1013
259M
    *buffer++ = '0' + digits;
1014
259M
    goto done;
1015
299M
  }
1016
193M
  if (u  <  10000) {   // 10,000
1017
24.0M
    if (u >= 1000) 
goto lt10_00015.7M
;
1018
8.33M
    digits = u / 100;
1019
8.33M
    *buffer++ = '0' + digits;
1020
8.33M
    goto sublt100;
1021
24.0M
  }
1022
168M
  if (u  <  1000000) {   // 1,000,000
1023
120M
    if (u >= 100000) 
goto lt1_000_0006.62M
;
1024
113M
    digits = u / 10000;  //    10,000
1025
113M
    *buffer++ = '0' + digits;
1026
113M
    goto sublt10_000;
1027
120M
  }
1028
48.4M
  if (u  <  100000000) {   // 100,000,000
1029
47.3M
    if (u >= 10000000) 
goto lt100_000_0001.30M
;
1030
46.0M
    digits = u / 1000000;  //   1,000,000
1031
46.0M
    *buffer++ = '0' + digits;
1032
46.0M
    goto sublt1_000_000;
1033
47.3M
  }
1034
  // we already know that u < 1,000,000,000
1035
1.13M
  digits = u / 100000000;   // 100,000,000
1036
1.13M
  *buffer++ = '0' + digits;
1037
1.13M
  goto sublt100_000_000;
1038
48.4M
}
1039
1040
253M
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
1041
253M
  uint32 u = i;
1042
253M
  if (i < 0) {
1043
1.06k
    *buffer++ = '-';
1044
    // We need to do the negation in modular (i.e., "unsigned")
1045
    // arithmetic; MSVC++ apprently warns for plain "-u", so
1046
    // we write the equivalent expression "0 - u" instead.
1047
1.06k
    u = 0 - u;
1048
1.06k
  }
1049
253M
  return FastUInt32ToBufferLeft(u, buffer);
1050
253M
}
1051
1052
232M
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
1053
232M
  uint digits;
1054
232M
  const char *ASCII_digits = nullptr;
1055
1056
232M
  uint32 u = static_cast<uint32>(u64);
1057
232M
  if (u == u64) 
return FastUInt32ToBufferLeft(u, buffer)226M
;
1058
1059
6.52M
  uint64 top_11_digits = u64 / 1000000000;
1060
6.52M
  buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
1061
6.52M
  u = narrow_cast<uint32>(u64 - (top_11_digits * 1000000000));
1062
1063
6.52M
  digits = u / 10000000;  // 10,000,000
1064
6.52M
  DCHECK_LT(digits, 100);
1065
6.52M
  ASCII_digits = two_ASCII_digits[digits];
1066
6.52M
  buffer[0] = ASCII_digits[0];
1067
6.52M
  buffer[1] = ASCII_digits[1];
1068
6.52M
  buffer += 2;
1069
6.52M
  u -= digits * 10000000;  // 10,000,000
1070
6.52M
  digits = u / 100000;  // 100,000
1071
6.52M
  ASCII_digits = two_ASCII_digits[digits];
1072
6.52M
  buffer[0] = ASCII_digits[0];
1073
6.52M
  buffer[1] = ASCII_digits[1];
1074
6.52M
  buffer += 2;
1075
6.52M
  u -= digits * 100000;  // 100,000
1076
6.52M
  digits = u / 1000;  // 1,000
1077
6.52M
  ASCII_digits = two_ASCII_digits[digits];
1078
6.52M
  buffer[0] = ASCII_digits[0];
1079
6.52M
  buffer[1] = ASCII_digits[1];
1080
6.52M
  buffer += 2;
1081
6.52M
  u -= digits * 1000;  // 1,000
1082
6.52M
  digits = u / 10;
1083
6.52M
  ASCII_digits = two_ASCII_digits[digits];
1084
6.52M
  buffer[0] = ASCII_digits[0];
1085
6.52M
  buffer[1] = ASCII_digits[1];
1086
6.52M
  buffer += 2;
1087
6.52M
  u -= digits * 10;
1088
6.52M
  digits = u;
1089
6.52M
  *buffer++ = '0' + digits;
1090
6.52M
  *buffer = 0;
1091
6.52M
  return buffer;
1092
232M
}
1093
1094
58.9M
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1095
58.9M
  uint64 u = i;
1096
58.9M
  if (i < 0) {
1097
868k
    *buffer++ = '-';
1098
868k
    u = 0 - u;
1099
868k
  }
1100
58.9M
  return FastUInt64ToBufferLeft(u, buffer);
1101
58.9M
}
1102
1103
0
int HexDigitsPrefix(const char* buf, int num_digits) {
1104
0
  for (int i = 0; i < num_digits; i++)
1105
0
    if (!ascii_isxdigit(buf[i]))
1106
0
      return 0;  // This also detects end of string as '\0' is not xdigit.
1107
0
  return 1;
1108
0
}
1109
1110
// ----------------------------------------------------------------------
1111
// AutoDigitStrCmp
1112
// AutoDigitLessThan
1113
// StrictAutoDigitLessThan
1114
// autodigit_less
1115
// autodigit_greater
1116
// strict_autodigit_less
1117
// strict_autodigit_greater
1118
//    These are like less<string> and greater<string>, except when a
1119
//    run of digits is encountered at corresponding points in the two
1120
//    arguments.  Such digit strings are compared numerically instead
1121
//    of lexicographically.  Therefore if you sort by
1122
//    "autodigit_less", some machine names might get sorted as:
1123
//        exaf1
1124
//        exaf2
1125
//        exaf10
1126
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
1127
//    set to true, or the strict version of the other functions),
1128
//    strings that represent equal numbers will not be considered equal if
1129
//    the string representations are not identical.  That is, "01" < "1" in
1130
//    strict mode, but "01" == "1" otherwise.
1131
// ----------------------------------------------------------------------
1132
1133
int AutoDigitStrCmp(const char* a, size_t alen,
1134
                    const char* b, size_t blen,
1135
0
                    bool strict) {
1136
0
  size_t aindex = 0;
1137
0
  size_t bindex = 0;
1138
0
  while ((aindex < alen) && (bindex < blen)) {
1139
0
    if (isdigit(a[aindex]) && isdigit(b[bindex])) {
1140
      // Compare runs of digits.  Instead of extracting numbers, we
1141
      // just skip leading zeroes, and then get the run-lengths.  This
1142
      // allows us to handle arbitrary precision numbers.  We remember
1143
      // how many zeroes we found so that we can differentiate between
1144
      // "1" and "01" in strict mode.
1145
1146
      // Skip leading zeroes, but remember how many we found
1147
0
      size_t azeroes = aindex;
1148
0
      size_t bzeroes = bindex;
1149
0
      while ((aindex < alen) && (a[aindex] == '0')) aindex++;
1150
0
      while ((bindex < blen) && (b[bindex] == '0')) bindex++;
1151
0
      azeroes = aindex - azeroes;
1152
0
      bzeroes = bindex - bzeroes;
1153
1154
      // Count digit lengths
1155
0
      size_t astart = aindex;
1156
0
      size_t bstart = bindex;
1157
0
      while ((aindex < alen) && isdigit(a[aindex])) aindex++;
1158
0
      while ((bindex < blen) && isdigit(b[bindex])) bindex++;
1159
0
      if (aindex - astart < bindex - bstart) {
1160
        // a has shorter run of digits: so smaller
1161
0
        return -1;
1162
0
      } else if (aindex - astart > bindex - bstart) {
1163
        // a has longer run of digits: so larger
1164
0
        return 1;
1165
0
      } else {
1166
        // Same lengths, so compare digit by digit
1167
0
        for (size_t i = 0; i < aindex-astart; i++) {
1168
0
          if (a[astart+i] < b[bstart+i]) {
1169
0
            return -1;
1170
0
          } else if (a[astart+i] > b[bstart+i]) {
1171
0
            return 1;
1172
0
          }
1173
0
        }
1174
        // Equal: did one have more leading zeroes?
1175
0
        if (strict && azeroes != bzeroes) {
1176
0
          if (azeroes > bzeroes) {
1177
            // a has more leading zeroes: a < b
1178
0
            return -1;
1179
0
          } else {
1180
            // b has more leading zeroes: a > b
1181
0
            return 1;
1182
0
          }
1183
0
        }
1184
        // Equal: so continue scanning
1185
0
      }
1186
0
    } else if (a[aindex] < b[bindex]) {
1187
0
      return -1;
1188
0
    } else if (a[aindex] > b[bindex]) {
1189
0
      return 1;
1190
0
    } else {
1191
0
      aindex++;
1192
0
      bindex++;
1193
0
    }
1194
0
  }
1195
1196
0
  if (aindex < alen) {
1197
    // b is prefix of a
1198
0
    return 1;
1199
0
  } else if (bindex < blen) {
1200
    // a is prefix of b
1201
0
    return -1;
1202
0
  } else {
1203
    // a is equal to b
1204
0
    return 0;
1205
0
  }
1206
0
}
1207
1208
0
bool AutoDigitLessThan(const char* a, size_t alen, const char* b, size_t blen) {
1209
0
  return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
1210
0
}
1211
1212
bool StrictAutoDigitLessThan(const char* a, size_t alen,
1213
0
                             const char* b, size_t blen) {
1214
0
  return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
1215
0
}
1216
1217
// ----------------------------------------------------------------------
1218
// SimpleDtoa()
1219
// SimpleFtoa()
1220
// DoubleToBuffer()
1221
// FloatToBuffer()
1222
//    We want to print the value without losing precision, but we also do
1223
//    not want to print more digits than necessary.  This turns out to be
1224
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
1225
//    exactly in binary.  If we print 0.2 with a very large precision,
1226
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1227
//    On the other hand, if we set the precision too low, we lose
1228
//    significant digits when printing numbers that actually need them.
1229
//    It turns out there is no precision value that does the right thing
1230
//    for all numbers.
1231
//
1232
//    Our strategy is to first try printing with a precision that is never
1233
//    over-precise, then parse the result with strtod() to see if it
1234
//    matches.  If not, we print again with a precision that will always
1235
//    give a precise result, but may use more digits than necessary.
1236
//
1237
//    An arguably better strategy would be to use the algorithm described
1238
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
1239
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
1240
//    however, that the following implementation is about as fast as
1241
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
1242
//    will not scale well on multi-core machines.  DMG's code is slightly
1243
//    more accurate (in that it will never use more digits than
1244
//    necessary), but this is probably irrelevant for most users.
1245
//
1246
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
1247
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
1248
//    one in that it makes guesses and then uses strtod() to check them.
1249
//    Their implementation is faster because they use their own code to
1250
//    generate the digits in the first place rather than use snprintf(),
1251
//    thus avoiding format string parsing overhead.  However, this makes
1252
//    it considerably more complicated than the following implementation,
1253
//    and it is embedded in a larger library.  If speed turns out to be
1254
//    an issue, we could re-implement this in terms of their
1255
//    implementation.
1256
// ----------------------------------------------------------------------
1257
1258
31
string SimpleDtoa(double value) {
1259
31
  char buffer[kDoubleToBufferSize];
1260
31
  return DoubleToBuffer(value, buffer);
1261
31
}
1262
1263
5
string SimpleFtoa(float value) {
1264
5
  char buffer[kFloatToBufferSize];
1265
5
  return FloatToBuffer(value, buffer);
1266
5
}
1267
1268
51
char* DoubleToBuffer(double value, char* buffer) {
1269
  // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1270
  // platforms these days.  Just in case some system exists where DBL_DIG
1271
  // is significantly larger -- and risks overflowing our buffer -- we have
1272
  // this assert.
1273
51
  COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1274
1275
51
  if (value == std::numeric_limits<double>::infinity()) {
1276
6
    strncpy(buffer, "inf", kDoubleToBufferSize);
1277
6
    return buffer;
1278
45
  } else if (value == -std::numeric_limits<double>::infinity()) {
1279
4
    strncpy(buffer, "-inf", kDoubleToBufferSize);
1280
4
    return buffer;
1281
41
  } else if (isnan(value)) {
1282
10
    strncpy(buffer, "nan", kDoubleToBufferSize);
1283
10
    return buffer;
1284
10
  }
1285
1286
31
  int snprintf_result =
1287
31
    snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1288
1289
  // The snprintf should never overflow because the buffer is significantly
1290
  // larger than the precision we asked for.
1291
31
  DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1292
1293
  // We need to make parsed_value volatile in order to force the compiler to
1294
  // write it out to the stack.  Otherwise, it may keep the value in a
1295
  // register, and if it does that, it may keep it as a long double instead
1296
  // of a double.  This long double may have extra bits that make it compare
1297
  // unequal to "value" even though it would be exactly equal if it were
1298
  // truncated to a double.
1299
31
  volatile double parsed_value = strtod(buffer, NULL);
1300
31
  if (parsed_value != value) {
1301
6
    int snprintf_result =
1302
6
      snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
1303
1304
    // Should never overflow; see above.
1305
6
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1306
6
  }
1307
1308
31
  return buffer;
1309
51
}
1310
1311
5
char* FloatToBuffer(float value, char* buffer) {
1312
  // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1313
  // platforms these days.  Just in case some system exists where FLT_DIG
1314
  // is significantly larger -- and risks overflowing our buffer -- we have
1315
  // this assert.
1316
5
  COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1317
1318
5
  if (value == std::numeric_limits<double>::infinity()) {
1319
0
    strncpy(buffer, "inf", kFloatToBufferSize);
1320
0
    return buffer;
1321
5
  } else if (value == -std::numeric_limits<double>::infinity()) {
1322
0
    strncpy(buffer, "-inf", kFloatToBufferSize);
1323
0
    return buffer;
1324
5
  } else if (isnan(value)) {
1325
0
    strncpy(buffer, "nan", kFloatToBufferSize);
1326
0
    return buffer;
1327
0
  }
1328
1329
5
  int snprintf_result =
1330
5
    snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1331
1332
  // The snprintf should never overflow because the buffer is significantly
1333
  // larger than the precision we asked for.
1334
5
  DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1335
1336
5
  float parsed_value;
1337
5
  if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1338
5
    int snprintf_result =
1339
5
      snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+3, value);
1340
1341
    // Should never overflow; see above.
1342
5
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1343
5
  }
1344
1345
5
  return buffer;
1346
5
}
1347
1348
// ----------------------------------------------------------------------
1349
// SimpleItoaWithCommas()
1350
//    Description: converts an integer to a string.
1351
//    Puts commas every 3 spaces.
1352
//    Faster than printf("%d")?
1353
//
1354
//    Return value: string
1355
// ----------------------------------------------------------------------
1356
0
string SimpleItoaWithCommas(int32 i) {
1357
  // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1358
  // Longest is -2,147,483,648.
1359
0
  char local[14];
1360
0
  char *p = local + sizeof(local);
1361
  // Need to use uint32 instead of int32 to correctly handle
1362
  // -2,147,483,648.
1363
0
  uint32 n = i;
1364
0
  if (i < 0)
1365
0
    n = 0 - n;  // negate the unsigned value to avoid overflow
1366
0
  *--p = '0' + n % 10;          // this case deals with the number "0"
1367
0
  n /= 10;
1368
0
  while (n) {
1369
0
    *--p = '0' + n % 10;
1370
0
    n /= 10;
1371
0
    if (n == 0) break;
1372
1373
0
    *--p = '0' + n % 10;
1374
0
    n /= 10;
1375
0
    if (n == 0) break;
1376
1377
0
    *--p = ',';
1378
0
    *--p = '0' + n % 10;
1379
0
    n /= 10;
1380
    // For this unrolling, we check if n == 0 in the main while loop
1381
0
  }
1382
0
  if (i < 0)
1383
0
    *--p = '-';
1384
0
  return string(p, local + sizeof(local));
1385
0
}
1386
1387
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1388
// compile.
1389
0
string SimpleItoaWithCommas(uint32 i) {
1390
  // 10 digits and 3 commas are good for 32-bit or smaller ints.
1391
  // Longest is 4,294,967,295.
1392
0
  char local[13];
1393
0
  char *p = local + sizeof(local);
1394
0
  *--p = '0' + i % 10;          // this case deals with the number "0"
1395
0
  i /= 10;
1396
0
  while (i) {
1397
0
    *--p = '0' + i % 10;
1398
0
    i /= 10;
1399
0
    if (i == 0) break;
1400
1401
0
    *--p = '0' + i % 10;
1402
0
    i /= 10;
1403
0
    if (i == 0) break;
1404
1405
0
    *--p = ',';
1406
0
    *--p = '0' + i % 10;
1407
0
    i /= 10;
1408
    // For this unrolling, we check if i == 0 in the main while loop
1409
0
  }
1410
0
  return string(p, local + sizeof(local));
1411
0
}
1412
1413
0
string SimpleItoaWithCommas(int64 i) {
1414
  // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1415
0
  char local[26];
1416
0
  char *p = local + sizeof(local);
1417
  // Need to use uint64 instead of int64 to correctly handle
1418
  // -9,223,372,036,854,775,808.
1419
0
  uint64 n = i;
1420
0
  if (i < 0)
1421
0
    n = 0 - n;
1422
0
  *--p = '0' + n % 10;          // this case deals with the number "0"
1423
0
  n /= 10;
1424
0
  while (n) {
1425
0
    *--p = '0' + n % 10;
1426
0
    n /= 10;
1427
0
    if (n == 0) break;
1428
1429
0
    *--p = '0' + n % 10;
1430
0
    n /= 10;
1431
0
    if (n == 0) break;
1432
1433
0
    *--p = ',';
1434
0
    *--p = '0' + n % 10;
1435
0
    n /= 10;
1436
    // For this unrolling, we check if n == 0 in the main while loop
1437
0
  }
1438
0
  if (i < 0)
1439
0
    *--p = '-';
1440
0
  return string(p, local + sizeof(local));
1441
0
}
1442
1443
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1444
// compile.
1445
0
string SimpleItoaWithCommas(uint64 i) {
1446
  // 20 digits and 6 commas are good for 64-bit or smaller ints.
1447
  // Longest is 18,446,744,073,709,551,615.
1448
0
  char local[26];
1449
0
  char *p = local + sizeof(local);
1450
0
  *--p = '0' + i % 10;          // this case deals with the number "0"
1451
0
  i /= 10;
1452
0
  while (i) {
1453
0
    *--p = '0' + i % 10;
1454
0
    i /= 10;
1455
0
    if (i == 0) break;
1456
1457
0
    *--p = '0' + i % 10;
1458
0
    i /= 10;
1459
0
    if (i == 0) break;
1460
1461
0
    *--p = ',';
1462
0
    *--p = '0' + i % 10;
1463
0
    i /= 10;
1464
    // For this unrolling, we check if i == 0 in the main while loop
1465
0
  }
1466
0
  return string(p, local + sizeof(local));
1467
0
}
1468
1469
// ----------------------------------------------------------------------
1470
// ItoaKMGT()
1471
//    Description: converts an integer to a string
1472
//    Truncates values to a readable unit: K, G, M or T
1473
//    Opposite of atoi_kmgt()
1474
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1475
//
1476
//    Return value: string
1477
// ----------------------------------------------------------------------
1478
0
string ItoaKMGT(int64 i) {
1479
0
  const char *sign = "", *suffix = "";
1480
0
  if (i < 0) {
1481
    // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1482
    // that's OK as this function is only for human readability
1483
0
    if (i == numeric_limits<int64>::min()) i++;
1484
0
    sign = "-";
1485
0
    i = -i;
1486
0
  }
1487
1488
0
  int64 val;
1489
1490
0
  if ((val = (i >> 40)) > 1) {
1491
0
    suffix = "T";
1492
0
  } else if ((val = (i >> 30)) > 1) {
1493
0
    suffix = "G";
1494
0
  } else if ((val = (i >> 20)) > 1) {
1495
0
    suffix = "M";
1496
0
  } else if ((val = (i >> 10)) > 1) {
1497
0
    suffix = "K";
1498
0
  } else {
1499
0
    val = i;
1500
0
  }
1501
1502
0
  return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1503
0
}
1504
1505
// DEPRECATED(wadetregaskis).
1506
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1507
1508
0
string FloatToString(float f, const char* format) {
1509
0
  return StringPrintf(format, f);
1510
0
}
1511
1512
0
string IntToString(int i, const char* format) {
1513
0
  return StringPrintf(format, i);
1514
0
}
1515
1516
0
string Int64ToString(int64 i64, const char* format) {
1517
0
  return StringPrintf(format, i64);
1518
0
}
1519
1520
0
string UInt64ToString(uint64 ui64, const char* format) {
1521
0
  return StringPrintf(format, ui64);
1522
0
}
1523
1524
namespace {
1525
  constexpr int64_t kBytesPerGB = 1000000000;
1526
  constexpr int64_t kBytesPerMB = 1000000;
1527
  constexpr int64_t kBytesPerKB = 1000;
1528
}
1529
1530
1.83k
string HumanizeBytes(uint64_t bytes, int precision) {
1531
1.83k
  std::ostringstream op_stream;
1532
1.83k
  op_stream << std::fixed << std::setprecision(precision);
1533
1.83k
  if (bytes >= kBytesPerGB) {
1534
0
    op_stream << static_cast<double> (bytes)/kBytesPerGB << " GB";
1535
1.83k
  } else if (bytes >= kBytesPerMB) {
1536
249
    op_stream << static_cast<double> (bytes)/kBytesPerMB << " MB";
1537
1.58k
  } else if (bytes >= kBytesPerKB) {
1538
1.57k
    op_stream << static_cast<double> (bytes)/kBytesPerKB << " KB";
1539
1.57k
  } else {
1540
18
    op_stream << bytes << " B";
1541
18
  }
1542
1.83k
  return op_stream.str();
1543
1.83k
}