/Users/deen/code/yugabyte-db/src/yb/gutil/strings/numbers.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.cc |
3 | | // |
4 | | // The following only applies to changes made to this file as part of YugaByte development. |
5 | | // |
6 | | // Portions Copyright (c) YugaByte, Inc. |
7 | | // |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
9 | | // in compliance with the License. You may obtain a copy of the License at |
10 | | // |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
14 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
15 | | // or implied. See the License for the specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // This file contains string processing functions related to |
19 | | // numeric values. |
20 | | |
21 | | #include "yb/gutil/strings/numbers.h" |
22 | | |
23 | | #include <assert.h> |
24 | | #include <ctype.h> |
25 | | #include <errno.h> |
26 | | #include <float.h> // for DBL_DIG and FLT_DIG |
27 | | #include <math.h> // for HUGE_VAL |
28 | | #include <stdio.h> |
29 | | #include <stdlib.h> |
30 | | #include <string.h> |
31 | | |
32 | | #include <iomanip> |
33 | | #include <limits> |
34 | | #include <sstream> |
35 | | |
36 | | #include <glog/logging.h> |
37 | | |
38 | | #include "yb/gutil/casts.h" |
39 | | #include "yb/gutil/int128.h" |
40 | | #include "yb/gutil/integral_types.h" |
41 | | #include "yb/gutil/stringprintf.h" |
42 | | #include "yb/gutil/strings/ascii_ctype.h" |
43 | | #include "yb/gutil/strtoint.h" |
44 | | |
45 | | using std::numeric_limits; |
46 | | using std::string; |
47 | | |
48 | | |
49 | | // Reads a <double> in *text, which may not be whitespace-initiated. |
50 | | // *len is the length, or -1 if text is '\0'-terminated, which is more |
51 | | // efficient. Sets *text to the end of the double, and val to the |
52 | | // converted value, and the length of the double is subtracted from |
53 | | // *len. <double> may also be a '?', in which case val will be |
54 | | // unchanged. Returns true upon success. If initial_minus is |
55 | | // non-NULL, then *initial_minus will indicate whether the first |
56 | | // symbol seen was a '-', which will be ignored. Similarly, if |
57 | | // final_period is non-NULL, then *final_period will indicate whether |
58 | | // the last symbol seen was a '.', which will be ignored. This is |
59 | | // useful in case that an initial '-' or final '.' would have another |
60 | | // meaning (as a separator, e.g.). |
61 | | static inline bool EatADouble(const char** text, ssize_t* len, bool allow_question, |
62 | | double* val, bool* initial_minus, |
63 | 0 | bool* final_period) { |
64 | 0 | const char* pos = *text; |
65 | 0 | ssize_t rem = *len; // remaining length, or -1 if null-terminated |
66 | |
|
67 | 0 | if (pos == nullptr || rem == 0) |
68 | 0 | return false; |
69 | | |
70 | 0 | if (allow_question && (*pos == '?')) { |
71 | 0 | *text = pos + 1; |
72 | 0 | if (rem != -1) |
73 | 0 | *len = rem - 1; |
74 | 0 | return true; |
75 | 0 | } |
76 | | |
77 | 0 | if (initial_minus) { |
78 | 0 | if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment. |
79 | 0 | if (rem == 1) |
80 | 0 | return false; |
81 | 0 | ++pos; |
82 | 0 | if (rem != -1) |
83 | 0 | --rem; |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | | // a double has to begin one of these (we don't allow 'inf' or whitespace) |
88 | | // this also serves as an optimization. |
89 | 0 | if (!strchr("-+.0123456789", *pos)) |
90 | 0 | return false; |
91 | | |
92 | | // strtod is evil in that the second param is a non-const char** |
93 | 0 | char* end_nonconst; |
94 | 0 | double retval; |
95 | 0 | if (rem == -1) { |
96 | 0 | retval = strtod(pos, &end_nonconst); |
97 | 0 | } else { |
98 | | // not '\0'-terminated & no obvious terminator found. must copy. |
99 | 0 | std::unique_ptr<char[]> buf(new char[rem + 1]); |
100 | 0 | memcpy(buf.get(), pos, rem); |
101 | 0 | buf[rem] = '\0'; |
102 | 0 | retval = strtod(buf.get(), &end_nonconst); |
103 | 0 | end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get()); |
104 | 0 | } |
105 | |
|
106 | 0 | if (pos == end_nonconst) |
107 | 0 | return false; |
108 | | |
109 | 0 | if (final_period) { |
110 | 0 | *final_period = (end_nonconst[-1] == '.'); |
111 | 0 | if (*final_period) { |
112 | 0 | --end_nonconst; |
113 | 0 | } |
114 | 0 | } |
115 | |
|
116 | 0 | *text = end_nonconst; |
117 | 0 | *val = retval; |
118 | 0 | if (rem != -1) |
119 | 0 | *len = rem - (end_nonconst - pos); |
120 | 0 | return true; |
121 | 0 | } |
122 | | |
123 | | // If update, consume one of acceptable_chars from string *text of |
124 | | // length len and return that char, or '\0' otherwise. If len is -1, |
125 | | // *text is null-terminated. If update is false, don't alter *text and |
126 | | // *len. If null_ok, then update must be false, and, if text has no |
127 | | // more chars, then return '\1' (arbitrary nonzero). |
128 | | static inline char EatAChar(const char** text, ssize_t* len, |
129 | | const char* acceptable_chars, |
130 | 0 | bool update, bool null_ok) { |
131 | 0 | assert(!(update && null_ok)); |
132 | 0 | if ((*len == 0) || (**text == '\0')) |
133 | 0 | return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode. |
134 | | |
135 | 0 | if (strchr(acceptable_chars, **text)) { |
136 | 0 | char result = **text; |
137 | 0 | if (update) { |
138 | 0 | ++(*text); |
139 | 0 | if (*len != -1) |
140 | 0 | --(*len); |
141 | 0 | } |
142 | 0 | return result; |
143 | 0 | } |
144 | | |
145 | 0 | return '\0'; // no match; no update |
146 | 0 | } |
147 | | |
148 | | // Parse an expression in 'text' of the form: <comparator><double> or |
149 | | // <double><sep><double> See full comments in header file. |
150 | | bool ParseDoubleRange(const char* text, ssize_t len, const char** end, |
151 | | double* from, double* to, bool* is_currency, |
152 | 0 | const DoubleRangeOptions& opts) { |
153 | 0 | const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL; |
154 | |
|
155 | 0 | if (!opts.dont_modify_unbounded) { |
156 | 0 | *from = -HUGE_VAL; |
157 | 0 | *to = HUGE_VAL; |
158 | 0 | } |
159 | 0 | if (opts.allow_currency && (is_currency != nullptr)) |
160 | 0 | *is_currency = false; |
161 | |
|
162 | 0 | assert(len >= -1); |
163 | 0 | assert(opts.separators && (*opts.separators != '\0')); |
164 | | // these aren't valid separators |
165 | 0 | assert(strlen(opts.separators) == |
166 | 0 | strcspn(opts.separators, "+0123456789eE$")); |
167 | 0 | assert(opts.num_required_bounds <= 2); |
168 | | |
169 | | // Handle easier cases of comparators (<, >) first |
170 | 0 | if (opts.allow_comparators) { |
171 | 0 | char comparator = EatAChar(&text, &len, "<>", true, false); |
172 | 0 | if (comparator) { |
173 | 0 | double* dest = (comparator == '>') ? from : to; |
174 | 0 | EatAChar(&text, &len, "=", true, false); |
175 | 0 | if (opts.allow_currency && EatAChar(&text, &len, "$", true, false)) |
176 | 0 | if (is_currency != nullptr) |
177 | 0 | *is_currency = true; |
178 | 0 | if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, |
179 | 0 | nullptr)) |
180 | 0 | return false; |
181 | 0 | *end = text; |
182 | 0 | return EatAChar(&text, &len, opts.acceptable_terminators, false, |
183 | 0 | opts.null_terminator_ok); |
184 | 0 | } |
185 | 0 | } |
186 | | |
187 | 0 | bool seen_dollar = (opts.allow_currency && |
188 | 0 | EatAChar(&text, &len, "$", true, false)); |
189 | | |
190 | | // If we see a '-', two things could be happening: -<to> or |
191 | | // <from>... where <from> is negative. Treat initial minus sign as a |
192 | | // separator if '-' is a valid separator. |
193 | | // Similarly, we prepare for the possibility of seeing a '.' at the |
194 | | // end of the number, in case '.' (which really means '..') is a |
195 | | // separator. |
196 | 0 | bool initial_minus_sign = false; |
197 | 0 | bool final_period = false; |
198 | 0 | bool* check_initial_minus = (strchr(opts.separators, '-') && !seen_dollar |
199 | 0 | && (opts.num_required_bounds < 2)) ? |
200 | 0 | (&initial_minus_sign) : nullptr; |
201 | 0 | bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) |
202 | 0 | : nullptr; |
203 | 0 | bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, |
204 | 0 | from, check_initial_minus, check_final_period); |
205 | | |
206 | | // if 2 bounds required, must see a double (or '?' if allowed) |
207 | 0 | if ((opts.num_required_bounds == 2) && !double_seen) return false; |
208 | | |
209 | 0 | if (seen_dollar && !double_seen) { |
210 | 0 | --text; |
211 | 0 | if (len != -1) |
212 | 0 | ++len; |
213 | 0 | seen_dollar = false; |
214 | 0 | } |
215 | | // If we're here, we've read the first double and now expect a |
216 | | // separator and another <double>. |
217 | 0 | char separator = EatAChar(&text, &len, opts.separators, true, false); |
218 | 0 | if (separator == '.') { |
219 | | // seen one '.' as separator; must check for another; perhaps set seplen=2 |
220 | 0 | if (EatAChar(&text, &len, ".", true, false)) { |
221 | 0 | if (final_period) { |
222 | | // We may have three periods in a row. The first is part of the |
223 | | // first number, the others are a separator. Policy: 234...567 |
224 | | // is "234." to "567", not "234" to ".567". |
225 | 0 | EatAChar(&text, &len, ".", true, false); |
226 | 0 | } |
227 | 0 | } else if (!EatAChar(&text, &len, opts.separators, true, false)) { |
228 | | // just one '.' and no other separator; uneat the first '.' we saw |
229 | 0 | --text; |
230 | 0 | if (len != -1) |
231 | 0 | ++len; |
232 | 0 | separator = '\0'; |
233 | 0 | } |
234 | 0 | } |
235 | | // By now, we've consumed whatever separator there may have been, |
236 | | // and separator is true iff there was one. |
237 | 0 | if (!separator) { |
238 | 0 | if (final_period) // final period now considered part of first double |
239 | 0 | EatAChar(&text, &len, ".", true, false); |
240 | 0 | if (initial_minus_sign && double_seen) { |
241 | 0 | *to = *from; |
242 | 0 | *from = from_default; |
243 | 0 | } else if (opts.require_separator || |
244 | 0 | (opts.num_required_bounds > 0 && !double_seen) || |
245 | 0 | (opts.num_required_bounds > 1) ) { |
246 | 0 | return false; |
247 | 0 | } |
248 | 0 | } else { |
249 | 0 | if (initial_minus_sign && double_seen) |
250 | 0 | *from = -(*from); |
251 | | // read second <double> |
252 | 0 | bool second_dollar_seen = (seen_dollar |
253 | 0 | || (opts.allow_currency && !double_seen)) |
254 | 0 | && EatAChar(&text, &len, "$", true, false); |
255 | 0 | bool second_double_seen = EatADouble( |
256 | 0 | &text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr); |
257 | 0 | if (opts.num_required_bounds > static_cast<uint32_t>(double_seen + second_double_seen)) |
258 | 0 | return false; |
259 | 0 | if (second_dollar_seen && !second_double_seen) { |
260 | 0 | --text; |
261 | 0 | if (len != -1) |
262 | 0 | ++len; |
263 | 0 | second_dollar_seen = false; |
264 | 0 | } |
265 | 0 | seen_dollar = seen_dollar || second_dollar_seen; |
266 | 0 | } |
267 | |
|
268 | 0 | if (seen_dollar && (is_currency != nullptr)) |
269 | 0 | *is_currency = true; |
270 | | // We're done. But we have to check that the next char is a proper |
271 | | // terminator. |
272 | 0 | *end = text; |
273 | 0 | char terminator = EatAChar(&text, &len, opts.acceptable_terminators, false, |
274 | 0 | opts.null_terminator_ok); |
275 | 0 | if (terminator == '.') |
276 | 0 | --(*end); |
277 | 0 | return terminator; |
278 | 0 | } |
279 | | |
280 | | // ---------------------------------------------------------------------- |
281 | | // ConsumeStrayLeadingZeroes |
282 | | // Eliminates all leading zeroes (unless the string itself is composed |
283 | | // of nothing but zeroes, in which case one is kept: 0...0 becomes 0). |
284 | | // -------------------------------------------------------------------- |
285 | | |
286 | 0 | void ConsumeStrayLeadingZeroes(string *const str) { |
287 | 0 | const string::size_type len(str->size()); |
288 | 0 | if (len > 1 && (*str)[0] == '0') { |
289 | 0 | const char |
290 | 0 | *const begin(str->c_str()), |
291 | 0 | *const end(begin + len), |
292 | 0 | *ptr(begin + 1); |
293 | 0 | while (ptr != end && *ptr == '0') { |
294 | 0 | ++ptr; |
295 | 0 | } |
296 | 0 | string::size_type remove(ptr - begin); |
297 | 0 | DCHECK_GT(ptr, begin); |
298 | 0 | if (remove == len) { |
299 | 0 | --remove; // if they are all zero, leave one... |
300 | 0 | } |
301 | 0 | str->erase(0, remove); |
302 | 0 | } |
303 | 0 | } |
304 | | |
305 | | // ---------------------------------------------------------------------- |
306 | | // ParseLeadingInt32Value() |
307 | | // ParseLeadingUInt32Value() |
308 | | // A simple parser for [u]int32 values. Returns the parsed value |
309 | | // if a valid value is found; else returns deflt |
310 | | // This cannot handle decimal numbers with leading 0s. |
311 | | // -------------------------------------------------------------------- |
312 | | |
313 | 1 | int32 ParseLeadingInt32Value(const char *str, int32 deflt) { |
314 | 1 | char *error = nullptr; |
315 | 1 | auto value = strtol(str, &error, 0); |
316 | | // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. |
317 | 1 | if (value > numeric_limits<int32>::max()) { |
318 | 0 | value = numeric_limits<int32>::max(); |
319 | 1 | } else if (value < numeric_limits<int32>::min()) { |
320 | 0 | value = numeric_limits<int32>::min(); |
321 | 0 | } |
322 | 1 | return (error == str) ? deflt : narrow_cast<int32>(value); |
323 | 1 | } |
324 | | |
325 | 0 | uint32 ParseLeadingUInt32Value(const char *str, uint32 deflt) { |
326 | 0 | if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) { |
327 | | // When long is 32 bits, we can use strtoul. |
328 | 0 | char *error = nullptr; |
329 | 0 | const uint32 value = strtoul(str, &error, 0); |
330 | 0 | return (error == str) ? deflt : value; |
331 | 0 | } else { |
332 | | // When long is 64 bits, we must use strto64 and handle limits |
333 | | // by hand. The reason we cannot use a 64-bit strtoul is that |
334 | | // it would be impossible to differentiate "-2" (that should wrap |
335 | | // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 |
336 | | // (that should be pegged to UINT_MAX due to overflow). |
337 | 0 | char *error = nullptr; |
338 | 0 | int64 value = strto64(str, &error, 0); |
339 | 0 | if (value > numeric_limits<uint32>::max() || |
340 | 0 | value < -static_cast<int64>(numeric_limits<uint32>::max())) { |
341 | 0 | value = numeric_limits<uint32>::max(); |
342 | 0 | } |
343 | | // Within these limits, truncation to 32 bits handles negatives correctly. |
344 | 0 | return (error == str) ? deflt : narrow_cast<uint32>(value); |
345 | 0 | } |
346 | 0 | } |
347 | | |
348 | | // ---------------------------------------------------------------------- |
349 | | // ParseLeadingDec32Value |
350 | | // ParseLeadingUDec32Value |
351 | | // A simple parser for [u]int32 values. Returns the parsed value |
352 | | // if a valid value is found; else returns deflt |
353 | | // The string passed in is treated as *10 based*. |
354 | | // This can handle strings with leading 0s. |
355 | | // -------------------------------------------------------------------- |
356 | | |
357 | 0 | int32 ParseLeadingDec32Value(const char *str, int32 deflt) { |
358 | 0 | char *error = nullptr; |
359 | 0 | auto value = strtol(str, &error, 10); |
360 | | // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. |
361 | 0 | if (value > numeric_limits<int32>::max()) { |
362 | 0 | value = numeric_limits<int32>::max(); |
363 | 0 | } else if (value < numeric_limits<int32>::min()) { |
364 | 0 | value = numeric_limits<int32>::min(); |
365 | 0 | } |
366 | 0 | return (error == str) ? deflt : narrow_cast<int32>(value); |
367 | 0 | } |
368 | | |
369 | 0 | uint32 ParseLeadingUDec32Value(const char *str, uint32 deflt) { |
370 | 0 | if (numeric_limits<size_t>::max() == numeric_limits<uint32>::max()) { |
371 | | // When long is 32 bits, we can use strtoul. |
372 | 0 | char *error = nullptr; |
373 | 0 | const uint32 value = strtoul(str, &error, 10); |
374 | 0 | return (error == str) ? deflt : value; |
375 | 0 | } else { |
376 | | // When long is 64 bits, we must use strto64 and handle limits |
377 | | // by hand. The reason we cannot use a 64-bit strtoul is that |
378 | | // it would be impossible to differentiate "-2" (that should wrap |
379 | | // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 |
380 | | // (that should be pegged to UINT_MAX due to overflow). |
381 | 0 | char *error = nullptr; |
382 | 0 | int64 value = strto64(str, &error, 10); |
383 | 0 | if (value > numeric_limits<uint32>::max() || |
384 | 0 | value < -static_cast<int64>(numeric_limits<uint32>::max())) { |
385 | 0 | value = numeric_limits<uint32>::max(); |
386 | 0 | } |
387 | | // Within these limits, truncation to 32 bits handles negatives correctly. |
388 | 0 | return (error == str) ? deflt : narrow_cast<uint32>(value); |
389 | 0 | } |
390 | 0 | } |
391 | | |
392 | | // ---------------------------------------------------------------------- |
393 | | // ParseLeadingUInt64Value |
394 | | // ParseLeadingInt64Value |
395 | | // ParseLeadingHex64Value |
396 | | // A simple parser for 64-bit values. Returns the parsed value if a |
397 | | // valid integer is found; else returns deflt |
398 | | // UInt64 and Int64 cannot handle decimal numbers with leading 0s. |
399 | | // -------------------------------------------------------------------- |
400 | 0 | uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) { |
401 | 0 | char *error = nullptr; |
402 | 0 | const uint64 value = strtou64(str, &error, 0); |
403 | 0 | return (error == str) ? deflt : value; |
404 | 0 | } |
405 | | |
406 | 0 | int64 ParseLeadingInt64Value(const char *str, int64 deflt) { |
407 | 0 | char *error = nullptr; |
408 | 0 | const int64 value = strto64(str, &error, 0); |
409 | 0 | return (error == str) ? deflt : value; |
410 | 0 | } |
411 | | |
412 | 0 | uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { |
413 | 0 | char *error = nullptr; |
414 | 0 | const uint64 value = strtou64(str, &error, 16); |
415 | 0 | return (error == str) ? deflt : value; |
416 | 0 | } |
417 | | |
418 | | // ---------------------------------------------------------------------- |
419 | | // ParseLeadingDec64Value |
420 | | // ParseLeadingUDec64Value |
421 | | // A simple parser for [u]int64 values. Returns the parsed value |
422 | | // if a valid value is found; else returns deflt |
423 | | // The string passed in is treated as *10 based*. |
424 | | // This can handle strings with leading 0s. |
425 | | // -------------------------------------------------------------------- |
426 | | |
427 | 0 | int64 ParseLeadingDec64Value(const char *str, int64 deflt) { |
428 | 0 | char *error = nullptr; |
429 | 0 | const int64 value = strto64(str, &error, 10); |
430 | 0 | return (error == str) ? deflt : value; |
431 | 0 | } |
432 | | |
433 | 0 | uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) { |
434 | 0 | char *error = nullptr; |
435 | 0 | const uint64 value = strtou64(str, &error, 10); |
436 | 0 | return (error == str) ? deflt : value; |
437 | 0 | } |
438 | | |
439 | | // ---------------------------------------------------------------------- |
440 | | // ParseLeadingDoubleValue() |
441 | | // A simple parser for double values. Returns the parsed value |
442 | | // if a valid value is found; else returns deflt |
443 | | // -------------------------------------------------------------------- |
444 | | |
445 | 0 | double ParseLeadingDoubleValue(const char *str, double deflt) { |
446 | 0 | char *error = nullptr; |
447 | 0 | errno = 0; |
448 | 0 | const double value = strtod(str, &error); |
449 | 0 | if (errno != 0 || // overflow/underflow happened |
450 | 0 | error == str) { // no valid parse |
451 | 0 | return deflt; |
452 | 0 | } else { |
453 | 0 | return value; |
454 | 0 | } |
455 | 0 | } |
456 | | |
457 | | // ---------------------------------------------------------------------- |
458 | | // ParseLeadingBoolValue() |
459 | | // A recognizer of boolean string values. Returns the parsed value |
460 | | // if a valid value is found; else returns deflt. This skips leading |
461 | | // whitespace, is case insensitive, and recognizes these forms: |
462 | | // 0/1, false/true, no/yes, n/y |
463 | | // -------------------------------------------------------------------- |
464 | 46.1k | bool ParseLeadingBoolValue(const char *str, bool deflt) { |
465 | 46.1k | static const int kMaxLen = 5; |
466 | 46.1k | char value[kMaxLen + 1]; |
467 | | // Skip whitespace |
468 | 46.1k | while (ascii_isspace(*str)) { |
469 | 0 | ++str; |
470 | 0 | } |
471 | 46.1k | int len = 0; |
472 | 277k | for (; len <= kMaxLen && ascii_isalnum(*str); ++str) |
473 | 230k | value[len++] = ascii_tolower(*str); |
474 | 46.1k | if (len == 0 || len > kMaxLen) |
475 | 0 | return deflt; |
476 | 46.1k | value[len] = '\0'; |
477 | 46.1k | switch (len) { |
478 | 0 | case 1: |
479 | 0 | if (value[0] == '0' || value[0] == 'n') |
480 | 0 | return false; |
481 | 0 | if (value[0] == '1' || value[0] == 'y') |
482 | 0 | return true; |
483 | 0 | break; |
484 | 0 | case 2: |
485 | 0 | if (!strcmp(value, "no")) |
486 | 0 | return false; |
487 | 0 | break; |
488 | 0 | case 3: |
489 | 0 | if (!strcmp(value, "yes")) |
490 | 0 | return true; |
491 | 0 | break; |
492 | 0 | case 4: |
493 | 0 | if (!strcmp(value, "true")) |
494 | 0 | return true; |
495 | 0 | break; |
496 | 46.1k | case 5: |
497 | 46.1k | if (!strcmp(value, "false")) |
498 | 46.1k | return false; |
499 | 0 | break; |
500 | 0 | } |
501 | 0 | return deflt; |
502 | 0 | } |
503 | | |
504 | | |
505 | | // ---------------------------------------------------------------------- |
506 | | // FpToString() |
507 | | // FloatToString() |
508 | | // IntToString() |
509 | | // Convert various types to their string representation, possibly padded |
510 | | // with spaces, using snprintf format specifiers. |
511 | | // ---------------------------------------------------------------------- |
512 | | |
513 | 0 | string FpToString(Fprint fp) { |
514 | 0 | char buf[17]; |
515 | 0 | snprintf(buf, sizeof(buf), "%016" PRIx64, fp); |
516 | 0 | return string(buf); |
517 | 0 | } |
518 | | |
519 | | // Default arguments |
520 | 0 | string Uint128ToHexString(uint128 ui128) { |
521 | 0 | char buf[33]; |
522 | 0 | snprintf(buf, sizeof(buf), "%016" PRIx64, |
523 | 0 | Uint128High64(ui128)); |
524 | 0 | snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64, |
525 | 0 | Uint128Low64(ui128)); |
526 | 0 | return string(buf); |
527 | 0 | } |
528 | | |
529 | 9.36M | string Uint16ToHexString(uint16_t ui16) { |
530 | 9.36M | char buf[5]; |
531 | 9.36M | snprintf(buf, sizeof(buf), "%04X", ui16); |
532 | 9.36M | return string(buf); |
533 | 9.36M | } |
534 | | |
535 | | namespace { |
536 | | |
537 | | // Represents integer values of digits. |
538 | | // Uses 36 to indicate an invalid character since we support |
539 | | // bases up to 36. |
540 | | static const int8 kAsciiToInt[256] = { |
541 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. |
542 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
543 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
544 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, |
545 | | 36, 36, 36, 36, 36, 36, 36, |
546 | | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
547 | | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
548 | | 36, 36, 36, 36, 36, 36, |
549 | | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, |
550 | | 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
551 | | 36, 36, 36, 36, 36, |
552 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
553 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
554 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
555 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
556 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
557 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
558 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
559 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36 }; |
560 | | |
561 | | // Input format based on POSIX.1-2008 strtol |
562 | | // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html |
563 | | template<typename IntType> |
564 | | bool safe_int_internal(const char* start, const char* end, int base, |
565 | 0 | IntType* value_p) { |
566 | | // Consume whitespace. |
567 | 0 | while (start < end && ascii_isspace(start[0])) { |
568 | 0 | ++start; |
569 | 0 | } |
570 | 0 | while (start < end && ascii_isspace(end[-1])) { |
571 | 0 | --end; |
572 | 0 | } |
573 | 0 | if (start >= end) { |
574 | 0 | return false; |
575 | 0 | } |
576 | | |
577 | | // Consume sign. |
578 | 0 | const bool negative = (start[0] == '-'); |
579 | 0 | if (negative || start[0] == '+') { |
580 | 0 | ++start; |
581 | 0 | if (start >= end) { |
582 | 0 | return false; |
583 | 0 | } |
584 | 0 | } |
585 | | |
586 | | // Consume base-dependent prefix. |
587 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 |
588 | | // base 16: "0x" -> base 16 |
589 | | // Also validate the base. |
590 | 0 | if (base == 0) { |
591 | 0 | if (end - start >= 2 && start[0] == '0' && |
592 | 0 | (start[1] == 'x' || start[1] == 'X')) { |
593 | 0 | base = 16; |
594 | 0 | start += 2; |
595 | 0 | } else if (end - start >= 1 && start[0] == '0') { |
596 | 0 | base = 8; |
597 | 0 | start += 1; |
598 | 0 | } else { |
599 | 0 | base = 10; |
600 | 0 | } |
601 | 0 | } else if (base == 16) { |
602 | 0 | if (end - start >= 2 && start[0] == '0' && |
603 | 0 | (start[1] == 'x' || start[1] == 'X')) { |
604 | 0 | start += 2; |
605 | 0 | } |
606 | 0 | } else if (base >= 2 && base <= 36) { |
607 | | // okay |
608 | 0 | } else { |
609 | 0 | return false; |
610 | 0 | } |
611 | | |
612 | | // Consume digits. |
613 | | // |
614 | | // The classic loop: |
615 | | // |
616 | | // for each digit |
617 | | // value = value * base + digit |
618 | | // value *= sign |
619 | | // |
620 | | // The classic loop needs overflow checking. It also fails on the most |
621 | | // negative integer, -2147483648 in 32-bit two's complement representation. |
622 | | // |
623 | | // My improved loop: |
624 | | // |
625 | | // if (!negative) |
626 | | // for each digit |
627 | | // value = value * base |
628 | | // value = value + digit |
629 | | // else |
630 | | // for each digit |
631 | | // value = value * base |
632 | | // value = value - digit |
633 | | // |
634 | | // Overflow checking becomes simple. |
635 | | // |
636 | | // I present the positive code first for easier reading. |
637 | 0 | IntType value = 0; |
638 | 0 | if (!negative) { |
639 | 0 | const IntType vmax = std::numeric_limits<IntType>::max(); |
640 | 0 | assert(vmax > 0); |
641 | 0 | assert(vmax >= base); |
642 | 0 | const IntType vmax_over_base = vmax / base; |
643 | | // loop over digits |
644 | | // loop body is interleaved for perf, not readability |
645 | 0 | for (; start < end; ++start) { |
646 | 0 | unsigned char c = static_cast<unsigned char>(start[0]); |
647 | 0 | int digit = kAsciiToInt[c]; |
648 | 0 | if (value > vmax_over_base) return false; |
649 | 0 | value *= base; |
650 | 0 | if (digit >= base) return false; |
651 | 0 | if (value > vmax - digit) return false; |
652 | 0 | value += digit; |
653 | 0 | } |
654 | 0 | } else { |
655 | 0 | const IntType vmin = std::numeric_limits<IntType>::min(); |
656 | 0 | assert(vmin < 0); |
657 | 0 | assert(vmin <= 0 - base); |
658 | 0 | IntType vmin_over_base = vmin / base; |
659 | | // 2003 c++ standard [expr.mul] |
660 | | // "... the sign of the remainder is implementation-defined." |
661 | | // Although (vmin/base)*base + vmin%base is always vmin. |
662 | | // 2011 c++ standard tightens the spec but we cannot rely on it. |
663 | 0 | if (vmin % base > 0) { |
664 | 0 | vmin_over_base += 1; |
665 | 0 | } |
666 | | // loop over digits |
667 | | // loop body is interleaved for perf, not readability |
668 | 0 | for (; start < end; ++start) { |
669 | 0 | unsigned char c = static_cast<unsigned char>(start[0]); |
670 | 0 | int digit = kAsciiToInt[c]; |
671 | 0 | if (value < vmin_over_base) return false; |
672 | 0 | value *= base; |
673 | 0 | if (digit >= base) return false; |
674 | 0 | if (value < vmin + digit) return false; |
675 | 0 | value -= digit; |
676 | 0 | } |
677 | 0 | } |
678 | | |
679 | | // Store output. |
680 | 0 | *value_p = value; |
681 | 0 | return true; |
682 | 0 | } Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_ Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIxEEbPKcS2_iPT_ |
683 | | |
684 | | } // anonymous namespace |
685 | | |
686 | | bool safe_strto32_base(const char* startptr, const int buffer_size, |
687 | 0 | int32* v, int base) { |
688 | 0 | return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v); |
689 | 0 | } |
690 | | |
691 | | bool safe_strto64_base(const char* startptr, const int buffer_size, |
692 | 0 | int64* v, int base) { |
693 | 0 | return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v); |
694 | 0 | } |
695 | | |
696 | 0 | bool safe_strto32(const char* startptr, const int buffer_size, int32* value) { |
697 | 0 | return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value); |
698 | 0 | } |
699 | | |
700 | 0 | bool safe_strto64(const char* startptr, const int buffer_size, int64* value) { |
701 | 0 | return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value); |
702 | 0 | } |
703 | | |
704 | 559k | bool safe_strto32_base(const char* str, int32* value, int base) { |
705 | 559k | char* endptr; |
706 | 559k | errno = 0; // errno only gets set on errors |
707 | 559k | *value = strto32(str, &endptr, base); |
708 | 559k | if (endptr != str) { |
709 | 559k | while (ascii_isspace(*endptr)) ++endptr; |
710 | 559k | } |
711 | 559k | return *str != '\0' && *endptr == '\0' && errno == 0; |
712 | 559k | } |
713 | | |
714 | 9 | bool safe_strto64_base(const char* str, int64* value, int base) { |
715 | 9 | char* endptr; |
716 | 9 | errno = 0; // errno only gets set on errors |
717 | 9 | *value = strto64(str, &endptr, base); |
718 | 9 | if (endptr != str) { |
719 | 9 | while (ascii_isspace(*endptr)) ++endptr; |
720 | 9 | } |
721 | 9 | return *str != '\0' && *endptr == '\0' && errno == 0; |
722 | 9 | } |
723 | | |
724 | 235k | bool safe_strtou32_base(const char* str, uint32* value, int base) { |
725 | | // strtoul does not give any errors on negative numbers, so we have to |
726 | | // search the string for '-' manually. |
727 | 235k | while (ascii_isspace(*str)) ++str; |
728 | 235k | if (*str == '-') return false; |
729 | | |
730 | 235k | char* endptr; |
731 | 235k | errno = 0; // errno only gets set on errors |
732 | 235k | *value = strtou32(str, &endptr, base); |
733 | 235k | if (endptr != str) { |
734 | 235k | while (ascii_isspace(*endptr)) ++endptr; |
735 | 235k | } |
736 | 235k | return *str != '\0' && *endptr == '\0' && errno == 0; |
737 | 235k | } |
738 | | |
739 | 2 | bool safe_strtou64_base(const char* str, uint64* value, int base) { |
740 | | // strtou64 does not give any errors on negative numbers, so we have to |
741 | | // search the string for '-' manually. |
742 | 2 | while (ascii_isspace(*str)) ++str; |
743 | 2 | if (*str == '-') return false; |
744 | | |
745 | 2 | char* endptr; |
746 | 2 | errno = 0; // errno only gets set on errors |
747 | 2 | *value = strtou64(str, &endptr, base); |
748 | 2 | if (endptr != str) { |
749 | 2 | while (ascii_isspace(*endptr)) ++endptr; |
750 | 2 | } |
751 | 2 | return *str != '\0' && *endptr == '\0' && errno == 0; |
752 | 2 | } |
753 | | |
754 | | // ---------------------------------------------------------------------- |
755 | | // u64tostr_base36() |
756 | | // Converts unsigned number to string representation in base-36. |
757 | | // -------------------------------------------------------------------- |
758 | 0 | size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) { |
759 | 0 | CHECK_GT(buf_size, 0); |
760 | 0 | CHECK(buffer); |
761 | 0 | static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz"; |
762 | |
|
763 | 0 | buffer[buf_size - 1] = '\0'; |
764 | 0 | size_t result_size = 1; |
765 | |
|
766 | 0 | do { |
767 | 0 | if (buf_size == result_size) { // Ran out of space. |
768 | 0 | return 0; |
769 | 0 | } |
770 | 0 | int remainder = number % 36; |
771 | 0 | number /= 36; |
772 | 0 | buffer[buf_size - result_size - 1] = kAlphabet[remainder]; |
773 | 0 | result_size++; |
774 | 0 | } while (number); |
775 | |
|
776 | 0 | memmove(buffer, buffer + buf_size - result_size, result_size); |
777 | |
|
778 | 0 | return result_size - 1; |
779 | 0 | } |
780 | | |
781 | | // Generate functions that wrap safe_strtoXXX_base. |
782 | | #define GEN_SAFE_STRTO(name, type) \ |
783 | 0 | bool name##_base(const string& str, type* value, int base) { \ |
784 | 0 | return name##_base(str.c_str(), value, base); \ |
785 | 0 | } \ Unexecuted instantiation: _Z17safe_strto32_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPii Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPji Unexecuted instantiation: _Z17safe_strto64_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPxi Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPyi |
786 | 794k | bool name(const char* str, type* value) { \ |
787 | 794k | return name##_base(str, value, 10); \ |
788 | 794k | } \ Line | Count | Source | 786 | 559k | bool name(const char* str, type* value) { \ | 787 | 559k | return name##_base(str, value, 10); \ | 788 | 559k | } \ |
Line | Count | Source | 786 | 235k | bool name(const char* str, type* value) { \ | 787 | 235k | return name##_base(str, value, 10); \ | 788 | 235k | } \ |
Unexecuted instantiation: _Z12safe_strto64PKcPx Unexecuted instantiation: _Z13safe_strtou64PKcPy |
789 | 9 | bool name(const string& str, type* value) { \ |
790 | 9 | return name##_base(str.c_str(), value, 10); \ |
791 | 9 | } Unexecuted instantiation: _Z12safe_strto32RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPi Unexecuted instantiation: _Z13safe_strtou32RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPj _Z12safe_strto64RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPx Line | Count | Source | 789 | 9 | bool name(const string& str, type* value) { \ | 790 | 9 | return name##_base(str.c_str(), value, 10); \ | 791 | 9 | } |
Unexecuted instantiation: _Z13safe_strtou64RKNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEEEPy |
792 | | GEN_SAFE_STRTO(safe_strto32, int32); |
793 | | GEN_SAFE_STRTO(safe_strtou32, uint32); |
794 | | GEN_SAFE_STRTO(safe_strto64, int64); |
795 | | GEN_SAFE_STRTO(safe_strtou64, uint64); |
796 | | #undef GEN_SAFE_STRTO |
797 | | |
798 | 5 | bool safe_strtof(const char* str, float* value) { |
799 | 5 | char* endptr; |
800 | | #ifdef _MSC_VER // has no strtof() |
801 | | *value = strtod(str, &endptr); |
802 | | #else |
803 | 5 | *value = strtof(str, &endptr); |
804 | 5 | #endif |
805 | 5 | if (endptr != str) { |
806 | 5 | while (ascii_isspace(*endptr)) ++endptr; |
807 | 5 | } |
808 | | // Ignore range errors from strtod/strtof. |
809 | | // The values it returns on underflow and |
810 | | // overflow are the right fallback in a |
811 | | // robust setting. |
812 | 5 | return *str != '\0' && *endptr == '\0'; |
813 | 5 | } |
814 | | |
815 | 0 | bool safe_strtod(const char* str, double* value) { |
816 | 0 | char* endptr; |
817 | 0 | *value = strtod(str, &endptr); |
818 | 0 | if (endptr != str) { |
819 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
820 | 0 | } |
821 | | // Ignore range errors from strtod. The values it |
822 | | // returns on underflow and overflow are the right |
823 | | // fallback in a robust setting. |
824 | 0 | return *str != '\0' && *endptr == '\0'; |
825 | 0 | } |
826 | | |
827 | 0 | bool safe_strtof(const string& str, float* value) { |
828 | 0 | return safe_strtof(str.c_str(), value); |
829 | 0 | } |
830 | | |
831 | 0 | bool safe_strtod(const string& str, double* value) { |
832 | 0 | return safe_strtod(str.c_str(), value); |
833 | 0 | } |
834 | | |
835 | 0 | uint64 atoi_kmgt(const char* s) { |
836 | 0 | char* endptr; |
837 | 0 | uint64 n = strtou64(s, &endptr, 10); |
838 | 0 | uint64 scale = 1; |
839 | 0 | char c = *endptr; |
840 | 0 | if (c != '\0') { |
841 | 0 | c = ascii_toupper(c); |
842 | 0 | switch (c) { |
843 | 0 | case 'K': |
844 | 0 | scale = GG_ULONGLONG(1) << 10; |
845 | 0 | break; |
846 | 0 | case 'M': |
847 | 0 | scale = GG_ULONGLONG(1) << 20; |
848 | 0 | break; |
849 | 0 | case 'G': |
850 | 0 | scale = GG_ULONGLONG(1) << 30; |
851 | 0 | break; |
852 | 0 | case 'T': |
853 | 0 | scale = GG_ULONGLONG(1) << 40; |
854 | 0 | break; |
855 | 0 | default: |
856 | 0 | LOG(FATAL) << "Invalid mnemonic: `" << c << "';" |
857 | 0 | << " should be one of `K', `M', `G', and `T'."; |
858 | 0 | } |
859 | 0 | } |
860 | 0 | return n * scale; |
861 | 0 | } |
862 | | |
863 | | // ---------------------------------------------------------------------- |
864 | | // FastIntToBuffer() |
865 | | // FastInt64ToBuffer() |
866 | | // FastHexToBuffer() |
867 | | // FastHex64ToBuffer() |
868 | | // FastHex32ToBuffer() |
869 | | // FastTimeToBuffer() |
870 | | // These are intended for speed. FastHexToBuffer() assumes the |
871 | | // integer is non-negative. FastHexToBuffer() puts output in |
872 | | // hex rather than decimal. FastTimeToBuffer() puts the output |
873 | | // into RFC822 format. If time is 0, uses the current time. |
874 | | // |
875 | | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
876 | | // padded to exactly 16 bytes (plus one byte for '\0') |
877 | | // |
878 | | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
879 | | // padded to exactly 8 bytes (plus one byte for '\0') |
880 | | // |
881 | | // All functions take the output buffer as an arg. FastInt() |
882 | | // uses at most 22 bytes, FastTime() uses exactly 30 bytes. |
883 | | // They all return a pointer to the beginning of the output, |
884 | | // which may not be the beginning of the input buffer. (Though |
885 | | // for FastTimeToBuffer(), we guarantee that it is.) |
886 | | // ---------------------------------------------------------------------- |
887 | | |
888 | 0 | char *FastInt64ToBuffer(int64 i, char* buffer) { |
889 | 0 | FastInt64ToBufferLeft(i, buffer); |
890 | 0 | return buffer; |
891 | 0 | } |
892 | | |
893 | 0 | char *FastInt32ToBuffer(int32 i, char* buffer) { |
894 | 0 | FastInt32ToBufferLeft(i, buffer); |
895 | 0 | return buffer; |
896 | 0 | } |
897 | | |
898 | 0 | char *FastHexToBuffer(int i, char* buffer) { |
899 | 0 | CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i; |
900 | |
|
901 | 0 | static const char *hexdigits = "0123456789abcdef"; |
902 | 0 | char *p = buffer + 21; |
903 | 0 | *p-- = '\0'; |
904 | 0 | do { |
905 | 0 | *p-- = hexdigits[i & 15]; // mod by 16 |
906 | 0 | i >>= 4; // divide by 16 |
907 | 0 | } while (i > 0); |
908 | 0 | return p + 1; |
909 | 0 | } |
910 | | |
911 | 5.81M | char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { |
912 | 5.81M | static const char *hexdigits = "0123456789abcdef"; |
913 | 5.81M | buffer[num_byte] = '\0'; |
914 | 98.7M | for (int i = num_byte - 1; i >= 0; i--) { |
915 | 92.9M | buffer[i] = hexdigits[value & 0xf]; |
916 | 92.9M | value >>= 4; |
917 | 92.9M | } |
918 | 5.81M | return buffer; |
919 | 5.81M | } |
920 | | |
921 | 5.81M | char *FastHex64ToBuffer(uint64 value, char* buffer) { |
922 | 5.81M | return InternalFastHexToBuffer(value, buffer, 16); |
923 | 5.81M | } |
924 | | |
925 | 8 | std::string FastHex64ToString(uint64 value) { |
926 | 8 | std::string result; |
927 | 8 | result.resize(16); |
928 | 8 | InternalFastHexToBuffer(value, &result[0], 16); |
929 | 8 | return result; |
930 | 8 | } |
931 | | |
932 | 1 | char *FastHex32ToBuffer(uint32 value, char* buffer) { |
933 | 1 | return InternalFastHexToBuffer(value, buffer, 8); |
934 | 1 | } |
935 | | |
936 | | // TODO(user): revisit the two_ASCII_digits optimization. |
937 | | // |
938 | | // Several converters use this table to reduce |
939 | | // division and modulo operations. |
940 | | extern const char two_ASCII_digits[100][2]; // from strutil.cc |
941 | | |
942 | | // ---------------------------------------------------------------------- |
943 | | // FastInt32ToBufferLeft() |
944 | | // FastUInt32ToBufferLeft() |
945 | | // FastInt64ToBufferLeft() |
946 | | // FastUInt64ToBufferLeft() |
947 | | // |
948 | | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
949 | | // Unlike the Fast*ToBuffer() functions, however, these functions write |
950 | | // their output to the beginning of the buffer (hence the name, as the |
951 | | // output is left-aligned). The caller is responsible for ensuring that |
952 | | // the buffer has enough space to hold the output. |
953 | | // |
954 | | // Returns a pointer to the end of the string (i.e. the null character |
955 | | // terminating the string). |
956 | | // ---------------------------------------------------------------------- |
957 | | |
958 | 226M | char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { |
959 | 226M | uint digits; |
960 | 226M | const char *ASCII_digits = nullptr; |
961 | | // The idea of this implementation is to trim the number of divides to as few |
962 | | // as possible by using multiplication and subtraction rather than mod (%), |
963 | | // and by outputting two digits at a time rather than one. |
964 | | // The huge-number case is first, in the hopes that the compiler will output |
965 | | // that case in one branch-free block of code, and only output conditional |
966 | | // branches into it from below. |
967 | 226M | if (u >= 1000000000) { // >= 1,000,000,000 |
968 | 2.98M | digits = u / 100000000; // 100,000,000 |
969 | 2.98M | ASCII_digits = two_ASCII_digits[digits]; |
970 | 2.98M | buffer[0] = ASCII_digits[0]; |
971 | 2.98M | buffer[1] = ASCII_digits[1]; |
972 | 2.98M | buffer += 2; |
973 | 3.98M | sublt100_000_000: |
974 | 3.98M | u -= digits * 100000000; // 100,000,000 |
975 | 5.78M | lt100_000_000: |
976 | 5.78M | digits = u / 1000000; // 1,000,000 |
977 | 5.78M | ASCII_digits = two_ASCII_digits[digits]; |
978 | 5.78M | buffer[0] = ASCII_digits[0]; |
979 | 5.78M | buffer[1] = ASCII_digits[1]; |
980 | 5.78M | buffer += 2; |
981 | 48.7M | sublt1_000_000: |
982 | 48.7M | u -= digits * 1000000; // 1,000,000 |
983 | 54.3M | lt1_000_000: |
984 | 54.3M | digits = u / 10000; // 10,000 |
985 | 54.3M | ASCII_digits = two_ASCII_digits[digits]; |
986 | 54.3M | buffer[0] = ASCII_digits[0]; |
987 | 54.3M | buffer[1] = ASCII_digits[1]; |
988 | 54.3M | buffer += 2; |
989 | 77.5M | sublt10_000: |
990 | 77.5M | u -= digits * 10000; // 10,000 |
991 | 86.3M | lt10_000: |
992 | 86.3M | digits = u / 100; |
993 | 86.3M | ASCII_digits = two_ASCII_digits[digits]; |
994 | 86.3M | buffer[0] = ASCII_digits[0]; |
995 | 86.3M | buffer[1] = ASCII_digits[1]; |
996 | 86.3M | buffer += 2; |
997 | 91.1M | sublt100: |
998 | 91.1M | u -= digits * 100; |
999 | 104M | lt100: |
1000 | 104M | digits = u; |
1001 | 104M | ASCII_digits = two_ASCII_digits[digits]; |
1002 | 104M | buffer[0] = ASCII_digits[0]; |
1003 | 104M | buffer[1] = ASCII_digits[1]; |
1004 | 104M | buffer += 2; |
1005 | 226M | done: |
1006 | 226M | *buffer = 0; |
1007 | 226M | return buffer; |
1008 | 223M | } |
1009 | | |
1010 | 223M | if (u < 100) { |
1011 | 135M | digits = u; |
1012 | 135M | if (u >= 10) goto lt100; |
1013 | 121M | *buffer++ = '0' + digits; |
1014 | 121M | goto done; |
1015 | 121M | } |
1016 | 88.1M | if (u < 10000) { // 10,000 |
1017 | 13.6M | if (u >= 1000) goto lt10_000; |
1018 | 4.87M | digits = u / 100; |
1019 | 4.87M | *buffer++ = '0' + digits; |
1020 | 4.87M | goto sublt100; |
1021 | 4.87M | } |
1022 | 74.4M | if (u < 1000000) { // 1,000,000 |
1023 | 28.8M | if (u >= 100000) goto lt1_000_000; |
1024 | 23.2M | digits = u / 10000; // 10,000 |
1025 | 23.2M | *buffer++ = '0' + digits; |
1026 | 23.2M | goto sublt10_000; |
1027 | 23.2M | } |
1028 | 45.6M | if (u < 100000000) { // 100,000,000 |
1029 | 44.7M | if (u >= 10000000) goto lt100_000_000; |
1030 | 42.9M | digits = u / 1000000; // 1,000,000 |
1031 | 42.9M | *buffer++ = '0' + digits; |
1032 | 42.9M | goto sublt1_000_000; |
1033 | 42.9M | } |
1034 | | // we already know that u < 1,000,000,000 |
1035 | 889k | digits = u / 100000000; // 100,000,000 |
1036 | 889k | *buffer++ = '0' + digits; |
1037 | 889k | goto sublt100_000_000; |
1038 | 889k | } |
1039 | | |
1040 | 128M | char* FastInt32ToBufferLeft(int32 i, char* buffer) { |
1041 | 128M | uint32 u = i; |
1042 | 128M | if (i < 0) { |
1043 | 991 | *buffer++ = '-'; |
1044 | | // We need to do the negation in modular (i.e., "unsigned") |
1045 | | // arithmetic; MSVC++ apprently warns for plain "-u", so |
1046 | | // we write the equivalent expression "0 - u" instead. |
1047 | 991 | u = 0 - u; |
1048 | 991 | } |
1049 | 128M | return FastUInt32ToBufferLeft(u, buffer); |
1050 | 128M | } |
1051 | | |
1052 | 90.6M | char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { |
1053 | 90.6M | uint digits; |
1054 | 90.6M | const char *ASCII_digits = nullptr; |
1055 | | |
1056 | 90.6M | uint32 u = static_cast<uint32>(u64); |
1057 | 90.6M | if (u == u64) return FastUInt32ToBufferLeft(u, buffer); |
1058 | | |
1059 | 5.22M | uint64 top_11_digits = u64 / 1000000000; |
1060 | 5.22M | buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); |
1061 | 5.22M | u = narrow_cast<uint32>(u64 - (top_11_digits * 1000000000)); |
1062 | | |
1063 | 5.22M | digits = u / 10000000; // 10,000,000 |
1064 | 5.22M | DCHECK_LT(digits, 100); |
1065 | 5.22M | ASCII_digits = two_ASCII_digits[digits]; |
1066 | 5.22M | buffer[0] = ASCII_digits[0]; |
1067 | 5.22M | buffer[1] = ASCII_digits[1]; |
1068 | 5.22M | buffer += 2; |
1069 | 5.22M | u -= digits * 10000000; // 10,000,000 |
1070 | 5.22M | digits = u / 100000; // 100,000 |
1071 | 5.22M | ASCII_digits = two_ASCII_digits[digits]; |
1072 | 5.22M | buffer[0] = ASCII_digits[0]; |
1073 | 5.22M | buffer[1] = ASCII_digits[1]; |
1074 | 5.22M | buffer += 2; |
1075 | 5.22M | u -= digits * 100000; // 100,000 |
1076 | 5.22M | digits = u / 1000; // 1,000 |
1077 | 5.22M | ASCII_digits = two_ASCII_digits[digits]; |
1078 | 5.22M | buffer[0] = ASCII_digits[0]; |
1079 | 5.22M | buffer[1] = ASCII_digits[1]; |
1080 | 5.22M | buffer += 2; |
1081 | 5.22M | u -= digits * 1000; // 1,000 |
1082 | 5.22M | digits = u / 10; |
1083 | 5.22M | ASCII_digits = two_ASCII_digits[digits]; |
1084 | 5.22M | buffer[0] = ASCII_digits[0]; |
1085 | 5.22M | buffer[1] = ASCII_digits[1]; |
1086 | 5.22M | buffer += 2; |
1087 | 5.22M | u -= digits * 10; |
1088 | 5.22M | digits = u; |
1089 | 5.22M | *buffer++ = '0' + digits; |
1090 | 5.22M | *buffer = 0; |
1091 | 5.22M | return buffer; |
1092 | 5.22M | } |
1093 | | |
1094 | 22.0M | char* FastInt64ToBufferLeft(int64 i, char* buffer) { |
1095 | 22.0M | uint64 u = i; |
1096 | 22.0M | if (i < 0) { |
1097 | 463k | *buffer++ = '-'; |
1098 | 463k | u = 0 - u; |
1099 | 463k | } |
1100 | 22.0M | return FastUInt64ToBufferLeft(u, buffer); |
1101 | 22.0M | } |
1102 | | |
1103 | 0 | int HexDigitsPrefix(const char* buf, int num_digits) { |
1104 | 0 | for (int i = 0; i < num_digits; i++) |
1105 | 0 | if (!ascii_isxdigit(buf[i])) |
1106 | 0 | return 0; // This also detects end of string as '\0' is not xdigit. |
1107 | 0 | return 1; |
1108 | 0 | } |
1109 | | |
1110 | | // ---------------------------------------------------------------------- |
1111 | | // AutoDigitStrCmp |
1112 | | // AutoDigitLessThan |
1113 | | // StrictAutoDigitLessThan |
1114 | | // autodigit_less |
1115 | | // autodigit_greater |
1116 | | // strict_autodigit_less |
1117 | | // strict_autodigit_greater |
1118 | | // These are like less<string> and greater<string>, except when a |
1119 | | // run of digits is encountered at corresponding points in the two |
1120 | | // arguments. Such digit strings are compared numerically instead |
1121 | | // of lexicographically. Therefore if you sort by |
1122 | | // "autodigit_less", some machine names might get sorted as: |
1123 | | // exaf1 |
1124 | | // exaf2 |
1125 | | // exaf10 |
1126 | | // When using "strict" comparison (AutoDigitStrCmp with the strict flag |
1127 | | // set to true, or the strict version of the other functions), |
1128 | | // strings that represent equal numbers will not be considered equal if |
1129 | | // the string representations are not identical. That is, "01" < "1" in |
1130 | | // strict mode, but "01" == "1" otherwise. |
1131 | | // ---------------------------------------------------------------------- |
1132 | | |
1133 | | int AutoDigitStrCmp(const char* a, size_t alen, |
1134 | | const char* b, size_t blen, |
1135 | 0 | bool strict) { |
1136 | 0 | size_t aindex = 0; |
1137 | 0 | size_t bindex = 0; |
1138 | 0 | while ((aindex < alen) && (bindex < blen)) { |
1139 | 0 | if (isdigit(a[aindex]) && isdigit(b[bindex])) { |
1140 | | // Compare runs of digits. Instead of extracting numbers, we |
1141 | | // just skip leading zeroes, and then get the run-lengths. This |
1142 | | // allows us to handle arbitrary precision numbers. We remember |
1143 | | // how many zeroes we found so that we can differentiate between |
1144 | | // "1" and "01" in strict mode. |
1145 | | |
1146 | | // Skip leading zeroes, but remember how many we found |
1147 | 0 | size_t azeroes = aindex; |
1148 | 0 | size_t bzeroes = bindex; |
1149 | 0 | while ((aindex < alen) && (a[aindex] == '0')) aindex++; |
1150 | 0 | while ((bindex < blen) && (b[bindex] == '0')) bindex++; |
1151 | 0 | azeroes = aindex - azeroes; |
1152 | 0 | bzeroes = bindex - bzeroes; |
1153 | | |
1154 | | // Count digit lengths |
1155 | 0 | size_t astart = aindex; |
1156 | 0 | size_t bstart = bindex; |
1157 | 0 | while ((aindex < alen) && isdigit(a[aindex])) aindex++; |
1158 | 0 | while ((bindex < blen) && isdigit(b[bindex])) bindex++; |
1159 | 0 | if (aindex - astart < bindex - bstart) { |
1160 | | // a has shorter run of digits: so smaller |
1161 | 0 | return -1; |
1162 | 0 | } else if (aindex - astart > bindex - bstart) { |
1163 | | // a has longer run of digits: so larger |
1164 | 0 | return 1; |
1165 | 0 | } else { |
1166 | | // Same lengths, so compare digit by digit |
1167 | 0 | for (size_t i = 0; i < aindex-astart; i++) { |
1168 | 0 | if (a[astart+i] < b[bstart+i]) { |
1169 | 0 | return -1; |
1170 | 0 | } else if (a[astart+i] > b[bstart+i]) { |
1171 | 0 | return 1; |
1172 | 0 | } |
1173 | 0 | } |
1174 | | // Equal: did one have more leading zeroes? |
1175 | 0 | if (strict && azeroes != bzeroes) { |
1176 | 0 | if (azeroes > bzeroes) { |
1177 | | // a has more leading zeroes: a < b |
1178 | 0 | return -1; |
1179 | 0 | } else { |
1180 | | // b has more leading zeroes: a > b |
1181 | 0 | return 1; |
1182 | 0 | } |
1183 | 0 | } |
1184 | | // Equal: so continue scanning |
1185 | 0 | } |
1186 | 0 | } else if (a[aindex] < b[bindex]) { |
1187 | 0 | return -1; |
1188 | 0 | } else if (a[aindex] > b[bindex]) { |
1189 | 0 | return 1; |
1190 | 0 | } else { |
1191 | 0 | aindex++; |
1192 | 0 | bindex++; |
1193 | 0 | } |
1194 | 0 | } |
1195 | |
|
1196 | 0 | if (aindex < alen) { |
1197 | | // b is prefix of a |
1198 | 0 | return 1; |
1199 | 0 | } else if (bindex < blen) { |
1200 | | // a is prefix of b |
1201 | 0 | return -1; |
1202 | 0 | } else { |
1203 | | // a is equal to b |
1204 | 0 | return 0; |
1205 | 0 | } |
1206 | 0 | } |
1207 | | |
1208 | 0 | bool AutoDigitLessThan(const char* a, size_t alen, const char* b, size_t blen) { |
1209 | 0 | return AutoDigitStrCmp(a, alen, b, blen, false) < 0; |
1210 | 0 | } |
1211 | | |
1212 | | bool StrictAutoDigitLessThan(const char* a, size_t alen, |
1213 | 0 | const char* b, size_t blen) { |
1214 | 0 | return AutoDigitStrCmp(a, alen, b, blen, true) < 0; |
1215 | 0 | } |
1216 | | |
1217 | | // ---------------------------------------------------------------------- |
1218 | | // SimpleDtoa() |
1219 | | // SimpleFtoa() |
1220 | | // DoubleToBuffer() |
1221 | | // FloatToBuffer() |
1222 | | // We want to print the value without losing precision, but we also do |
1223 | | // not want to print more digits than necessary. This turns out to be |
1224 | | // trickier than it sounds. Numbers like 0.2 cannot be represented |
1225 | | // exactly in binary. If we print 0.2 with a very large precision, |
1226 | | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
1227 | | // On the other hand, if we set the precision too low, we lose |
1228 | | // significant digits when printing numbers that actually need them. |
1229 | | // It turns out there is no precision value that does the right thing |
1230 | | // for all numbers. |
1231 | | // |
1232 | | // Our strategy is to first try printing with a precision that is never |
1233 | | // over-precise, then parse the result with strtod() to see if it |
1234 | | // matches. If not, we print again with a precision that will always |
1235 | | // give a precise result, but may use more digits than necessary. |
1236 | | // |
1237 | | // An arguably better strategy would be to use the algorithm described |
1238 | | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
1239 | | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
1240 | | // however, that the following implementation is about as fast as |
1241 | | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
1242 | | // will not scale well on multi-core machines. DMG's code is slightly |
1243 | | // more accurate (in that it will never use more digits than |
1244 | | // necessary), but this is probably irrelevant for most users. |
1245 | | // |
1246 | | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
1247 | | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
1248 | | // one in that it makes guesses and then uses strtod() to check them. |
1249 | | // Their implementation is faster because they use their own code to |
1250 | | // generate the digits in the first place rather than use snprintf(), |
1251 | | // thus avoiding format string parsing overhead. However, this makes |
1252 | | // it considerably more complicated than the following implementation, |
1253 | | // and it is embedded in a larger library. If speed turns out to be |
1254 | | // an issue, we could re-implement this in terms of their |
1255 | | // implementation. |
1256 | | // ---------------------------------------------------------------------- |
1257 | | |
1258 | 31 | string SimpleDtoa(double value) { |
1259 | 31 | char buffer[kDoubleToBufferSize]; |
1260 | 31 | return DoubleToBuffer(value, buffer); |
1261 | 31 | } |
1262 | | |
1263 | 5 | string SimpleFtoa(float value) { |
1264 | 5 | char buffer[kFloatToBufferSize]; |
1265 | 5 | return FloatToBuffer(value, buffer); |
1266 | 5 | } |
1267 | | |
1268 | 39 | char* DoubleToBuffer(double value, char* buffer) { |
1269 | | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
1270 | | // platforms these days. Just in case some system exists where DBL_DIG |
1271 | | // is significantly larger -- and risks overflowing our buffer -- we have |
1272 | | // this assert. |
1273 | 39 | COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
1274 | | |
1275 | 39 | if (value == std::numeric_limits<double>::infinity()) { |
1276 | 6 | strncpy(buffer, "inf", kDoubleToBufferSize); |
1277 | 6 | return buffer; |
1278 | 33 | } else if (value == -std::numeric_limits<double>::infinity()) { |
1279 | 4 | strncpy(buffer, "-inf", kDoubleToBufferSize); |
1280 | 4 | return buffer; |
1281 | 29 | } else if (isnan(value)) { |
1282 | 10 | strncpy(buffer, "nan", kDoubleToBufferSize); |
1283 | 10 | return buffer; |
1284 | 10 | } |
1285 | | |
1286 | 19 | int snprintf_result = |
1287 | 19 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); |
1288 | | |
1289 | | // The snprintf should never overflow because the buffer is significantly |
1290 | | // larger than the precision we asked for. |
1291 | 19 | DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
1292 | | |
1293 | | // We need to make parsed_value volatile in order to force the compiler to |
1294 | | // write it out to the stack. Otherwise, it may keep the value in a |
1295 | | // register, and if it does that, it may keep it as a long double instead |
1296 | | // of a double. This long double may have extra bits that make it compare |
1297 | | // unequal to "value" even though it would be exactly equal if it were |
1298 | | // truncated to a double. |
1299 | 19 | volatile double parsed_value = strtod(buffer, NULL); |
1300 | 19 | if (parsed_value != value) { |
1301 | 6 | int snprintf_result = |
1302 | 6 | snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); |
1303 | | |
1304 | | // Should never overflow; see above. |
1305 | 6 | DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
1306 | 6 | } |
1307 | | |
1308 | 19 | return buffer; |
1309 | 19 | } |
1310 | | |
1311 | 5 | char* FloatToBuffer(float value, char* buffer) { |
1312 | | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
1313 | | // platforms these days. Just in case some system exists where FLT_DIG |
1314 | | // is significantly larger -- and risks overflowing our buffer -- we have |
1315 | | // this assert. |
1316 | 5 | COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
1317 | | |
1318 | 5 | if (value == std::numeric_limits<double>::infinity()) { |
1319 | 0 | strncpy(buffer, "inf", kFloatToBufferSize); |
1320 | 0 | return buffer; |
1321 | 5 | } else if (value == -std::numeric_limits<double>::infinity()) { |
1322 | 0 | strncpy(buffer, "-inf", kFloatToBufferSize); |
1323 | 0 | return buffer; |
1324 | 5 | } else if (isnan(value)) { |
1325 | 0 | strncpy(buffer, "nan", kFloatToBufferSize); |
1326 | 0 | return buffer; |
1327 | 0 | } |
1328 | | |
1329 | 5 | int snprintf_result = |
1330 | 5 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); |
1331 | | |
1332 | | // The snprintf should never overflow because the buffer is significantly |
1333 | | // larger than the precision we asked for. |
1334 | 5 | DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
1335 | | |
1336 | 5 | float parsed_value; |
1337 | 5 | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
1338 | 5 | int snprintf_result = |
1339 | 5 | snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+3, value); |
1340 | | |
1341 | | // Should never overflow; see above. |
1342 | 5 | DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
1343 | 5 | } |
1344 | | |
1345 | 5 | return buffer; |
1346 | 5 | } |
1347 | | |
1348 | | // ---------------------------------------------------------------------- |
1349 | | // SimpleItoaWithCommas() |
1350 | | // Description: converts an integer to a string. |
1351 | | // Puts commas every 3 spaces. |
1352 | | // Faster than printf("%d")? |
1353 | | // |
1354 | | // Return value: string |
1355 | | // ---------------------------------------------------------------------- |
1356 | 0 | string SimpleItoaWithCommas(int32 i) { |
1357 | | // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints. |
1358 | | // Longest is -2,147,483,648. |
1359 | 0 | char local[14]; |
1360 | 0 | char *p = local + sizeof(local); |
1361 | | // Need to use uint32 instead of int32 to correctly handle |
1362 | | // -2,147,483,648. |
1363 | 0 | uint32 n = i; |
1364 | 0 | if (i < 0) |
1365 | 0 | n = 0 - n; // negate the unsigned value to avoid overflow |
1366 | 0 | *--p = '0' + n % 10; // this case deals with the number "0" |
1367 | 0 | n /= 10; |
1368 | 0 | while (n) { |
1369 | 0 | *--p = '0' + n % 10; |
1370 | 0 | n /= 10; |
1371 | 0 | if (n == 0) break; |
1372 | | |
1373 | 0 | *--p = '0' + n % 10; |
1374 | 0 | n /= 10; |
1375 | 0 | if (n == 0) break; |
1376 | | |
1377 | 0 | *--p = ','; |
1378 | 0 | *--p = '0' + n % 10; |
1379 | 0 | n /= 10; |
1380 | | // For this unrolling, we check if n == 0 in the main while loop |
1381 | 0 | } |
1382 | 0 | if (i < 0) |
1383 | 0 | *--p = '-'; |
1384 | 0 | return string(p, local + sizeof(local)); |
1385 | 0 | } |
1386 | | |
1387 | | // We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't |
1388 | | // compile. |
1389 | 0 | string SimpleItoaWithCommas(uint32 i) { |
1390 | | // 10 digits and 3 commas are good for 32-bit or smaller ints. |
1391 | | // Longest is 4,294,967,295. |
1392 | 0 | char local[13]; |
1393 | 0 | char *p = local + sizeof(local); |
1394 | 0 | *--p = '0' + i % 10; // this case deals with the number "0" |
1395 | 0 | i /= 10; |
1396 | 0 | while (i) { |
1397 | 0 | *--p = '0' + i % 10; |
1398 | 0 | i /= 10; |
1399 | 0 | if (i == 0) break; |
1400 | | |
1401 | 0 | *--p = '0' + i % 10; |
1402 | 0 | i /= 10; |
1403 | 0 | if (i == 0) break; |
1404 | | |
1405 | 0 | *--p = ','; |
1406 | 0 | *--p = '0' + i % 10; |
1407 | 0 | i /= 10; |
1408 | | // For this unrolling, we check if i == 0 in the main while loop |
1409 | 0 | } |
1410 | 0 | return string(p, local + sizeof(local)); |
1411 | 0 | } |
1412 | | |
1413 | 0 | string SimpleItoaWithCommas(int64 i) { |
1414 | | // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. |
1415 | 0 | char local[26]; |
1416 | 0 | char *p = local + sizeof(local); |
1417 | | // Need to use uint64 instead of int64 to correctly handle |
1418 | | // -9,223,372,036,854,775,808. |
1419 | 0 | uint64 n = i; |
1420 | 0 | if (i < 0) |
1421 | 0 | n = 0 - n; |
1422 | 0 | *--p = '0' + n % 10; // this case deals with the number "0" |
1423 | 0 | n /= 10; |
1424 | 0 | while (n) { |
1425 | 0 | *--p = '0' + n % 10; |
1426 | 0 | n /= 10; |
1427 | 0 | if (n == 0) break; |
1428 | | |
1429 | 0 | *--p = '0' + n % 10; |
1430 | 0 | n /= 10; |
1431 | 0 | if (n == 0) break; |
1432 | | |
1433 | 0 | *--p = ','; |
1434 | 0 | *--p = '0' + n % 10; |
1435 | 0 | n /= 10; |
1436 | | // For this unrolling, we check if n == 0 in the main while loop |
1437 | 0 | } |
1438 | 0 | if (i < 0) |
1439 | 0 | *--p = '-'; |
1440 | 0 | return string(p, local + sizeof(local)); |
1441 | 0 | } |
1442 | | |
1443 | | // We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't |
1444 | | // compile. |
1445 | 0 | string SimpleItoaWithCommas(uint64 i) { |
1446 | | // 20 digits and 6 commas are good for 64-bit or smaller ints. |
1447 | | // Longest is 18,446,744,073,709,551,615. |
1448 | 0 | char local[26]; |
1449 | 0 | char *p = local + sizeof(local); |
1450 | 0 | *--p = '0' + i % 10; // this case deals with the number "0" |
1451 | 0 | i /= 10; |
1452 | 0 | while (i) { |
1453 | 0 | *--p = '0' + i % 10; |
1454 | 0 | i /= 10; |
1455 | 0 | if (i == 0) break; |
1456 | | |
1457 | 0 | *--p = '0' + i % 10; |
1458 | 0 | i /= 10; |
1459 | 0 | if (i == 0) break; |
1460 | | |
1461 | 0 | *--p = ','; |
1462 | 0 | *--p = '0' + i % 10; |
1463 | 0 | i /= 10; |
1464 | | // For this unrolling, we check if i == 0 in the main while loop |
1465 | 0 | } |
1466 | 0 | return string(p, local + sizeof(local)); |
1467 | 0 | } |
1468 | | |
1469 | | // ---------------------------------------------------------------------- |
1470 | | // ItoaKMGT() |
1471 | | // Description: converts an integer to a string |
1472 | | // Truncates values to a readable unit: K, G, M or T |
1473 | | // Opposite of atoi_kmgt() |
1474 | | // e.g. 100 -> "100" 1500 -> "1500" 4000 -> "3K" 57185920 -> "45M" |
1475 | | // |
1476 | | // Return value: string |
1477 | | // ---------------------------------------------------------------------- |
1478 | 0 | string ItoaKMGT(int64 i) { |
1479 | 0 | const char *sign = "", *suffix = ""; |
1480 | 0 | if (i < 0) { |
1481 | | // We lose some accuracy if the caller passes LONG_LONG_MIN, but |
1482 | | // that's OK as this function is only for human readability |
1483 | 0 | if (i == numeric_limits<int64>::min()) i++; |
1484 | 0 | sign = "-"; |
1485 | 0 | i = -i; |
1486 | 0 | } |
1487 | |
|
1488 | 0 | int64 val; |
1489 | |
|
1490 | 0 | if ((val = (i >> 40)) > 1) { |
1491 | 0 | suffix = "T"; |
1492 | 0 | } else if ((val = (i >> 30)) > 1) { |
1493 | 0 | suffix = "G"; |
1494 | 0 | } else if ((val = (i >> 20)) > 1) { |
1495 | 0 | suffix = "M"; |
1496 | 0 | } else if ((val = (i >> 10)) > 1) { |
1497 | 0 | suffix = "K"; |
1498 | 0 | } else { |
1499 | 0 | val = i; |
1500 | 0 | } |
1501 | |
|
1502 | 0 | return StringPrintf("%s%" PRId64 "%s", sign, val, suffix); |
1503 | 0 | } |
1504 | | |
1505 | | // DEPRECATED(wadetregaskis). |
1506 | | // These are non-inline because some BUILD files turn on -Wformat-non-literal. |
1507 | | |
1508 | 0 | string FloatToString(float f, const char* format) { |
1509 | 0 | return StringPrintf(format, f); |
1510 | 0 | } |
1511 | | |
1512 | 0 | string IntToString(int i, const char* format) { |
1513 | 0 | return StringPrintf(format, i); |
1514 | 0 | } |
1515 | | |
1516 | 0 | string Int64ToString(int64 i64, const char* format) { |
1517 | 0 | return StringPrintf(format, i64); |
1518 | 0 | } |
1519 | | |
1520 | 0 | string UInt64ToString(uint64 ui64, const char* format) { |
1521 | 0 | return StringPrintf(format, ui64); |
1522 | 0 | } |
1523 | | |
1524 | | namespace { |
1525 | | constexpr int64_t kBytesPerGB = 1000000000; |
1526 | | constexpr int64_t kBytesPerMB = 1000000; |
1527 | | constexpr int64_t kBytesPerKB = 1000; |
1528 | | } |
1529 | | |
1530 | 327 | string HumanizeBytes(uint64_t bytes, int precision) { |
1531 | 327 | std::ostringstream op_stream; |
1532 | 327 | op_stream << std::fixed << std::setprecision(precision); |
1533 | 327 | if (bytes >= kBytesPerGB) { |
1534 | 0 | op_stream << static_cast<double> (bytes)/kBytesPerGB << " GB"; |
1535 | 327 | } else if (bytes >= kBytesPerMB) { |
1536 | 44 | op_stream << static_cast<double> (bytes)/kBytesPerMB << " MB"; |
1537 | 283 | } else if (bytes >= kBytesPerKB) { |
1538 | 273 | op_stream << static_cast<double> (bytes)/kBytesPerKB << " KB"; |
1539 | 10 | } else { |
1540 | 10 | op_stream << bytes << " B"; |
1541 | 10 | } |
1542 | 327 | return op_stream.str(); |
1543 | 327 | } |