YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/util/date_time.cc
Line
Count
Source (jump to first uncovered line)
1
//--------------------------------------------------------------------------------------------------
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
// DateTime parser and serializer
16
//--------------------------------------------------------------------------------------------------
17
18
#include "yb/util/date_time.h"
19
20
#include <unicode/gregocal.h>
21
22
#include <regex>
23
24
#include <boost/date_time/c_local_time_adjustor.hpp>
25
#include <boost/date_time/local_time/local_time.hpp>
26
#include <boost/smart_ptr/make_shared.hpp>
27
28
#include "yb/gutil/casts.h"
29
30
#include "yb/util/result.h"
31
#include "yb/util/status_format.h"
32
33
using std::locale;
34
using std::vector;
35
using std::string;
36
using std::regex;
37
using icu::GregorianCalendar;
38
using icu::TimeZone;
39
using icu::UnicodeString;
40
using boost::gregorian::date;
41
using boost::local_time::local_date_time;
42
using boost::local_time::local_time_facet;
43
using boost::local_time::local_microsec_clock;
44
using boost::local_time::posix_time_zone;
45
using boost::local_time::time_zone_ptr;
46
using boost::posix_time::ptime;
47
using boost::posix_time::microseconds;
48
using boost::posix_time::time_duration;
49
using boost::posix_time::microsec_clock;
50
using boost::posix_time::milliseconds;
51
52
DEFINE_bool(use_icu_timezones, true, "Use the new ICU library for timezones instead of boost");
53
54
namespace yb {
55
56
namespace {
57
58
// UTC timezone.
59
static const time_zone_ptr kUtcTimezone(new posix_time_zone("UTC"));
60
61
// Unix epoch (time_t 0) at UTC.
62
static const local_date_time kEpoch(boost::posix_time::from_time_t(0), kUtcTimezone);
63
64
// Date offset of Unix epoch (2^31).
65
static constexpr uint32_t kEpochDateOffset = 1<<31;
66
67
// Day in milli- and micro-seconds.
68
static constexpr int64_t kDayInMilliSeconds = 24 * 60 * 60 * 1000L;
69
static constexpr int64_t kDayInMicroSeconds = kDayInMilliSeconds * 1000L;
70
71
14.8k
Timestamp ToTimestamp(const local_date_time& t) {
72
14.8k
  return Timestamp((t - kEpoch).total_microseconds());
73
14.8k
}
74
75
25
Result<uint32_t> ToDate(const int64_t days_since_epoch) {
76
25
  const int64_t date = days_since_epoch + kEpochDateOffset;
77
25
  if (date < std::numeric_limits<uint32_t>::min() || date > std::numeric_limits<uint32_t>::max()) {
78
0
    return STATUS(InvalidArgument, "Invalid date");
79
0
  }
80
25
  return narrow_cast<uint32_t>(date);
81
25
}
82
83
34
Result<GregorianCalendar> CreateCalendar() {
84
34
  UErrorCode status = U_ZERO_ERROR;
85
34
  GregorianCalendar cal(*TimeZone::getGMT(), status);
86
34
  if (U_FAILURE(status)) {
87
0
    return STATUS(InvalidArgument, "Failed to create Gregorian calendar", u_errorName(status));
88
0
  }
89
34
  cal.setGregorianChange(U_DATE_MIN, status);
90
34
  if (U_FAILURE(status)) {
91
0
    return STATUS(InvalidArgument, "Failed to set Gregorian change", u_errorName(status));
92
0
  }
93
34
  cal.setLenient(FALSE);
94
34
  cal.clear();
95
34
  return cal;
96
34
}
97
98
// Get system (local) time zone.
99
10.8k
string GetSystemTimezone() {
100
  // Get system timezone by getting current UTC time, converting to local time and computing the
101
  // offset.
102
10.8k
  const ptime utc_time = microsec_clock::universal_time();
103
10.8k
  const ptime local_time = boost::date_time::c_local_adjustor<ptime>::utc_to_local(utc_time);
104
10.8k
  const time_duration offset = local_time - utc_time;
105
10.8k
  const int hours = narrow_cast<int>(offset.hours());
106
10.8k
  const int minutes = narrow_cast<int>(offset.minutes());
107
10.8k
  char buffer[7]; // "+HH:MM" or "-HH:MM"
108
10.8k
  const size_t result = snprintf(buffer, sizeof(buffer), "%+2.2d:%2.2d", hours, minutes);
109
0
  CHECK(result > 0 && result < sizeof(buffer)) << "Unexpected snprintf result: " << result;
110
10.8k
  return buffer;
111
10.8k
}
112
113
/* Subset of supported Timezone formats https://docs.oracle.com/cd/E51711_01/DR/ICU_Time_Zones.html
114
 * Full database can be found at https://www.iana.org/time-zones
115
 * We support everything that Cassandra supports, like z/Z, +/-0800, +/-08:30 GMT+/-[0]7:00,
116
 * and we also support UTC+/-[0]9:30 which Cassandra does not support
117
 */
118
1.30k
Result<string> GetTimezone(string timezoneID) {
119
  /* Parse timezone offset from string in most formats of timezones
120
   * Some formats are supported by ICU and some different ones by Boost::PosixTime
121
   * To capture both, return posix supported directly, and for ICU, create ICU Timezone and then
122
   * convert to a supported Posix format.
123
   */
124
  // [+/-]0830 is not supported by ICU TimeZone or Posixtime so need to do some extra work
125
1.30k
  std::smatch m;
126
1.30k
  std::regex rgx = regex("(?:\\+|-)(\\d{2})(\\d{2})");
127
1.30k
  if (timezoneID.empty()) {
128
0
    return GetSystemTimezone();
129
1.30k
  } else if (timezoneID == "z" || timezoneID == "Z") {
130
180
    timezoneID = "GMT";
131
1.12k
  } else if (std::regex_match(timezoneID, m , rgx)) {
132
181
    return m.str(1) + ":" + m.str(2);
133
945
  } else if (timezoneID.at(0) == '+' || timezoneID.at(0) == '-' ||
134
762
             timezoneID.substr(0, 3) == "UTC") {
135
578
    return timezoneID;
136
578
  }
137
547
  std::unique_ptr<TimeZone> tzone(TimeZone::createTimeZone(timezoneID.c_str()));
138
547
  UnicodeString id;
139
547
  tzone->getID(id);
140
547
  string timezone;
141
547
  id.toUTF8String(timezone);
142
547
  if (*tzone == TimeZone::getUnknown()) {
143
3
    return STATUS(InvalidArgument, "Invalid Timezone: " + timezoneID +
144
3
        "\nUse standardized timezone such as \"America/New_York\" or offset such as UTC-07:00.");
145
3
  }
146
544
  time_duration td = milliseconds(tzone->getRawOffset());
147
544
  const int hours = narrow_cast<int>(td.hours());
148
544
  const int minutes = narrow_cast<int>(td.minutes());
149
544
  char buffer[7]; // "+HH:MM" or "-HH:MM"
150
544
  const size_t result = snprintf(buffer, sizeof(buffer), "%+2.2d:%2.2d", hours, abs(minutes));
151
544
  if (result <= 0 || result >= sizeof(buffer)) {
152
0
    return STATUS(Corruption, "Parsing timezone into timezone offset string failed");
153
0
  }
154
544
  return buffer;
155
544
}
156
157
1.33k
Result<time_zone_ptr> StringToTimezone(const std::string& tz, bool use_utc) {
158
1.33k
  if (tz.empty()) {
159
24
    return use_utc ? kUtcTimezone : boost::make_shared<posix_time_zone>(GetSystemTimezone());
160
24
  }
161
1.31k
  if (FLAGS_use_icu_timezones) {
162
1.30k
    return boost::make_shared<posix_time_zone>(VERIFY_RESULT(GetTimezone(tz)));
163
4
  }
164
4
  return boost::make_shared<posix_time_zone>(tz);
165
4
}
166
167
} // namespace
168
169
//------------------------------------------------------------------------------------------------
170
Result<Timestamp> DateTime::TimestampFromString(const string& str,
171
1.36k
                                                const InputFormat& input_format) {
172
1.36k
  std::smatch m;
173
  // trying first regex to match from the format
174
26.0k
  for (const auto& reg : input_format.regexes) {
175
26.0k
    if (std::regex_match(str, m, reg)) {
176
      // setting default values where missing
177
1.33k
      const int year = stoi(m.str(1));
178
1.33k
      const int month = stoi(m.str(2));
179
1.33k
      const int day = stoi(m.str(3));
180
1.28k
      const int hours = m.str(4).empty() ? 0 : stoi(m.str(4));
181
1.28k
      const int minutes = m.str(5).empty() ? 0 : stoi(m.str(5));
182
860
      const int seconds = m.str(6).empty() ? 0 : stoi(m.str(6));
183
904
      int64_t frac = m.str(7).empty() ? 0 : stoi(m.str(7));
184
1.33k
      frac = AdjustPrecision(frac, m.str(7).size(), time_duration::num_fractional_digits());
185
      // constructing date_time and getting difference from epoch to set as Timestamp value
186
1.33k
      try {
187
1.33k
        const date d(year, month, day);
188
1.33k
        const time_duration t(hours, minutes, seconds, frac);
189
1.33k
        time_zone_ptr tz = VERIFY_RESULT(StringToTimezone(m.str(8), input_format.use_utc));
190
1.33k
        return ToTimestamp(local_date_time(d, t, tz, local_date_time::NOT_DATE_TIME_ON_ERROR));
191
5
      } catch (std::exception& e) {
192
5
        return STATUS(InvalidArgument, "Invalid timestamp", e.what());
193
5
      }
194
1.33k
    }
195
26.0k
  }
196
28
  return STATUS_FORMAT(InvalidArgument, "Invalid timestamp $0: Wrong format of input string", str);
197
1.36k
}
198
199
567
Timestamp DateTime::TimestampFromInt(const int64_t val, const InputFormat& input_format) {
200
567
  return Timestamp(AdjustPrecision(val, input_format.input_precision, kInternalPrecision));
201
567
}
202
203
17.1k
string DateTime::TimestampToString(const Timestamp timestamp, const OutputFormat& output_format) {
204
17.1k
  std::ostringstream ss;
205
17.1k
  ss.imbue(output_format.output_locale);
206
17.1k
  static const local_date_time kSystemEpoch(
207
17.1k
      boost::posix_time::from_time_t(0), boost::make_shared<posix_time_zone>(GetSystemTimezone()));
208
17.1k
  try {
209
17.1k
    auto epoch = output_format.use_utc ? kEpoch : kSystemEpoch;
210
17.1k
    ss << epoch + microseconds(timestamp.value());
211
0
  } catch (...) {
212
    // If we cannot produce a valid date, default to showing the exact timestamp value.
213
    // This can happen if timestamp value is outside the standard year range (1400..10000).
214
0
    ss << timestamp.value();
215
0
  }
216
17.1k
  return ss.str();
217
17.1k
}
218
219
13.5k
Timestamp DateTime::TimestampNow() {
220
13.5k
  return ToTimestamp(local_microsec_clock::local_time(kUtcTimezone));
221
13.5k
}
222
223
//------------------------------------------------------------------------------------------------
224
27
Result<uint32_t> DateTime::DateFromString(const std::string& str) {
225
  // Regex for date format "yyyy-mm-dd"
226
27
  static const regex date_format("(-?\\d{1,7})-(\\d{1,2})-(\\d{1,2})");
227
27
  std::smatch m;
228
27
  if (!std::regex_match(str, m, date_format)) {
229
2
    return STATUS(InvalidArgument, "Invalid date format");
230
2
  }
231
25
  const int year = stoi(m.str(1));
232
25
  const int month = stoi(m.str(2));
233
25
  const int day = stoi(m.str(3));
234
25
  if (month < 1 || month > 12) {
235
1
    return STATUS(InvalidArgument, "Invalid month");
236
1
  }
237
24
  if (day < 1 || day > 31) {
238
1
    return STATUS(InvalidArgument, "Invalid day of month");
239
1
  }
240
23
  const auto cal_era = (year <= 0) ? GregorianCalendar::EEras::BC : GregorianCalendar::EEras::AD;
241
20
  const int cal_year = (year <= 0) ? -year + 1 : year;
242
23
  GregorianCalendar cal = VERIFY_RESULT(CreateCalendar());
243
23
  cal.set(UCAL_ERA, cal_era);
244
23
  cal.set(cal_year, month - 1, day);
245
23
  UErrorCode status = U_ZERO_ERROR;
246
23
  const int64_t ms_since_epoch = cal.getTime(status);
247
23
  if (U_FAILURE(status)) {
248
0
    return STATUS(InvalidArgument, "Failed to get time", u_errorName(status));
249
0
  }
250
23
  return ToDate(ms_since_epoch / kDayInMilliSeconds);
251
23
}
252
253
2
Result<uint32_t> DateTime::DateFromTimestamp(const Timestamp timestamp) {
254
2
  return ToDate(timestamp.ToInt64() / kDayInMicroSeconds);
255
2
}
256
257
0
Result<uint32_t> DateTime::DateFromUnixTimestamp(const int64_t unix_timestamp) {
258
0
  return ToDate(unix_timestamp / kDayInMilliSeconds);
259
0
}
260
261
11
Result<string> DateTime::DateToString(const uint32_t date) {
262
11
  GregorianCalendar cal = VERIFY_RESULT(CreateCalendar());
263
11
  UErrorCode status = U_ZERO_ERROR;
264
11
  cal.setTime(DateToUnixTimestamp(date), status);
265
11
  if (U_FAILURE(status)) {
266
0
    return STATUS(InvalidArgument, "Failed to set time", u_errorName(status));
267
0
  }
268
11
  const int year  = cal.get(UCAL_ERA, status) == GregorianCalendar::EEras::BC ?
269
9
                    -(cal.get(UCAL_YEAR, status) - 1) : cal.get(UCAL_YEAR, status);
270
11
  const int month = cal.get(UCAL_MONTH, status) + 1;
271
11
  const int day   = cal.get(UCAL_DATE, status);
272
11
  if (U_FAILURE(status)) {
273
0
    return STATUS(InvalidArgument, "Failed to get date", u_errorName(status));
274
0
  }
275
11
  char buffer[15]; // Between "-5877641-06-23" and "5881580-07-11".
276
11
  const size_t result = snprintf(buffer, sizeof(buffer), "%d-%2.2d-%2.2d", year, month, day);
277
0
  CHECK(result > 0 && result < sizeof(buffer)) << "Unexpected snprintf result: " << result;
278
11
  return buffer;
279
11
}
280
281
2
Timestamp DateTime::DateToTimestamp(uint32_t date) {
282
2
  return Timestamp((static_cast<int64_t>(date) - kEpochDateOffset) * kDayInMicroSeconds);
283
2
}
284
285
12
int64_t DateTime::DateToUnixTimestamp(uint32_t date) {
286
12
  return (static_cast<int64_t>(date) - kEpochDateOffset) * kDayInMilliSeconds;
287
12
}
288
289
1
uint32_t DateTime::DateNow() {
290
1
  return narrow_cast<uint32_t>(TimestampNow().ToInt64() / kDayInMicroSeconds + kEpochDateOffset);
291
1
}
292
293
//------------------------------------------------------------------------------------------------
294
11
Result<int64_t> DateTime::TimeFromString(const std::string& str) {
295
  // Regex for time format "hh:mm:ss[.fffffffff]"
296
11
  static const regex time_format("(\\d{1,2}):(\\d{1,2}):(\\d{1,2})(\\.(\\d{0,9}))?");
297
11
  std::smatch m;
298
11
  if (!std::regex_match(str, m, time_format)) {
299
3
    return STATUS(InvalidArgument, "Invalid time format");
300
3
  }
301
8
  const int64_t hour = stoi(m.str(1));
302
8
  const int64_t minute = stoi(m.str(2));
303
8
  const int64_t second = stoi(m.str(3));
304
4
  const int64_t nano_sec = m.str(5).empty() ? 0 : (stoi(m.str(5)) * pow(10, 9 - m.str(5).size()));
305
8
  if (hour < 0 || hour > 23) {
306
0
    return STATUS(InvalidArgument, "Invalid hour");
307
0
  }
308
8
  if (minute < 0 || minute > 59) {
309
1
    return STATUS(InvalidArgument, "Invalid minute");
310
1
  }
311
7
  if (second < 0 || second > 59) {
312
1
    return STATUS(InvalidArgument, "Invalid second");
313
1
  }
314
6
  return ((hour * 60 + minute) * 60 + second) * 1000000000 + nano_sec;
315
6
}
316
317
4
Result<string> DateTime::TimeToString(int64_t time) {
318
4
  if (time < 0) {
319
0
    return STATUS(InvalidArgument, "Invalid time");
320
0
  }
321
4
  const int nano_sec = time % 1000000000; time /= 1000000000;
322
4
  const int second = time % 60; time /= 60;
323
4
  const int minute = time % 60; time /= 60;
324
4
  if (time > 23) {
325
0
    return STATUS(InvalidArgument, "Invalid hour");
326
0
  }
327
4
  const int hour = narrow_cast<int>(time);
328
4
  char buffer[19]; // "hh:mm:ss[.fffffffff]"
329
4
  const size_t result = snprintf(buffer, sizeof(buffer), "%2.2d:%2.2d:%2.2d.%9.9d",
330
4
                                 hour, minute, second, nano_sec);
331
0
  CHECK(result > 0 && result < sizeof(buffer)) << "Unexpected snprintf result: " << result;
332
4
  return buffer;
333
4
}
334
335
1
int64_t DateTime::TimeNow() {
336
1
  return (TimestampNow().ToInt64() % kDayInMicroSeconds) * 1000;
337
1
}
338
339
//------------------------------------------------------------------------------------------------
340
20
Result<MonoDelta> DateTime::IntervalFromString(const std::string& str) {
341
  /* See Postgres: DecodeInterval() in datetime.c */
342
20
  static const std::vector<std::regex> regexes {
343
      // ISO 8601: '3d 4h 5m 6s'
344
      // Abbreviated Postgres: '3 d 4 hrs 5 mins 6 secs'
345
      // Traditional Postgres: '3 days 4 hours 5 minutes 6 seconds'
346
20
      std::regex("(?:(\\d+) ?d(?:ay)?s?)? *(?:(\\d+) ?h(?:ou)?r?s?)? *"
347
20
                     "(?:(\\d+) ?m(?:in(?:ute)?s?)?)? *(?:(\\d+) ?s(?:ec(?:ond)?s?)?)?",
348
20
                 std::regex_constants::icase),
349
      // SQL Standard: 'D H:M:S'
350
20
      std::regex("(?:(\\d+) )?(\\d{1,2}):(\\d{1,2}):(\\d{1,2})", std::regex_constants::icase),
351
20
  };
352
  // Try each regex to see if one matches.
353
21
  for (const auto& reg : regexes) {
354
21
    std::smatch m;
355
21
    if (std::regex_match(str, m, reg)) {
356
      // All regex's have the name 4 capture groups, in order.
357
12
      const auto day = m.str(1).empty() ? 0 : stol(m.str(1));
358
10
      const auto hours = m.str(2).empty() ? 0 : stol(m.str(2));
359
12
      const auto minutes = m.str(3).empty() ? 0 : stol(m.str(3));
360
11
      const auto seconds = m.str(4).empty() ? 0 : stol(m.str(4));
361
      // Convert to microseconds.
362
20
      return MonoDelta::FromSeconds(seconds + (60 * (minutes + 60 * (hours + 24 * day))));
363
20
    }
364
21
  }
365
0
  return STATUS(InvalidArgument, "Invalid interval", "Wrong format of input string: " + str);
366
20
}
367
368
//------------------------------------------------------------------------------------------------
369
int64_t DateTime::AdjustPrecision(int64_t val,
370
                                  size_t input_precision,
371
4.26M
                                  const size_t output_precision) {
372
6.28M
  while (input_precision < output_precision) {
373
    // In case of overflow we just return max/min values -- this is needed for correctness of
374
    // comparison operations and is similar to Cassandra behaviour.
375
2.02M
    if (val > (INT64_MAX / 10)) return INT64_MAX;
376
2.02M
    if (val < (INT64_MIN / 10)) return INT64_MIN;
377
378
2.02M
    val *= 10;
379
2.02M
    input_precision += 1;
380
2.02M
  }
381
15.0M
  while (input_precision > output_precision) {
382
10.7M
    val /= 10;
383
10.7M
    input_precision -= 1;
384
10.7M
  }
385
4.26M
  return val;
386
4.26M
}
387
388
namespace {
389
390
41.3k
std::vector<std::regex> InputFormatRegexes() {
391
  // declaring format components used to construct regexes below
392
41.3k
  string fmt_empty = "()";
393
41.3k
  string date_fmt = "(\\d{4})-(\\d{1,2})-(\\d{1,2})";
394
41.3k
  string time_fmt = "(\\d{1,2}):(\\d{1,2}):(\\d{1,2})";
395
41.3k
  string time_fmt_no_sec = "(\\d{1,2}):(\\d{1,2})" + fmt_empty;
396
41.3k
  string time_empty = fmt_empty + fmt_empty + fmt_empty;
397
41.3k
  string frac_fmt = "\\.(\\d{6}|\\d{1,3})";
398
  // Offset, i.e. +/-xx:xx, +/-0000, timezone parser will do additional checking.
399
41.3k
  string tzX_fmt = "((?:\\+|-)\\d{2}:?\\d{2})";
400
  // Zulu Timezone e.g allows user to just add z or Z at the end with no space in front to indicate
401
  // Zulu Time which is equivlent to GMT/UTC.
402
41.3k
  string tzY_fmt = "([zZ])";
403
  // Timezone name, abbreviation, or offset (preceded by space), e.g. PDT, UDT+/-xx:xx, etc..
404
  // At this point this allows anything that starts with a letter or '+' (after space), and leaves
405
  // further processing to the timezone parser.
406
41.3k
  string tzZ_fmt = " ([a-zA-Z\\+].+)";
407
408
41.3k
  std::vector<std::regex> result;
409
82.6k
  for (const auto& sep : { " ", "T" }) {
410
165k
    for (const auto& time : { time_fmt_no_sec, time_fmt }) {
411
330k
      for (const auto& frac : { fmt_empty, frac_fmt }) {
412
1.32M
        for (const auto& tz : { fmt_empty, tzX_fmt, tzY_fmt, tzZ_fmt }) {
413
1.32M
          result.emplace_back(date_fmt + sep + time + frac + tz);
414
1.32M
        }
415
330k
      }
416
165k
    }
417
82.6k
  }
418
165k
  for (const auto& tz : { fmt_empty, tzX_fmt, tzY_fmt, tzZ_fmt }) {
419
165k
    result.emplace_back(date_fmt + time_empty + fmt_empty + tz);
420
165k
  }
421
41.3k
  return result;
422
41.3k
}
423
424
} // namespace
425
426
const DateTime::InputFormat DateTime::CqlInputFormat = {
427
  .regexes = InputFormatRegexes(),
428
  .input_precision = 3, // Cassandra current default
429
  .use_utc = false,
430
};
431
432
const DateTime::OutputFormat DateTime::CqlOutputFormat = OutputFormat {
433
  .output_locale = locale(locale::classic(), new local_time_facet("%Y-%m-%dT%H:%M:%S.%f%q")),
434
  .use_utc = true,
435
};
436
437
const DateTime::InputFormat DateTime::HumanReadableInputFormat = DateTime::InputFormat {
438
  .regexes = InputFormatRegexes(),
439
  .input_precision = 6,
440
  .use_utc = false,
441
};
442
443
const DateTime::OutputFormat DateTime::HumanReadableOutputFormat = OutputFormat {
444
  .output_locale = locale(locale::classic(), new local_time_facet("%Y-%m-%d %H:%M:%S.%f")),
445
  .use_utc = false,
446
};
447
448
} // namespace yb