/Users/deen/code/yugabyte-db/src/yb/tools/bulk_load_utils.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/tools/bulk_load_utils.h" |
15 | | |
16 | | #include <boost/algorithm/string.hpp> |
17 | | |
18 | | #include "yb/util/date_time.h" |
19 | | #include "yb/util/stol_utils.h" |
20 | | |
21 | | DEFINE_string(csv_delimiter, ",", "The character used to separate different columns."); |
22 | | DEFINE_string( |
23 | | csv_quote_character, "\"", "The character used to quote a column that may have a delimiter."); |
24 | | DEFINE_string(skipped_cols, "", "Comma separated list of 0-indexed columns to skip"); |
25 | | |
26 | | namespace { |
27 | 0 | static bool CSVSeparatorValidator(const char* flagname, const string& value) { |
28 | 0 | if (value.size() != 1) { |
29 | 0 | LOG(INFO) << "Expect " << flagname << " to be 1 character long"; |
30 | 0 | return false; |
31 | 0 | } |
32 | 0 | return true; |
33 | 0 | } |
34 | | } |
35 | | |
36 | | __attribute__((unused)) |
37 | | DEFINE_validator(csv_delimiter, &CSVSeparatorValidator); |
38 | | __attribute__((unused)) |
39 | | DEFINE_validator(csv_quote_character, &CSVSeparatorValidator); |
40 | | |
41 | | namespace yb { |
42 | | namespace tools { |
43 | | |
44 | 0 | Result<Timestamp> TimestampFromString(const std::string& str) { |
45 | 0 | auto val = CheckedStoll(str); |
46 | 0 | if (val.ok()) { |
47 | 0 | return DateTime::TimestampFromInt(*val); |
48 | 0 | } |
49 | | |
50 | 0 | return DateTime::TimestampFromString(str); |
51 | 0 | } |
52 | | |
53 | 0 | bool IsNull(std::string str) { |
54 | 0 | boost::algorithm::to_lower(str); |
55 | 0 | return str == kNullStringEscaped; |
56 | 0 | } |
57 | | |
58 | 0 | CsvTokenizer Tokenize(const std::string& line) { |
59 | 0 | return Tokenize(line, FLAGS_csv_delimiter[0], FLAGS_csv_quote_character[0]); |
60 | 0 | } |
61 | | |
62 | 0 | CsvTokenizer Tokenize(const std::string& line, char delimiter = ',', char quote_char = '"') { |
63 | 0 | boost::escaped_list_separator<char> seps('\\', delimiter, quote_char); |
64 | 0 | CsvTokenizer tokenizer(line, seps); |
65 | 0 | return tokenizer; |
66 | 0 | } |
67 | | |
68 | 0 | std::set<int> SkippedColumns() { |
69 | 0 | return SkippedColumns(FLAGS_skipped_cols); |
70 | 0 | } |
71 | | |
72 | 0 | std::set<int> SkippedColumns(const string& columns_to_skip) { |
73 | 0 | std::set<int> skipped_cols; |
74 | 0 | CsvTokenizer tokenizer = Tokenize(columns_to_skip, ','); |
75 | 0 | for (auto it = tokenizer.begin(); it != tokenizer.end(); it++) { |
76 | 0 | auto col = CheckedStoi(*it); |
77 | 0 | CHECK(col.ok()); |
78 | 0 | skipped_cols.insert(*col); |
79 | 0 | } |
80 | 0 | return skipped_cols; |
81 | 0 | } |
82 | | |
83 | | } // namespace tools |
84 | | } // namespace yb |