/Users/deen/code/yugabyte-db/src/yb/gutil/strings/join.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2008 and onwards Google Inc. All rights reserved. |
2 | | // |
3 | | // The following only applies to changes made to this file as part of YugaByte development. |
4 | | // |
5 | | // Portions Copyright (c) YugaByte, Inc. |
6 | | // |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
8 | | // in compliance with the License. You may obtain a copy of the License at |
9 | | // |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // |
12 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
13 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
14 | | // or implied. See the License for the specific language governing permissions and limitations |
15 | | // under the License. |
16 | | // |
17 | | |
18 | | #include "yb/gutil/strings/join.h" |
19 | | |
20 | | #include <memory> |
21 | | |
22 | | #include <glog/logging.h> |
23 | | |
24 | | #include "yb/gutil/strings/ascii_ctype.h" |
25 | | #include "yb/gutil/strings/escaping.h" |
26 | | |
27 | | // ---------------------------------------------------------------------- |
28 | | // JoinUsing() |
29 | | // This merges a vector of string components with delim inserted |
30 | | // as separaters between components. |
31 | | // This is essentially the same as JoinUsingToBuffer except |
32 | | // the return result is dynamically allocated using "new char[]". |
33 | | // It is the caller's responsibility to "delete []" the |
34 | | // |
35 | | // If result_length_p is not NULL, it will contain the length of the |
36 | | // result string (not including the trailing '\0'). |
37 | | // ---------------------------------------------------------------------- |
38 | | char* JoinUsing(const vector<const char*>& components, |
39 | | const char* delim, |
40 | 0 | size_t* result_length_p) { |
41 | 0 | const auto num_components = components.size(); |
42 | 0 | const auto delim_length = strlen(delim); |
43 | 0 | auto num_chars = num_components > 1 ? delim_length * (num_components - 1) : 0; |
44 | 0 | for (size_t i = 0; i < num_components; ++i) |
45 | 0 | num_chars += strlen(components[i]); |
46 | |
|
47 | 0 | auto res_buffer = new char[num_chars + 1]; |
48 | 0 | return JoinUsingToBuffer(components, delim, num_chars+1, |
49 | 0 | res_buffer, result_length_p); |
50 | 0 | } |
51 | | |
52 | | // ---------------------------------------------------------------------- |
53 | | // JoinUsingToBuffer() |
54 | | // This merges a vector of string components with delim inserted |
55 | | // as separaters between components. |
56 | | // User supplies the result buffer with specified buffer size. |
57 | | // The result is also returned for convenience. |
58 | | // |
59 | | // If result_length_p is not NULL, it will contain the length of the |
60 | | // result string (not including the trailing '\0'). |
61 | | // ---------------------------------------------------------------------- |
62 | | char* JoinUsingToBuffer(const vector<const char*>& components, |
63 | | const char* delim, |
64 | | size_t result_buffer_size, |
65 | | char* result_buffer, |
66 | 0 | size_t* result_length_p) { |
67 | 0 | CHECK(result_buffer != nullptr); |
68 | 0 | const auto num_components = components.size(); |
69 | 0 | const auto max_str_len = result_buffer_size - 1; |
70 | 0 | char* curr_dest = result_buffer; |
71 | 0 | size_t num_chars = 0; |
72 | 0 | for (size_t i = 0; (i < num_components) && (num_chars < max_str_len); ++i) { |
73 | 0 | const char* curr_src = components[i]; |
74 | 0 | while ((*curr_src != '\0') && (num_chars < max_str_len)) { |
75 | 0 | *curr_dest = *curr_src; |
76 | 0 | ++num_chars; |
77 | 0 | ++curr_dest; |
78 | 0 | ++curr_src; |
79 | 0 | } |
80 | 0 | if (i != (num_components-1)) { // not the last component ==> add separator |
81 | 0 | curr_src = delim; |
82 | 0 | while ((*curr_src != '\0') && (num_chars < max_str_len)) { |
83 | 0 | *curr_dest = *curr_src; |
84 | 0 | ++num_chars; |
85 | 0 | ++curr_dest; |
86 | 0 | ++curr_src; |
87 | 0 | } |
88 | 0 | } |
89 | 0 | } |
90 | |
|
91 | 0 | if (result_buffer_size > 0) |
92 | 0 | *curr_dest = '\0'; // add null termination |
93 | 0 | if (result_length_p != nullptr) // set string length value |
94 | 0 | *result_length_p = num_chars; |
95 | |
|
96 | 0 | return result_buffer; |
97 | 0 | } |
98 | | |
99 | | // ---------------------------------------------------------------------- |
100 | | // JoinStrings() |
101 | | // This merges a vector of string components with delim inserted |
102 | | // as separaters between components. |
103 | | // This is essentially the same as JoinUsingToBuffer except |
104 | | // it uses strings instead of char *s. |
105 | | // |
106 | | // ---------------------------------------------------------------------- |
107 | | |
108 | | void JoinStringsInArray(string const* const* components, |
109 | | size_t num_components, |
110 | | const char* delim, |
111 | 0 | string* result) { |
112 | 0 | CHECK(result != nullptr); |
113 | 0 | result->clear(); |
114 | 0 | for (size_t i = 0; i < num_components; i++) { |
115 | 0 | if (i > 0) { |
116 | 0 | (*result) += delim; |
117 | 0 | } |
118 | 0 | (*result) += *(components[i]); |
119 | 0 | } |
120 | 0 | } |
121 | | |
122 | | void JoinStringsInArray(string const *components, |
123 | | size_t num_components, |
124 | | const char *delim, |
125 | 0 | string *result) { |
126 | 0 | JoinStringsIterator(components, |
127 | 0 | components + num_components, |
128 | 0 | delim, |
129 | 0 | result); |
130 | 0 | } |
131 | | |
132 | | // ---------------------------------------------------------------------- |
133 | | // JoinMapKeysAndValues() |
134 | | // JoinVectorKeysAndValues() |
135 | | // This merges the keys and values of a string -> string map or pair |
136 | | // of strings vector, with one delim (intra_delim) between each key |
137 | | // and its associated value and another delim (inter_delim) between |
138 | | // each key/value pair. The result is returned in a string (passed |
139 | | // as the last argument). |
140 | | // ---------------------------------------------------------------------- |
141 | | |
142 | | void JoinMapKeysAndValues(const map<string, string>& components, |
143 | | const GStringPiece& intra_delim, |
144 | | const GStringPiece& inter_delim, |
145 | 0 | string* result) { |
146 | 0 | JoinKeysAndValuesIterator(components.begin(), components.end(), |
147 | 0 | intra_delim, inter_delim, |
148 | 0 | result); |
149 | 0 | } |
150 | | |
151 | | void JoinVectorKeysAndValues(const vector< pair<string, string> >& components, |
152 | | const GStringPiece& intra_delim, |
153 | | const GStringPiece& inter_delim, |
154 | 0 | string* result) { |
155 | 0 | JoinKeysAndValuesIterator(components.begin(), components.end(), |
156 | 0 | intra_delim, inter_delim, |
157 | 0 | result); |
158 | 0 | } |
159 | | |
160 | | // ---------------------------------------------------------------------- |
161 | | // JoinCSVLine() |
162 | | // This function is the inverse of SplitCSVLineWithDelimiter() in that the |
163 | | // string returned by JoinCSVLineWithDelimiter() can be passed to |
164 | | // SplitCSVLineWithDelimiter() to get the original string vector back. |
165 | | // Quotes and escapes the elements of original_cols according to CSV quoting |
166 | | // rules, and the joins the escaped quoted strings with commas using |
167 | | // JoinStrings(). Note that JoinCSVLineWithDelimiter() will not necessarily |
168 | | // return the same string originally passed in to |
169 | | // SplitCSVLineWithDelimiter(), since SplitCSVLineWithDelimiter() can handle |
170 | | // gratuitous spacing and quoting. 'output' must point to an empty string. |
171 | | // |
172 | | // Example: |
173 | | // [Google], [x], [Buchheit, Paul], [string with " quoite in it], [ space ] |
174 | | // ---> [Google,x,"Buchheit, Paul","string with "" quote in it"," space "] |
175 | | // ---------------------------------------------------------------------- |
176 | | void JoinCSVLineWithDelimiter(const vector<string>& cols, char delimiter, |
177 | 192 | string* output) { |
178 | 192 | CHECK(output); |
179 | 192 | CHECK(output->empty()); |
180 | 192 | vector<string> quoted_cols; |
181 | | |
182 | 192 | const string delimiter_str(1, delimiter); |
183 | 192 | const string escape_chars = delimiter_str + "\""; |
184 | | |
185 | | // If the string contains the delimiter or " anywhere, or begins or ends with |
186 | | // whitespace (ie ascii_isspace() returns true), escape all double-quotes and |
187 | | // bracket the string in double quotes. string.rbegin() evaluates to the last |
188 | | // character of the string. |
189 | 192 | for (const auto& col : cols) { |
190 | 192 | if ((col.find_first_of(escape_chars) != string::npos) || |
191 | 192 | (!col.empty() && (ascii_isspace(*col.begin()) || |
192 | 192 | ascii_isspace(*col.rbegin())))) { |
193 | | // Double the original size, for escaping, plus two bytes for |
194 | | // the bracketing double-quotes, and one byte for the closing \0. |
195 | 0 | auto size = 2 * col.size() + 3; |
196 | 0 | std::unique_ptr<char[]> buf(new char[size]); |
197 | | |
198 | | // Leave space at beginning and end for bracketing double-quotes. |
199 | 0 | auto escaped_size = strings::EscapeStrForCSV(col.c_str(), buf.get() + 1, size - 2); |
200 | 0 | CHECK_GE(escaped_size, 0) << "Buffer somehow wasn't large enough."; |
201 | 0 | CHECK_GE(size, escaped_size + 3) |
202 | 0 | << "Buffer should have one space at the beginning for a " |
203 | 0 | << "double-quote, one at the end for a double-quote, and " |
204 | 0 | << "one at the end for a closing '\0'"; |
205 | 0 | *buf.get() = '"'; |
206 | 0 | *((buf.get() + 1) + escaped_size) = '"'; |
207 | 0 | *((buf.get() + 1) + escaped_size + 1) = '\0'; |
208 | 0 | quoted_cols.push_back(string(buf.get(), buf.get() + escaped_size + 2)); |
209 | 192 | } else { |
210 | 192 | quoted_cols.push_back(col); |
211 | 192 | } |
212 | 192 | } |
213 | 192 | JoinStrings(quoted_cols, delimiter_str, output); |
214 | 192 | } |
215 | | |
216 | 192 | void JoinCSVLine(const vector<string>& cols, string* output) { |
217 | 192 | JoinCSVLineWithDelimiter(cols, ',', output); |
218 | 192 | } |
219 | | |
220 | 192 | string JoinCSVLine(const vector<string>& cols) { |
221 | 192 | string output; |
222 | 192 | JoinCSVLine(cols, &output); |
223 | 192 | return output; |
224 | 192 | } |