YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/join.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 and onwards Google Inc.  All rights reserved.
2
//
3
// The following only applies to changes made to this file as part of YugaByte development.
4
//
5
// Portions Copyright (c) YugaByte, Inc.
6
//
7
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
8
// in compliance with the License.  You may obtain a copy of the License at
9
//
10
// http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing, software distributed under the License
13
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
14
// or implied.  See the License for the specific language governing permissions and limitations
15
// under the License.
16
//
17
18
#include "yb/gutil/strings/join.h"
19
20
#include <memory>
21
22
#include <glog/logging.h>
23
24
#include "yb/gutil/strings/ascii_ctype.h"
25
#include "yb/gutil/strings/escaping.h"
26
27
// ----------------------------------------------------------------------
28
// JoinUsing()
29
//    This merges a vector of string components with delim inserted
30
//    as separaters between components.
31
//    This is essentially the same as JoinUsingToBuffer except
32
//    the return result is dynamically allocated using "new char[]".
33
//    It is the caller's responsibility to "delete []" the
34
//
35
//    If result_length_p is not NULL, it will contain the length of the
36
//    result string (not including the trailing '\0').
37
// ----------------------------------------------------------------------
38
char* JoinUsing(const vector<const char*>& components,
39
                const char* delim,
40
0
                size_t* result_length_p) {
41
0
  const auto num_components = components.size();
42
0
  const auto delim_length = strlen(delim);
43
0
  auto num_chars = num_components > 1 ? delim_length * (num_components - 1) : 0;
44
0
  for (size_t i = 0; i < num_components; ++i)
45
0
    num_chars += strlen(components[i]);
46
47
0
  auto res_buffer = new char[num_chars + 1];
48
0
  return JoinUsingToBuffer(components, delim, num_chars+1,
49
0
                           res_buffer, result_length_p);
50
0
}
51
52
// ----------------------------------------------------------------------
53
// JoinUsingToBuffer()
54
//    This merges a vector of string components with delim inserted
55
//    as separaters between components.
56
//    User supplies the result buffer with specified buffer size.
57
//    The result is also returned for convenience.
58
//
59
//    If result_length_p is not NULL, it will contain the length of the
60
//    result string (not including the trailing '\0').
61
// ----------------------------------------------------------------------
62
char* JoinUsingToBuffer(const vector<const char*>& components,
63
                         const char* delim,
64
                         size_t result_buffer_size,
65
                         char* result_buffer,
66
0
                         size_t* result_length_p) {
67
0
  CHECK(result_buffer != nullptr);
68
0
  const auto num_components = components.size();
69
0
  const auto max_str_len = result_buffer_size - 1;
70
0
  char* curr_dest = result_buffer;
71
0
  size_t num_chars = 0;
72
0
  for (size_t i = 0; (i < num_components) && (num_chars < max_str_len); ++i) {
73
0
    const char* curr_src = components[i];
74
0
    while ((*curr_src != '\0') && (num_chars < max_str_len)) {
75
0
      *curr_dest = *curr_src;
76
0
      ++num_chars;
77
0
      ++curr_dest;
78
0
      ++curr_src;
79
0
    }
80
0
    if (i != (num_components-1)) {  // not the last component ==> add separator
81
0
      curr_src = delim;
82
0
      while ((*curr_src != '\0') && (num_chars < max_str_len)) {
83
0
        *curr_dest = *curr_src;
84
0
        ++num_chars;
85
0
        ++curr_dest;
86
0
        ++curr_src;
87
0
      }
88
0
    }
89
0
  }
90
91
0
  if (result_buffer_size > 0)
92
0
    *curr_dest = '\0';  // add null termination
93
0
  if (result_length_p != nullptr)  // set string length value
94
0
    *result_length_p = num_chars;
95
96
0
  return result_buffer;
97
0
}
98
99
// ----------------------------------------------------------------------
100
// JoinStrings()
101
//    This merges a vector of string components with delim inserted
102
//    as separaters between components.
103
//    This is essentially the same as JoinUsingToBuffer except
104
//    it uses strings instead of char *s.
105
//
106
// ----------------------------------------------------------------------
107
108
void JoinStringsInArray(string const* const* components,
109
                        size_t num_components,
110
                        const char* delim,
111
0
                        string* result) {
112
0
  CHECK(result != nullptr);
113
0
  result->clear();
114
0
  for (size_t i = 0; i < num_components; i++) {
115
0
    if (i > 0) {
116
0
      (*result) += delim;
117
0
    }
118
0
    (*result) += *(components[i]);
119
0
  }
120
0
}
121
122
void JoinStringsInArray(string const *components,
123
                        size_t num_components,
124
                        const char *delim,
125
0
                        string *result) {
126
0
  JoinStringsIterator(components,
127
0
                      components + num_components,
128
0
                      delim,
129
0
                      result);
130
0
}
131
132
// ----------------------------------------------------------------------
133
// JoinMapKeysAndValues()
134
// JoinVectorKeysAndValues()
135
//    This merges the keys and values of a string -> string map or pair
136
//    of strings vector, with one delim (intra_delim) between each key
137
//    and its associated value and another delim (inter_delim) between
138
//    each key/value pair.  The result is returned in a string (passed
139
//    as the last argument).
140
// ----------------------------------------------------------------------
141
142
void JoinMapKeysAndValues(const map<string, string>& components,
143
                          const GStringPiece& intra_delim,
144
                          const GStringPiece& inter_delim,
145
0
                          string* result) {
146
0
  JoinKeysAndValuesIterator(components.begin(), components.end(),
147
0
                            intra_delim, inter_delim,
148
0
                            result);
149
0
}
150
151
void JoinVectorKeysAndValues(const vector< pair<string, string> >& components,
152
                             const GStringPiece& intra_delim,
153
                             const GStringPiece& inter_delim,
154
0
                             string* result) {
155
0
  JoinKeysAndValuesIterator(components.begin(), components.end(),
156
0
                            intra_delim, inter_delim,
157
0
                            result);
158
0
}
159
160
// ----------------------------------------------------------------------
161
// JoinCSVLine()
162
//    This function is the inverse of SplitCSVLineWithDelimiter() in that the
163
//    string returned by JoinCSVLineWithDelimiter() can be passed to
164
//    SplitCSVLineWithDelimiter() to get the original string vector back.
165
//    Quotes and escapes the elements of original_cols according to CSV quoting
166
//    rules, and the joins the escaped quoted strings with commas using
167
//    JoinStrings().  Note that JoinCSVLineWithDelimiter() will not necessarily
168
//    return the same string originally passed in to
169
//    SplitCSVLineWithDelimiter(), since SplitCSVLineWithDelimiter() can handle
170
//    gratuitous spacing and quoting. 'output' must point to an empty string.
171
//
172
//    Example:
173
//     [Google], [x], [Buchheit, Paul], [string with " quoite in it], [ space ]
174
//     --->  [Google,x,"Buchheit, Paul","string with "" quote in it"," space "]
175
// ----------------------------------------------------------------------
176
void JoinCSVLineWithDelimiter(const vector<string>& cols, char delimiter,
177
4
                              string* output) {
178
4
  CHECK(output);
179
4
  CHECK(output->empty());
180
4
  vector<string> quoted_cols;
181
182
4
  const string delimiter_str(1, delimiter);
183
4
  const string escape_chars = delimiter_str + "\"";
184
185
  // If the string contains the delimiter or " anywhere, or begins or ends with
186
  // whitespace (ie ascii_isspace() returns true), escape all double-quotes and
187
  // bracket the string in double quotes. string.rbegin() evaluates to the last
188
  // character of the string.
189
4
  for (const auto& col : cols) {
190
4
    if ((col.find_first_of(escape_chars) != string::npos) ||
191
4
        (!col.empty() && (ascii_isspace(*col.begin()) ||
192
4
                              ascii_isspace(*col.rbegin())))) {
193
      // Double the original size, for escaping, plus two bytes for
194
      // the bracketing double-quotes, and one byte for the closing \0.
195
0
      auto size = 2 * col.size() + 3;
196
0
      std::unique_ptr<char[]> buf(new char[size]);
197
198
      // Leave space at beginning and end for bracketing double-quotes.
199
0
      auto escaped_size = strings::EscapeStrForCSV(col.c_str(), buf.get() + 1, size - 2);
200
0
      CHECK_GE(escaped_size, 0) << "Buffer somehow wasn't large enough.";
201
0
      CHECK_GE(size, escaped_size + 3)
202
0
        << "Buffer should have one space at the beginning for a "
203
0
        << "double-quote, one at the end for a double-quote, and "
204
0
        << "one at the end for a closing '\0'";
205
0
      *buf.get() = '"';
206
0
      *((buf.get() + 1) + escaped_size) = '"';
207
0
      *((buf.get() + 1) + escaped_size + 1) = '\0';
208
0
      quoted_cols.push_back(string(buf.get(), buf.get() + escaped_size + 2));
209
4
    } else {
210
4
      quoted_cols.push_back(col);
211
4
    }
212
4
  }
213
4
  JoinStrings(quoted_cols, delimiter_str, output);
214
4
}
215
216
4
void JoinCSVLine(const vector<string>& cols, string* output) {
217
4
  JoinCSVLineWithDelimiter(cols, ',', output);
218
4
}
219
220
4
string JoinCSVLine(const vector<string>& cols) {
221
4
  string output;
222
4
  JoinCSVLine(cols, &output);
223
4
  return output;
224
4
}