YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/strip.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
// based on contributions of various authors in strings/strutil_unittest.cc
3
//
4
// The following only applies to changes made to this file as part of YugaByte development.
5
//
6
// Portions Copyright (c) YugaByte, Inc.
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
9
// in compliance with the License.  You may obtain a copy of the License at
10
//
11
// http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing, software distributed under the License
14
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15
// or implied.  See the License for the specific language governing permissions and limitations
16
// under the License.
17
//
18
// This file contains functions that remove a defined part from the string,
19
// i.e., strip the string.
20
21
#include "yb/gutil/strings/strip.h"
22
23
#include <assert.h>
24
#include <string.h>
25
26
#include <algorithm>
27
28
using std::copy;
29
using std::max;
30
using std::min;
31
using std::swap;
32
using std::string;
33
34
0
string StripPrefixString(GStringPiece str, const GStringPiece& prefix) {
35
0
  if (str.starts_with(prefix))
36
0
    str.remove_prefix(prefix.length());
37
0
  return str.as_string();
38
0
}
39
40
bool TryStripPrefixString(GStringPiece str, const GStringPiece& prefix,
41
2
                                 string* result) {
42
2
  const bool has_prefix = str.starts_with(prefix);
43
2
  if (has_prefix)
44
2
    str.remove_prefix(prefix.length());
45
2
  str.as_string().swap(*result);
46
2
  return has_prefix;
47
2
}
48
49
0
string StripSuffixString(GStringPiece str, const GStringPiece& suffix) {
50
0
  if (str.ends_with(suffix))
51
0
    str.remove_suffix(suffix.length());
52
0
  return str.as_string();
53
0
}
54
55
bool TryStripSuffixString(GStringPiece str, const GStringPiece& suffix,
56
11
                                 string* result) {
57
11
  const bool has_suffix = str.ends_with(suffix);
58
11
  if (has_suffix)
59
10
    str.remove_suffix(suffix.length());
60
11
  str.as_string().swap(*result);
61
11
  return has_suffix;
62
11
}
63
64
// ----------------------------------------------------------------------
65
// StripString
66
//    Replaces any occurrence of the character 'remove' (or the characters
67
//    in 'remove') with the character 'replacewith'.
68
// ----------------------------------------------------------------------
69
0
void StripString(char* str, GStringPiece remove, char replacewith) {
70
0
  for (; *str != '\0'; ++str) {
71
0
    if (remove.find(*str) != GStringPiece::npos) {
72
0
      *str = replacewith;
73
0
    }
74
0
  }
75
0
}
76
77
0
void StripString(char* str, int len, GStringPiece remove, char replacewith) {
78
0
  char* end = str + len;
79
0
  for (; str < end; ++str) {
80
0
    if (remove.find(*str) != GStringPiece::npos) {
81
0
      *str = replacewith;
82
0
    }
83
0
  }
84
0
}
85
86
0
void StripString(string* s, GStringPiece remove, char replacewith) {
87
0
  for (char& c : *s) {
88
0
    if (remove.find(c) != GStringPiece::npos) {
89
0
      c = replacewith;
90
0
    }
91
0
  }
92
0
}
93
94
// ----------------------------------------------------------------------
95
// StripWhiteSpace
96
// ----------------------------------------------------------------------
97
0
void StripWhiteSpace(const char** str, size_t* len) {
98
  // strip off trailing whitespace
99
0
  while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) {
100
0
    (*len)--;
101
0
  }
102
103
  // strip off leading whitespace
104
0
  while ((*len) > 0 && ascii_isspace((*str)[0])) {
105
0
    (*len)--;
106
0
    (*str)++;
107
0
  }
108
0
}
109
110
0
bool StripTrailingNewline(string* s) {
111
0
  if (!s->empty() && (*s)[s->size() - 1] == '\n') {
112
0
    if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
113
0
      s->resize(s->size() - 2);
114
0
    else
115
0
      s->resize(s->size() - 1);
116
0
    return true;
117
0
  }
118
0
  return false;
119
0
}
120
121
255k
void StripWhiteSpace(string* str) {
122
255k
  size_t str_length = str->length();
123
124
  // Strip off leading whitespace.
125
255k
  size_t first = 0;
126
255k
  while (first < str_length && ascii_isspace((*str)[first])) {
127
8
    ++first;
128
8
  }
129
  // If entire string is white space.
130
255k
  if (first == str_length) {
131
418
    str->clear();
132
418
    return;
133
418
  }
134
255k
  if (first > 0) {
135
7
    str->erase(0, first);
136
7
    str_length -= first;
137
7
  }
138
139
  // Strip off trailing whitespace.
140
255k
  size_t last = str_length - 1;
141
255k
  while (last >= 0 && ascii_isspace((*str)[last])) {
142
3
    --last;
143
3
  }
144
255k
  if (last != (str_length - 1) && last >= 0) {
145
2
    str->erase(last + 1, string::npos);
146
2
  }
147
255k
}
148
149
// ----------------------------------------------------------------------
150
// Misc. stripping routines
151
// ----------------------------------------------------------------------
152
0
void StripCurlyBraces(string* s) {
153
0
  return StripBrackets('{', '}', s);
154
0
}
155
156
0
void StripBrackets(char left, char right, string* s) {
157
0
  string::iterator opencurly = find(s->begin(), s->end(), left);
158
0
  while (opencurly != s->end()) {
159
0
    string::iterator closecurly = find(opencurly, s->end(), right);
160
0
    if (closecurly == s->end())
161
0
      return;
162
0
    opencurly = s->erase(opencurly, closecurly + 1);
163
0
    opencurly = find(opencurly, s->end(), left);
164
0
  }
165
0
}
166
167
0
void StripMarkupTags(string* s) {
168
0
  string::iterator openbracket = find(s->begin(), s->end(), '<');
169
0
  while (openbracket != s->end()) {
170
0
    string::iterator closebracket = find(openbracket, s->end(), '>');
171
0
    if (closebracket == s->end()) {
172
0
      s->erase(openbracket, closebracket);
173
0
      return;
174
0
    }
175
176
0
    openbracket = s->erase(openbracket, closebracket + 1);
177
0
    openbracket = find(openbracket, s->end(), '<');
178
0
  }
179
0
}
180
181
0
string OutputWithMarkupTagsStripped(const string& s) {
182
0
  string result(s);
183
0
  StripMarkupTags(&result);
184
0
  return result;
185
0
}
186
187
188
57
size_t TrimStringLeft(string* s, const GStringPiece& remove) {
189
57
  size_t i = 0;
190
102
  while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
191
45
    ++i;
192
45
  }
193
57
  if (i > 0) s->erase(0, i);
194
57
  return i;
195
57
}
196
197
57
size_t TrimStringRight(string* s, const GStringPiece& remove) {
198
57
  size_t i = s->size(), trimmed = 0;
199
102
  while (i > 0 && memchr(remove.data(), (*s)[i-1], remove.size())) {
200
45
    --i;
201
45
  }
202
57
  if (i < s->size()) {
203
45
    trimmed = s->size() - i;
204
45
    s->erase(i);
205
45
  }
206
57
  return trimmed;
207
57
}
208
209
// ----------------------------------------------------------------------
210
// Various removal routines
211
// ----------------------------------------------------------------------
212
0
size_t strrm(char* str, char c) {
213
0
  char *src, *dest;
214
0
  for (src = dest = str; *src != '\0'; ++src)
215
0
    if (*src != c) *(dest++) = *src;
216
0
  *dest = '\0';
217
0
  return dest - str;
218
0
}
219
220
0
size_t memrm(char* str, size_t strlen, char c) {
221
0
  char *src, *dest;
222
0
  for (src = dest = str; strlen > 0; ++src) {
223
0
    --strlen;
224
0
    if (*src != c) *(dest++) = *src;
225
0
  }
226
0
  return dest - str;
227
0
}
228
229
0
size_t strrmm(char* str, const char* chars) {
230
0
  char *src, *dest;
231
0
  for (src = dest = str; *src != '\0'; ++src) {
232
0
    bool skip = false;
233
0
    for (const char* c = chars; *c != '\0'; c++) {
234
0
      if (*src == *c) {
235
0
        skip = true;
236
0
        break;
237
0
      }
238
0
    }
239
0
    if (!skip) *(dest++) = *src;
240
0
  }
241
0
  *dest = '\0';
242
0
  return dest - str;
243
0
}
244
245
0
size_t strrmm(string* str, const string& chars) {
246
0
  size_t str_len = str->length();
247
0
  size_t in_index = str->find_first_of(chars);
248
0
  if (in_index == string::npos)
249
0
    return str_len;
250
251
0
  size_t out_index = in_index++;
252
253
0
  while (in_index < str_len) {
254
0
    char c = (*str)[in_index++];
255
0
    if (chars.find(c) == string::npos)
256
0
      (*str)[out_index++] = c;
257
0
  }
258
259
0
  str->resize(out_index);
260
0
  return out_index;
261
0
}
262
263
// ----------------------------------------------------------------------
264
// StripDupCharacters
265
//    Replaces any repeated occurrence of the character 'repeat_char'
266
//    with single occurrence.  e.g.,
267
//       StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
268
//    Return the number of characters removed
269
// ----------------------------------------------------------------------
270
0
size_t StripDupCharacters(string* s, char dup_char, int64 start_pos) {
271
0
  if (start_pos < 0)
272
0
    start_pos = 0;
273
274
  // remove dups by compaction in-place
275
0
  size_t input_pos = start_pos;   // current reader position
276
0
  size_t output_pos = start_pos;  // current writer position
277
0
  const size_t input_end = s->size();
278
0
  while (input_pos < input_end) {
279
    // keep current character
280
0
    const char curr_char = (*s)[input_pos];
281
0
    if (output_pos != input_pos)  // must copy
282
0
      (*s)[output_pos] = curr_char;
283
0
    ++input_pos;
284
0
    ++output_pos;
285
286
0
    if (curr_char == dup_char) {  // skip subsequent dups
287
0
      while ((input_pos < input_end) && ((*s)[input_pos] == dup_char))
288
0
        ++input_pos;
289
0
    }
290
0
  }
291
0
  const size_t num_deleted = input_pos - output_pos;
292
0
  s->resize(s->size() - num_deleted);
293
0
  return num_deleted;
294
0
}
295
296
// ----------------------------------------------------------------------
297
// RemoveExtraWhitespace()
298
//   Remove leading, trailing, and duplicate internal whitespace.
299
// ----------------------------------------------------------------------
300
0
void RemoveExtraWhitespace(string* s) {
301
0
  assert(s != nullptr);
302
  // Empty strings clearly have no whitespace, and this code assumes that
303
  // string length is greater than 0
304
0
  if (s->empty())
305
0
    return;
306
307
0
  size_t input_pos = 0;   // current reader position
308
0
  size_t output_pos = 0;  // current writer position
309
0
  const size_t input_end = s->size();
310
  // Strip off leading space
311
0
  while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++;
312
313
0
  while (input_pos < input_end - 1) {
314
0
    char c = (*s)[input_pos];
315
0
    char next = (*s)[input_pos + 1];
316
    // Copy each non-whitespace character to the right position.
317
    // For a block of whitespace, print the last one.
318
0
    if (!ascii_isspace(c) || !ascii_isspace(next)) {
319
0
      if (output_pos != input_pos) {  // only copy if needed
320
0
        (*s)[output_pos] = c;
321
0
      }
322
0
      output_pos++;
323
0
    }
324
0
    input_pos++;
325
0
  }
326
  // Pick up the last character if needed.
327
0
  char c = (*s)[input_end - 1];
328
0
  if (!ascii_isspace(c)) (*s)[output_pos++] = c;
329
330
0
  s->resize(output_pos);
331
0
}
332
333
//------------------------------------------------------------------------
334
// See comment in header file for a complete description.
335
//------------------------------------------------------------------------
336
0
void StripLeadingWhiteSpace(string* str) {
337
0
  char const* const leading = StripLeadingWhiteSpace(
338
0
      const_cast<char*>(str->c_str()));
339
0
  if (leading != nullptr) {
340
0
    string const tmp(leading);
341
0
    str->assign(tmp);
342
0
  } else {
343
0
    str->assign("");
344
0
  }
345
0
}
346
347
0
void StripTrailingWhitespace(string* const s) {
348
0
  string::size_type i;
349
0
  for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) {
350
0
  }
351
352
0
  s->resize(i);
353
0
}
354
355
// ----------------------------------------------------------------------
356
// TrimRunsInString
357
//    Removes leading and trailing runs, and collapses middle
358
//    runs of a set of characters into a single character (the
359
//    first one specified in 'remove').  Useful for collapsing
360
//    runs of repeated delimiters, whitespace, etc.  E.g.,
361
//    TrimRunsInString(&s, " :,()") removes leading and trailing
362
//    delimiter chars and collapses and converts internal runs
363
//    of delimiters to single ' ' characters, so, for example,
364
//    "  a:(b):c  " -> "a b c"
365
//    "first,last::(area)phone, ::zip" -> "first last area phone zip"
366
// ----------------------------------------------------------------------
367
0
void TrimRunsInString(string* s, GStringPiece remove) {
368
0
  string::iterator dest = s->begin();
369
0
  string::iterator src_end = s->end();
370
0
  for (string::iterator src = s->begin(); src != src_end; ) {
371
0
    if (remove.find(*src) == GStringPiece::npos) {
372
0
      *(dest++) = *(src++);
373
0
    } else {
374
      // Skip to the end of this run of chars that are in 'remove'.
375
0
      for (++src; src != src_end; ++src) {
376
0
        if (remove.find(*src) == GStringPiece::npos) {
377
0
          if (dest != s->begin()) {
378
            // This is an internal run; collapse it.
379
0
            *(dest++) = remove[0];
380
0
          }
381
0
          *(dest++) = *(src++);
382
0
          break;
383
0
        }
384
0
      }
385
0
    }
386
0
  }
387
0
  s->erase(dest, src_end);
388
0
}
389
390
// ----------------------------------------------------------------------
391
// RemoveNullsInString
392
//    Removes any internal \0 characters from the string.
393
// ----------------------------------------------------------------------
394
0
void RemoveNullsInString(string* s) {
395
0
  s->erase(remove(s->begin(), s->end(), '\0'), s->end());
396
0
}