YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/gutil/strings/strip.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.h
3
//
4
// The following only applies to changes made to this file as part of YugaByte development.
5
//
6
// Portions Copyright (c) YugaByte, Inc.
7
//
8
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
9
// in compliance with the License.  You may obtain a copy of the License at
10
//
11
// http://www.apache.org/licenses/LICENSE-2.0
12
//
13
// Unless required by applicable law or agreed to in writing, software distributed under the License
14
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
15
// or implied.  See the License for the specific language governing permissions and limitations
16
// under the License.
17
//
18
// This file contains functions that remove a defined part from the string,
19
// i.e., strip the string.
20
21
#ifndef YB_GUTIL_STRINGS_STRIP_H
22
#define YB_GUTIL_STRINGS_STRIP_H
23
24
#include <stddef.h>
25
#include <string>
26
using std::string;
27
28
#include "yb/gutil/strings/ascii_ctype.h"
29
#include "yb/gutil/strings/stringpiece.h"
30
31
// Given a string and a putative prefix, returns the string minus the
32
// prefix string if the prefix matches, otherwise the original
33
// string.
34
string StripPrefixString(GStringPiece str, const GStringPiece& prefix);
35
36
// Like StripPrefixString, but return true if the prefix was
37
// successfully matched.  Write the output to *result.
38
// It is safe for result to point back to the input string.
39
bool TryStripPrefixString(GStringPiece str, const GStringPiece& prefix,
40
                          string* result);
41
42
// Given a string and a putative suffix, returns the string minus the
43
// suffix string if the suffix matches, otherwise the original
44
// string.
45
string StripSuffixString(GStringPiece str, const GStringPiece& suffix);
46
47
48
// Like StripSuffixString, but return true if the suffix was
49
// successfully matched.  Write the output to *result.
50
// It is safe for result to point back to the input string.
51
bool TryStripSuffixString(GStringPiece str, const GStringPiece& suffix,
52
                          string* result);
53
54
// ----------------------------------------------------------------------
55
// StripString
56
//    Replaces any occurrence of the character 'remove' (or the characters
57
//    in 'remove') with the character 'replacewith'.
58
//    Good for keeping html characters or protocol characters (\t) out
59
//    of places where they might cause a problem.
60
// ----------------------------------------------------------------------
61
0
inline void StripString(char* str, char remove, char replacewith) {
62
0
  for (; *str; str++) {
63
0
    if (*str == remove)
64
0
      *str = replacewith;
65
0
  }
66
0
}
67
68
void StripString(char* str, GStringPiece remove, char replacewith);
69
void StripString(char* str, int len, GStringPiece remove, char replacewith);
70
void StripString(string* s, GStringPiece remove, char replacewith);
71
72
// ----------------------------------------------------------------------
73
// StripDupCharacters
74
//    Replaces any repeated occurrence of the character 'dup_char'
75
//    with single occurrence.  e.g.,
76
//       StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
77
//    Return the number of characters removed
78
// ----------------------------------------------------------------------
79
int StripDupCharacters(string* s, char dup_char, int start_pos);
80
81
// ----------------------------------------------------------------------
82
// StripWhiteSpace
83
//    "Removes" whitespace from both sides of string.  Pass in a pointer to an
84
//    array of characters, and its length.  The function changes the pointer
85
//    and length to refer to a substring that does not contain leading or
86
//    trailing spaces; it does not modify the string itself.  If the caller is
87
//    using NUL-terminated strings, it is the caller's responsibility to insert
88
//    the NUL character at the end of the substring."
89
//
90
//    Note: to be completely type safe, this function should be
91
//    parameterized as a template: template<typename anyChar> void
92
//    StripWhiteSpace(anyChar** str, int* len), where the expectation
93
//    is that anyChar could be char, const char, w_char, const w_char,
94
//    unicode_char, or any other character type we want.  However, we
95
//    just provided a version for char and const char.  C++ is
96
//    inconvenient, but correct, here.  Ask Amit is you want to know
97
//    the type safety details.
98
// ----------------------------------------------------------------------
99
void StripWhiteSpace(const char** str, size_t* len);
100
101
//------------------------------------------------------------------------
102
// StripTrailingWhitespace()
103
//   Removes whitespace at the end of the string *s.
104
//------------------------------------------------------------------------
105
void StripTrailingWhitespace(string* s);
106
107
//------------------------------------------------------------------------
108
// StripTrailingNewline(string*)
109
//   Strips the very last trailing newline or CR+newline from its
110
//   input, if one exists.  Useful for dealing with MapReduce's text
111
//   input mode, which appends '\n' to each map input.  Returns true
112
//   if a newline was stripped.
113
//------------------------------------------------------------------------
114
bool StripTrailingNewline(string* s);
115
116
0
inline void StripWhiteSpace(char** str, size_t* len) {
117
0
  // The "real" type for StripWhiteSpace is ForAll char types C, take
118
0
  // (C, int) as input and return (C, int) as output.  We're using the
119
0
  // cast here to assert that we can take a char*, even though the
120
0
  // function thinks it's assigning to const char*.
121
0
  StripWhiteSpace(const_cast<const char**>(str), len);
122
0
}
123
124
0
inline void StripWhiteSpace(GStringPiece* str) {
125
0
  const char* data = str->data();
126
0
  size_t len = str->size();
127
0
  StripWhiteSpace(&data, &len);
128
0
  str->set(data, len);
129
0
}
130
131
void StripWhiteSpace(string* str);
132
133
namespace strings {
134
135
template <typename Collection>
136
inline void StripWhiteSpaceInCollection(Collection* collection) {
137
  for (typename Collection::iterator it = collection->begin();
138
       it != collection->end(); ++it)
139
    StripWhiteSpace(&(*it));
140
}
141
142
}  // namespace strings
143
144
// ----------------------------------------------------------------------
145
// StripLeadingWhiteSpace
146
//    "Removes" whitespace from beginning of string. Returns ptr to first
147
//    non-whitespace character if one is present, NULL otherwise. Assumes
148
//    "line" is null-terminated.
149
// ----------------------------------------------------------------------
150
151
0
inline const char* StripLeadingWhiteSpace(const char* line) {
152
0
  // skip leading whitespace
153
0
  while (ascii_isspace(*line))
154
0
    ++line;
155
0
156
0
  if ('\0' == *line)  // end of line, no non-whitespace
157
0
    return NULL;
158
0
159
0
  return line;
160
0
}
161
162
// StripLeadingWhiteSpace for non-const strings.
163
0
inline char* StripLeadingWhiteSpace(char* line) {
164
0
  return const_cast<char*>(
165
0
      StripLeadingWhiteSpace(const_cast<const char*>(line)));
166
0
}
167
168
void StripLeadingWhiteSpace(string* str);
169
170
// Remove leading, trailing, and duplicate internal whitespace.
171
void RemoveExtraWhitespace(string* s);
172
173
174
// ----------------------------------------------------------------------
175
// SkipLeadingWhiteSpace
176
//    Returns str advanced past white space characters, if any.
177
//    Never returns NULL.  "str" must be terminated by a null character.
178
// ----------------------------------------------------------------------
179
0
inline const char* SkipLeadingWhiteSpace(const char* str) {
180
0
  while (ascii_isspace(*str))
181
0
    ++str;
182
0
  return str;
183
0
}
184
185
0
inline char* SkipLeadingWhiteSpace(char* str) {
186
0
  while (ascii_isspace(*str))
187
0
    ++str;
188
0
  return str;
189
0
}
190
191
// ----------------------------------------------------------------------
192
// StripCurlyBraces
193
//    Strips everything enclosed in pairs of curly braces and the curly
194
//    braces. Doesn't touch open braces. It doesn't handle nested curly
195
//    braces. This is used for removing things like {:stopword} from
196
//    queries.
197
// StripBrackets does the same, but allows the caller to specify different
198
//    left and right bracket characters, such as '(' and ')'.
199
// ----------------------------------------------------------------------
200
201
void StripCurlyBraces(string* s);
202
void StripBrackets(char left, char right, string* s);
203
204
205
// ----------------------------------------------------------------------
206
// StripMarkupTags
207
//    Strips everything enclosed in pairs of angle brackets and the angle
208
//    brackets.
209
//    This is used for stripping strings of markup; e.g. going from
210
//    "the quick <b>brown</b> fox" to "the quick brown fox."
211
//    If you want to skip entire sections of markup (e.g. the word "brown"
212
//    too in that example), see webutil/pageutil/pageutil.h .
213
//    This function was designed for stripping the bold tags (inserted by the
214
//    docservers) from the titles of news stories being returned by RSS.
215
//    This implementation DOES NOT cover all cases in html documents
216
//    like tags that contain quoted angle-brackets, or HTML comment.
217
//    For example <IMG SRC = "foo.gif" ALT = "A > B">
218
//    or <!-- <A comment> -->
219
//    See "perldoc -q html"
220
// ----------------------------------------------------------------------
221
222
void StripMarkupTags(string* s);
223
string OutputWithMarkupTagsStripped(const string& s);
224
225
// ----------------------------------------------------------------------
226
// TrimStringLeft
227
//    Removes any occurrences of the characters in 'remove' from the start
228
//    of the string.  Returns the number of chars trimmed.
229
// ----------------------------------------------------------------------
230
size_t TrimStringLeft(string* s, const GStringPiece& remove);
231
232
// ----------------------------------------------------------------------
233
// TrimStringRight
234
//    Removes any occurrences of the characters in 'remove' from the end
235
//    of the string.  Returns the number of chars trimmed.
236
// ----------------------------------------------------------------------
237
size_t TrimStringRight(string* s, const GStringPiece& remove);
238
239
// ----------------------------------------------------------------------
240
// TrimString
241
//    Removes any occurrences of the characters in 'remove' from either
242
//    end of the string.
243
// ----------------------------------------------------------------------
244
inline size_t TrimString(string* s, const GStringPiece& remove) {
245
  size_t right_trim = TrimStringRight(s, remove);
246
  return right_trim + TrimStringLeft(s, remove);
247
}
248
249
// ----------------------------------------------------------------------
250
// TrimRunsInString
251
//    Removes leading and trailing runs, and collapses middle
252
//    runs of a set of characters into a single character (the
253
//    first one specified in 'remove').  Useful for collapsing
254
//    runs of repeated delimiters, whitespace, etc.  E.g.,
255
//    TrimRunsInString(&s, " :,()") removes leading and trailing
256
//    delimiter chars and collapses and converts internal runs
257
//    of delimiters to single ' ' characters, so, for example,
258
//    "  a:(b):c  " -> "a b c"
259
//    "first,last::(area)phone, ::zip" -> "first last area phone zip"
260
// ----------------------------------------------------------------------
261
void TrimRunsInString(string* s, GStringPiece remove);
262
263
// ----------------------------------------------------------------------
264
// RemoveNullsInString
265
//    Removes any internal \0 characters from the string.
266
// ----------------------------------------------------------------------
267
void RemoveNullsInString(string* s);
268
269
// ----------------------------------------------------------------------
270
// strrm()
271
// memrm()
272
//    Remove all occurrences of a given character from a string.
273
//    Returns the new length.
274
// ----------------------------------------------------------------------
275
276
size_t strrm(char* str, char c);
277
size_t memrm(char* str, size_t strlen, char c);
278
279
// ----------------------------------------------------------------------
280
// strrmm()
281
//    Remove all occurrences of a given set of characters from a string.
282
//    Returns the new length.
283
// ----------------------------------------------------------------------
284
size_t strrmm(char* str, const char* chars);
285
size_t strrmm(string* str, const string& chars);
286
287
#endif  // YB_GUTIL_STRINGS_STRIP_H