/Users/deen/code/yugabyte-db/src/yb/gutil/strings/strip.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // based on contributions of various authors in strings/strutil_unittest.cc |
3 | | // |
4 | | // The following only applies to changes made to this file as part of YugaByte development. |
5 | | // |
6 | | // Portions Copyright (c) YugaByte, Inc. |
7 | | // |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
9 | | // in compliance with the License. You may obtain a copy of the License at |
10 | | // |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
14 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
15 | | // or implied. See the License for the specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // This file contains functions that remove a defined part from the string, |
19 | | // i.e., strip the string. |
20 | | |
21 | | #include "yb/gutil/strings/strip.h" |
22 | | |
23 | | #include <assert.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include <algorithm> |
27 | | |
28 | | using std::copy; |
29 | | using std::max; |
30 | | using std::min; |
31 | | using std::swap; |
32 | | using std::string; |
33 | | |
34 | 0 | string StripPrefixString(GStringPiece str, const GStringPiece& prefix) { |
35 | 0 | if (str.starts_with(prefix)) |
36 | 0 | str.remove_prefix(prefix.length()); |
37 | 0 | return str.as_string(); |
38 | 0 | } |
39 | | |
40 | | bool TryStripPrefixString(GStringPiece str, const GStringPiece& prefix, |
41 | 2 | string* result) { |
42 | 2 | const bool has_prefix = str.starts_with(prefix); |
43 | 2 | if (has_prefix) |
44 | 2 | str.remove_prefix(prefix.length()); |
45 | 2 | str.as_string().swap(*result); |
46 | 2 | return has_prefix; |
47 | 2 | } |
48 | | |
49 | 0 | string StripSuffixString(GStringPiece str, const GStringPiece& suffix) { |
50 | 0 | if (str.ends_with(suffix)) |
51 | 0 | str.remove_suffix(suffix.length()); |
52 | 0 | return str.as_string(); |
53 | 0 | } |
54 | | |
55 | | bool TryStripSuffixString(GStringPiece str, const GStringPiece& suffix, |
56 | 11 | string* result) { |
57 | 11 | const bool has_suffix = str.ends_with(suffix); |
58 | 11 | if (has_suffix) |
59 | 10 | str.remove_suffix(suffix.length()); |
60 | 11 | str.as_string().swap(*result); |
61 | 11 | return has_suffix; |
62 | 11 | } |
63 | | |
64 | | // ---------------------------------------------------------------------- |
65 | | // StripString |
66 | | // Replaces any occurrence of the character 'remove' (or the characters |
67 | | // in 'remove') with the character 'replacewith'. |
68 | | // ---------------------------------------------------------------------- |
69 | 0 | void StripString(char* str, GStringPiece remove, char replacewith) { |
70 | 0 | for (; *str != '\0'; ++str) { |
71 | 0 | if (remove.find(*str) != GStringPiece::npos) { |
72 | 0 | *str = replacewith; |
73 | 0 | } |
74 | 0 | } |
75 | 0 | } |
76 | | |
77 | 0 | void StripString(char* str, int len, GStringPiece remove, char replacewith) { |
78 | 0 | char* end = str + len; |
79 | 0 | for (; str < end; ++str) { |
80 | 0 | if (remove.find(*str) != GStringPiece::npos) { |
81 | 0 | *str = replacewith; |
82 | 0 | } |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | 0 | void StripString(string* s, GStringPiece remove, char replacewith) { |
87 | 0 | for (char& c : *s) { |
88 | 0 | if (remove.find(c) != GStringPiece::npos) { |
89 | 0 | c = replacewith; |
90 | 0 | } |
91 | 0 | } |
92 | 0 | } |
93 | | |
94 | | // ---------------------------------------------------------------------- |
95 | | // StripWhiteSpace |
96 | | // ---------------------------------------------------------------------- |
97 | 0 | void StripWhiteSpace(const char** str, size_t* len) { |
98 | | // strip off trailing whitespace |
99 | 0 | while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) { |
100 | 0 | (*len)--; |
101 | 0 | } |
102 | | |
103 | | // strip off leading whitespace |
104 | 0 | while ((*len) > 0 && ascii_isspace((*str)[0])) { |
105 | 0 | (*len)--; |
106 | 0 | (*str)++; |
107 | 0 | } |
108 | 0 | } |
109 | | |
110 | 0 | bool StripTrailingNewline(string* s) { |
111 | 0 | if (!s->empty() && (*s)[s->size() - 1] == '\n') { |
112 | 0 | if (s->size() > 1 && (*s)[s->size() - 2] == '\r') |
113 | 0 | s->resize(s->size() - 2); |
114 | 0 | else |
115 | 0 | s->resize(s->size() - 1); |
116 | 0 | return true; |
117 | 0 | } |
118 | 0 | return false; |
119 | 0 | } |
120 | | |
121 | 255k | void StripWhiteSpace(string* str) { |
122 | 255k | size_t str_length = str->length(); |
123 | | |
124 | | // Strip off leading whitespace. |
125 | 255k | size_t first = 0; |
126 | 255k | while (first < str_length && ascii_isspace((*str)[first])) { |
127 | 8 | ++first; |
128 | 8 | } |
129 | | // If entire string is white space. |
130 | 255k | if (first == str_length) { |
131 | 418 | str->clear(); |
132 | 418 | return; |
133 | 418 | } |
134 | 255k | if (first > 0) { |
135 | 7 | str->erase(0, first); |
136 | 7 | str_length -= first; |
137 | 7 | } |
138 | | |
139 | | // Strip off trailing whitespace. |
140 | 255k | size_t last = str_length - 1; |
141 | 255k | while (last >= 0 && ascii_isspace((*str)[last])) { |
142 | 3 | --last; |
143 | 3 | } |
144 | 255k | if (last != (str_length - 1) && last >= 0) { |
145 | 2 | str->erase(last + 1, string::npos); |
146 | 2 | } |
147 | 255k | } |
148 | | |
149 | | // ---------------------------------------------------------------------- |
150 | | // Misc. stripping routines |
151 | | // ---------------------------------------------------------------------- |
152 | 0 | void StripCurlyBraces(string* s) { |
153 | 0 | return StripBrackets('{', '}', s); |
154 | 0 | } |
155 | | |
156 | 0 | void StripBrackets(char left, char right, string* s) { |
157 | 0 | string::iterator opencurly = find(s->begin(), s->end(), left); |
158 | 0 | while (opencurly != s->end()) { |
159 | 0 | string::iterator closecurly = find(opencurly, s->end(), right); |
160 | 0 | if (closecurly == s->end()) |
161 | 0 | return; |
162 | 0 | opencurly = s->erase(opencurly, closecurly + 1); |
163 | 0 | opencurly = find(opencurly, s->end(), left); |
164 | 0 | } |
165 | 0 | } |
166 | | |
167 | 0 | void StripMarkupTags(string* s) { |
168 | 0 | string::iterator openbracket = find(s->begin(), s->end(), '<'); |
169 | 0 | while (openbracket != s->end()) { |
170 | 0 | string::iterator closebracket = find(openbracket, s->end(), '>'); |
171 | 0 | if (closebracket == s->end()) { |
172 | 0 | s->erase(openbracket, closebracket); |
173 | 0 | return; |
174 | 0 | } |
175 | | |
176 | 0 | openbracket = s->erase(openbracket, closebracket + 1); |
177 | 0 | openbracket = find(openbracket, s->end(), '<'); |
178 | 0 | } |
179 | 0 | } |
180 | | |
181 | 0 | string OutputWithMarkupTagsStripped(const string& s) { |
182 | 0 | string result(s); |
183 | 0 | StripMarkupTags(&result); |
184 | 0 | return result; |
185 | 0 | } |
186 | | |
187 | | |
188 | 57 | size_t TrimStringLeft(string* s, const GStringPiece& remove) { |
189 | 57 | size_t i = 0; |
190 | 102 | while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { |
191 | 45 | ++i; |
192 | 45 | } |
193 | 57 | if (i > 0) s->erase(0, i); |
194 | 57 | return i; |
195 | 57 | } |
196 | | |
197 | 57 | size_t TrimStringRight(string* s, const GStringPiece& remove) { |
198 | 57 | size_t i = s->size(), trimmed = 0; |
199 | 102 | while (i > 0 && memchr(remove.data(), (*s)[i-1], remove.size())) { |
200 | 45 | --i; |
201 | 45 | } |
202 | 57 | if (i < s->size()) { |
203 | 45 | trimmed = s->size() - i; |
204 | 45 | s->erase(i); |
205 | 45 | } |
206 | 57 | return trimmed; |
207 | 57 | } |
208 | | |
209 | | // ---------------------------------------------------------------------- |
210 | | // Various removal routines |
211 | | // ---------------------------------------------------------------------- |
212 | 0 | size_t strrm(char* str, char c) { |
213 | 0 | char *src, *dest; |
214 | 0 | for (src = dest = str; *src != '\0'; ++src) |
215 | 0 | if (*src != c) *(dest++) = *src; |
216 | 0 | *dest = '\0'; |
217 | 0 | return dest - str; |
218 | 0 | } |
219 | | |
220 | 0 | size_t memrm(char* str, size_t strlen, char c) { |
221 | 0 | char *src, *dest; |
222 | 0 | for (src = dest = str; strlen > 0; ++src) { |
223 | 0 | --strlen; |
224 | 0 | if (*src != c) *(dest++) = *src; |
225 | 0 | } |
226 | 0 | return dest - str; |
227 | 0 | } |
228 | | |
229 | 0 | size_t strrmm(char* str, const char* chars) { |
230 | 0 | char *src, *dest; |
231 | 0 | for (src = dest = str; *src != '\0'; ++src) { |
232 | 0 | bool skip = false; |
233 | 0 | for (const char* c = chars; *c != '\0'; c++) { |
234 | 0 | if (*src == *c) { |
235 | 0 | skip = true; |
236 | 0 | break; |
237 | 0 | } |
238 | 0 | } |
239 | 0 | if (!skip) *(dest++) = *src; |
240 | 0 | } |
241 | 0 | *dest = '\0'; |
242 | 0 | return dest - str; |
243 | 0 | } |
244 | | |
245 | 0 | size_t strrmm(string* str, const string& chars) { |
246 | 0 | size_t str_len = str->length(); |
247 | 0 | size_t in_index = str->find_first_of(chars); |
248 | 0 | if (in_index == string::npos) |
249 | 0 | return str_len; |
250 | | |
251 | 0 | size_t out_index = in_index++; |
252 | |
|
253 | 0 | while (in_index < str_len) { |
254 | 0 | char c = (*str)[in_index++]; |
255 | 0 | if (chars.find(c) == string::npos) |
256 | 0 | (*str)[out_index++] = c; |
257 | 0 | } |
258 | |
|
259 | 0 | str->resize(out_index); |
260 | 0 | return out_index; |
261 | 0 | } |
262 | | |
263 | | // ---------------------------------------------------------------------- |
264 | | // StripDupCharacters |
265 | | // Replaces any repeated occurrence of the character 'repeat_char' |
266 | | // with single occurrence. e.g., |
267 | | // StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" |
268 | | // Return the number of characters removed |
269 | | // ---------------------------------------------------------------------- |
270 | 0 | size_t StripDupCharacters(string* s, char dup_char, int64 start_pos) { |
271 | 0 | if (start_pos < 0) |
272 | 0 | start_pos = 0; |
273 | | |
274 | | // remove dups by compaction in-place |
275 | 0 | size_t input_pos = start_pos; // current reader position |
276 | 0 | size_t output_pos = start_pos; // current writer position |
277 | 0 | const size_t input_end = s->size(); |
278 | 0 | while (input_pos < input_end) { |
279 | | // keep current character |
280 | 0 | const char curr_char = (*s)[input_pos]; |
281 | 0 | if (output_pos != input_pos) // must copy |
282 | 0 | (*s)[output_pos] = curr_char; |
283 | 0 | ++input_pos; |
284 | 0 | ++output_pos; |
285 | |
|
286 | 0 | if (curr_char == dup_char) { // skip subsequent dups |
287 | 0 | while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) |
288 | 0 | ++input_pos; |
289 | 0 | } |
290 | 0 | } |
291 | 0 | const size_t num_deleted = input_pos - output_pos; |
292 | 0 | s->resize(s->size() - num_deleted); |
293 | 0 | return num_deleted; |
294 | 0 | } |
295 | | |
296 | | // ---------------------------------------------------------------------- |
297 | | // RemoveExtraWhitespace() |
298 | | // Remove leading, trailing, and duplicate internal whitespace. |
299 | | // ---------------------------------------------------------------------- |
300 | 0 | void RemoveExtraWhitespace(string* s) { |
301 | 0 | assert(s != nullptr); |
302 | | // Empty strings clearly have no whitespace, and this code assumes that |
303 | | // string length is greater than 0 |
304 | 0 | if (s->empty()) |
305 | 0 | return; |
306 | | |
307 | 0 | size_t input_pos = 0; // current reader position |
308 | 0 | size_t output_pos = 0; // current writer position |
309 | 0 | const size_t input_end = s->size(); |
310 | | // Strip off leading space |
311 | 0 | while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++; |
312 | |
|
313 | 0 | while (input_pos < input_end - 1) { |
314 | 0 | char c = (*s)[input_pos]; |
315 | 0 | char next = (*s)[input_pos + 1]; |
316 | | // Copy each non-whitespace character to the right position. |
317 | | // For a block of whitespace, print the last one. |
318 | 0 | if (!ascii_isspace(c) || !ascii_isspace(next)) { |
319 | 0 | if (output_pos != input_pos) { // only copy if needed |
320 | 0 | (*s)[output_pos] = c; |
321 | 0 | } |
322 | 0 | output_pos++; |
323 | 0 | } |
324 | 0 | input_pos++; |
325 | 0 | } |
326 | | // Pick up the last character if needed. |
327 | 0 | char c = (*s)[input_end - 1]; |
328 | 0 | if (!ascii_isspace(c)) (*s)[output_pos++] = c; |
329 | |
|
330 | 0 | s->resize(output_pos); |
331 | 0 | } |
332 | | |
333 | | //------------------------------------------------------------------------ |
334 | | // See comment in header file for a complete description. |
335 | | //------------------------------------------------------------------------ |
336 | 0 | void StripLeadingWhiteSpace(string* str) { |
337 | 0 | char const* const leading = StripLeadingWhiteSpace( |
338 | 0 | const_cast<char*>(str->c_str())); |
339 | 0 | if (leading != nullptr) { |
340 | 0 | string const tmp(leading); |
341 | 0 | str->assign(tmp); |
342 | 0 | } else { |
343 | 0 | str->assign(""); |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | 0 | void StripTrailingWhitespace(string* const s) { |
348 | 0 | string::size_type i; |
349 | 0 | for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) { |
350 | 0 | } |
351 | |
|
352 | 0 | s->resize(i); |
353 | 0 | } |
354 | | |
355 | | // ---------------------------------------------------------------------- |
356 | | // TrimRunsInString |
357 | | // Removes leading and trailing runs, and collapses middle |
358 | | // runs of a set of characters into a single character (the |
359 | | // first one specified in 'remove'). Useful for collapsing |
360 | | // runs of repeated delimiters, whitespace, etc. E.g., |
361 | | // TrimRunsInString(&s, " :,()") removes leading and trailing |
362 | | // delimiter chars and collapses and converts internal runs |
363 | | // of delimiters to single ' ' characters, so, for example, |
364 | | // " a:(b):c " -> "a b c" |
365 | | // "first,last::(area)phone, ::zip" -> "first last area phone zip" |
366 | | // ---------------------------------------------------------------------- |
367 | 0 | void TrimRunsInString(string* s, GStringPiece remove) { |
368 | 0 | string::iterator dest = s->begin(); |
369 | 0 | string::iterator src_end = s->end(); |
370 | 0 | for (string::iterator src = s->begin(); src != src_end; ) { |
371 | 0 | if (remove.find(*src) == GStringPiece::npos) { |
372 | 0 | *(dest++) = *(src++); |
373 | 0 | } else { |
374 | | // Skip to the end of this run of chars that are in 'remove'. |
375 | 0 | for (++src; src != src_end; ++src) { |
376 | 0 | if (remove.find(*src) == GStringPiece::npos) { |
377 | 0 | if (dest != s->begin()) { |
378 | | // This is an internal run; collapse it. |
379 | 0 | *(dest++) = remove[0]; |
380 | 0 | } |
381 | 0 | *(dest++) = *(src++); |
382 | 0 | break; |
383 | 0 | } |
384 | 0 | } |
385 | 0 | } |
386 | 0 | } |
387 | 0 | s->erase(dest, src_end); |
388 | 0 | } |
389 | | |
390 | | // ---------------------------------------------------------------------- |
391 | | // RemoveNullsInString |
392 | | // Removes any internal \0 characters from the string. |
393 | | // ---------------------------------------------------------------------- |
394 | 0 | void RemoveNullsInString(string* s) { |
395 | 0 | s->erase(remove(s->begin(), s->end(), '\0'), s->end()); |
396 | 0 | } |