/Users/deen/code/yugabyte-db/src/yb/gutil/strings/strip.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.h |
3 | | // |
4 | | // The following only applies to changes made to this file as part of YugaByte development. |
5 | | // |
6 | | // Portions Copyright (c) YugaByte, Inc. |
7 | | // |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
9 | | // in compliance with the License. You may obtain a copy of the License at |
10 | | // |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // |
13 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
14 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
15 | | // or implied. See the License for the specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // This file contains functions that remove a defined part from the string, |
19 | | // i.e., strip the string. |
20 | | |
21 | | #ifndef YB_GUTIL_STRINGS_STRIP_H |
22 | | #define YB_GUTIL_STRINGS_STRIP_H |
23 | | |
24 | | #include <stddef.h> |
25 | | #include <string> |
26 | | using std::string; |
27 | | |
28 | | #include "yb/gutil/strings/ascii_ctype.h" |
29 | | #include "yb/gutil/strings/stringpiece.h" |
30 | | |
31 | | // Given a string and a putative prefix, returns the string minus the |
32 | | // prefix string if the prefix matches, otherwise the original |
33 | | // string. |
34 | | string StripPrefixString(GStringPiece str, const GStringPiece& prefix); |
35 | | |
36 | | // Like StripPrefixString, but return true if the prefix was |
37 | | // successfully matched. Write the output to *result. |
38 | | // It is safe for result to point back to the input string. |
39 | | bool TryStripPrefixString(GStringPiece str, const GStringPiece& prefix, |
40 | | string* result); |
41 | | |
42 | | // Given a string and a putative suffix, returns the string minus the |
43 | | // suffix string if the suffix matches, otherwise the original |
44 | | // string. |
45 | | string StripSuffixString(GStringPiece str, const GStringPiece& suffix); |
46 | | |
47 | | |
48 | | // Like StripSuffixString, but return true if the suffix was |
49 | | // successfully matched. Write the output to *result. |
50 | | // It is safe for result to point back to the input string. |
51 | | bool TryStripSuffixString(GStringPiece str, const GStringPiece& suffix, |
52 | | string* result); |
53 | | |
54 | | // ---------------------------------------------------------------------- |
55 | | // StripString |
56 | | // Replaces any occurrence of the character 'remove' (or the characters |
57 | | // in 'remove') with the character 'replacewith'. |
58 | | // Good for keeping html characters or protocol characters (\t) out |
59 | | // of places where they might cause a problem. |
60 | | // ---------------------------------------------------------------------- |
61 | 0 | inline void StripString(char* str, char remove, char replacewith) { |
62 | 0 | for (; *str; str++) { |
63 | 0 | if (*str == remove) |
64 | 0 | *str = replacewith; |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | | void StripString(char* str, GStringPiece remove, char replacewith); |
69 | | void StripString(char* str, int len, GStringPiece remove, char replacewith); |
70 | | void StripString(string* s, GStringPiece remove, char replacewith); |
71 | | |
72 | | // ---------------------------------------------------------------------- |
73 | | // StripDupCharacters |
74 | | // Replaces any repeated occurrence of the character 'dup_char' |
75 | | // with single occurrence. e.g., |
76 | | // StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" |
77 | | // Return the number of characters removed |
78 | | // ---------------------------------------------------------------------- |
79 | | int StripDupCharacters(string* s, char dup_char, int start_pos); |
80 | | |
81 | | // ---------------------------------------------------------------------- |
82 | | // StripWhiteSpace |
83 | | // "Removes" whitespace from both sides of string. Pass in a pointer to an |
84 | | // array of characters, and its length. The function changes the pointer |
85 | | // and length to refer to a substring that does not contain leading or |
86 | | // trailing spaces; it does not modify the string itself. If the caller is |
87 | | // using NUL-terminated strings, it is the caller's responsibility to insert |
88 | | // the NUL character at the end of the substring." |
89 | | // |
90 | | // Note: to be completely type safe, this function should be |
91 | | // parameterized as a template: template<typename anyChar> void |
92 | | // StripWhiteSpace(anyChar** str, int* len), where the expectation |
93 | | // is that anyChar could be char, const char, w_char, const w_char, |
94 | | // unicode_char, or any other character type we want. However, we |
95 | | // just provided a version for char and const char. C++ is |
96 | | // inconvenient, but correct, here. Ask Amit is you want to know |
97 | | // the type safety details. |
98 | | // ---------------------------------------------------------------------- |
99 | | void StripWhiteSpace(const char** str, size_t* len); |
100 | | |
101 | | //------------------------------------------------------------------------ |
102 | | // StripTrailingWhitespace() |
103 | | // Removes whitespace at the end of the string *s. |
104 | | //------------------------------------------------------------------------ |
105 | | void StripTrailingWhitespace(string* s); |
106 | | |
107 | | //------------------------------------------------------------------------ |
108 | | // StripTrailingNewline(string*) |
109 | | // Strips the very last trailing newline or CR+newline from its |
110 | | // input, if one exists. Useful for dealing with MapReduce's text |
111 | | // input mode, which appends '\n' to each map input. Returns true |
112 | | // if a newline was stripped. |
113 | | //------------------------------------------------------------------------ |
114 | | bool StripTrailingNewline(string* s); |
115 | | |
116 | 0 | inline void StripWhiteSpace(char** str, size_t* len) { |
117 | 0 | // The "real" type for StripWhiteSpace is ForAll char types C, take |
118 | 0 | // (C, int) as input and return (C, int) as output. We're using the |
119 | 0 | // cast here to assert that we can take a char*, even though the |
120 | 0 | // function thinks it's assigning to const char*. |
121 | 0 | StripWhiteSpace(const_cast<const char**>(str), len); |
122 | 0 | } |
123 | | |
124 | 0 | inline void StripWhiteSpace(GStringPiece* str) { |
125 | 0 | const char* data = str->data(); |
126 | 0 | size_t len = str->size(); |
127 | 0 | StripWhiteSpace(&data, &len); |
128 | 0 | str->set(data, len); |
129 | 0 | } |
130 | | |
131 | | void StripWhiteSpace(string* str); |
132 | | |
133 | | namespace strings { |
134 | | |
135 | | template <typename Collection> |
136 | | inline void StripWhiteSpaceInCollection(Collection* collection) { |
137 | | for (typename Collection::iterator it = collection->begin(); |
138 | | it != collection->end(); ++it) |
139 | | StripWhiteSpace(&(*it)); |
140 | | } |
141 | | |
142 | | } // namespace strings |
143 | | |
144 | | // ---------------------------------------------------------------------- |
145 | | // StripLeadingWhiteSpace |
146 | | // "Removes" whitespace from beginning of string. Returns ptr to first |
147 | | // non-whitespace character if one is present, NULL otherwise. Assumes |
148 | | // "line" is null-terminated. |
149 | | // ---------------------------------------------------------------------- |
150 | | |
151 | 0 | inline const char* StripLeadingWhiteSpace(const char* line) { |
152 | 0 | // skip leading whitespace |
153 | 0 | while (ascii_isspace(*line)) |
154 | 0 | ++line; |
155 | 0 |
|
156 | 0 | if ('\0' == *line) // end of line, no non-whitespace |
157 | 0 | return NULL; |
158 | 0 |
|
159 | 0 | return line; |
160 | 0 | } |
161 | | |
162 | | // StripLeadingWhiteSpace for non-const strings. |
163 | 0 | inline char* StripLeadingWhiteSpace(char* line) { |
164 | 0 | return const_cast<char*>( |
165 | 0 | StripLeadingWhiteSpace(const_cast<const char*>(line))); |
166 | 0 | } |
167 | | |
168 | | void StripLeadingWhiteSpace(string* str); |
169 | | |
170 | | // Remove leading, trailing, and duplicate internal whitespace. |
171 | | void RemoveExtraWhitespace(string* s); |
172 | | |
173 | | |
174 | | // ---------------------------------------------------------------------- |
175 | | // SkipLeadingWhiteSpace |
176 | | // Returns str advanced past white space characters, if any. |
177 | | // Never returns NULL. "str" must be terminated by a null character. |
178 | | // ---------------------------------------------------------------------- |
179 | 0 | inline const char* SkipLeadingWhiteSpace(const char* str) { |
180 | 0 | while (ascii_isspace(*str)) |
181 | 0 | ++str; |
182 | 0 | return str; |
183 | 0 | } |
184 | | |
185 | 0 | inline char* SkipLeadingWhiteSpace(char* str) { |
186 | 0 | while (ascii_isspace(*str)) |
187 | 0 | ++str; |
188 | 0 | return str; |
189 | 0 | } |
190 | | |
191 | | // ---------------------------------------------------------------------- |
192 | | // StripCurlyBraces |
193 | | // Strips everything enclosed in pairs of curly braces and the curly |
194 | | // braces. Doesn't touch open braces. It doesn't handle nested curly |
195 | | // braces. This is used for removing things like {:stopword} from |
196 | | // queries. |
197 | | // StripBrackets does the same, but allows the caller to specify different |
198 | | // left and right bracket characters, such as '(' and ')'. |
199 | | // ---------------------------------------------------------------------- |
200 | | |
201 | | void StripCurlyBraces(string* s); |
202 | | void StripBrackets(char left, char right, string* s); |
203 | | |
204 | | |
205 | | // ---------------------------------------------------------------------- |
206 | | // StripMarkupTags |
207 | | // Strips everything enclosed in pairs of angle brackets and the angle |
208 | | // brackets. |
209 | | // This is used for stripping strings of markup; e.g. going from |
210 | | // "the quick <b>brown</b> fox" to "the quick brown fox." |
211 | | // If you want to skip entire sections of markup (e.g. the word "brown" |
212 | | // too in that example), see webutil/pageutil/pageutil.h . |
213 | | // This function was designed for stripping the bold tags (inserted by the |
214 | | // docservers) from the titles of news stories being returned by RSS. |
215 | | // This implementation DOES NOT cover all cases in html documents |
216 | | // like tags that contain quoted angle-brackets, or HTML comment. |
217 | | // For example <IMG SRC = "foo.gif" ALT = "A > B"> |
218 | | // or <!-- <A comment> --> |
219 | | // See "perldoc -q html" |
220 | | // ---------------------------------------------------------------------- |
221 | | |
222 | | void StripMarkupTags(string* s); |
223 | | string OutputWithMarkupTagsStripped(const string& s); |
224 | | |
225 | | // ---------------------------------------------------------------------- |
226 | | // TrimStringLeft |
227 | | // Removes any occurrences of the characters in 'remove' from the start |
228 | | // of the string. Returns the number of chars trimmed. |
229 | | // ---------------------------------------------------------------------- |
230 | | size_t TrimStringLeft(string* s, const GStringPiece& remove); |
231 | | |
232 | | // ---------------------------------------------------------------------- |
233 | | // TrimStringRight |
234 | | // Removes any occurrences of the characters in 'remove' from the end |
235 | | // of the string. Returns the number of chars trimmed. |
236 | | // ---------------------------------------------------------------------- |
237 | | size_t TrimStringRight(string* s, const GStringPiece& remove); |
238 | | |
239 | | // ---------------------------------------------------------------------- |
240 | | // TrimString |
241 | | // Removes any occurrences of the characters in 'remove' from either |
242 | | // end of the string. |
243 | | // ---------------------------------------------------------------------- |
244 | | inline size_t TrimString(string* s, const GStringPiece& remove) { |
245 | | size_t right_trim = TrimStringRight(s, remove); |
246 | | return right_trim + TrimStringLeft(s, remove); |
247 | | } |
248 | | |
249 | | // ---------------------------------------------------------------------- |
250 | | // TrimRunsInString |
251 | | // Removes leading and trailing runs, and collapses middle |
252 | | // runs of a set of characters into a single character (the |
253 | | // first one specified in 'remove'). Useful for collapsing |
254 | | // runs of repeated delimiters, whitespace, etc. E.g., |
255 | | // TrimRunsInString(&s, " :,()") removes leading and trailing |
256 | | // delimiter chars and collapses and converts internal runs |
257 | | // of delimiters to single ' ' characters, so, for example, |
258 | | // " a:(b):c " -> "a b c" |
259 | | // "first,last::(area)phone, ::zip" -> "first last area phone zip" |
260 | | // ---------------------------------------------------------------------- |
261 | | void TrimRunsInString(string* s, GStringPiece remove); |
262 | | |
263 | | // ---------------------------------------------------------------------- |
264 | | // RemoveNullsInString |
265 | | // Removes any internal \0 characters from the string. |
266 | | // ---------------------------------------------------------------------- |
267 | | void RemoveNullsInString(string* s); |
268 | | |
269 | | // ---------------------------------------------------------------------- |
270 | | // strrm() |
271 | | // memrm() |
272 | | // Remove all occurrences of a given character from a string. |
273 | | // Returns the new length. |
274 | | // ---------------------------------------------------------------------- |
275 | | |
276 | | size_t strrm(char* str, char c); |
277 | | size_t memrm(char* str, size_t strlen, char c); |
278 | | |
279 | | // ---------------------------------------------------------------------- |
280 | | // strrmm() |
281 | | // Remove all occurrences of a given set of characters from a string. |
282 | | // Returns the new length. |
283 | | // ---------------------------------------------------------------------- |
284 | | size_t strrmm(char* str, const char* chars); |
285 | | size_t strrmm(string* str, const string& chars); |
286 | | |
287 | | #endif // YB_GUTIL_STRINGS_STRIP_H |