/Users/deen/code/yugabyte-db/src/yb/util/url-coding.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
33 | | // you may not use this file except in compliance with the License. |
34 | | // You may obtain a copy of the License at |
35 | | // |
36 | | // http://www.apache.org/licenses/LICENSE-2.0 |
37 | | // |
38 | | // Unless required by applicable law or agreed to in writing, software |
39 | | // distributed under the License is distributed on an "AS IS" BASIS, |
40 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
41 | | // See the License for the specific language governing permissions and |
42 | | // limitations under the License. |
43 | | |
44 | | #include "yb/util/url-coding.h" |
45 | | |
46 | | |
47 | | #include <boost/algorithm/string.hpp> |
48 | | #include <boost/archive/iterators/base64_from_binary.hpp> |
49 | | #include <boost/archive/iterators/binary_from_base64.hpp> |
50 | | #include <boost/archive/iterators/transform_width.hpp> |
51 | | |
52 | | #include <glog/logging.h> |
53 | | |
54 | | using std::string; |
55 | | using std::vector; |
56 | | using namespace boost::archive::iterators; // NOLINT(*) |
57 | | |
58 | | namespace yb { |
59 | | |
60 | | namespace { |
61 | | |
62 | | // Hive selectively encodes characters. This is the whitelist of |
63 | | // characters it will encode. |
64 | | // See common/src/java/org/apache/hadoop/hive/common/FileUtils.java |
65 | | // in the Hive source code for the source of this list. |
66 | | std::function<bool(char)> HiveShouldEscape = |
67 | | boost::is_any_of("\"#%\\*/:=?\u00FF"); // NOLINT(*) |
68 | | |
69 | | // It is more convenient to maintain the complement of the set of |
70 | | // characters to escape when not in Hive-compat mode. |
71 | | std::function<bool(char)> ShouldNotEscape = boost::is_any_of("-_.~"); // NOLINT(*) |
72 | | |
73 | 25 | inline void UrlEncode(const char* in, size_t in_len, string* out, bool hive_compat) { |
74 | 25 | (*out).reserve(in_len); |
75 | 25 | std::stringstream ss; |
76 | 759 | for (size_t i = 0; i < in_len; ++i734 ) { |
77 | 734 | const char ch = in[i]; |
78 | | // Escape the character iff a) we are in Hive-compat mode and the |
79 | | // character is in the Hive whitelist or b) we are not in |
80 | | // Hive-compat mode, and the character is not alphanumeric or one |
81 | | // of the four commonly excluded characters. |
82 | 734 | if ((hive_compat && HiveShouldEscape(ch)182 ) || |
83 | 734 | (708 !hive_compat708 && !(552 isalnum(ch)552 || ShouldNotEscape(ch)72 ))) { |
84 | 88 | ss << '%' << std::uppercase << std::hex << static_cast<uint32_t>(ch); |
85 | 646 | } else { |
86 | 646 | ss << ch; |
87 | 646 | } |
88 | 734 | } |
89 | | |
90 | 25 | (*out) = ss.str(); |
91 | 25 | } |
92 | | |
93 | | } // namespace |
94 | | |
95 | 7 | void UrlEncode(const vector<uint8_t>& in, string* out, bool hive_compat) { |
96 | 7 | if (in.empty()) { |
97 | 2 | *out = ""; |
98 | 5 | } else { |
99 | 5 | UrlEncode(reinterpret_cast<const char*>(&in[0]), in.size(), out, hive_compat); |
100 | 5 | } |
101 | 7 | } |
102 | | |
103 | 20 | void UrlEncode(const string& in, string* out, bool hive_compat) { |
104 | 20 | UrlEncode(in.c_str(), in.size(), out, hive_compat); |
105 | 20 | } |
106 | | |
107 | 13 | string UrlEncodeToString(const std::string& in, bool hive_compat) { |
108 | 13 | string ret; |
109 | 13 | UrlEncode(in, &ret, hive_compat); |
110 | 13 | return ret; |
111 | 13 | } |
112 | | |
113 | | // Adapted from |
114 | | // http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/ |
115 | | // example/http/server3/request_handler.cpp |
116 | | // See http://www.boost.org/LICENSE_1_0.txt for license for this method. |
117 | 1.27k | bool UrlDecode(const string& in, string* out, bool hive_compat) { |
118 | 1.27k | out->clear(); |
119 | 1.27k | out->reserve(in.size()); |
120 | 24.4k | for (size_t i = 0; i < in.size(); ++i23.1k ) { |
121 | 23.1k | if (in[i] == '%') { |
122 | 44 | if (i + 3 <= in.size()) { |
123 | 44 | int value = 0; |
124 | 44 | std::istringstream is(in.substr(i + 1, 2)); |
125 | 44 | if (is >> std::hex >> value) { |
126 | 44 | (*out) += static_cast<char>(value); |
127 | 44 | i += 2; |
128 | 44 | } else { |
129 | 0 | return false; |
130 | 0 | } |
131 | 44 | } else { |
132 | 0 | return false; |
133 | 0 | } |
134 | 23.1k | } else if (!hive_compat && in[i] == '+'23.0k ) { // Hive does not encode ' ' as '+' |
135 | 0 | (*out) += ' '; |
136 | 23.1k | } else { |
137 | 23.1k | (*out) += in[i]; |
138 | 23.1k | } |
139 | 23.1k | } |
140 | 1.27k | return true; |
141 | 1.27k | } |
142 | | |
143 | 12 | static inline void Base64Encode(const char* in, size_t in_len, std::stringstream* out) { |
144 | 12 | typedef base64_from_binary<transform_width<const char*, 6, 8> > base64_encode; |
145 | | // Base64 encodes 8 byte chars as 6 bit values. |
146 | 12 | std::stringstream::pos_type len_before = out->tellp(); |
147 | 12 | copy(base64_encode(in), base64_encode(in + in_len), std::ostream_iterator<char>(*out)); |
148 | 12 | auto bytes_written = out->tellp() - len_before; |
149 | | // Pad with = to make it valid base64 encoded string |
150 | 12 | int num_pad = bytes_written % 4; |
151 | 12 | if (num_pad != 0) { |
152 | 8 | num_pad = 4 - num_pad; |
153 | 20 | for (int i = 0; i < num_pad; ++i12 ) { |
154 | 12 | (*out) << "="; |
155 | 12 | } |
156 | 8 | } |
157 | 12 | DCHECK_EQ(out->str().size() % 4, 0); |
158 | 12 | } |
159 | | |
160 | 6 | void Base64Encode(const vector<uint8_t>& in, string* out) { |
161 | 6 | if (in.empty()) { |
162 | 0 | *out = ""; |
163 | 6 | } else { |
164 | 6 | std::stringstream ss; |
165 | 6 | Base64Encode(in, &ss); |
166 | 6 | *out = ss.str(); |
167 | 6 | } |
168 | 6 | } |
169 | | |
170 | 6 | void Base64Encode(const vector<uint8_t>& in, std::stringstream* out) { |
171 | 6 | if (!in.empty()) { |
172 | | // Boost does not like non-null terminated strings |
173 | 6 | string tmp(reinterpret_cast<const char*>(&in[0]), in.size()); |
174 | 6 | Base64Encode(tmp.c_str(), tmp.size(), out); |
175 | 6 | } |
176 | 6 | } |
177 | | |
178 | 6 | void Base64Encode(const string& in, string* out) { |
179 | 6 | std::stringstream ss; |
180 | 6 | Base64Encode(in.c_str(), in.size(), &ss); |
181 | 6 | *out = ss.str(); |
182 | 6 | } |
183 | | |
184 | 0 | void Base64Encode(const string& in, std::stringstream* out) { |
185 | 0 | Base64Encode(in.c_str(), in.size(), out); |
186 | 0 | } |
187 | | |
188 | 6 | bool Base64Decode(const string& in, string* out) { |
189 | 6 | typedef transform_width<binary_from_base64<string::const_iterator>, 8, 6> base64_decode; |
190 | 6 | string tmp = in; |
191 | | // Replace padding with base64 encoded NULL |
192 | 6 | replace(tmp.begin(), tmp.end(), '=', 'A'); |
193 | 6 | try { |
194 | 6 | *out = string(base64_decode(tmp.begin()), base64_decode(tmp.end())); |
195 | 6 | } catch(std::exception& e) { |
196 | 0 | return false; |
197 | 0 | } |
198 | | |
199 | | // Remove trailing '\0' that were added as padding. Since \0 is special, |
200 | | // the boost functions get confused so do this manually. |
201 | 6 | int num_padded_chars = 0; |
202 | 12 | for (size_t i = out->size(); i > 0;) { |
203 | 12 | --i; |
204 | 12 | if ((*out)[i] != '\0') break6 ; |
205 | 6 | ++num_padded_chars; |
206 | 6 | } |
207 | 6 | out->resize(out->size() - num_padded_chars); |
208 | 6 | return true; |
209 | 6 | } |
210 | | |
211 | 632 | void EscapeForHtml(const string& in, std::stringstream* out) { |
212 | 632 | DCHECK(out != nullptr); |
213 | 42.6k | for (const char& c : in) { |
214 | 42.6k | switch (c) { |
215 | 8 | case '<': (*out) << "<"; |
216 | 8 | break; |
217 | 8 | case '>': (*out) << ">"; |
218 | 8 | break; |
219 | 1 | case '&': (*out) << "&"; |
220 | 1 | break; |
221 | 42.6k | default: (*out) << c; |
222 | 42.6k | } |
223 | 42.6k | } |
224 | 632 | } |
225 | | |
226 | 631 | std::string EscapeForHtmlToString(const std::string& in) { |
227 | 631 | std::stringstream str; |
228 | 631 | EscapeForHtml(in, &str); |
229 | 631 | return str.str(); |
230 | 631 | } |
231 | | |
232 | | } // namespace yb |