YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/util/url-coding.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// The following only applies to changes made to this file as part of YugaByte development.
19
//
20
// Portions Copyright (c) YugaByte, Inc.
21
//
22
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
23
// in compliance with the License.  You may obtain a copy of the License at
24
//
25
// http://www.apache.org/licenses/LICENSE-2.0
26
//
27
// Unless required by applicable law or agreed to in writing, software distributed under the License
28
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
29
// or implied.  See the License for the specific language governing permissions and limitations
30
// under the License.
31
//
32
// Licensed under the Apache License, Version 2.0 (the "License");
33
// you may not use this file except in compliance with the License.
34
// You may obtain a copy of the License at
35
//
36
// http://www.apache.org/licenses/LICENSE-2.0
37
//
38
// Unless required by applicable law or agreed to in writing, software
39
// distributed under the License is distributed on an "AS IS" BASIS,
40
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41
// See the License for the specific language governing permissions and
42
// limitations under the License.
43
44
#include "yb/util/url-coding.h"
45
46
47
#include <boost/algorithm/string.hpp>
48
#include <boost/archive/iterators/base64_from_binary.hpp>
49
#include <boost/archive/iterators/binary_from_base64.hpp>
50
#include <boost/archive/iterators/transform_width.hpp>
51
52
#include <glog/logging.h>
53
54
using std::string;
55
using std::vector;
56
using namespace boost::archive::iterators; // NOLINT(*)
57
58
namespace yb {
59
60
namespace {
61
62
// Hive selectively encodes characters. This is the whitelist of
63
// characters it will encode.
64
// See common/src/java/org/apache/hadoop/hive/common/FileUtils.java
65
// in the Hive source code for the source of this list.
66
std::function<bool(char)> HiveShouldEscape =
67
    boost::is_any_of("\"#%\\*/:=?\u00FF"); // NOLINT(*)
68
69
// It is more convenient to maintain the complement of the set of
70
// characters to escape when not in Hive-compat mode.
71
std::function<bool(char)> ShouldNotEscape = boost::is_any_of("-_.~"); // NOLINT(*)
72
73
25
inline void UrlEncode(const char* in, size_t in_len, string* out, bool hive_compat) {
74
25
  (*out).reserve(in_len);
75
25
  std::stringstream ss;
76
759
  for (size_t i = 0; i < in_len; 
++i734
) {
77
734
    const char ch = in[i];
78
    // Escape the character iff a) we are in Hive-compat mode and the
79
    // character is in the Hive whitelist or b) we are not in
80
    // Hive-compat mode, and the character is not alphanumeric or one
81
    // of the four commonly excluded characters.
82
734
    if ((hive_compat && 
HiveShouldEscape(ch)182
) ||
83
734
        
(708
!hive_compat708
&&
!(552
isalnum(ch)552
||
ShouldNotEscape(ch)72
))) {
84
88
      ss << '%' << std::uppercase << std::hex << static_cast<uint32_t>(ch);
85
646
    } else {
86
646
      ss << ch;
87
646
    }
88
734
  }
89
90
25
  (*out) = ss.str();
91
25
}
92
93
} // namespace
94
95
7
void UrlEncode(const vector<uint8_t>& in, string* out, bool hive_compat) {
96
7
  if (in.empty()) {
97
2
    *out = "";
98
5
  } else {
99
5
    UrlEncode(reinterpret_cast<const char*>(&in[0]), in.size(), out, hive_compat);
100
5
  }
101
7
}
102
103
20
void UrlEncode(const string& in, string* out, bool hive_compat) {
104
20
  UrlEncode(in.c_str(), in.size(), out, hive_compat);
105
20
}
106
107
13
string UrlEncodeToString(const std::string& in, bool hive_compat) {
108
13
  string ret;
109
13
  UrlEncode(in, &ret, hive_compat);
110
13
  return ret;
111
13
}
112
113
// Adapted from
114
// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/
115
//   example/http/server3/request_handler.cpp
116
// See http://www.boost.org/LICENSE_1_0.txt for license for this method.
117
1.27k
bool UrlDecode(const string& in, string* out, bool hive_compat) {
118
1.27k
  out->clear();
119
1.27k
  out->reserve(in.size());
120
24.4k
  for (size_t i = 0; i < in.size(); 
++i23.1k
) {
121
23.1k
    if (in[i] == '%') {
122
44
      if (i + 3 <= in.size()) {
123
44
        int value = 0;
124
44
        std::istringstream is(in.substr(i + 1, 2));
125
44
        if (is >> std::hex >> value) {
126
44
          (*out) += static_cast<char>(value);
127
44
          i += 2;
128
44
        } else {
129
0
          return false;
130
0
        }
131
44
      } else {
132
0
        return false;
133
0
      }
134
23.1k
    } else if (!hive_compat && 
in[i] == '+'23.0k
) { // Hive does not encode ' ' as '+'
135
0
      (*out) += ' ';
136
23.1k
    } else {
137
23.1k
      (*out) += in[i];
138
23.1k
    }
139
23.1k
  }
140
1.27k
  return true;
141
1.27k
}
142
143
12
static inline void Base64Encode(const char* in, size_t in_len, std::stringstream* out) {
144
12
  typedef base64_from_binary<transform_width<const char*, 6, 8> > base64_encode;
145
  // Base64 encodes 8 byte chars as 6 bit values.
146
12
  std::stringstream::pos_type len_before = out->tellp();
147
12
  copy(base64_encode(in), base64_encode(in + in_len), std::ostream_iterator<char>(*out));
148
12
  auto bytes_written = out->tellp() - len_before;
149
  // Pad with = to make it valid base64 encoded string
150
12
  int num_pad = bytes_written % 4;
151
12
  if (num_pad != 0) {
152
8
    num_pad = 4 - num_pad;
153
20
    for (int i = 0; i < num_pad; 
++i12
) {
154
12
      (*out) << "=";
155
12
    }
156
8
  }
157
12
  DCHECK_EQ(out->str().size() % 4, 0);
158
12
}
159
160
6
void Base64Encode(const vector<uint8_t>& in, string* out) {
161
6
  if (in.empty()) {
162
0
    *out = "";
163
6
  } else {
164
6
    std::stringstream ss;
165
6
    Base64Encode(in, &ss);
166
6
    *out = ss.str();
167
6
  }
168
6
}
169
170
6
void Base64Encode(const vector<uint8_t>& in, std::stringstream* out) {
171
6
  if (!in.empty()) {
172
    // Boost does not like non-null terminated strings
173
6
    string tmp(reinterpret_cast<const char*>(&in[0]), in.size());
174
6
    Base64Encode(tmp.c_str(), tmp.size(), out);
175
6
  }
176
6
}
177
178
6
void Base64Encode(const string& in, string* out) {
179
6
  std::stringstream ss;
180
6
  Base64Encode(in.c_str(), in.size(), &ss);
181
6
  *out = ss.str();
182
6
}
183
184
0
void Base64Encode(const string& in, std::stringstream* out) {
185
0
  Base64Encode(in.c_str(), in.size(), out);
186
0
}
187
188
6
bool Base64Decode(const string& in, string* out) {
189
6
  typedef transform_width<binary_from_base64<string::const_iterator>, 8, 6> base64_decode;
190
6
  string tmp = in;
191
  // Replace padding with base64 encoded NULL
192
6
  replace(tmp.begin(), tmp.end(), '=', 'A');
193
6
  try {
194
6
    *out = string(base64_decode(tmp.begin()), base64_decode(tmp.end()));
195
6
  } catch(std::exception& e) {
196
0
    return false;
197
0
  }
198
199
  // Remove trailing '\0' that were added as padding.  Since \0 is special,
200
  // the boost functions get confused so do this manually.
201
6
  int num_padded_chars = 0;
202
12
  for (size_t i = out->size(); i > 0;) {
203
12
    --i;
204
12
    if ((*out)[i] != '\0') 
break6
;
205
6
    ++num_padded_chars;
206
6
  }
207
6
  out->resize(out->size() - num_padded_chars);
208
6
  return true;
209
6
}
210
211
632
void EscapeForHtml(const string& in, std::stringstream* out) {
212
632
  DCHECK(out != nullptr);
213
42.6k
  for (const char& c : in) {
214
42.6k
    switch (c) {
215
8
      case '<': (*out) << "&lt;";
216
8
                break;
217
8
      case '>': (*out) << "&gt;";
218
8
                break;
219
1
      case '&': (*out) << "&amp;";
220
1
                break;
221
42.6k
      default: (*out) << c;
222
42.6k
    }
223
42.6k
  }
224
632
}
225
226
631
std::string EscapeForHtmlToString(const std::string& in) {
227
631
  std::stringstream str;
228
631
  EscapeForHtml(in, &str);
229
631
  return str.str();
230
631
}
231
232
} // namespace yb