/Users/deen/code/yugabyte-db/src/yb/util/varint.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/util/varint.h" |
15 | | |
16 | | #include <openssl/bn.h> |
17 | | |
18 | | #include "yb/gutil/casts.h" |
19 | | |
20 | | #include "yb/util/result.h" |
21 | | #include "yb/util/status_format.h" |
22 | | #include "yb/util/status_log.h" |
23 | | |
24 | | namespace yb { |
25 | | namespace util { |
26 | | |
27 | 1.10M | void BigNumDeleter::operator()(BIGNUM* bn) const { |
28 | 1.10M | BN_free(bn); |
29 | 1.10M | } |
30 | | |
31 | 123k | ATTRIBUTE_NO_SANITIZE_UNDEFINED VarInt::VarInt(int64_t int64_val) : impl_(BN_new()) { |
32 | 123k | if (int64_val >= 0) { |
33 | 110k | BN_set_word(impl_.get(), int64_val); |
34 | 110k | } else { |
35 | 12.8k | BN_set_word(impl_.get(), -int64_val); |
36 | 12.8k | BN_set_negative(impl_.get(), 1); |
37 | 12.8k | } |
38 | 123k | } |
39 | | |
40 | 482k | VarInt::VarInt() : impl_(BN_new()) {} |
41 | | |
42 | 422 | VarInt::VarInt(const VarInt& var_int) : impl_(BN_dup(var_int.impl_.get())) {} |
43 | | |
44 | 228 | VarInt& VarInt::operator=(const VarInt& rhs) { |
45 | 228 | impl_.reset(BN_dup(rhs.impl_.get())); |
46 | 228 | return *this; |
47 | 228 | } |
48 | | |
49 | 2.11k | std::string VarInt::ToString() const { |
50 | 2.11k | char* temp = BN_bn2dec(impl_.get()); |
51 | 2.11k | std::string result(temp); |
52 | 2.11k | OPENSSL_free(temp); |
53 | 2.11k | return result; |
54 | 2.11k | } |
55 | | |
56 | 7.96k | Result<int64_t> VarInt::ToInt64() const { |
57 | 7.96k | BN_ULONG value = BN_get_word(impl_.get()); |
58 | 7.96k | bool negative = BN_is_negative(impl_.get()); |
59 | | // Casting minimal signed value to unsigned type of the same size returns its absolute value. |
60 | 7.96k | BN_ULONG bound = negative ? std::numeric_limits<int64_t>::min()315 |
61 | 7.96k | : std::numeric_limits<int64_t>::max()7.65k ; |
62 | | |
63 | 7.96k | if (value > bound) { |
64 | 12 | return STATUS_FORMAT( |
65 | 12 | InvalidArgument, "VarInt $0 cannot be converted to int64 due to overflow", *this); |
66 | 12 | } |
67 | 7.95k | return negative ? -value307 : value7.64k ; |
68 | 7.96k | } |
69 | | |
70 | 2.26k | Status VarInt::FromString(const std::string& str) { |
71 | 2.26k | return FromString(str.c_str()); |
72 | 2.26k | } |
73 | | |
74 | 14.3k | Status VarInt::FromString(const char* cstr) { |
75 | 14.3k | if (*cstr == '+') { |
76 | 424 | ++cstr; |
77 | 424 | } |
78 | 14.3k | BIGNUM* temp = nullptr; |
79 | 14.3k | size_t parsed = BN_dec2bn(&temp, cstr); |
80 | 14.3k | impl_.reset(temp); |
81 | 14.3k | if (parsed == 0 || parsed != strlen(cstr)14.3k ) { |
82 | 2 | return STATUS_FORMAT(InvalidArgument, "Cannot parse varint: $0", cstr); |
83 | 2 | } |
84 | 14.3k | return Status::OK(); |
85 | 14.3k | } |
86 | | |
87 | 67.3k | int VarInt::CompareTo(const VarInt& other) const { |
88 | 67.3k | return BN_cmp(impl_.get(), other.impl_.get()); |
89 | 67.3k | } |
90 | | |
91 | | template <class T> |
92 | 248k | void FlipBits(T* data, size_t count) { |
93 | 5.99M | for (auto end = data + count; data != end; ++data5.74M ) { |
94 | 5.74M | *data = ~*data; |
95 | 5.74M | } |
96 | 248k | } |
97 | | |
98 | | constexpr auto kWordSize = sizeof(uint8_t); |
99 | | constexpr auto kBitsPerWord = 8 * kWordSize; |
100 | | |
101 | 324k | std::string VarInt::EncodeToComparable(size_t num_reserved_bits) const { |
102 | | // Actually we use EncodeToComparable with num_reserved_bits equals to 0 or 2, so don't have |
103 | | // to handle complex case when it wraps byte. |
104 | 324k | DCHECK_LT(num_reserved_bits, 8); |
105 | | |
106 | 324k | if (BN_is_zero(impl_.get())) { |
107 | | // Zero is encoded as positive value of length 0. |
108 | 1.00k | return std::string(1, 0x80 >> num_reserved_bits); |
109 | 1.00k | } |
110 | | |
111 | 323k | auto num_bits = BN_num_bits(impl_.get()); |
112 | | // The minimal number of bits that is required to encode this number: |
113 | | // sign bit, bits in value representation and reserved bits. |
114 | 323k | size_t total_num_bits = num_bits + 1 + num_reserved_bits; |
115 | | |
116 | | // Number of groups of 7 becomes number of bytes, because of the unary size prefix. |
117 | 323k | size_t num_bytes = (total_num_bits + 6) / 7; |
118 | | // If total_num_bits is not a multiple of seven, then the numerical part is prefixed with zeros. |
119 | 323k | size_t rounding_padding = num_bytes * 7 - total_num_bits; |
120 | | // Header consists of sign bit, then number of ones that equals to num_bytes and padding zeroes. |
121 | 323k | size_t header_length = 1 + num_bytes + rounding_padding; |
122 | | |
123 | | // Number of words required to encode this, i.e. header and bits in value rounded up to word size. |
124 | 323k | auto words_count = (num_bits + header_length + kBitsPerWord - 1) / kBitsPerWord; |
125 | | // Offset of the first byte that would contain number representation. |
126 | 323k | auto offset = words_count - (num_bits + kBitsPerWord - 1) / kBitsPerWord; |
127 | 323k | std::string result(words_count, 0); |
128 | 323k | if (offset > 0) { |
129 | | // This value could be merged with ones, so should cleanup it. |
130 | 185k | result[offset - 1] = 0; |
131 | 185k | } |
132 | 323k | auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data())); |
133 | 323k | BN_bn2bin(impl_.get(), data + offset); |
134 | 323k | size_t idx = 0; |
135 | | // Fill header with ones. We also fill reserved bits with ones for simplicity, then it will be |
136 | | // reset to zero. |
137 | 323k | size_t num_ones = num_bytes + num_reserved_bits; |
138 | | // Fill complete bytes with ones. |
139 | 516k | while (num_ones > 8) { |
140 | 192k | data[idx] = 0xff; |
141 | 192k | num_ones -= 8; |
142 | 192k | ++idx; |
143 | 192k | } |
144 | | // Merge last byte of header with possible first byte of number body. |
145 | 323k | data[idx] |= 0xff ^ ((1 << (8 - num_ones)) - 1); |
146 | | // Negative number is inverted. |
147 | 323k | if (BN_is_negative(impl_.get())) { |
148 | 92.0k | FlipBits(data, result.size()); |
149 | 92.0k | } |
150 | | // Set reserved bits to 0. |
151 | 323k | if (num_reserved_bits) { |
152 | 209k | data[0] &= (1 << (8 - num_reserved_bits)) - 1; |
153 | 209k | } |
154 | | |
155 | 323k | return result; |
156 | 324k | } |
157 | | |
158 | | Status VarInt::DecodeFromComparable(const Slice &slice, size_t *num_decoded_bytes, |
159 | 455k | size_t num_reserved_bits) { |
160 | 455k | DCHECK_LT(num_reserved_bits, 8); |
161 | | |
162 | 455k | size_t len = slice.size(); |
163 | 455k | if (len == 0) { |
164 | 199 | return STATUS(Corruption, "Cannot decode varint from empty slice"); |
165 | 199 | } |
166 | 455k | bool negative = (slice[0] & (0x80 >> num_reserved_bits)) == 0; |
167 | 455k | std::vector<uint8_t> buffer(slice.data(), slice.end()); |
168 | 455k | if (negative) { |
169 | 142k | FlipBits(buffer.data(), buffer.size()); |
170 | 142k | } |
171 | 455k | if (num_reserved_bits) { |
172 | 289k | buffer[0] |= ~((1 << (8 - num_reserved_bits)) - 1); |
173 | 289k | } |
174 | 455k | size_t idx = 0; |
175 | 455k | size_t num_ones = 0; |
176 | 795k | while (buffer[idx] == 0xff) { |
177 | 339k | ++idx; |
178 | 339k | if (idx >= len) { |
179 | 0 | return STATUS_FORMAT( |
180 | 0 | Corruption, "Encoded varint failure, no prefix termination: $0", |
181 | 0 | slice.ToDebugHexString()); |
182 | 0 | } |
183 | 339k | num_ones += 8; |
184 | 339k | } |
185 | 455k | uint8_t temp = 0x80; |
186 | 2.08M | while (buffer[idx] & temp) { |
187 | 1.62M | buffer[idx] ^= temp; |
188 | 1.62M | ++num_ones; |
189 | 1.62M | temp >>= 1; |
190 | 1.62M | } |
191 | 455k | num_ones -= num_reserved_bits; |
192 | 455k | if (num_ones > len) { |
193 | 0 | return STATUS_FORMAT( |
194 | 0 | Corruption, "Not enough data in encoded varint: $0, $1", |
195 | 0 | slice.ToDebugHexString(), num_ones); |
196 | 0 | } |
197 | 455k | *num_decoded_bytes = num_ones; |
198 | 455k | impl_.reset(BN_bin2bn(buffer.data() + idx, narrow_cast<int>(num_ones - idx), nullptr /* ret */)); |
199 | 455k | if (negative) { |
200 | 142k | BN_set_negative(impl_.get(), 1); |
201 | 142k | } |
202 | | |
203 | 455k | return Status::OK(); |
204 | 455k | } |
205 | | |
206 | 232 | Status VarInt::DecodeFromComparable(const Slice& slice) { |
207 | 232 | size_t num_decoded_bytes; |
208 | 232 | return DecodeFromComparable(slice, &num_decoded_bytes); |
209 | 232 | } |
210 | | |
211 | 33.5k | std::string VarInt::EncodeToTwosComplement() const { |
212 | 33.5k | if (BN_is_zero(impl_.get())) { |
213 | 239 | return std::string(1, 0); |
214 | 239 | } |
215 | 33.2k | size_t num_bits = BN_num_bits(impl_.get()); |
216 | 33.2k | size_t offset = num_bits % kBitsPerWord == 0 ? 14.06k : 029.2k ; |
217 | 33.2k | size_t count = (num_bits + kBitsPerWord - 1) / kBitsPerWord + offset; |
218 | 33.2k | std::string result(count, 0); |
219 | 33.2k | auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data())); |
220 | 33.2k | if (offset) { |
221 | 4.06k | *data = 0; |
222 | 4.06k | } |
223 | 33.2k | BN_bn2bin(impl_.get(), data + offset); |
224 | 33.2k | if (BN_is_negative(impl_.get())) { |
225 | 13.5k | FlipBits(data, result.size()); |
226 | 13.5k | unsigned char* back = data + result.size(); |
227 | 13.6k | while (--back >= data && *back == 0xff) { |
228 | 119 | *back = 0; |
229 | 119 | } |
230 | 13.5k | ++*back; |
231 | | // The case when number has form of 10000000 00000000 ... |
232 | | // It is easier to detect it at this point, instead of doing preallocation check. |
233 | 13.5k | if (offset == 1 && back > data1.61k && (data[1] & 0x80)1.61k ) { |
234 | 7 | result.erase(0, 1); |
235 | 7 | } |
236 | 13.5k | } |
237 | 33.2k | return result; |
238 | 33.5k | } |
239 | | |
240 | 458 | Status VarInt::DecodeFromTwosComplement(const std::string& input) { |
241 | 458 | if (input.size() == 0) { |
242 | 0 | return STATUS(Corruption, "Cannot decode varint from empty slice"); |
243 | 0 | } |
244 | 458 | bool negative = (input[0] & 0x80) != 0; |
245 | 458 | if (!negative) { |
246 | 360 | impl_.reset(BN_bin2bn( |
247 | 360 | pointer_cast<const unsigned char*>(input.data()), narrow_cast<int>(input.size()), |
248 | 360 | nullptr /* ret */)); |
249 | 360 | return Status::OK(); |
250 | 360 | } |
251 | 98 | std::string copy(input); |
252 | 98 | auto data = pointer_cast<unsigned char*>(const_cast<char*>(copy.data())); |
253 | 98 | unsigned char* back = data + copy.size(); |
254 | 114 | while (--back >= data && *back == 0x00) { |
255 | 16 | *back = 0xff; |
256 | 16 | } |
257 | 98 | --*back; |
258 | 98 | FlipBits(data, copy.size()); |
259 | 98 | impl_.reset(BN_bin2bn(data, narrow_cast<int>(input.size()), nullptr /* ret */)); |
260 | 98 | BN_set_negative(impl_.get(), 1); |
261 | | |
262 | 98 | return Status::OK(); |
263 | 458 | } |
264 | | |
265 | 6.62k | const VarInt& VarInt::Negate() { |
266 | 6.62k | BN_set_negative(impl_.get(), 1 - BN_is_negative(impl_.get())); |
267 | 6.62k | return *this; |
268 | 6.62k | } |
269 | | |
270 | 13.3k | int VarInt::Sign() const { |
271 | 13.3k | if (BN_is_zero(impl_.get())) { |
272 | 247 | return 0; |
273 | 247 | } |
274 | 13.1k | return BN_is_negative(impl_.get()) ? -13.44k : 19.67k ; |
275 | 13.3k | } |
276 | | |
277 | 98 | Result<VarInt> VarInt::CreateFromString(const std::string& input) { |
278 | 98 | VarInt result; |
279 | 98 | RETURN_NOT_OK(result.FromString(input)); |
280 | 98 | return result; |
281 | 98 | } |
282 | | |
283 | 11.3k | Result<VarInt> VarInt::CreateFromString(const char* input) { |
284 | 11.3k | VarInt result; |
285 | 11.3k | RETURN_NOT_OK(result.FromString(input)); |
286 | 11.3k | return result; |
287 | 11.3k | } |
288 | | |
289 | 0 | std::ostream& operator<<(ostream& os, const VarInt& v) { |
290 | 0 | os << v.ToString(); |
291 | 0 | return os; |
292 | 0 | } |
293 | | |
294 | 9.35k | VarInt operator+(const VarInt& lhs, const VarInt& rhs) { |
295 | 9.35k | BigNumPtr temp(BN_new()); |
296 | 9.35k | CHECK(BN_add(temp.get(), lhs.impl_.get(), rhs.impl_.get())); |
297 | 9.35k | return VarInt(std::move(temp)); |
298 | 9.35k | } |
299 | | |
300 | 14.0k | VarInt operator-(const VarInt& lhs, const VarInt& rhs) { |
301 | 14.0k | BigNumPtr temp(BN_new()); |
302 | 14.0k | CHECK(BN_sub(temp.get(), lhs.impl_.get(), rhs.impl_.get())); |
303 | 14.0k | return VarInt(std::move(temp)); |
304 | 14.0k | } |
305 | | |
306 | | } // namespace util |
307 | | } // namespace yb |