/Users/deen/code/yugabyte-db/src/yb/util/varint.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/util/varint.h" |
15 | | |
16 | | #include <openssl/bn.h> |
17 | | |
18 | | #include "yb/gutil/casts.h" |
19 | | |
20 | | #include "yb/util/result.h" |
21 | | #include "yb/util/status_format.h" |
22 | | #include "yb/util/status_log.h" |
23 | | |
24 | | namespace yb { |
25 | | namespace util { |
26 | | |
27 | 1.02M | void BigNumDeleter::operator()(BIGNUM* bn) const { |
28 | 1.02M | BN_free(bn); |
29 | 1.02M | } |
30 | | |
31 | 116k | ATTRIBUTE_NO_SANITIZE_UNDEFINED VarInt::VarInt(int64_t int64_val) : impl_(BN_new()) { |
32 | 116k | if (int64_val >= 0) { |
33 | 105k | BN_set_word(impl_.get(), int64_val); |
34 | 11.0k | } else { |
35 | 11.0k | BN_set_word(impl_.get(), -int64_val); |
36 | 11.0k | BN_set_negative(impl_.get(), 1); |
37 | 11.0k | } |
38 | 116k | } |
39 | | |
40 | 447k | VarInt::VarInt() : impl_(BN_new()) {} |
41 | | |
42 | 422 | VarInt::VarInt(const VarInt& var_int) : impl_(BN_dup(var_int.impl_.get())) {} |
43 | | |
44 | 229 | VarInt& VarInt::operator=(const VarInt& rhs) { |
45 | 229 | impl_.reset(BN_dup(rhs.impl_.get())); |
46 | 229 | return *this; |
47 | 229 | } |
48 | | |
49 | 324 | std::string VarInt::ToString() const { |
50 | 324 | char* temp = BN_bn2dec(impl_.get()); |
51 | 324 | std::string result(temp); |
52 | 324 | OPENSSL_free(temp); |
53 | 324 | return result; |
54 | 324 | } |
55 | | |
56 | 8.35k | Result<int64_t> VarInt::ToInt64() const { |
57 | 8.35k | BN_ULONG value = BN_get_word(impl_.get()); |
58 | 8.35k | bool negative = BN_is_negative(impl_.get()); |
59 | | // Casting minimal signed value to unsigned type of the same size returns its absolute value. |
60 | 34 | BN_ULONG bound = negative ? std::numeric_limits<int64_t>::min() |
61 | 8.32k | : std::numeric_limits<int64_t>::max(); |
62 | | |
63 | 8.35k | if (value > bound) { |
64 | 12 | return STATUS_FORMAT( |
65 | 12 | InvalidArgument, "VarInt $0 cannot be converted to int64 due to overflow", *this); |
66 | 12 | } |
67 | 8.34k | return negative ? -value : value; |
68 | 8.34k | } |
69 | | |
70 | 1.79k | Status VarInt::FromString(const std::string& str) { |
71 | 1.79k | return FromString(str.c_str()); |
72 | 1.79k | } |
73 | | |
74 | 14.3k | Status VarInt::FromString(const char* cstr) { |
75 | 14.3k | if (*cstr == '+') { |
76 | 381 | ++cstr; |
77 | 381 | } |
78 | 14.3k | BIGNUM* temp = nullptr; |
79 | 14.3k | size_t parsed = BN_dec2bn(&temp, cstr); |
80 | 14.3k | impl_.reset(temp); |
81 | 14.3k | if (parsed == 0 || parsed != strlen(cstr)) { |
82 | 2 | return STATUS_FORMAT(InvalidArgument, "Cannot parse varint: $0", cstr); |
83 | 2 | } |
84 | 14.3k | return Status::OK(); |
85 | 14.3k | } |
86 | | |
87 | 67.3k | int VarInt::CompareTo(const VarInt& other) const { |
88 | 67.3k | return BN_cmp(impl_.get(), other.impl_.get()); |
89 | 67.3k | } |
90 | | |
91 | | template <class T> |
92 | 243k | void FlipBits(T* data, size_t count) { |
93 | 5.67M | for (auto end = data + count; data != end; ++data) { |
94 | 5.43M | *data = ~*data; |
95 | 5.43M | } |
96 | 243k | } |
97 | | |
98 | | constexpr auto kWordSize = sizeof(uint8_t); |
99 | | constexpr auto kBitsPerWord = 8 * kWordSize; |
100 | | |
101 | 294k | std::string VarInt::EncodeToComparable(size_t num_reserved_bits) const { |
102 | | // Actually we use EncodeToComparable with num_reserved_bits equals to 0 or 2, so don't have |
103 | | // to handle complex case when it wraps byte. |
104 | 294k | DCHECK_LT(num_reserved_bits, 8); |
105 | | |
106 | 294k | if (BN_is_zero(impl_.get())) { |
107 | | // Zero is encoded as positive value of length 0. |
108 | 656 | return std::string(1, 0x80 >> num_reserved_bits); |
109 | 656 | } |
110 | | |
111 | 294k | auto num_bits = BN_num_bits(impl_.get()); |
112 | | // The minimal number of bits that is required to encode this number: |
113 | | // sign bit, bits in value representation and reserved bits. |
114 | 294k | size_t total_num_bits = num_bits + 1 + num_reserved_bits; |
115 | | |
116 | | // Number of groups of 7 becomes number of bytes, because of the unary size prefix. |
117 | 294k | size_t num_bytes = (total_num_bits + 6) / 7; |
118 | | // If total_num_bits is not a multiple of seven, then the numerical part is prefixed with zeros. |
119 | 294k | size_t rounding_padding = num_bytes * 7 - total_num_bits; |
120 | | // Header consists of sign bit, then number of ones that equals to num_bytes and padding zeroes. |
121 | 294k | size_t header_length = 1 + num_bytes + rounding_padding; |
122 | | |
123 | | // Number of words required to encode this, i.e. header and bits in value rounded up to word size. |
124 | 294k | auto words_count = (num_bits + header_length + kBitsPerWord - 1) / kBitsPerWord; |
125 | | // Offset of the first byte that would contain number representation. |
126 | 294k | auto offset = words_count - (num_bits + kBitsPerWord - 1) / kBitsPerWord; |
127 | 294k | std::string result(words_count, 0); |
128 | 294k | if (offset > 0) { |
129 | | // This value could be merged with ones, so should cleanup it. |
130 | 174k | result[offset - 1] = 0; |
131 | 174k | } |
132 | 294k | auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data())); |
133 | 294k | BN_bn2bin(impl_.get(), data + offset); |
134 | 294k | size_t idx = 0; |
135 | | // Fill header with ones. We also fill reserved bits with ones for simplicity, then it will be |
136 | | // reset to zero. |
137 | 294k | size_t num_ones = num_bytes + num_reserved_bits; |
138 | | // Fill complete bytes with ones. |
139 | 498k | while (num_ones > 8) { |
140 | 203k | data[idx] = 0xff; |
141 | 203k | num_ones -= 8; |
142 | 203k | ++idx; |
143 | 203k | } |
144 | | // Merge last byte of header with possible first byte of number body. |
145 | 294k | data[idx] |= 0xff ^ ((1 << (8 - num_ones)) - 1); |
146 | | // Negative number is inverted. |
147 | 294k | if (BN_is_negative(impl_.get())) { |
148 | 89.0k | FlipBits(data, result.size()); |
149 | 89.0k | } |
150 | | // Set reserved bits to 0. |
151 | 294k | if (num_reserved_bits) { |
152 | 180k | data[0] &= (1 << (8 - num_reserved_bits)) - 1; |
153 | 180k | } |
154 | | |
155 | 294k | return result; |
156 | 294k | } |
157 | | |
158 | | Status VarInt::DecodeFromComparable(const Slice &slice, size_t *num_decoded_bytes, |
159 | 426k | size_t num_reserved_bits) { |
160 | 426k | DCHECK_LT(num_reserved_bits, 8); |
161 | | |
162 | 426k | size_t len = slice.size(); |
163 | 426k | if (len == 0) { |
164 | 122 | return STATUS(Corruption, "Cannot decode varint from empty slice"); |
165 | 122 | } |
166 | 426k | bool negative = (slice[0] & (0x80 >> num_reserved_bits)) == 0; |
167 | 426k | std::vector<uint8_t> buffer(slice.data(), slice.end()); |
168 | 426k | if (negative) { |
169 | 139k | FlipBits(buffer.data(), buffer.size()); |
170 | 139k | } |
171 | 426k | if (num_reserved_bits) { |
172 | 260k | buffer[0] |= ~((1 << (8 - num_reserved_bits)) - 1); |
173 | 260k | } |
174 | 426k | size_t idx = 0; |
175 | 426k | size_t num_ones = 0; |
176 | 786k | while (buffer[idx] == 0xff) { |
177 | 360k | ++idx; |
178 | 360k | if (idx >= len) { |
179 | 0 | return STATUS_FORMAT( |
180 | 0 | Corruption, "Encoded varint failure, no prefix termination: $0", |
181 | 0 | slice.ToDebugHexString()); |
182 | 0 | } |
183 | 360k | num_ones += 8; |
184 | 360k | } |
185 | 426k | uint8_t temp = 0x80; |
186 | 1.95M | while (buffer[idx] & temp) { |
187 | 1.53M | buffer[idx] ^= temp; |
188 | 1.53M | ++num_ones; |
189 | 1.53M | temp >>= 1; |
190 | 1.53M | } |
191 | 426k | num_ones -= num_reserved_bits; |
192 | 426k | if (num_ones > len) { |
193 | 0 | return STATUS_FORMAT( |
194 | 0 | Corruption, "Not enough data in encoded varint: $0, $1", |
195 | 0 | slice.ToDebugHexString(), num_ones); |
196 | 0 | } |
197 | 426k | *num_decoded_bytes = num_ones; |
198 | 426k | impl_.reset(BN_bin2bn(buffer.data() + idx, narrow_cast<int>(num_ones - idx), nullptr /* ret */)); |
199 | 426k | if (negative) { |
200 | 139k | BN_set_negative(impl_.get(), 1); |
201 | 139k | } |
202 | | |
203 | 426k | return Status::OK(); |
204 | 426k | } |
205 | | |
206 | 147 | Status VarInt::DecodeFromComparable(const Slice& slice) { |
207 | 147 | size_t num_decoded_bytes; |
208 | 147 | return DecodeFromComparable(slice, &num_decoded_bytes); |
209 | 147 | } |
210 | | |
211 | 33.5k | std::string VarInt::EncodeToTwosComplement() const { |
212 | 33.5k | if (BN_is_zero(impl_.get())) { |
213 | 141 | return std::string(1, 0); |
214 | 141 | } |
215 | 33.3k | size_t num_bits = BN_num_bits(impl_.get()); |
216 | 29.5k | size_t offset = num_bits % kBitsPerWord == 0 ? 1 : 0; |
217 | 33.3k | size_t count = (num_bits + kBitsPerWord - 1) / kBitsPerWord + offset; |
218 | 33.3k | std::string result(count, 0); |
219 | 33.3k | auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data())); |
220 | 33.3k | if (offset) { |
221 | 3.83k | *data = 0; |
222 | 3.83k | } |
223 | 33.3k | BN_bn2bin(impl_.get(), data + offset); |
224 | 33.3k | if (BN_is_negative(impl_.get())) { |
225 | 14.7k | FlipBits(data, result.size()); |
226 | 14.7k | unsigned char* back = data + result.size(); |
227 | 14.7k | while (--back >= data && *back == 0xff) { |
228 | 21 | *back = 0; |
229 | 21 | } |
230 | 14.7k | ++*back; |
231 | | // The case when number has form of 10000000 00000000 ... |
232 | | // It is easier to detect it at this point, instead of doing preallocation check. |
233 | 14.7k | if (offset == 1 && back > data && (data[1] & 0x80)) { |
234 | 8 | result.erase(0, 1); |
235 | 8 | } |
236 | 14.7k | } |
237 | 33.3k | return result; |
238 | 33.3k | } |
239 | | |
240 | 458 | Status VarInt::DecodeFromTwosComplement(const std::string& input) { |
241 | 458 | if (input.size() == 0) { |
242 | 0 | return STATUS(Corruption, "Cannot decode varint from empty slice"); |
243 | 0 | } |
244 | 458 | bool negative = (input[0] & 0x80) != 0; |
245 | 458 | if (!negative) { |
246 | 258 | impl_.reset(BN_bin2bn( |
247 | 258 | pointer_cast<const unsigned char*>(input.data()), narrow_cast<int>(input.size()), |
248 | 258 | nullptr /* ret */)); |
249 | 258 | return Status::OK(); |
250 | 258 | } |
251 | 200 | std::string copy(input); |
252 | 200 | auto data = pointer_cast<unsigned char*>(const_cast<char*>(copy.data())); |
253 | 200 | unsigned char* back = data + copy.size(); |
254 | 216 | while (--back >= data && *back == 0x00) { |
255 | 16 | *back = 0xff; |
256 | 16 | } |
257 | 200 | --*back; |
258 | 200 | FlipBits(data, copy.size()); |
259 | 200 | impl_.reset(BN_bin2bn(data, narrow_cast<int>(input.size()), nullptr /* ret */)); |
260 | 200 | BN_set_negative(impl_.get(), 1); |
261 | | |
262 | 200 | return Status::OK(); |
263 | 200 | } |
264 | | |
265 | 7.76k | const VarInt& VarInt::Negate() { |
266 | 7.76k | BN_set_negative(impl_.get(), 1 - BN_is_negative(impl_.get())); |
267 | 7.76k | return *this; |
268 | 7.76k | } |
269 | | |
270 | 11.5k | int VarInt::Sign() const { |
271 | 11.5k | if (BN_is_zero(impl_.get())) { |
272 | 137 | return 0; |
273 | 137 | } |
274 | 11.4k | return BN_is_negative(impl_.get()) ? -1 : 1; |
275 | 11.4k | } |
276 | | |
277 | 98 | Result<VarInt> VarInt::CreateFromString(const std::string& input) { |
278 | 98 | VarInt result; |
279 | 98 | RETURN_NOT_OK(result.FromString(input)); |
280 | 98 | return result; |
281 | 98 | } |
282 | | |
283 | 11.3k | Result<VarInt> VarInt::CreateFromString(const char* input) { |
284 | 11.3k | VarInt result; |
285 | 11.3k | RETURN_NOT_OK(result.FromString(input)); |
286 | 11.3k | return result; |
287 | 11.3k | } |
288 | | |
289 | 0 | std::ostream& operator<<(ostream& os, const VarInt& v) { |
290 | 0 | os << v.ToString(); |
291 | 0 | return os; |
292 | 0 | } |
293 | | |
294 | 7.19k | VarInt operator+(const VarInt& lhs, const VarInt& rhs) { |
295 | 7.19k | BigNumPtr temp(BN_new()); |
296 | 7.19k | CHECK(BN_add(temp.get(), lhs.impl_.get(), rhs.impl_.get())); |
297 | 7.19k | return VarInt(std::move(temp)); |
298 | 7.19k | } |
299 | | |
300 | 15.2k | VarInt operator-(const VarInt& lhs, const VarInt& rhs) { |
301 | 15.2k | BigNumPtr temp(BN_new()); |
302 | 15.2k | CHECK(BN_sub(temp.get(), lhs.impl_.get(), rhs.impl_.get())); |
303 | 15.2k | return VarInt(std::move(temp)); |
304 | 15.2k | } |
305 | | |
306 | | } // namespace util |
307 | | } // namespace yb |