/Users/deen/code/yugabyte-db/src/yb/util/varint.cc
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright (c) YugaByte, Inc.  | 
2  |  | //  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except  | 
4  |  | // in compliance with the License.  You may obtain a copy of the License at  | 
5  |  | //  | 
6  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
7  |  | //  | 
8  |  | // Unless required by applicable law or agreed to in writing, software distributed under the License  | 
9  |  | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express  | 
10  |  | // or implied.  See the License for the specific language governing permissions and limitations  | 
11  |  | // under the License.  | 
12  |  | //  | 
13  |  |  | 
14  |  | #include "yb/util/varint.h"  | 
15  |  |  | 
16  |  | #include <openssl/bn.h>  | 
17  |  |  | 
18  |  | #include "yb/gutil/casts.h"  | 
19  |  |  | 
20  |  | #include "yb/util/result.h"  | 
21  |  | #include "yb/util/status_format.h"  | 
22  |  | #include "yb/util/status_log.h"  | 
23  |  |  | 
24  |  | namespace yb { | 
25  |  | namespace util { | 
26  |  |  | 
27  | 1.02M  | void BigNumDeleter::operator()(BIGNUM* bn) const { | 
28  | 1.02M  |   BN_free(bn);  | 
29  | 1.02M  | }  | 
30  |  |  | 
31  | 116k  | ATTRIBUTE_NO_SANITIZE_UNDEFINED VarInt::VarInt(int64_t int64_val) : impl_(BN_new()) { | 
32  | 116k  |   if (int64_val >= 0) { | 
33  | 105k  |     BN_set_word(impl_.get(), int64_val);  | 
34  | 11.0k  |   } else { | 
35  | 11.0k  |     BN_set_word(impl_.get(), -int64_val);  | 
36  | 11.0k  |     BN_set_negative(impl_.get(), 1);  | 
37  | 11.0k  |   }  | 
38  | 116k  | }  | 
39  |  |  | 
40  | 447k  | VarInt::VarInt() : impl_(BN_new()) {} | 
41  |  |  | 
42  | 422  | VarInt::VarInt(const VarInt& var_int) : impl_(BN_dup(var_int.impl_.get())) {} | 
43  |  |  | 
44  | 229  | VarInt& VarInt::operator=(const VarInt& rhs) { | 
45  | 229  |   impl_.reset(BN_dup(rhs.impl_.get()));  | 
46  | 229  |   return *this;  | 
47  | 229  | }  | 
48  |  |  | 
49  | 324  | std::string VarInt::ToString() const { | 
50  | 324  |   char* temp = BN_bn2dec(impl_.get());  | 
51  | 324  |   std::string result(temp);  | 
52  | 324  |   OPENSSL_free(temp);  | 
53  | 324  |   return result;  | 
54  | 324  | }  | 
55  |  |  | 
56  | 8.35k  | Result<int64_t> VarInt::ToInt64() const { | 
57  | 8.35k  |   BN_ULONG value = BN_get_word(impl_.get());  | 
58  | 8.35k  |   bool negative = BN_is_negative(impl_.get());  | 
59  |  |   // Casting minimal signed value to unsigned type of the same size returns its absolute value.  | 
60  | 34  |   BN_ULONG bound = negative ? std::numeric_limits<int64_t>::min()  | 
61  | 8.32k  |                             : std::numeric_limits<int64_t>::max();  | 
62  |  |  | 
63  | 8.35k  |   if (value > bound) { | 
64  | 12  |     return STATUS_FORMAT(  | 
65  | 12  |         InvalidArgument, "VarInt $0 cannot be converted to int64 due to overflow", *this);  | 
66  | 12  |   }  | 
67  | 8.34k  |   return negative ? -value : value;  | 
68  | 8.34k  | }  | 
69  |  |  | 
70  | 1.79k  | Status VarInt::FromString(const std::string& str) { | 
71  | 1.79k  |   return FromString(str.c_str());  | 
72  | 1.79k  | }  | 
73  |  |  | 
74  | 14.3k  | Status VarInt::FromString(const char* cstr) { | 
75  | 14.3k  |   if (*cstr == '+') { | 
76  | 381  |     ++cstr;  | 
77  | 381  |   }  | 
78  | 14.3k  |   BIGNUM* temp = nullptr;  | 
79  | 14.3k  |   size_t parsed = BN_dec2bn(&temp, cstr);  | 
80  | 14.3k  |   impl_.reset(temp);  | 
81  | 14.3k  |   if (parsed == 0 || parsed != strlen(cstr)) { | 
82  | 2  |     return STATUS_FORMAT(InvalidArgument, "Cannot parse varint: $0", cstr);  | 
83  | 2  |   }  | 
84  | 14.3k  |   return Status::OK();  | 
85  | 14.3k  | }  | 
86  |  |  | 
87  | 67.3k  | int VarInt::CompareTo(const VarInt& other) const { | 
88  | 67.3k  |   return BN_cmp(impl_.get(), other.impl_.get());  | 
89  | 67.3k  | }  | 
90  |  |  | 
91  |  | template <class T>  | 
92  | 243k  | void FlipBits(T* data, size_t count) { | 
93  | 5.67M  |   for (auto end = data + count; data != end; ++data) { | 
94  | 5.43M  |     *data = ~*data;  | 
95  | 5.43M  |   }  | 
96  | 243k  | }  | 
97  |  |  | 
98  |  | constexpr auto kWordSize = sizeof(uint8_t);  | 
99  |  | constexpr auto kBitsPerWord = 8 * kWordSize;  | 
100  |  |  | 
101  | 294k  | std::string VarInt::EncodeToComparable(size_t num_reserved_bits) const { | 
102  |  |   // Actually we use EncodeToComparable with num_reserved_bits equals to 0 or 2, so don't have  | 
103  |  |   // to handle complex case when it wraps byte.  | 
104  | 294k  |   DCHECK_LT(num_reserved_bits, 8);  | 
105  |  |  | 
106  | 294k  |   if (BN_is_zero(impl_.get())) { | 
107  |  |     // Zero is encoded as positive value of length 0.  | 
108  | 656  |     return std::string(1, 0x80 >> num_reserved_bits);  | 
109  | 656  |   }  | 
110  |  |  | 
111  | 294k  |   auto num_bits = BN_num_bits(impl_.get());  | 
112  |  |   // The minimal number of bits that is required to encode this number:  | 
113  |  |   // sign bit, bits in value representation and reserved bits.  | 
114  | 294k  |   size_t total_num_bits = num_bits + 1 + num_reserved_bits;  | 
115  |  |  | 
116  |  |   // Number of groups of 7 becomes number of bytes, because of the unary size prefix.  | 
117  | 294k  |   size_t num_bytes = (total_num_bits + 6) / 7;  | 
118  |  |   // If total_num_bits is not a multiple of seven, then the numerical part is prefixed with zeros.  | 
119  | 294k  |   size_t rounding_padding = num_bytes * 7 - total_num_bits;  | 
120  |  |   // Header consists of sign bit, then number of ones that equals to num_bytes and padding zeroes.  | 
121  | 294k  |   size_t header_length = 1 + num_bytes + rounding_padding;  | 
122  |  |  | 
123  |  |   // Number of words required to encode this, i.e. header and bits in value rounded up to word size.  | 
124  | 294k  |   auto words_count = (num_bits + header_length + kBitsPerWord - 1) / kBitsPerWord;  | 
125  |  |   // Offset of the first byte that would contain number representation.  | 
126  | 294k  |   auto offset = words_count - (num_bits + kBitsPerWord - 1) / kBitsPerWord;  | 
127  | 294k  |   std::string result(words_count, 0);  | 
128  | 294k  |   if (offset > 0) { | 
129  |  |     // This value could be merged with ones, so should cleanup it.  | 
130  | 174k  |     result[offset - 1] = 0;  | 
131  | 174k  |   }  | 
132  | 294k  |   auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data()));  | 
133  | 294k  |   BN_bn2bin(impl_.get(), data + offset);  | 
134  | 294k  |   size_t idx = 0;  | 
135  |  |   // Fill header with ones. We also fill reserved bits with ones for simplicity, then it will be  | 
136  |  |   // reset to zero.  | 
137  | 294k  |   size_t num_ones = num_bytes + num_reserved_bits;  | 
138  |  |   // Fill complete bytes with ones.  | 
139  | 498k  |   while (num_ones > 8) { | 
140  | 203k  |     data[idx] = 0xff;  | 
141  | 203k  |     num_ones -= 8;  | 
142  | 203k  |     ++idx;  | 
143  | 203k  |   }  | 
144  |  |   // Merge last byte of header with possible first byte of number body.  | 
145  | 294k  |   data[idx] |= 0xff ^ ((1 << (8 - num_ones)) - 1);  | 
146  |  |   // Negative number is inverted.  | 
147  | 294k  |   if (BN_is_negative(impl_.get())) { | 
148  | 89.0k  |     FlipBits(data, result.size());  | 
149  | 89.0k  |   }  | 
150  |  |   // Set reserved bits to 0.  | 
151  | 294k  |   if (num_reserved_bits) { | 
152  | 180k  |     data[0] &= (1 << (8 - num_reserved_bits)) - 1;  | 
153  | 180k  |   }  | 
154  |  |  | 
155  | 294k  |   return result;  | 
156  | 294k  | }  | 
157  |  |  | 
158  |  | Status VarInt::DecodeFromComparable(const Slice &slice, size_t *num_decoded_bytes,  | 
159  | 426k  |                                     size_t num_reserved_bits) { | 
160  | 426k  |   DCHECK_LT(num_reserved_bits, 8);  | 
161  |  |  | 
162  | 426k  |   size_t len = slice.size();  | 
163  | 426k  |   if (len == 0) { | 
164  | 122  |     return STATUS(Corruption, "Cannot decode varint from empty slice");  | 
165  | 122  |   }  | 
166  | 426k  |   bool negative = (slice[0] & (0x80 >> num_reserved_bits)) == 0;  | 
167  | 426k  |   std::vector<uint8_t> buffer(slice.data(), slice.end());  | 
168  | 426k  |   if (negative) { | 
169  | 139k  |     FlipBits(buffer.data(), buffer.size());  | 
170  | 139k  |   }  | 
171  | 426k  |   if (num_reserved_bits) { | 
172  | 260k  |     buffer[0] |= ~((1 << (8 - num_reserved_bits)) - 1);  | 
173  | 260k  |   }  | 
174  | 426k  |   size_t idx = 0;  | 
175  | 426k  |   size_t num_ones = 0;  | 
176  | 786k  |   while (buffer[idx] == 0xff) { | 
177  | 360k  |     ++idx;  | 
178  | 360k  |     if (idx >= len) { | 
179  | 0  |       return STATUS_FORMAT(  | 
180  | 0  |           Corruption, "Encoded varint failure, no prefix termination: $0",  | 
181  | 0  |           slice.ToDebugHexString());  | 
182  | 0  |     }  | 
183  | 360k  |     num_ones += 8;  | 
184  | 360k  |   }  | 
185  | 426k  |   uint8_t temp = 0x80;  | 
186  | 1.95M  |   while (buffer[idx] & temp) { | 
187  | 1.53M  |     buffer[idx] ^= temp;  | 
188  | 1.53M  |     ++num_ones;  | 
189  | 1.53M  |     temp >>= 1;  | 
190  | 1.53M  |   }  | 
191  | 426k  |   num_ones -= num_reserved_bits;  | 
192  | 426k  |   if (num_ones > len) { | 
193  | 0  |     return STATUS_FORMAT(  | 
194  | 0  |         Corruption, "Not enough data in encoded varint: $0, $1",  | 
195  | 0  |         slice.ToDebugHexString(), num_ones);  | 
196  | 0  |   }  | 
197  | 426k  |   *num_decoded_bytes = num_ones;  | 
198  | 426k  |   impl_.reset(BN_bin2bn(buffer.data() + idx, narrow_cast<int>(num_ones - idx), nullptr /* ret */));  | 
199  | 426k  |   if (negative) { | 
200  | 139k  |     BN_set_negative(impl_.get(), 1);  | 
201  | 139k  |   }  | 
202  |  |  | 
203  | 426k  |   return Status::OK();  | 
204  | 426k  | }  | 
205  |  |  | 
206  | 147  | Status VarInt::DecodeFromComparable(const Slice& slice) { | 
207  | 147  |   size_t num_decoded_bytes;  | 
208  | 147  |   return DecodeFromComparable(slice, &num_decoded_bytes);  | 
209  | 147  | }  | 
210  |  |  | 
211  | 33.5k  | std::string VarInt::EncodeToTwosComplement() const { | 
212  | 33.5k  |   if (BN_is_zero(impl_.get())) { | 
213  | 141  |     return std::string(1, 0);  | 
214  | 141  |   }  | 
215  | 33.3k  |   size_t num_bits = BN_num_bits(impl_.get());  | 
216  | 29.5k  |   size_t offset = num_bits % kBitsPerWord == 0 ? 1 : 0;  | 
217  | 33.3k  |   size_t count = (num_bits + kBitsPerWord - 1) / kBitsPerWord + offset;  | 
218  | 33.3k  |   std::string result(count, 0);  | 
219  | 33.3k  |   auto data = pointer_cast<unsigned char*>(const_cast<char*>(result.data()));  | 
220  | 33.3k  |   if (offset) { | 
221  | 3.83k  |     *data = 0;  | 
222  | 3.83k  |   }  | 
223  | 33.3k  |   BN_bn2bin(impl_.get(), data + offset);  | 
224  | 33.3k  |   if (BN_is_negative(impl_.get())) { | 
225  | 14.7k  |     FlipBits(data, result.size());  | 
226  | 14.7k  |     unsigned char* back = data + result.size();  | 
227  | 14.7k  |     while (--back >= data && *back == 0xff) { | 
228  | 21  |       *back = 0;  | 
229  | 21  |     }  | 
230  | 14.7k  |     ++*back;  | 
231  |  |     // The case when number has form of 10000000 00000000 ...  | 
232  |  |     // It is easier to detect it at this point, instead of doing preallocation check.  | 
233  | 14.7k  |     if (offset == 1 && back > data && (data[1] & 0x80)) { | 
234  | 8  |       result.erase(0, 1);  | 
235  | 8  |     }  | 
236  | 14.7k  |   }  | 
237  | 33.3k  |   return result;  | 
238  | 33.3k  | }  | 
239  |  |  | 
240  | 458  | Status VarInt::DecodeFromTwosComplement(const std::string& input) { | 
241  | 458  |   if (input.size() == 0) { | 
242  | 0  |     return STATUS(Corruption, "Cannot decode varint from empty slice");  | 
243  | 0  |   }  | 
244  | 458  |   bool negative = (input[0] & 0x80) != 0;  | 
245  | 458  |   if (!negative) { | 
246  | 258  |     impl_.reset(BN_bin2bn(  | 
247  | 258  |         pointer_cast<const unsigned char*>(input.data()), narrow_cast<int>(input.size()),  | 
248  | 258  |         nullptr /* ret */));  | 
249  | 258  |     return Status::OK();  | 
250  | 258  |   }  | 
251  | 200  |   std::string copy(input);  | 
252  | 200  |   auto data = pointer_cast<unsigned char*>(const_cast<char*>(copy.data()));  | 
253  | 200  |   unsigned char* back = data + copy.size();  | 
254  | 216  |   while (--back >= data && *back == 0x00) { | 
255  | 16  |     *back = 0xff;  | 
256  | 16  |   }  | 
257  | 200  |   --*back;  | 
258  | 200  |   FlipBits(data, copy.size());  | 
259  | 200  |   impl_.reset(BN_bin2bn(data, narrow_cast<int>(input.size()), nullptr /* ret */));  | 
260  | 200  |   BN_set_negative(impl_.get(), 1);  | 
261  |  |  | 
262  | 200  |   return Status::OK();  | 
263  | 200  | }  | 
264  |  |  | 
265  | 7.76k  | const VarInt& VarInt::Negate() { | 
266  | 7.76k  |   BN_set_negative(impl_.get(), 1 - BN_is_negative(impl_.get()));  | 
267  | 7.76k  |   return *this;  | 
268  | 7.76k  | }  | 
269  |  |  | 
270  | 11.5k  | int VarInt::Sign() const { | 
271  | 11.5k  |   if (BN_is_zero(impl_.get())) { | 
272  | 137  |     return 0;  | 
273  | 137  |   }  | 
274  | 11.4k  |   return BN_is_negative(impl_.get()) ? -1 : 1;  | 
275  | 11.4k  | }  | 
276  |  |  | 
277  | 98  | Result<VarInt> VarInt::CreateFromString(const std::string& input) { | 
278  | 98  |   VarInt result;  | 
279  | 98  |   RETURN_NOT_OK(result.FromString(input));  | 
280  | 98  |   return result;  | 
281  | 98  | }  | 
282  |  |  | 
283  | 11.3k  | Result<VarInt> VarInt::CreateFromString(const char* input) { | 
284  | 11.3k  |   VarInt result;  | 
285  | 11.3k  |   RETURN_NOT_OK(result.FromString(input));  | 
286  | 11.3k  |   return result;  | 
287  | 11.3k  | }  | 
288  |  |  | 
289  | 0  | std::ostream& operator<<(ostream& os, const VarInt& v) { | 
290  | 0  |   os << v.ToString();  | 
291  | 0  |   return os;  | 
292  | 0  | }  | 
293  |  |  | 
294  | 7.19k  | VarInt operator+(const VarInt& lhs, const VarInt& rhs) { | 
295  | 7.19k  |   BigNumPtr temp(BN_new());  | 
296  | 7.19k  |   CHECK(BN_add(temp.get(), lhs.impl_.get(), rhs.impl_.get()));  | 
297  | 7.19k  |   return VarInt(std::move(temp));  | 
298  | 7.19k  | }  | 
299  |  |  | 
300  | 15.2k  | VarInt operator-(const VarInt& lhs, const VarInt& rhs) { | 
301  | 15.2k  |   BigNumPtr temp(BN_new());  | 
302  | 15.2k  |   CHECK(BN_sub(temp.get(), lhs.impl_.get(), rhs.impl_.get()));  | 
303  | 15.2k  |   return VarInt(std::move(temp));  | 
304  | 15.2k  | }  | 
305  |  |  | 
306  |  | } // namespace util  | 
307  |  | } // namespace yb  |