/Users/deen/code/yugabyte-db/src/yb/util/varint.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #ifndef YB_UTIL_VARINT_H |
15 | | #define YB_UTIL_VARINT_H |
16 | | |
17 | | #include <openssl/ossl_typ.h> |
18 | | |
19 | | #include "yb/util/slice.h" |
20 | | |
21 | | namespace yb { |
22 | | namespace util { |
23 | | |
24 | | class BigNumDeleter { |
25 | | public: |
26 | | void operator()(BIGNUM* bn) const; |
27 | | }; |
28 | | |
29 | | typedef std::unique_ptr<BIGNUM, BigNumDeleter> BigNumPtr; |
30 | | |
31 | | // VarInt holds a sequence of digits d1, d2 ... and a radix r. |
32 | | // The digits should always to be in range 0 <= d < r, and satisfy d_n-1 > 0, 0 < r < 2^15 |
33 | | // (So that int multiplication never overflows). |
34 | | // The representation is little endian: The underlying number is d1 + d2 * r + d3 * r^2 + ... |
35 | | // |
36 | | // The provided EncodeToComparable() function gives a byte sequence from which the size can be |
37 | | // decoded, and it is guaranteed that a < b if and only if Enc(a) < Enc(b) in lexicographical byte |
38 | | // order. |
39 | | // |
40 | | // Two other ways to encode VarInt are also provided, which are used for Decimal encodings. |
41 | | |
42 | | // For signed int this metafunction returns int64_t, for unsigned - uint64_t. |
43 | | template<class T> |
44 | | class CoveringInt { |
45 | | public: |
46 | | typedef typename std::decay<T>::type CleanedT; |
47 | | typedef typename std::conditional<std::is_signed<CleanedT>::value, int64_t, uint64_t>::type type; |
48 | | }; |
49 | | |
50 | | class VarInt { |
51 | | public: |
52 | | VarInt(); |
53 | | VarInt(const VarInt& var_int); |
54 | 22.9k | VarInt(VarInt&& rhs) = default; |
55 | | explicit VarInt(int64_t int64_val); |
56 | | |
57 | | VarInt& operator=(const VarInt& rhs); |
58 | 17.1k | VarInt& operator=(VarInt&& rhs) = default; |
59 | | |
60 | | std::string ToString() const; |
61 | | |
62 | | Result<int64_t> ToInt64() const; |
63 | | |
64 | | // The input is expected to be of the form (-)?[0-9]+, whitespace is not allowed. Use this |
65 | | // after removing whitespace. |
66 | | CHECKED_STATUS FromString(const std::string& input); |
67 | | CHECKED_STATUS FromString(const char* cstr); |
68 | | |
69 | | static Result<VarInt> CreateFromString(const std::string& input); |
70 | | |
71 | | static Result<VarInt> CreateFromString(const char* input); |
72 | | |
73 | | // <0, =0, >0 if this <,=,> other numerically. |
74 | | int CompareTo(const VarInt& other) const; |
75 | | |
76 | | // Note: operator== is numerical comparison, and can yield equality between different bases. |
77 | 65.8k | bool operator==(const VarInt& other) const { return CompareTo(other) == 0; } |
78 | 0 | bool operator!=(const VarInt& other) const { return CompareTo(other) != 0; } |
79 | 601 | bool operator<(const VarInt& other) const { return CompareTo(other) < 0; } |
80 | 256 | bool operator<=(const VarInt& other) const { return CompareTo(other) <= 0; } |
81 | 256 | bool operator>(const VarInt& other) const { return CompareTo(other) > 0; } |
82 | 273 | bool operator>=(const VarInt& other) const { return CompareTo(other) >= 0; } |
83 | | |
84 | | /** |
85 | | * (1) Encoding algorithm for unsigned varint (with no reserved bits): |
86 | | * --------------------------------------------------------------------------- |
87 | | * |
88 | | * - Convert the number to base 128 (Or look at binary representation in groups of 7) |
89 | | * - Prefix the representation by number of digits in base 128 |
90 | | * (Or number of binary digits / 7) - 1 in unary format followed by a zero as delimiter |
91 | | * unary format is a bunch of 1's (eg 5 = 11111) |
92 | | * |
93 | | * Bytes Max Value Representation |
94 | | * ----- --------- --------- |
95 | | * 1 2^7-1 (127) 0[v] |
96 | | * 2 2^14-1 (16383) 10[v] |
97 | | * 3 2^21-1 110[v] |
98 | | * ... |
99 | | * 8 2^56-1 11111110[v] |
100 | | * 9 2^63-1 111111110[v] |
101 | | * ... |
102 | | * |
103 | | * [v] denotes the binary representation of v. |
104 | | * |
105 | | * (2) Encoding of Signed VarInt: |
106 | | * --------------------------------------------------------------------------- |
107 | | * |
108 | | * - First bit is sign bit (0 for negative, 1 for positive) |
109 | | * - The rest is the unsigned representation of the absolute value, except for 2 differences: |
110 | | * > The size prefix is (|v|+1)/7 instead of |v|/7 (Where |v| is the bit length of v) |
111 | | * > For negative numbers, we have to complement everything (Note that we don't add 1, this |
112 | | * is one's complement, not two's complement. So comp(v) = 2^|v|-1-v, not 2^|v|-v) |
113 | | * |
114 | | * Bytes Max Magnitude Positives Negatives |
115 | | * ----- --------- --------- -------- |
116 | | * 1 2^6-1 (63) 10[v] 01{-v} |
117 | | * 2 2^13-1 (8191) 110[v] 001{-v} |
118 | | * 3 2^20-1 1110[v] 0001{-v} |
119 | | * ... |
120 | | * |
121 | | * {v} denotes the bitwise negation of the binary representation of v. |
122 | | * |
123 | | * - Note: Negative zero technically has a different encoding than positive zero: |
124 | | * 01111111 vs 10000000, But we forcibly canonicalize it to 10000000. |
125 | | * |
126 | | * (3) Encoding with reserved bits: |
127 | | * --------------------------------------------------------------------------- |
128 | | * |
129 | | * Above specs assume that the encoded form is integer number of bytes long, but for |
130 | | * decimal implementation, we only have 7+8k bits for the exponent varint since the first bit |
131 | | * is used up by the global sign bit. Hence we need a way to reserve some bits in the beginning |
132 | | * of the encoding, |
133 | | * |
134 | | * This is done by computing the size prefix as (|v|+1 + reserved_bits)/7. This way, we would have |
135 | | * some guaranteed bits in the beginning of [v], we can move those to the beginning of the |
136 | | * encoding without harming comparison. |
137 | | * |
138 | | * (*) Anatomy of the final encoding (before complement) |
139 | | * |
140 | | * [reserved bits] [sign bit] [unary size prefix (bunch of ones)] [delimeter (one zero)] [binary] |
141 | | * |
142 | | * Example -1000 with 3 reserved bits: |
143 | | * Before complement: |
144 | | * [000][0][11][0][00000001111101000] |
145 | | * After complement: |
146 | | * [000][0][00][1][11111110000010111] |
147 | | * |
148 | | * In practice, we first encode as [111][0][11][0][00000001111101000] |
149 | | */ |
150 | | |
151 | | |
152 | | // is_signed and reserved_bits are parameters, Decode and Encode are inverse operations |
153 | | // if and only if parameters are the same. |
154 | | // num_reserved_bits = 5 ensures that first 5 bits of the encoded form are zero. |
155 | | // This is a comparable encoding. |
156 | | // |
157 | | // In the above description of the encode algorithms, |
158 | | // section 1 corresponds to is_signed = false, num_reserved_bits = 0, section 2 corresponds to |
159 | | // is_Signed = true, num_reserved_bits = 0, and section 3 addresses the general case with |
160 | | // num_reserved_bits > 0. |
161 | | // Note that the first <num_reserved_bits> bits of the encoding is guaranteed to be zero. |
162 | | std::string EncodeToComparable(size_t num_reserved_bits = 0) const; |
163 | | |
164 | | // Convert the number to base 256 and encode each digit as a byte from high order to low order. |
165 | | // If negative x, encode 2^(8t) + x for the smallest value of t that ensures first bit is one. |
166 | | // If num_bytes is -1 then choose it based on number of digits in base 256 |
167 | | CHECKED_STATUS DecodeFromComparable( |
168 | | const Slice& slice, size_t *num_decoded_bytes, size_t num_reserved_bits = 0); |
169 | | |
170 | | CHECKED_STATUS DecodeFromComparable(const Slice& string); |
171 | | |
172 | | // Each byte in the encoding encodes two digits, and a continuation bit in the beginning. |
173 | | // The continuation bit is zero if and only if this is the last byte of the encoding. |
174 | | std::string EncodeToTwosComplement() const; |
175 | | |
176 | | CHECKED_STATUS DecodeFromTwosComplement(const std::string& string); |
177 | | |
178 | | const VarInt& Negate(); |
179 | | |
180 | | int Sign() const; |
181 | | |
182 | | private: |
183 | 22.4k | explicit VarInt(BigNumPtr&& rhs) : impl_(std::move(rhs)) {} |
184 | | |
185 | | BigNumPtr impl_; |
186 | | |
187 | | friend VarInt operator+(const VarInt& lhs, const VarInt& rhs); |
188 | | friend VarInt operator-(const VarInt& lhs, const VarInt& rhs); |
189 | | }; |
190 | | |
191 | | std::ostream& operator<<(std::ostream& os, const VarInt& v); |
192 | | |
193 | | |
194 | | } // namespace util |
195 | | } // namespace yb |
196 | | |
197 | | |
198 | | #endif // YB_UTIL_VARINT_H |