YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/util/varint.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_UTIL_VARINT_H
15
#define YB_UTIL_VARINT_H
16
17
#include <openssl/ossl_typ.h>
18
19
#include "yb/util/slice.h"
20
21
namespace yb {
22
namespace util {
23
24
class BigNumDeleter {
25
 public:
26
  void operator()(BIGNUM* bn) const;
27
};
28
29
typedef std::unique_ptr<BIGNUM, BigNumDeleter> BigNumPtr;
30
31
// VarInt holds a sequence of digits d1, d2 ... and a radix r.
32
// The digits should always to be in range 0 <= d < r, and satisfy d_n-1 > 0, 0 < r < 2^15
33
// (So that int multiplication never overflows).
34
// The representation is little endian: The underlying number is d1 + d2 * r + d3 * r^2 + ...
35
//
36
// The provided EncodeToComparable() function gives a byte sequence from which the size can be
37
// decoded, and it is guaranteed that a < b if and only if Enc(a) < Enc(b) in lexicographical byte
38
// order.
39
//
40
// Two other ways to encode VarInt are also provided, which are used for Decimal encodings.
41
42
// For signed int this metafunction returns int64_t, for unsigned - uint64_t.
43
template<class T>
44
class CoveringInt {
45
 public:
46
  typedef typename std::decay<T>::type CleanedT;
47
  typedef typename std::conditional<std::is_signed<CleanedT>::value, int64_t, uint64_t>::type type;
48
};
49
50
class VarInt {
51
 public:
52
  VarInt();
53
  VarInt(const VarInt& var_int);
54
22.9k
  VarInt(VarInt&& rhs) = default;
55
  explicit VarInt(int64_t int64_val);
56
57
  VarInt& operator=(const VarInt& rhs);
58
22.2k
  VarInt& operator=(VarInt&& rhs) = default;
59
60
  std::string ToString() const;
61
62
  Result<int64_t> ToInt64() const;
63
64
  // The input is expected to be of the form (-)?[0-9]+, whitespace is not allowed. Use this
65
  // after removing whitespace.
66
  CHECKED_STATUS FromString(const std::string& input);
67
  CHECKED_STATUS FromString(const char* cstr);
68
69
  static Result<VarInt> CreateFromString(const std::string& input);
70
71
  static Result<VarInt> CreateFromString(const char* input);
72
73
  // <0, =0, >0 if this <,=,> other numerically.
74
  int CompareTo(const VarInt& other) const;
75
76
  // Note: operator== is numerical comparison, and can yield equality between different bases.
77
65.8k
  bool operator==(const VarInt& other) const { return CompareTo(other) == 0; }
78
0
  bool operator!=(const VarInt& other) const { return CompareTo(other) != 0; }
79
601
  bool operator<(const VarInt& other) const { return CompareTo(other) < 0; }
80
  bool operator<=(const VarInt& other) const { return CompareTo(other) <= 0; }
81
  bool operator>(const VarInt& other) const { return CompareTo(other) > 0; }
82
273
  bool operator>=(const VarInt& other) const { return CompareTo(other) >= 0; }
83
84
  /**
85
   * (1) Encoding algorithm for unsigned varint (with no reserved bits):
86
   * ---------------------------------------------------------------------------
87
   *
88
   * - Convert the number to base 128 (Or look at binary representation in groups of 7)
89
   * - Prefix the representation by number of digits in base 128
90
   * (Or number of binary digits / 7) - 1 in unary format followed by a zero as delimiter
91
   * unary format is a bunch of 1's (eg 5 = 11111)
92
   *
93
   * Bytes  Max Value            Representation
94
   * -----  ---------            ---------
95
   * 1      2^7-1 (127)          0[v]
96
   * 2      2^14-1 (16383)       10[v]
97
   * 3      2^21-1               110[v]
98
   * ...
99
   * 8      2^56-1               11111110[v]
100
   * 9      2^63-1               111111110[v]
101
   * ...
102
   *
103
   * [v] denotes the binary representation of v.
104
   *
105
   * (2) Encoding of Signed VarInt:
106
   * ---------------------------------------------------------------------------
107
   *
108
   * - First bit is sign bit (0 for negative, 1 for positive)
109
   * - The rest is the unsigned representation of the absolute value, except for 2 differences:
110
   *    > The size prefix is (|v|+1)/7 instead of |v|/7 (Where |v| is the bit length of v)
111
   *    > For negative numbers, we have to complement everything (Note that we don't add 1, this
112
   *        is one's complement, not two's complement. So comp(v) = 2^|v|-1-v, not 2^|v|-v)
113
   *
114
   *    Bytes  Max Magnitude        Positives       Negatives
115
   *    -----  ---------            ---------        --------
116
   *    1      2^6-1 (63)           10[v]            01{-v}
117
   *    2      2^13-1 (8191)        110[v]           001{-v}
118
   *    3      2^20-1               1110[v]          0001{-v}
119
   *    ...
120
   *
121
   * {v} denotes the bitwise negation of the binary representation of v.
122
   *
123
   * - Note: Negative zero technically has a different encoding than positive zero:
124
   *         01111111 vs 10000000, But we forcibly canonicalize it to 10000000.
125
   *
126
   * (3) Encoding with reserved bits:
127
   * ---------------------------------------------------------------------------
128
   *
129
   * Above specs assume that the encoded form is integer number of bytes long, but for
130
   * decimal implementation, we only have 7+8k bits for the exponent varint since the first bit
131
   * is used up by the global sign bit. Hence we need a way to reserve some bits in the beginning
132
   * of the encoding,
133
   *
134
   * This is done by computing the size prefix as (|v|+1 + reserved_bits)/7. This way, we would have
135
   * some guaranteed bits in the beginning of [v], we can move those to the beginning of the
136
   * encoding without harming comparison.
137
   *
138
   * (*) Anatomy of the final encoding (before complement)
139
   *
140
   * [reserved bits] [sign bit] [unary size prefix (bunch of ones)] [delimeter (one zero)] [binary]
141
   *
142
   * Example -1000 with 3 reserved bits:
143
   * Before complement:
144
   * [000][0][11][0][00000001111101000]
145
   * After complement:
146
   * [000][0][00][1][11111110000010111]
147
   *
148
   * In practice, we first encode as [111][0][11][0][00000001111101000]
149
   */
150
151
152
  // is_signed and reserved_bits are parameters, Decode and Encode are inverse operations
153
  // if and only if parameters are the same.
154
  // num_reserved_bits = 5 ensures that first 5 bits of the encoded form are zero.
155
  // This is a comparable encoding.
156
  //
157
  // In the above description of the encode algorithms,
158
  // section 1 corresponds to is_signed = false, num_reserved_bits = 0, section 2 corresponds to
159
  // is_Signed = true, num_reserved_bits = 0, and section 3 addresses the general case with
160
  // num_reserved_bits > 0.
161
  // Note that the first <num_reserved_bits> bits of the encoding is guaranteed to be zero.
162
  std::string EncodeToComparable(size_t num_reserved_bits = 0) const;
163
164
  // Convert the number to base 256 and encode each digit as a byte from high order to low order.
165
  // If negative x, encode 2^(8t) + x for the smallest value of t that ensures first bit is one.
166
  // If num_bytes is -1 then choose it based on number of digits in base 256
167
  CHECKED_STATUS DecodeFromComparable(
168
      const Slice& slice, size_t *num_decoded_bytes, size_t num_reserved_bits = 0);
169
170
  CHECKED_STATUS DecodeFromComparable(const Slice& string);
171
172
  // Each byte in the encoding encodes two digits, and a continuation bit in the beginning.
173
  // The continuation bit is zero if and only if this is the last byte of the encoding.
174
  std::string EncodeToTwosComplement() const;
175
176
  CHECKED_STATUS DecodeFromTwosComplement(const std::string& string);
177
178
  const VarInt& Negate();
179
180
  int Sign() const;
181
182
 private:
183
23.3k
  explicit VarInt(BigNumPtr&& rhs) : impl_(std::move(rhs)) {}
184
185
  BigNumPtr impl_;
186
187
  friend VarInt operator+(const VarInt& lhs, const VarInt& rhs);
188
  friend VarInt operator-(const VarInt& lhs, const VarInt& rhs);
189
};
190
191
std::ostream& operator<<(std::ostream& os, const VarInt& v);
192
193
194
} // namespace util
195
} // namespace yb
196
197
198
#endif // YB_UTIL_VARINT_H