YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/util/fast_varint.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
#include "yb/util/fast_varint.h"
14
15
#include <array>
16
17
#include "yb/util/cast.h"
18
#include "yb/util/result.h"
19
#include "yb/util/status_format.h"
20
21
using std::string;
22
23
namespace yb {
24
namespace util {
25
26
namespace {
27
28
32.7k
std::array<uint8_t, 0x100> MakeUnsignedVarIntSize() {
29
32.7k
  std::array<uint8_t, 0x100> result;
30
8.40M
  for (int i = 0; i < 0x100; 
++i8.37M
) {
31
    // clz does not work when argument is zero, so we shift value and put 1 at the end.
32
8.37M
    result[i] = __builtin_clz((i << 1) ^ 0x1ff) - 23 + 1;
33
8.37M
  }
34
32.7k
  return result;
35
32.7k
}
36
37
auto kUnsignedVarIntSize = MakeUnsignedVarIntSize();
38
39
211
Status NotEnoughEncodedBytes(size_t decoded_varint_size, size_t bytes_provided) {
40
211
  return STATUS_SUBSTITUTE(
41
211
      Corruption,
42
211
      "Decoded VarInt size as $0 but only $1 bytes provided",
43
211
      decoded_varint_size, bytes_provided);
44
211
}
45
46
}  // anonymous namespace
47
48
2.65G
int SignedPositiveVarIntLength(uint64_t v) {
49
  // Compute the number of bytes needed to represent this number.
50
2.65G
  v >>= 6;
51
2.65G
  int n = 1;
52
6.57G
  while (v != 0) {
53
3.91G
    v >>= 7;
54
3.91G
    n += 1;
55
3.91G
  }
56
2.65G
  return n;
57
2.65G
}
58
59
/*
60
 * - First bit is sign bit (0 for negative, 1 for positive)
61
 * - The rest is the unsigned representation of the absolute value, except for 2 differences:
62
 *    > The size prefix is (|v|+1)/7 instead of |v|/7 (Where |v| is the bit length of v)
63
 *    > For negative numbers, we have to complement everything (Note that we don't add 1, this
64
 *        is one's complement, not two's complement. So comp(v) = 2^|v|-1-v, not 2^|v|-v)
65
 *
66
 *    Bytes  Max Magnitude        Positives         Negatives
67
 *    -----  ---------            ---------         --------
68
 *    1      2^6-1 (63)           10[v]             01{-v}
69
 *    2      2^13-1 (8191)        110[v]            001{-v}
70
 *    3      2^20-1               1110[v]           0001{-v}
71
 *    4      2^27-1               11110[v]          00001{-v}
72
 *    5      2^34-1               111110[v]         000001{-v}
73
 *    6      2^41-1               1111110[v]        0000001{-v}
74
 *    7      2^48-1               11111110 [v]      00000001 {-v}
75
 *    8      2^55-1               11111111 0[v]     00000000 1{-v}
76
 *    9      2^62-1               11111111 10[v]    00000000 01{-v}
77
 *   10      2^69-1               11111111 110[v]   00000000 001{-v}
78
 */
79
2.65G
void FastEncodeSignedVarInt(int64_t v, uint8_t *dest, size_t *size) {
80
2.65G
  bool negative = v < 0;
81
82
2.65G
  uint64_t uv = static_cast<uint64_t>(v);
83
2.65G
  if (negative) {
84
850M
    uv = 1 + ~uv;
85
850M
  }
86
87
2.65G
  const int n = SignedPositiveVarIntLength(uv);
88
2.65G
  *size = n;
89
90
  // For n = 1 we should get 128 (10000000) as the first byte.
91
  //   In this case we have 6 available bits to use in the first byte.
92
  //
93
  // For n = 2, we'll get ~63 = 192 (11000000) as the first byte.
94
  //   In this case we have 5 available bits in the first byte, and 8 in the second byte.
95
  //
96
  // For n = 3, we'll get ~31 = 224 (11100000) as the first byte.
97
  //   In this case we have 4 + 8 + 8 = 20 bits to use.
98
  // . . .
99
  //
100
  // For n = 8, we'll get 255 (11111111). Also, the next byte's highest bit is zero.
101
  //   In this case we have 7 available bits in the second byte and 8 in every byte afterwards,
102
  //   so 55 total available bits.
103
  //
104
  // For n = 9, we'll get 255 (11111111). Also, the next byte's two highest bits are 10.
105
  //   In this case we have 62 more bits to use.
106
  //
107
  // For n = 10, we'll get 255 (11111111). Also, the next byte's three highest bits are 110.
108
  //   In this case we have 69 (5 + 8 * 8) bits to use.
109
110
2.65G
  int i;
111
2.65G
  if (n == 10) {
112
1.50M
    dest[0] = 0xff;
113
    // With a 10-byte encoding we are using 8 whole bytes to represent the number, which takes 63
114
    // bits or fewer, so no bits from the second byte are used, and it is always 11000000 (binary).
115
1.50M
    dest[1] = 0xc0;
116
1.50M
    i = 2;
117
2.65G
  } else if (n == 9) {
118
10.5M
    dest[0] = 0xff;
119
    // With 9-byte encoding bits 56-61 (6 bits) of the number go into the lower 6 bits of the second
120
    // byte, so it becomes 10xxxxxx where xxxxxx are the said 6 bits.
121
10.5M
    dest[1] = 0x80 | (uv >> 56);
122
10.5M
    i = 2;
123
2.64G
  } else {
124
    // In these cases
125
2.64G
    dest[0] = ~((1 << (8 - n)) - 1) | (uv >> (8 * (n - 1)));
126
2.64G
    i = 1;
127
2.64G
  }
128
6.56G
  for (; i < n; 
++i3.90G
) {
129
3.90G
    dest[i] = uv >> (8 * (n - 1 - i));
130
3.90G
  }
131
2.65G
  if (negative) {
132
4.75G
    for (i = 0; i < n; 
++i3.90G
) {
133
3.90G
      dest[i] = ~dest[i];
134
3.90G
    }
135
850M
  }
136
2.65G
}
137
138
5
std::string FastEncodeSignedVarIntToStr(int64_t v) {
139
5
  string s;
140
5
  FastAppendSignedVarIntToBuffer(v, &s);
141
5
  return s;
142
5
}
143
144
// Array of mask for decoding signed var int. Used to extract valuable bits from source.
145
// It is faster than calculating mask on the fly.
146
const uint64_t kVarIntMasks[] = {
147
                        0, // unused
148
                  0x3fULL,
149
                0x1fffULL,
150
               0xfffffULL,
151
             0x7ffffffULL,
152
           0x3ffffffffULL,
153
         0x1ffffffffffULL,
154
        0xffffffffffffULL,
155
      0x7fffffffffffffULL,
156
    0x3fffffffffffffffULL,
157
    0xffffffffffffffffULL,
158
};
159
160
inline Result<std::pair<int64_t, size_t>> FastDecodeSignedVarInt(
161
6.67G
    const uint8_t* const src, size_t src_size, const uint8_t* read_allowed_from) {
162
6.67G
  typedef std::pair<int64_t, size_t> ResultType;
163
6.67G
  if (src_size == 0) {
164
2
    return STATUS(Corruption, "Cannot decode a variable-length integer of zero size");
165
2
  }
166
167
6.67G
  uint16_t header = src[0] << 8 | (src_size > 1 ? 
src[1]6.22G
:
0445M
);
168
  // When value is positive then negative = 0, otherwise it is 0xffffffffffffffff.
169
6.67G
  uint64_t negative = -static_cast<uint64_t>((header & 0x8000) == 0);
170
6.67G
  header ^= negative;
171
  // We need to count ones, so invert the header. 0x7fff - mask when we count them.
172
  // 0x20 used to put one after end of range where we are interested in them.
173
  //
174
  // For example:
175
  // A positive number with a three-byte representation.
176
  // header                    : 1110???? ????????  (binary)
177
  // (~header & 0x7fff) & 0x20 : 0001???? ??1?????
178
  // Number of leading zeros   : 19 (considering the __builtin_clz argument is a 32-bit integer).
179
  // n_bytes                   : 3
180
  //
181
  // A negative number with a 10-byte representation.
182
  // header                    : 01111111 11??????  (binary)
183
  // (~header & 0x7fff) & 0x20 : 00000000 001?????
184
  // Number of leading zeros   : 26
185
  // n_bytes                   : 10
186
  //
187
  // Argument of __builtin_clz is always unsigned int.
188
6.67G
  size_t n_bytes = __builtin_clz((~header & 0x7fff) | 0x20) - 16;
189
6.67G
  if (src_size < n_bytes) {
190
142
    return NotEnoughEncodedBytes(n_bytes, src_size);
191
142
  }
192
6.67G
  auto mask = kVarIntMasks[n_bytes];
193
194
6.67G
  const auto* const read_start = src - 8 + n_bytes;
195
196
#if defined(THREAD_SANITIZER) || defined(ADDRESS_SANITIZER)
197
  if (false) {
198
#else
199
6.68G
  if (
PREDICT_TRUE6.67G
(read_start >= read_allowed_from)) {
200
6.68G
#endif
201
202
    // We are interested in range [src, src+n_bytes), so we use 64bit number that ends at
203
    // src+n_bytes.
204
    // Then we use mask to drop header and bytes out of range.
205
    // Suppose we have negative number -a (where a > 0). Then at this point we would have ~a & mask.
206
    // To get -a, we should fill it with zeros to 64bits: "| (~mask & negative)".
207
    // And add one: "- negative".
208
    // In case of non negative number, negative == 0.
209
    // So number will be unchanged by those manipulations.
210
6.68G
    return ResultType(
211
6.68G
          ((__builtin_bswap64(*reinterpret_cast<const uint64_t*>(read_start)) & mask) |
212
6.68G
              (~mask & negative)) - negative,
213
6.68G
          n_bytes);
214
18.4E
  } else {
215
18.4E
    uint64_t temp = 0;
216
18.4E
    const auto* end = src + n_bytes;
217
18.4E
    for (const uint8_t* i = std::max(read_start, src); i != end; 
++i4.00k
) {
218
4.00k
      temp = (temp << 8) | *i;
219
4.00k
    }
220
18.4E
    return ResultType(((temp & mask) | (~mask & negative)) - negative, n_bytes);
221
18.4E
  }
222
6.67G
}
223
224
Status FastDecodeSignedVarInt(
225
    const uint8_t* src, size_t src_size, const uint8_t* read_allowed_from, int64_t* v,
226
370k
    size_t* decoded_size) {
227
370k
  auto temp = VERIFY_RESULT(FastDecodeSignedVarInt(src, src_size, read_allowed_from));
228
0
  *v = temp.first;
229
370k
  *decoded_size = temp.second;
230
370k
  return Status::OK();
231
370k
}
232
233
inline Result<std::pair<int64_t, size_t>> FastDecodeSignedVarIntUnsafe(
234
6.67G
    const uint8_t* const src, size_t src_size) {
235
6.67G
  return FastDecodeSignedVarInt(src, src_size, nullptr);
236
6.67G
}
237
238
Status FastDecodeSignedVarIntUnsafe(
239
22.3k
    const uint8_t* src, size_t src_size, int64_t* v, size_t* decoded_size) {
240
22.3k
  auto temp = 
VERIFY_RESULT22.1k
(FastDecodeSignedVarIntUnsafe(src, src_size));22.1k
241
0
  *v = temp.first;
242
22.1k
  *decoded_size = temp.second;
243
22.1k
  return Status::OK();
244
22.3k
}
245
246
487M
Result<int64_t> FastDecodeSignedVarIntUnsafe(Slice* slice) {
247
487M
  auto temp = 
VERIFY_RESULT487M
(FastDecodeSignedVarIntUnsafe(slice->data(), slice->size()));487M
248
0
  slice->remove_prefix(temp.second);
249
487M
  return temp.first;
250
487M
}
251
252
22.3k
Status FastDecodeSignedVarIntUnsafe(const std::string& encoded, int64_t* v, size_t* decoded_size) {
253
22.3k
  return FastDecodeSignedVarIntUnsafe(
254
22.3k
      to_uchar_ptr(encoded.c_str()), encoded.size(), v, decoded_size);
255
22.3k
}
256
257
787M
Status FastDecodeDescendingSignedVarIntUnsafe(yb::Slice *slice, int64_t *dest) {
258
787M
  auto temp = VERIFY_RESULT(FastDecodeSignedVarIntUnsafe(slice->data(), slice->size()));
259
0
  *dest = -temp.first;
260
787M
  slice->remove_prefix(temp.second);
261
787M
  return Status::OK();
262
787M
}
263
264
5.40G
Result<int64_t> FastDecodeDescendingSignedVarIntUnsafe(Slice* slice) {
265
5.40G
  auto temp = VERIFY_RESULT(FastDecodeSignedVarIntUnsafe(slice->data(), slice->size()));
266
0
  slice->remove_prefix(temp.second);
267
5.40G
  return -temp.first;
268
5.40G
}
269
270
32.5M
size_t UnsignedVarIntLength(uint64_t v) {
271
32.5M
  size_t result = 1;
272
32.5M
  v >>= 7;
273
162M
  while (v != 0) {
274
130M
    v >>= 7;
275
130M
    ++result;
276
130M
  }
277
32.5M
  return result;
278
32.5M
}
279
280
92
void FastAppendUnsignedVarIntToStr(uint64_t v, std::string* dest) {
281
92
  char buf[kMaxVarIntBufferSize];
282
92
  size_t len = 0;
283
92
  FastEncodeUnsignedVarInt(v, to_uchar_ptr(buf), &len);
284
92
  DCHECK_LE(len, 10);
285
92
  dest->append(buf, len);
286
92
}
287
288
32.5M
void FastEncodeUnsignedVarInt(uint64_t v, uint8_t *dest, size_t *size) {
289
32.5M
  const size_t n = UnsignedVarIntLength(v);
290
32.5M
  if (size)
291
32.5M
    *size = n;
292
293
32.5M
  size_t i;
294
32.5M
  if (n == 10) {
295
50.0k
    dest[0] = 0xff;
296
    // With a 10-byte encoding we are using 8 whole bytes to represent the number, which takes 63
297
    // bits or fewer, so no bits from the second byte are used, and it is always 11000000 (binary).
298
50.0k
    dest[1] = 0x80;
299
50.0k
    i = 2;
300
32.4M
  } else if (n == 9) {
301
3.64M
    dest[0] = 0xff;
302
    // With 9-byte encoding bits 56-61 (6 bits) of the number go into the lower 6 bits of the second
303
    // byte, so it becomes 10xxxxxx where xxxxxx are the said 6 bits.
304
3.64M
    dest[1] = (v >> 56);
305
3.64M
    i = 2;
306
28.8M
  } else {
307
    // In these cases
308
28.8M
    dest[0] = ~((1 << (9 - n)) - 1) | (v >> (8 * (n - 1)));
309
28.8M
    i = 1;
310
28.8M
  }
311
159M
  for (; i < n; 
++i126M
) {
312
126M
    dest[i] = v >> (8 * (n - 1 - i));
313
126M
  }
314
32.5M
}
315
316
CHECKED_STATUS FastDecodeUnsignedVarInt(
317
2.00M
    const uint8_t* src, size_t src_size, uint64_t* v, size_t* decoded_size) {
318
2.00M
  if (src_size == 0) {
319
1
    return STATUS(Corruption, "Cannot decode a variable-length integer of zero size");
320
1
  }
321
322
2.00M
  uint8_t first_byte = src[0];
323
2.00M
  size_t n_bytes = kUnsignedVarIntSize[first_byte];
324
2.00M
  if (src_size < n_bytes) {
325
69
    return NotEnoughEncodedBytes(n_bytes, src_size);
326
69
  }
327
2.00M
  if (n_bytes == 1) {
328
    // Fast path for single-byte decoding.
329
201k
    *v = first_byte & 0x7f;
330
201k
    *decoded_size = 1;
331
201k
    return Status::OK();
332
201k
  }
333
334
1.79M
  uint64_t result = 0;
335
1.79M
  size_t i = 0;
336
1.79M
  if (n_bytes == 9) {
337
397k
    if (src[1] & 0x80) {
338
100k
      n_bytes = 10;
339
100k
      result = src[1] & 0x3f;
340
100k
      i = 2;
341
100k
    }
342
397k
    if (src_size < n_bytes) {
343
0
      return NotEnoughEncodedBytes(n_bytes, src_size);
344
0
    }
345
1.40M
  } else {
346
1.40M
    result = src[0] & ((1 << (8 - n_bytes)) - 1);
347
1.40M
    i = 1;
348
1.40M
  }
349
350
10.8M
  
for (; 1.79M
i < n_bytes;
++i9.08M
) {
351
9.08M
    result = (result << 8) | static_cast<uint8_t>(src[i]);
352
9.08M
  }
353
354
1.79M
  *v = result;
355
1.79M
  *decoded_size = n_bytes;
356
1.79M
  return Status::OK();
357
1.79M
}
358
359
1.00M
Result<uint64_t> FastDecodeUnsignedVarInt(Slice* slice) {
360
1.00M
  size_t size = 0;
361
1.00M
  uint64_t value = 0;
362
1.00M
  auto status = FastDecodeUnsignedVarInt(slice->data(), slice->size(), &value, &size);
363
1.00M
  if (!status.ok()) {
364
70
    return status;
365
70
  }
366
1.00M
  slice->remove_prefix(size);
367
1.00M
  return value;
368
1.00M
}
369
370
79
Result<uint64_t> FastDecodeUnsignedVarInt(const Slice& slice) {
371
79
  Slice s(slice);
372
79
  const auto result = 
VERIFY_RESULT9
(FastDecodeUnsignedVarInt(&s));9
373
9
  if (s.size() != 0)
374
9
    return STATUS(Corruption, "Slice not fully decoded.");
375
0
  return result;
376
9
}
377
378
}  // namespace util
379
}  // namespace yb