YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/rocksdb/table/block_builder_internal.h
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (c) YugaByte, Inc.
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5
// in compliance with the License.  You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software distributed under the License
10
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11
// or implied.  See the License for the specific language governing permissions and limitations
12
// under the License.
13
//
14
//
15
16
#ifndef YB_ROCKSDB_TABLE_BLOCK_BUILDER_INTERNAL_H
17
#define YB_ROCKSDB_TABLE_BLOCK_BUILDER_INTERNAL_H
18
19
#include "yb/rocksdb/db/dbformat.h"
20
#include "yb/rocksdb/status.h"
21
22
#include "yb/util/logging.h"
23
#include "yb/util/slice.h"
24
#include "yb/util/tostring.h"
25
#include "yb/util/result.h"
26
#include "yb/util/status_format.h"
27
28
namespace rocksdb {
29
30
// Stores component sizes based on pattern described in FindComponents inside block_builder.cc.
31
struct ComponentSizes {
32
  size_t prev_key_non_shared_1_size = 0;
33
  size_t non_shared_1_size = 0;
34
  size_t shared_middle_size = 0;
35
  size_t prev_key_non_shared_2_size = 0;
36
  size_t non_shared_2_size = 0;
37
38
652k
  static inline yb::Result<size_t> SafeAdd(size_t a, size_t b) {
39
652k
    size_t sum = a + b;
40
652k
    SCHECK_GE(sum, a, InternalError, yb::Format("Overflow: $0 + $1", a, b));
41
652k
    return sum;
42
652k
  }
43
44
163k
  inline yb::Result<size_t> DebugGetPrevKeyPartSize() const {
45
163k
    return SafeAdd(
46
163k
        VERIFY_RESULT(SafeAdd(prev_key_non_shared_1_size, shared_middle_size)),
47
163k
        prev_key_non_shared_2_size);
48
163k
  }
49
50
163k
  inline yb::Result<size_t> DebugGetCurKeyPartSize() const {
51
163k
    return SafeAdd(
52
163k
        VERIFY_RESULT(SafeAdd(non_shared_1_size, shared_middle_size)), non_shared_2_size);
53
163k
  }
54
55
163k
  inline CHECKED_STATUS DebugVerify(const Slice& prev_key_part, const Slice& cur_key_part) const {
56
163k
    SCHECK_EQ(
57
163k
        VERIFY_RESULT(DebugGetPrevKeyPartSize()), prev_key_part.size(), InternalError,
58
163k
        "Prev key part size doesn't match");
59
163k
    SCHECK_EQ(
60
163k
        VERIFY_RESULT(DebugGetCurKeyPartSize()), cur_key_part.size(), InternalError,
61
163k
        "Current key part size doesn't match");
62
163k
    SCHECK(
63
163k
        shared_middle_size > 0 || non_shared_2_size == 0, InternalError,
64
163k
        "No shared middle, but non-empty non_shared_2");
65
163k
    return Status::OK();
66
163k
  }
67
68
0
  std::string ToString() const {
69
0
    return YB_STRUCT_TO_STRING(
70
0
        non_shared_1_size, prev_key_non_shared_1_size, shared_middle_size, non_shared_2_size,
71
0
        prev_key_non_shared_2_size);
72
0
  }
73
};
74
75
inline void ValidateThreeSharedPartsSizes(
76
    size_t shared_prefix_size, size_t last_internal_component_reuse_size,
77
    bool is_last_internal_component_inc, const ComponentSizes& rest_sizes,
78
580k
    const size_t prev_key_size, const size_t key_size) {
79
580k
  DCHECK(
80
580k
      (last_internal_component_reuse_size == 0 && !is_last_internal_component_inc) ||
81
580k
      (last_internal_component_reuse_size == kLastInternalComponentSize));
82
0
  DCHECK_EQ(
83
0
      shared_prefix_size + rest_sizes.prev_key_non_shared_1_size + rest_sizes.shared_middle_size +
84
0
          rest_sizes.prev_key_non_shared_2_size + last_internal_component_reuse_size,
85
0
      prev_key_size)
86
0
      << "shared_prefix_size: " << shared_prefix_size << " " << rest_sizes.ToString()
87
0
      << " last_internal_component_reuse_size: " << last_internal_component_reuse_size
88
0
      << " prev_key_size: " << prev_key_size;
89
0
  DCHECK_EQ(
90
0
      shared_prefix_size + rest_sizes.non_shared_1_size + rest_sizes.shared_middle_size +
91
0
          rest_sizes.non_shared_2_size + last_internal_component_reuse_size,
92
0
      key_size)
93
0
      << "shared_prefix_size: " << shared_prefix_size << " " << rest_sizes.ToString()
94
0
      << " last_internal_component_reuse_size: " << last_internal_component_reuse_size
95
0
      << " cur_key_size: " << key_size;
96
580k
}
97
98
// Takes the components sizes computed by ThreeSharedPartsEncoder::Calculate and encodes into
99
// buffer.
100
// Sizes encoding format is described inside the function body.
101
inline void EncodeThreeSharedPartsSizes(
102
    const size_t shared_prefix_size,
103
    const size_t last_internal_component_reuse_size,
104
    const bool is_last_internal_component_inc,
105
    const ComponentSizes& rest_sizes,
106
    const size_t prev_key_size,
107
    const size_t key_size,
108
    const size_t value_size,
109
580k
    std::string* buffer) {
110
580k
  ValidateThreeSharedPartsSizes(
111
580k
      shared_prefix_size, last_internal_component_reuse_size, is_last_internal_component_inc,
112
580k
      rest_sizes, prev_key_size, key_size);
113
114
580k
  const int64_t non_shared_1_size_delta =
115
580k
      static_cast<int64_t>(rest_sizes.non_shared_1_size) -
116
580k
      static_cast<int64_t>(rest_sizes.prev_key_non_shared_1_size);
117
580k
  const int64_t non_shared_2_size_delta =
118
580k
      static_cast<int64_t>(rest_sizes.non_shared_2_size) -
119
580k
      static_cast<int64_t>(rest_sizes.prev_key_non_shared_2_size);
120
121
0
  DVLOG_WITH_FUNC(4) << "shared_prefix_size: " << shared_prefix_size
122
0
            << " other_components_sizes: " << rest_sizes.ToString()
123
0
            << " non_shared_1_size_delta: " << non_shared_1_size_delta
124
0
            << " non_shared_2_size_delta: " << non_shared_2_size_delta
125
0
            << " last_internal_component_reuse_size: " << last_internal_component_reuse_size
126
0
            << " key_size_delta: "
127
0
            << static_cast<int64_t>(key_size) - static_cast<int64_t>(prev_key_size);
128
129
  // This is the most frequent case we are optimizing for based on our custom docdb encoding format.
130
580k
  const bool is_frequent_case_1 =
131
580k
      (last_internal_component_reuse_size > 0) &&
132
68.4k
      (rest_sizes.non_shared_1_size == 1) && (rest_sizes.non_shared_2_size == 1) &&
133
5
      (non_shared_1_size_delta == 0) && (non_shared_2_size_delta == 0);
134
135
  // We have different cases encoded differently into the buffer:
136
  // 1) is_frequent_case_1 == true (most of non-restart keys):
137
  //   <encoded_1><shared_prefix_size>
138
  // encoded_1 is: <value_size, is_last_internal_component_inc, is_frequent_case_1>
139
  // We encode these two flags together with the value_size, because if value_size is less than 32
140
  // bytes, then encoded_1 will still be encoded as 1 byte. Otherwise one more byte is negligible
141
  // comparing to size of the value that is not delta compressed.
142
  //
143
  // 2) is_frequent_case_1 == false: see below inside the code.
144
580k
  const auto encoded_1 =
145
580k
      (value_size << 2) | (is_last_internal_component_inc << 1) | is_frequent_case_1;
146
580k
  FastPutVarint64(buffer, encoded_1);
147
580k
  if (is_frequent_case_1) {
148
0
    PutVarint32(buffer, static_cast<uint32_t>(shared_prefix_size));
149
580k
  } else {
150
580k
    const bool is_something_reused = rest_sizes.non_shared_1_size < key_size;
151
580k
#ifndef NDEBUG
152
580k
    const auto reused_size =
153
580k
        shared_prefix_size + rest_sizes.shared_middle_size + last_internal_component_reuse_size;
154
580k
    if (is_something_reused) {
155
194k
      DCHECK_GT(reused_size, 0);
156
385k
    } else {
157
385k
      DCHECK_EQ(reused_size, 0);
158
385k
      DCHECK_EQ(rest_sizes.non_shared_1_size, key_size);
159
385k
    }
160
580k
#endif // NDEBUG
161
580k
    if (is_something_reused) {
162
      // 2.1) something is reused from the prev key:
163
194k
      if (last_internal_component_reuse_size > 0
164
68.4k
          && non_shared_1_size_delta == 0
165
18.9k
          && (non_shared_2_size_delta == 0 || non_shared_2_size_delta == 1)
166
11.7k
          && rest_sizes.non_shared_1_size < 8
167
11.6k
          && rest_sizes.non_shared_2_size < 4) {
168
        // 2.1.1) last internal component is reused, non_shared_1_size_delta == 0,
169
        //   non_shared_1_size_delta == 0 or 1, non_shared_1_size and non_shared_2_size fits within
170
        //   encoded_2 (one byte) in a following way:
171
        //     bit 0: 1
172
        //     bit 1: 0
173
        //     bit 2: non_shared_2_size_delta == 1
174
        //     bits 3-5: non_shared_1_size
175
        //     bits 6-7: non_shared_2_size
176
        // All sizes are encoded as:
177
        // <encoded_1><encoded_2><shared_prefix_size>
178
11.5k
        const auto encoded_2 = 0b01 | ((non_shared_2_size_delta == 1) << 2) |
179
11.5k
                               (rest_sizes.non_shared_1_size << 3) |
180
11.5k
                               (rest_sizes.non_shared_2_size << 6);
181
11.5k
        DCHECK_LE(encoded_2, 0xff);
182
0
        DVLOG_WITH_FUNC(4) << "encoded_2: " << encoded_2;
183
11.5k
        PutFixed8(buffer, encoded_2);
184
182k
      } else {
185
        // 2.1.2) rest of the cases when we reuse something from prev key are encoded as:
186
        //   <encoded_1><encoded_2><non_shared_1_size>[<non_shared_1_size_delta>]
187
        //     [<non_shared_2_size>][<non_shared_2_size_delta>]<shared_prefix_size>
188
        //
189
        //   encoded_2 (one byte) is:
190
        //     bit 0: 1
191
        //     bit 1: 1
192
        //     bit 2: whether we reuse last internal component
193
        //     bit 3: non_shared_1_size_delta != 0
194
        //     bit 4: non_shared_2_size != 0
195
        //     bit 5: non_shared_2_size_delta != 0
196
        //     bits 6-7: not used
197
182k
        const auto encoded_2 = 0b11 | ((last_internal_component_reuse_size > 0) << 2) |
198
182k
                               ((non_shared_1_size_delta != 0) << 3) |
199
182k
                               ((rest_sizes.non_shared_2_size != 0) << 4) |
200
182k
                               ((non_shared_2_size_delta != 0) << 5);
201
182k
        DCHECK_LE(encoded_2, 0xff);
202
0
        DVLOG_WITH_FUNC(4) << "encoded_2: " << encoded_2;
203
182k
        PutFixed8(buffer, encoded_2);
204
205
182k
        PutVarint32(buffer, static_cast<uint32_t>(rest_sizes.non_shared_1_size));
206
182k
        if (non_shared_1_size_delta != 0) {
207
69.6k
          PutSignedVarint(buffer, non_shared_1_size_delta);
208
69.6k
        }
209
182k
        if (rest_sizes.non_shared_2_size != 0) {
210
30.9k
          PutVarint32(buffer, static_cast<uint32_t>(rest_sizes.non_shared_2_size));
211
30.9k
        }
212
182k
        if (non_shared_2_size_delta != 0) {
213
41.5k
          PutSignedVarint(buffer, non_shared_2_size_delta);
214
41.5k
        }
215
182k
      }
216
194k
      PutVarint32(buffer, static_cast<uint32_t>(shared_prefix_size));
217
385k
    } else {
218
      // 2.2) nothing is reused from the prev key (mostly restart keys):
219
385k
      if (key_size < (1 << 7) && key_size > 0) {
220
        // 0 < key_size < 128
221
        // <encoded_1><encoded_2>
222
        //   encoded_2 (one byte) is:
223
        //     bit 0: 0
224
        //     bits 1-7: key_size
225
368k
        const auto encoded_2 = 0b0 | (key_size << 1);
226
368k
        DCHECK_LE(encoded_2, 0xff);
227
0
        DVLOG_WITH_FUNC(4) << "encoded_2: " << encoded_2;
228
368k
        PutFixed8(buffer, encoded_2);
229
17.2k
      } else {
230
        // key_size == 0 || key_size > 128
231
        // <encoded_1><encoded_2 = 0><key_size>
232
17.2k
        const auto encoded_2 = 0;
233
0
        DVLOG_WITH_FUNC(4) << "encoded_2: " << encoded_2;
234
17.2k
        PutFixed8(buffer, encoded_2);
235
17.2k
        PutVarint32(buffer, static_cast<uint32_t>(key_size));
236
17.2k
      }
237
385k
    }
238
580k
  }
239
580k
}
240
241
}  // namespace rocksdb
242
243
#endif // YB_ROCKSDB_TABLE_BLOCK_BUILDER_INTERNAL_H