YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_key.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_DOCDB_DOC_KEY_H_
15
#define YB_DOCDB_DOC_KEY_H_
16
17
#include <ostream>
18
#include <vector>
19
20
#include <boost/container/small_vector.hpp>
21
#include <boost/optional/optional.hpp>
22
23
#include "yb/common/constants.h"
24
25
#include "yb/docdb/docdb_fwd.h"
26
#include "yb/docdb/key_bytes.h"
27
#include "yb/docdb/primitive_value.h"
28
29
#include "yb/rocksdb/env.h"
30
#include "yb/rocksdb/filter_policy.h"
31
32
#include "yb/util/ref_cnt_buffer.h"
33
#include "yb/util/slice.h"
34
#include "yb/util/strongly_typed_bool.h"
35
#include "yb/util/uuid.h"
36
37
namespace yb {
38
namespace docdb {
39
40
// ------------------------------------------------------------------------------------------------
41
// DocKey
42
// ------------------------------------------------------------------------------------------------
43
44
// A key that allows us to locate a document. This is the prefix of all RocksDB keys of records
45
// inside this document. A document key contains:
46
//   - An optional ID (cotable id or colocation id).
47
//   - An optional fixed-width hash prefix.
48
//   - A group of primitive values representing "hashed" components (this is what the hash is
49
//     computed based on, so this group is present/absent together with the hash).
50
//   - A group of "range" components suitable for doing ordered scans.
51
//
52
// The encoded representation of the key is as follows:
53
//   - Optional ID:
54
//     * For cotable id, the byte ValueType::kTableId followed by a sixteen byte UUID.
55
//     * For colocation id, the byte ValueType::kColocationId followed by a four byte
56
//       ColocationId.
57
//   - Optional fixed-width hash prefix, followed by hashed components:
58
//     * The byte ValueType::kUInt16Hash, followed by two bytes of the hash prefix.
59
//     * Hashed components:
60
//       1. Each hash component consists of a type byte (ValueType) followed by the encoded
61
//          representation of the respective type (see PrimitiveValue's key encoding).
62
//       2. ValueType::kGroupEnd terminates the sequence.
63
//   - Range components are stored similarly to the hashed components:
64
//     1. Each range component consists of a type byte (ValueType) followed by the encoded
65
//        representation of the respective type (see PrimitiveValue's key encoding).
66
//     2. ValueType::kGroupEnd terminates the sequence.
67
YB_DEFINE_ENUM(
68
    DocKeyPart,
69
    (kUpToHashCode)
70
    (kUpToHash)
71
    (kUpToId)
72
    // Includes all doc key components up to hashed ones. If there are no hashed components -
73
    // includes the first range component.
74
    (kUpToHashOrFirstRange)
75
    (kWholeDocKey));
76
77
class DocKeyDecoder;
78
79
YB_STRONGLY_TYPED_BOOL(HybridTimeRequired)
80
81
// Whether to allow parts of the range component of a doc key that should not be present in stored
82
// doc key, but could be used during read, for instance kLowest and kHighest.
83
YB_STRONGLY_TYPED_BOOL(AllowSpecial)
84
85
class DocKey {
86
 public:
87
  // Constructs an empty document key with no hash component.
88
  DocKey();
89
90
  // Construct a document key with only a range component, but no hashed component.
91
  explicit DocKey(std::vector<PrimitiveValue> range_components);
92
93
  // Construct a document key including a hashed component and a range component. The hash value has
94
  // to be calculated outside of the constructor, and we're not assuming any specific hash function
95
  // here.
96
  // @param hash A hash value calculated using the appropriate hash function based on
97
  //             hashed_components.
98
  // @param hashed_components Components of the key that go into computing the hash prefix.
99
  // @param range_components Components of the key that we want to be able to do range scans on.
100
  DocKey(DocKeyHash hash,
101
         std::vector<PrimitiveValue> hashed_components,
102
         std::vector<PrimitiveValue> range_components = std::vector<PrimitiveValue>());
103
104
  DocKey(const Uuid& cotable_id,
105
         DocKeyHash hash,
106
         std::vector<PrimitiveValue> hashed_components,
107
         std::vector<PrimitiveValue> range_components = std::vector<PrimitiveValue>());
108
109
  DocKey(ColocationId colocation_id,
110
         DocKeyHash hash,
111
         std::vector<PrimitiveValue> hashed_components,
112
         std::vector<PrimitiveValue> range_components = std::vector<PrimitiveValue>());
113
114
  explicit DocKey(const Uuid& cotable_id);
115
116
  explicit DocKey(ColocationId colocation_id);
117
118
  // Constructors to create a DocKey for the given schema to support co-located tables.
119
  explicit DocKey(const Schema& schema);
120
  DocKey(const Schema& schema, DocKeyHash hash);
121
  DocKey(const Schema& schema, std::vector<PrimitiveValue> range_components);
122
  DocKey(const Schema& schema, DocKeyHash hash,
123
         std::vector<PrimitiveValue> hashed_components,
124
         std::vector<PrimitiveValue> range_components = std::vector<PrimitiveValue>());
125
126
  KeyBytes Encode() const;
127
  void AppendTo(KeyBytes* out) const;
128
129
  // Encodes DocKey to binary representation returning result as RefCntPrefix.
130
  RefCntPrefix EncodeAsRefCntPrefix() const;
131
132
  // Resets the state to an empty document key.
133
  void Clear();
134
135
  // Clear the range components of the document key only.
136
  void ClearRangeComponents();
137
138
  // Resize the range components:
139
  //  - drop elements (primitive values) from the end if new_size is smaller than the old size.
140
  //  - append default primitive values (kNullLow) if new_size is bigger than the old size.
141
  void ResizeRangeComponents(int new_size);
142
143
  const Uuid& cotable_id() const {
144
    return cotable_id_;
145
  }
146
147
99.2M
  bool has_cotable_id() const {
148
99.2M
    return !cotable_id_.IsNil();
149
99.2M
  }
150
151
  ColocationId colocation_id() const {
152
    return colocation_id_;
153
  }
154
155
  bool has_colocation_id() const {
156
    return colocation_id_ != kColocationIdNotSet;
157
  }
158
159
16.9M
  DocKeyHash hash() const {
160
16.9M
    return hash_;
161
16.9M
  }
162
163
2.78k
  const std::vector<PrimitiveValue>& hashed_group() const {
164
2.78k
    return hashed_group_;
165
2.78k
  }
166
167
3.21k
  const std::vector<PrimitiveValue>& range_group() const {
168
3.21k
    return range_group_;
169
3.21k
  }
170
171
891k
  std::vector<PrimitiveValue>& hashed_group() {
172
891k
    return hashed_group_;
173
891k
  }
174
175
95.3M
  std::vector<PrimitiveValue>& range_group() {
176
95.3M
    return range_group_;
177
95.3M
  }
178
179
  // Decodes a document key from the given RocksDB key.
180
  // slice (in/out) - a slice corresponding to a RocksDB key. Any consumed bytes are removed.
181
  // part_to_decode specifies which part of key to decode.
182
  CHECKED_STATUS DecodeFrom(
183
      Slice* slice,
184
      DocKeyPart part_to_decode = DocKeyPart::kWholeDocKey,
185
      AllowSpecial allow_special = AllowSpecial::kFalse);
186
187
  // Decodes a document key from the given RocksDB key similar to the above but return the number
188
  // of bytes decoded from the input slice.
189
  Result<size_t> DecodeFrom(
190
      const Slice& slice,
191
      DocKeyPart part_to_decode = DocKeyPart::kWholeDocKey,
192
      AllowSpecial allow_special = AllowSpecial::kFalse);
193
194
  // Splits given RocksDB key into vector of slices that forms range_group of document key.
195
  static CHECKED_STATUS PartiallyDecode(Slice* slice,
196
                                        boost::container::small_vector_base<Slice>* out);
197
198
  // Decode just the hash code of a DocKey.
199
  static Result<DocKeyHash> DecodeHash(const Slice& slice);
200
201
  static Result<size_t> EncodedSize(
202
      Slice slice, DocKeyPart part, AllowSpecial allow_special = AllowSpecial::kFalse);
203
204
  // Returns size of the encoded `part` of DocKey and whether it has hash code present.
205
  static Result<std::pair<size_t, bool>> EncodedSizeAndHashPresent(Slice slice, DocKeyPart part);
206
207
  // Returns size of encoded hash part and whole part of DocKey.
208
  static Result<std::pair<size_t, size_t>> EncodedHashPartAndDocKeySizes(
209
      Slice slice, AllowSpecial allow_special = AllowSpecial::kFalse);
210
211
  // Decode the current document key from the given slice, but expect all bytes to be consumed, and
212
  // return an error status if that is not the case.
213
  CHECKED_STATUS FullyDecodeFrom(const rocksdb::Slice& slice);
214
215
  // Converts the document key to a human-readable representation.
216
  std::string ToString(AutoDecodeKeys auto_decode_keys = AutoDecodeKeys::kFalse) const;
217
  static std::string DebugSliceToString(Slice slice);
218
219
  // Check if it is an empty key.
220
22.5M
  bool empty() const {
221
22.5M
    return !hash_present_ && 
range_group_.empty()22.4M
;
222
22.5M
  }
223
224
  bool operator ==(const DocKey& other) const;
225
226
85.2k
  bool operator !=(const DocKey& other) const {
227
85.2k
    return !(*this == other);
228
85.2k
  }
229
230
  bool HashedComponentsEqual(const DocKey& other) const;
231
232
  void AddRangeComponent(const PrimitiveValue& val);
233
234
  void SetRangeComponent(const PrimitiveValue& val, int idx);
235
236
  int CompareTo(const DocKey& other) const;
237
238
16
  bool operator <(const DocKey& other) const {
239
16
    return CompareTo(other) < 0;
240
16
  }
241
242
0
  bool operator <=(const DocKey& other) const {
243
0
    return CompareTo(other) <= 0;
244
0
  }
245
246
16
  bool operator >(const DocKey& other) const {
247
16
    return CompareTo(other) > 0;
248
16
  }
249
250
0
  bool operator >=(const DocKey& other) const {
251
0
    return CompareTo(other) >= 0;
252
0
  }
253
254
  bool BelongsTo(const Schema& schema) const;
255
256
  void set_cotable_id(const Uuid& cotable_id) {
257
    if (!cotable_id.IsNil()) {
258
      DCHECK_EQ(colocation_id_, kColocationIdNotSet);
259
    }
260
    cotable_id_ = cotable_id;
261
  }
262
263
  void set_colocation_id(const ColocationId colocation_id) {
264
    if (colocation_id != kColocationIdNotSet) {
265
      DCHECK(cotable_id_.IsNil());
266
    }
267
    colocation_id_ = colocation_id;
268
  }
269
270
704k
  void set_hash(DocKeyHash hash) {
271
704k
    hash_ = hash;
272
704k
    hash_present_ = true;
273
704k
  }
274
275
21.8M
  bool has_hash() const {
276
21.8M
    return hash_present_;
277
21.8M
  }
278
279
  // Converts a redis string key to a doc key
280
  static DocKey FromRedisKey(uint16_t hash, const std::string& key);
281
  static KeyBytes EncodedFromRedisKey(uint16_t hash, const std::string &key);
282
283
 private:
284
  class DecodeFromCallback;
285
  friend class DecodeFromCallback;
286
287
  template<class Callback>
288
  static CHECKED_STATUS DoDecode(DocKeyDecoder* decoder,
289
                                 DocKeyPart part_to_decode,
290
                                 AllowSpecial allow_special,
291
                                 const Callback& callback);
292
293
  // Uuid of the non-primary table this DocKey belongs to co-located in a tablet. Nil for the
294
  // primary or single-tenant table.
295
  Uuid cotable_id_;
296
297
  // Colocation ID used to distinguish a table within a colocation group.
298
  // kColocationIdNotSet for a primary or single-tenant table.
299
  ColocationId colocation_id_;
300
301
  // TODO: can we get rid of this field and just use !hashed_group_.empty() instead?
302
  bool hash_present_;
303
304
  DocKeyHash hash_;
305
  std::vector<PrimitiveValue> hashed_group_;
306
  std::vector<PrimitiveValue> range_group_;
307
};
308
309
template <class Collection>
310
117M
void AppendDocKeyItems(const Collection& doc_key_items, KeyBytes* result) {
311
117M
  for (const auto& item : doc_key_items) {
312
110M
    item.AppendToKey(result);
313
110M
  }
314
117M
  result->AppendGroupEnd();
315
117M
}
void yb::docdb::AppendDocKeyItems<std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > >(std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > const&, yb::docdb::KeyBytes*)
Line
Count
Source
310
117M
void AppendDocKeyItems(const Collection& doc_key_items, KeyBytes* result) {
311
117M
  for (const auto& item : doc_key_items) {
312
110M
    item.AppendToKey(result);
313
110M
  }
314
117M
  result->AppendGroupEnd();
315
117M
}
void yb::docdb::AppendDocKeyItems<std::initializer_list<yb::docdb::PrimitiveValue> >(std::initializer_list<yb::docdb::PrimitiveValue> const&, yb::docdb::KeyBytes*)
Line
Count
Source
310
88.7k
void AppendDocKeyItems(const Collection& doc_key_items, KeyBytes* result) {
311
88.7k
  for (const auto& item : doc_key_items) {
312
53.8k
    item.AppendToKey(result);
313
53.8k
  }
314
88.7k
  result->AppendGroupEnd();
315
88.7k
}
316
317
class DocKeyEncoderAfterHashStep {
318
 public:
319
70.5M
  explicit DocKeyEncoderAfterHashStep(KeyBytes* out) : out_(out) {}
320
321
  template <class Collection>
322
70.6M
  void Range(const Collection& range_group) {
323
70.6M
    AppendDocKeyItems(range_group, out_);
324
70.6M
  }
void yb::docdb::DocKeyEncoderAfterHashStep::Range<std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > >(std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > const&)
Line
Count
Source
322
70.5M
  void Range(const Collection& range_group) {
323
70.5M
    AppendDocKeyItems(range_group, out_);
324
70.5M
  }
void yb::docdb::DocKeyEncoderAfterHashStep::Range<std::initializer_list<yb::docdb::PrimitiveValue> >(std::initializer_list<yb::docdb::PrimitiveValue> const&)
Line
Count
Source
322
44.3k
  void Range(const Collection& range_group) {
323
44.3k
    AppendDocKeyItems(range_group, out_);
324
44.3k
  }
325
326
 private:
327
  KeyBytes* out_;
328
};
329
330
class DocKeyEncoderAfterTableIdStep {
331
 public:
332
71.1M
  explicit DocKeyEncoderAfterTableIdStep(KeyBytes* out) : out_(out) {
333
71.1M
  }
334
335
  template <class Collection>
336
  DocKeyEncoderAfterHashStep Hash(
337
52.4M
      bool hash_present, uint16_t hash, const Collection& hashed_group) {
338
52.4M
    if (!hash_present) {
339
23.4M
      return DocKeyEncoderAfterHashStep(out_);
340
23.4M
    }
341
342
29.0M
    return Hash(hash, hashed_group);
343
52.4M
  }
344
345
  template <class Collection>
346
47.2M
  DocKeyEncoderAfterHashStep Hash(uint16_t hash, const Collection& hashed_group) {
347
    // We are not setting the "more items in group" bit on the hash field because it is not part
348
    // of "hashed" or "range" groups.
349
47.2M
    AppendHash(hash, out_);
350
47.2M
    AppendDocKeyItems(hashed_group, out_);
351
352
47.2M
    return DocKeyEncoderAfterHashStep(out_);
353
47.2M
  }
yb::docdb::DocKeyEncoderAfterHashStep yb::docdb::DocKeyEncoderAfterTableIdStep::Hash<std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > >(unsigned short, std::__1::vector<yb::docdb::PrimitiveValue, std::__1::allocator<yb::docdb::PrimitiveValue> > const&)
Line
Count
Source
346
47.1M
  DocKeyEncoderAfterHashStep Hash(uint16_t hash, const Collection& hashed_group) {
347
    // We are not setting the "more items in group" bit on the hash field because it is not part
348
    // of "hashed" or "range" groups.
349
47.1M
    AppendHash(hash, out_);
350
47.1M
    AppendDocKeyItems(hashed_group, out_);
351
352
47.1M
    return DocKeyEncoderAfterHashStep(out_);
353
47.1M
  }
yb::docdb::DocKeyEncoderAfterHashStep yb::docdb::DocKeyEncoderAfterTableIdStep::Hash<std::initializer_list<yb::docdb::PrimitiveValue> >(unsigned short, std::initializer_list<yb::docdb::PrimitiveValue> const&)
Line
Count
Source
346
44.3k
  DocKeyEncoderAfterHashStep Hash(uint16_t hash, const Collection& hashed_group) {
347
    // We are not setting the "more items in group" bit on the hash field because it is not part
348
    // of "hashed" or "range" groups.
349
44.3k
    AppendHash(hash, out_);
350
44.3k
    AppendDocKeyItems(hashed_group, out_);
351
352
44.3k
    return DocKeyEncoderAfterHashStep(out_);
353
44.3k
  }
354
355
  template <class HashCollection, class RangeCollection>
356
  void HashAndRange(uint16_t hash, const HashCollection& hashed_group,
357
18.1M
                    const RangeCollection& range_collection) {
358
18.1M
    Hash(hash, hashed_group).Range(range_collection);
359
18.1M
  }
360
361
  void HashAndRange(uint16_t hash, const std::initializer_list<PrimitiveValue>& hashed_group,
362
44.3k
                    const std::initializer_list<PrimitiveValue>& range_collection) {
363
44.3k
    Hash(hash, hashed_group).Range(range_collection);
364
44.3k
  }
365
366
 private:
367
  KeyBytes* out_;
368
};
369
370
class DocKeyEncoder {
371
 public:
372
71.1M
  explicit DocKeyEncoder(KeyBytes* out) : out_(out) {}
373
374
  DocKeyEncoderAfterTableIdStep CotableId(const Uuid& cotable_id);
375
376
  DocKeyEncoderAfterTableIdStep ColocationId(const ColocationId colocation_id);
377
378
  DocKeyEncoderAfterTableIdStep Schema(const Schema& schema);
379
380
 private:
381
  KeyBytes* out_;
382
};
383
384
class DocKeyDecoder {
385
 public:
386
1.07G
  explicit DocKeyDecoder(const Slice& input) : input_(input) {}
387
388
  Result<bool> DecodeCotableId(Uuid* uuid = nullptr);
389
  Result<bool> DecodeColocationId(ColocationId* colocation_id = nullptr);
390
391
  Result<bool> HasPrimitiveValue();
392
393
  Result<bool> DecodeHashCode(
394
      uint16_t* out = nullptr, AllowSpecial allow_special = AllowSpecial::kFalse);
395
396
  Result<bool> DecodeHashCode(AllowSpecial allow_special);
397
398
  CHECKED_STATUS DecodePrimitiveValue(
399
      PrimitiveValue* out = nullptr, AllowSpecial allow_special = AllowSpecial::kFalse);
400
401
  CHECKED_STATUS DecodePrimitiveValue(AllowSpecial allow_special);
402
403
  CHECKED_STATUS ConsumeGroupEnd();
404
405
  bool GroupEnded() const;
406
407
1.99G
  const Slice& left_input() const {
408
1.99G
    return input_;
409
1.99G
  }
410
411
74.4M
  size_t ConsumedSizeFrom(const uint8_t* start) const {
412
74.4M
    return input_.data() - start;
413
74.4M
  }
414
415
1.18G
  Slice* mutable_input() {
416
1.18G
    return &input_;
417
1.18G
  }
418
419
  CHECKED_STATUS DecodeToRangeGroup();
420
421
 private:
422
  Slice input_;
423
};
424
425
// Clears range components from provided key. Returns true if they were exists.
426
Result<bool> ClearRangeComponents(KeyBytes* out, AllowSpecial allow_special = AllowSpecial::kFalse);
427
428
// Returns true if both keys have hashed components and them are equal or both keys don't have
429
// hashed components and first range components are equal and false otherwise.
430
Result<bool> HashedOrFirstRangeComponentsEqual(const Slice& lhs, const Slice& rhs);
431
432
bool DocKeyBelongsTo(Slice doc_key, const Schema& schema);
433
434
// Consumes single primitive value from start of slice.
435
// Returns true when value was consumed, false when group end is found. The group end byte is
436
// consumed in the latter case.
437
Result<bool> ConsumePrimitiveValueFromKey(Slice* slice);
438
439
// Consume a group of document key components, ending with ValueType::kGroupEnd.
440
// @param slice - the current point at which we are decoding a key
441
// @param result - vector to append decoded values to.
442
Status ConsumePrimitiveValuesFromKey(rocksdb::Slice* slice,
443
                                     std::vector<PrimitiveValue>* result);
444
445
Result<boost::optional<DocKeyHash>> DecodeDocKeyHash(const Slice& encoded_key);
446
447
0
inline std::ostream& operator <<(std::ostream& out, const DocKey& doc_key) {
448
0
  out << doc_key.ToString();
449
0
  return out;
450
0
}
451
452
// ------------------------------------------------------------------------------------------------
453
// SubDocKey
454
// ------------------------------------------------------------------------------------------------
455
456
// A key pointing to a subdocument. Consists of a DocKey identifying the document, a list of
457
// primitive values leading to the subdocument in question, from the outermost to innermost order,
458
// and an optional hybrid_time of when the subdocument (which may itself be a primitive value) was
459
// last fully overwritten or deleted.
460
//
461
// Keys stored in RocksDB should always have the hybrid_time field set. However, it is useful to
462
// make the hybrid_time field optional while a SubDocKey is being constructed. If the hybrid_time
463
// is not set, it is omitted from the encoded representation of a SubDocKey.
464
//
465
// Implementation note: we use HybridTime::kInvalid to represent an omitted hybrid_time.
466
// We rely on that being the default-constructed value of a HybridTime.
467
//
468
// TODO: this should be renamed to something more generic, e.g. Key or LogicalKey, to reflect that
469
// this is actually the logical representation of keys that we store in the RocksDB key-value store.
470
class SubDocKey {
471
 public:
472
423M
  SubDocKey() {}
473
1.30M
  explicit SubDocKey(const DocKey& doc_key) : doc_key_(doc_key) {}
474
131k
  explicit SubDocKey(DocKey&& doc_key) : doc_key_(std::move(doc_key)) {}
475
476
  SubDocKey(const DocKey& doc_key, HybridTime hybrid_time)
477
      : doc_key_(doc_key),
478
3.85M
        doc_ht_(DocHybridTime(hybrid_time)) {
479
3.85M
  }
480
481
  SubDocKey(DocKey&& doc_key,
482
            HybridTime hybrid_time)
483
      : doc_key_(std::move(doc_key)),
484
6
        doc_ht_(DocHybridTime(hybrid_time)) {
485
6
  }
486
487
  SubDocKey(const DocKey& doc_key, const DocHybridTime& hybrid_time)
488
      : doc_key_(doc_key),
489
        doc_ht_(std::move(hybrid_time)) {
490
  }
491
492
  SubDocKey(const DocKey& doc_key,
493
            DocHybridTime doc_hybrid_time,
494
            const std::vector<PrimitiveValue>& subkeys)
495
      : doc_key_(doc_key),
496
        doc_ht_(doc_hybrid_time),
497
0
        subkeys_(subkeys) {
498
0
  }
499
500
  SubDocKey(const DocKey& doc_key,
501
            HybridTime hybrid_time,
502
            const std::vector<PrimitiveValue>& subkeys)
503
      : doc_key_(doc_key),
504
        doc_ht_(DocHybridTime(hybrid_time)),
505
0
        subkeys_(subkeys) {
506
0
  }
507
508
  template <class ...T>
509
  SubDocKey(const DocKey& doc_key, T... subkeys_and_maybe_hybrid_time)
510
      : doc_key_(doc_key),
511
2.94M
        doc_ht_(DocHybridTime::kInvalid) {
512
2.94M
    AppendSubKeysAndMaybeHybridTime(subkeys_and_maybe_hybrid_time...);
513
2.94M
  }
yb::docdb::SubDocKey::SubDocKey<yb::docdb::PrimitiveValue>(yb::docdb::DocKey const&, yb::docdb::PrimitiveValue)
Line
Count
Source
511
2.92M
        doc_ht_(DocHybridTime::kInvalid) {
512
2.92M
    AppendSubKeysAndMaybeHybridTime(subkeys_and_maybe_hybrid_time...);
513
2.92M
  }
yb::docdb::SubDocKey::SubDocKey<yb::docdb::PrimitiveValue, yb::docdb::PrimitiveValue>(yb::docdb::DocKey const&, yb::docdb::PrimitiveValue, yb::docdb::PrimitiveValue)
Line
Count
Source
511
21.2k
        doc_ht_(DocHybridTime::kInvalid) {
512
21.2k
    AppendSubKeysAndMaybeHybridTime(subkeys_and_maybe_hybrid_time...);
513
21.2k
  }
514
515
  CHECKED_STATUS FromDocPath(const DocPath& doc_path);
516
517
  // Return the subkeys within this SubDocKey
518
1.47M
  const std::vector<PrimitiveValue>& subkeys() const {
519
1.47M
    return subkeys_;
520
1.47M
  }
521
522
3.64k
  std::vector<PrimitiveValue>& subkeys() {
523
3.64k
    return subkeys_;
524
3.64k
  }
525
526
  // Append a sequence of sub-keys to this key.
527
  template<class ...T>
528
  void AppendSubKeysAndMaybeHybridTime(PrimitiveValue subdoc_key,
529
21.2k
                                       T... subkeys_and_maybe_hybrid_time) {
530
21.2k
    subkeys_.push_back(std::move(subdoc_key));
531
21.2k
    AppendSubKeysAndMaybeHybridTime(subkeys_and_maybe_hybrid_time...);
532
21.2k
  }
533
534
  template<class ...T>
535
2.96M
  void AppendSubKeysAndMaybeHybridTime(PrimitiveValue subdoc_key) {
536
2.96M
    subkeys_.emplace_back(std::move(subdoc_key));
537
2.96M
  }
538
539
  template<class ...T>
540
  void AppendSubKeysAndMaybeHybridTime(PrimitiveValue subdoc_key, HybridTime hybrid_time) {
541
    DCHECK(!has_hybrid_time());
542
    subkeys_.emplace_back(subdoc_key);
543
    DCHECK(hybrid_time.is_valid());
544
    doc_ht_ = DocHybridTime(hybrid_time);
545
  }
546
547
  void AppendSubKey(PrimitiveValue subkey);
548
549
  void RemoveLastSubKey();
550
551
  void KeepPrefix(size_t num_sub_keys_to_keep);
552
553
288k
  void remove_hybrid_time() {
554
288k
    doc_ht_ = DocHybridTime::kInvalid;
555
288k
  }
556
557
  void Clear();
558
559
0
  bool IsValid() const {
560
0
    return !doc_key_.empty();
561
0
  }
562
563
2.36M
  KeyBytes Encode() const { return DoEncode(true /* include_hybrid_time */); }
564
1.22M
  KeyBytes EncodeWithoutHt() const { return DoEncode(false /* include_hybrid_time */); }
565
566
  // Decodes a SubDocKey from the given slice, typically retrieved from a RocksDB key.
567
  // @param slice
568
  //     A pointer to the slice containing the bytes to decode the SubDocKey from. This slice is
569
  //     modified, with consumed bytes being removed.
570
  // @param require_hybrid_time
571
  //     Whether a hybrid_time is required in the end of the SubDocKey. If this is true, we require
572
  //     a ValueType::kHybridTime byte followed by a hybrid_time to be present in the input slice.
573
  //     Otherwise, we allow decoding an incomplete SubDocKey without a hybrid_time in the end. Note
574
  //     that we also allow input that has a few bytes in the end but not enough to represent a
575
  //     hybrid_time.
576
  // @param allow_special
577
  //     Whether it is allowed to have special value types in slice, that are used during seek.
578
  //     If such value type is found, decoding is stopped w/o error.
579
  CHECKED_STATUS DecodeFrom(rocksdb::Slice* slice,
580
                            HybridTimeRequired require_hybrid_time = HybridTimeRequired::kTrue,
581
                            AllowSpecial allow_special = AllowSpecial::kFalse);
582
583
  // Similar to DecodeFrom, but requires that the entire slice is decoded, and thus takes a const
584
  // reference to a slice. This still respects the require_hybrid_time parameter, but in case a
585
  // hybrid_time is omitted, we don't allow any extra bytes to be present in the slice.
586
  CHECKED_STATUS FullyDecodeFrom(
587
      const rocksdb::Slice& slice,
588
      HybridTimeRequired hybrid_time_required = HybridTimeRequired::kTrue);
589
590
  // Splits given RocksDB key into vector of slices that forms range_group of document key and
591
  // hybrid_time.
592
  static CHECKED_STATUS PartiallyDecode(Slice* slice,
593
                                        boost::container::small_vector_base<Slice>* out);
594
595
  // Splits the given RocksDB sub key into a vector of slices that forms the range group of document
596
  // key and sub keys.
597
  //
598
  // We don't use Result<...> to be able to reuse memory allocated by out.
599
  //
600
  // When key does not start with a hash component, the returned prefix would start with the first
601
  // range component.
602
  //
603
  // For instance, for a (hash_value, h1, h2, r1, r2, s1) doc key the following values will be
604
  // returned:
605
  // encoded_length(hash_value, h1, h2) <-------------- (includes the kGroupEnd of the hashed part)
606
  // encoded_length(hash_value, h1, h2, r1)
607
  // encoded_length(hash_value, h1, h2, r1, r2)
608
  // encoded_length(hash_value, h1, h2, r1, r2, s1) <------- (includes kGroupEnd of the range part).
609
  static CHECKED_STATUS DecodePrefixLengths(
610
      Slice slice, boost::container::small_vector_base<size_t>* out);
611
612
  // Fills out with ends of SubDocKey components.  First item in out will be size of ID part
613
  // (cotable id or colocation id) of DocKey (0 if ID is not present), second size of whole DocKey,
614
  // third size of DocKey + size of first subkey, and so on.
615
  //
616
  // To illustrate,
617
  // * for key
618
  //     SubDocKey(DocKey(0xfca0, [3], []), [SystemColumnId(0); HT{ physical: 1581475435181551 }])
619
  //   aka
620
  //     47FCA0488000000321214A80238001B5E605A0CA10804A
621
  //   (and with spaces to make it clearer)
622
  //     47FCA0 4880000003 21 21 4A80 238001B5E605A0CA10804A
623
  //   the ends will be
624
  //     {0, 10, 12}
625
  // * for key
626
  //     SubDocKey(DocKey(ColocationId=16385, [], [5]), [SystemColumnId(0); HT{ physical: ... }])
627
  //   aka
628
  //     30000040014880000005214A80238001B5E700309553804A
629
  //   (and with spaces to make it clearer)
630
  //     3000004001 4880000005 21 4A80 238001B5E700309553804A
631
  //   the ends will be
632
  //     {5, 11, 13}
633
  // * for key
634
  //     SubDocKey(DocKey(ColocationId=16385, [], []), [HT{ physical: 1581471227403848 }])
635
  //   aka
636
  //     300000400121238001B5E7006E61B7804A
637
  //   (and with spaces to make it clearer)
638
  //     3000004001 21 238001B5E7006E61B7804A
639
  //   the ends will be
640
  //     {5}
641
  //
642
  // If out is not empty, then it will be interpreted as partial result for this decoding operation
643
  // and the appropriate prefix will be skipped.
644
  static CHECKED_STATUS DecodeDocKeyAndSubKeyEnds(
645
      Slice slice, boost::container::small_vector_base<size_t>* out);
646
647
  // Attempts to decode a subkey at the beginning of the given slice, consuming the corresponding
648
  // prefix of the slice. Returns false if there is no next subkey, as indicated by the slice being
649
  // empty or encountering an encoded hybrid time.
650
  static Result<bool> DecodeSubkey(Slice* slice);
651
652
  CHECKED_STATUS FullyDecodeFromKeyWithOptionalHybridTime(const rocksdb::Slice& slice);
653
654
  std::string ToString(AutoDecodeKeys auto_decode_keys = AutoDecodeKeys::kFalse) const;
655
  static std::string DebugSliceToString(Slice slice);
656
  static Result<std::string> DebugSliceToStringAsResult(Slice slice);
657
658
1.48M
  const DocKey& doc_key() const {
659
1.48M
    return doc_key_;
660
1.48M
  }
661
662
206M
  DocKey& doc_key() {
663
206M
    return doc_key_;
664
206M
  }
665
666
290k
  size_t num_subkeys() const {
667
290k
    return subkeys_.size();
668
290k
  }
669
670
  bool StartsWith(const SubDocKey& prefix) const;
671
672
  bool operator ==(const SubDocKey& other) const;
673
674
  bool operator !=(const SubDocKey& other) const {
675
    return !(*this == other);
676
  }
677
678
0
  const PrimitiveValue& last_subkey() const {
679
0
    assert(!subkeys_.empty());
680
0
    return subkeys_.back();
681
0
  }
682
683
  int CompareTo(const SubDocKey& other) const;
684
  int CompareToIgnoreHt(const SubDocKey& other) const;
685
686
  bool operator <(const SubDocKey& other) const {
687
    return CompareTo(other) < 0;
688
  }
689
690
0
  bool operator <=(const SubDocKey& other) const {
691
0
    return CompareTo(other) <= 0;
692
0
  }
693
694
0
  bool operator >(const SubDocKey& other) const {
695
0
    return CompareTo(other) > 0;
696
0
  }
697
698
0
  bool operator >=(const SubDocKey& other) const {
699
0
    return CompareTo(other) >= 0;
700
0
  }
701
702
1.83k
  HybridTime hybrid_time() const {
703
1.83k
    DCHECK(has_hybrid_time());
704
1.83k
    return doc_ht_.hybrid_time();
705
1.83k
  }
706
707
93.0M
  const DocHybridTime& doc_hybrid_time() const {
708
93.0M
    DCHECK(has_hybrid_time());
709
93.0M
    return doc_ht_;
710
93.0M
  }
711
712
6.48k
  void set_hybrid_time(const DocHybridTime& hybrid_time) {
713
6.48k
    DCHECK(hybrid_time.is_valid());
714
6.48k
    doc_ht_ = hybrid_time;
715
6.48k
  }
716
717
181M
  bool has_hybrid_time() const {
718
181M
    return doc_ht_.is_valid();
719
181M
  }
720
721
  // Generate a RocksDB key that would allow us to seek to the smallest SubDocKey that has a
722
  // lexicographically higher sequence of subkeys than this one, but is not an extension of this
723
  // sequence of subkeys.  In other words, ensure we advance to the next field (subkey) either
724
  // within the object (subdocument) we are currently scanning, or at any higher level, including
725
  // advancing to the next document key.
726
  //
727
  // E.g. assuming the SubDocKey this is being called on is #2 from the following example,
728
  // performing a RocksDB seek on the return value of this takes us to #7.
729
  //
730
  // 1. SubDocKey(DocKey([], ["a"]), [HT(1)]) -> {}
731
  // 2. SubDocKey(DocKey([], ["a"]), ["x", HT(1)]) -> {} ---------------------------.
732
  // 3. SubDocKey(DocKey([], ["a"]), ["x", "x", HT(2)]) -> null                     |
733
  // 4. SubDocKey(DocKey([], ["a"]), ["x", "x", HT(1)]) -> {}                       |
734
  // 5. SubDocKey(DocKey([], ["a"]), ["x", "x", "y", HT(1)]) -> {}                  |
735
  // 6. SubDocKey(DocKey([], ["a"]), ["x", "x", "y", "x", HT(1)]) -> true           |
736
  // 7. SubDocKey(DocKey([], ["a"]), ["y", HT(3)]) -> {}                  <---------
737
  // 8. SubDocKey(DocKey([], ["a"]), ["y", "y", HT(3)]) -> {}
738
  // 9. SubDocKey(DocKey([], ["a"]), ["y", "y", "x", HT(3)]) ->
739
  //
740
  // This is achieved by simply appending a byte that is higher than any ValueType in an encoded
741
  // representation of a SubDocKey that extends the vector of subkeys present in the current one,
742
  // or has the same vector of subkeys, i.e. key/value pairs #3-6 in the above example. HybridTime
743
  // is omitted from the resulting encoded representation.
744
  KeyBytes AdvanceOutOfSubDoc() const;
745
746
  // Similar to AdvanceOutOfSubDoc, but seek to the smallest key that skips documents with this
747
  // DocKey and DocKeys that have the same hash components but add more range components to it.
748
  //
749
  // E.g. assuming the SubDocKey this is being called on is #2 from the following example:
750
  //
751
  //  1. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), [HT(1)]) -> {}
752
  //  2. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["x", HT(1)]) -> {} <----------------.
753
  //  3. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["x", "x", HT(2)]) -> null           |
754
  //  4. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["x", "x", HT(1)]) -> {}             |
755
  //  5. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["x", "x", "y", HT(1)]) -> {}        |
756
  //  6. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["x", "x", "y", "x", HT(1)]) -> true |
757
  //  7. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["y", HT(3)]) -> {}                  |
758
  //  8. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["y", "y", HT(3)]) -> {}             |
759
  //  9. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"]), ["y", "y", "x", HT(3)]) -> {}        |
760
  // ...                                                                                        |
761
  // 20. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d", "e"]), ["y", HT(3)]) -> {}             |
762
  // 21. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d", "e"]), ["z", HT(3)]) -> {}             |
763
  // 22. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "f"]), [HT(1)]) -> {}      <--- (*** 1 ***)-|
764
  // 23. SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "f"]), ["x", HT(1)]) -> {}                  |
765
  // ...                                                                                        |
766
  // 30. SubDocKey(DocKey(0x2345, ["a", "c"], ["c", "f"]), [HT(1)]) -> {}      <--- (*** 2 ***)-
767
  // 31. SubDocKey(DocKey(0x2345, ["a", "c"], ["c", "f"]), ["x", HT(1)]) -> {}
768
  //
769
  // SubDocKey(DocKey(0x1234, ["a", "b"], ["c", "d"])).AdvanceOutOfDocKeyPrefix() will seek to #22
770
  // (*** 1 ***), pass doc keys with additional range components when they are present.
771
  //
772
  // And when given a doc key without range component like below, it can help seek pass all doc
773
  // keys with the same hash components, e.g.
774
  // SubDocKey(DocKey(0x1234, ["a", "b"], [])).AdvanceOutOfDocKeyPrefix() will seek to #30
775
  // (*** 2 ***).
776
777
  KeyBytes AdvanceOutOfDocKeyPrefix() const;
778
779
 private:
780
  class DecodeCallback;
781
  friend class DecodeCallback;
782
783
  // Attempts to decode and consume a subkey from the beginning of the given slice.
784
  // A non-error false result means e.g. that the slice is empty or if the next thing is an encoded
785
  // hybrid time.
786
  template<class Callback>
787
  static Result<bool> DecodeSubkey(Slice* slice, const Callback& callback);
788
789
  template<class Callback>
790
  static Status DoDecode(rocksdb::Slice* slice,
791
                         HybridTimeRequired require_hybrid_time,
792
                         AllowSpecial allow_special,
793
                         const Callback& callback);
794
795
  KeyBytes DoEncode(bool include_hybrid_time) const;
796
797
  DocKey doc_key_;
798
  DocHybridTime doc_ht_;
799
800
  // TODO: make this a small_vector.
801
  std::vector<PrimitiveValue> subkeys_;
802
};
803
804
0
inline std::ostream& operator <<(std::ostream& out, const SubDocKey& subdoc_key) {
805
0
  out << subdoc_key.ToString();
806
0
  return out;
807
0
}
808
809
// A best-effort to decode the given sequence of key bytes as either a DocKey or a SubDocKey.
810
// If not possible to decode, return the key_bytes directly as a readable string.
811
std::string BestEffortDocDBKeyToStr(const KeyBytes &key_bytes);
812
std::string BestEffortDocDBKeyToStr(const rocksdb::Slice &slice);
813
814
class DocDbAwareFilterPolicyBase : public rocksdb::FilterPolicy {
815
 public:
816
1.54M
  explicit DocDbAwareFilterPolicyBase(size_t filter_block_size_bits, rocksdb::Logger* logger) {
817
1.54M
    builtin_policy_.reset(rocksdb::NewFixedSizeFilterPolicy(
818
1.54M
        filter_block_size_bits, rocksdb::FilterPolicy::kDefaultFixedSizeFilterErrorRate, logger));
819
1.54M
  }
820
821
  void CreateFilter(const rocksdb::Slice* keys, int n, std::string* dst) const override;
822
823
  bool KeyMayMatch(const rocksdb::Slice& key, const rocksdb::Slice& filter) const override;
824
825
  rocksdb::FilterBitsBuilder* GetFilterBitsBuilder() const override;
826
827
  rocksdb::FilterBitsReader* GetFilterBitsReader(const rocksdb::Slice& contents) const override;
828
829
  FilterType GetFilterType() const override;
830
831
 private:
832
  std::unique_ptr<const rocksdb::FilterPolicy> builtin_policy_;
833
};
834
835
// This filter policy only takes into account hashed components of keys for filtering.
836
class DocDbAwareHashedComponentsFilterPolicy : public DocDbAwareFilterPolicyBase {
837
 public:
838
  DocDbAwareHashedComponentsFilterPolicy(size_t filter_block_size_bits, rocksdb::Logger* logger)
839
515k
      : DocDbAwareFilterPolicyBase(filter_block_size_bits, logger) {}
840
841
515k
  const char* Name() const override { return "DocKeyHashedComponentsFilter"; }
842
843
  const KeyTransformer* GetKeyTransformer() const override;
844
};
845
846
// Together with the fix for BlockBasedTableBuild::Add
847
// (https://github.com/yugabyte/yugabyte-db/issues/6435) we also disable DocKeyV2Filter
848
// for range-partitioned tablets. For hash-partitioned tablets it will be supported during read
849
// path and will work the same way as DocDbAwareV3FilterPolicy.
850
class DocDbAwareV2FilterPolicy : public DocDbAwareFilterPolicyBase {
851
 public:
852
  DocDbAwareV2FilterPolicy(size_t filter_block_size_bits, rocksdb::Logger* logger)
853
515k
      : DocDbAwareFilterPolicyBase(filter_block_size_bits, logger) {}
854
855
515k
  const char* Name() const override { return "DocKeyV2Filter"; }
856
857
  const KeyTransformer* GetKeyTransformer() const override;
858
};
859
860
// This filter policy takes into account following parts of keys for filtering:
861
// - For range-based partitioned tables (such tables have 0 hashed components):
862
// use all hash components of the doc key.
863
// - For hash-based partitioned tables (such tables have >0 hashed components):
864
// use first range component of the doc key.
865
class DocDbAwareV3FilterPolicy : public DocDbAwareFilterPolicyBase {
866
 public:
867
  DocDbAwareV3FilterPolicy(size_t filter_block_size_bits, rocksdb::Logger* logger)
868
515k
      : DocDbAwareFilterPolicyBase(filter_block_size_bits, logger) {}
869
870
2.47M
  const char* Name() const override { return "DocKeyV3Filter"; }
871
872
  const KeyTransformer* GetKeyTransformer() const override;
873
};
874
875
}  // namespace docdb
876
}  // namespace yb
877
878
#endif  // YB_DOCDB_DOC_KEY_H_