YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/subdoc_reader.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/subdoc_reader.h"
15
16
#include <cstddef>
17
#include <memory>
18
#include <string>
19
#include <vector>
20
21
#include <glog/logging.h>
22
23
#include "yb/common/doc_hybrid_time.h"
24
#include "yb/common/hybrid_time.h"
25
#include "yb/common/transaction.h"
26
#include "yb/common/typedefs.h"
27
28
#include "yb/docdb/deadline_info.h"
29
#include "yb/docdb/doc_key.h"
30
#include "yb/docdb/doc_ttl_util.h"
31
#include "yb/docdb/expiration.h"
32
#include "yb/docdb/intent_aware_iterator.h"
33
#include "yb/docdb/key_bytes.h"
34
#include "yb/docdb/primitive_value.h"
35
#include "yb/docdb/subdocument.h"
36
#include "yb/docdb/value.h"
37
#include "yb/docdb/value_type.h"
38
39
#include "yb/gutil/integral_types.h"
40
#include "yb/gutil/macros.h"
41
42
#include "yb/server/hybrid_clock.h"
43
44
#include "yb/util/monotime.h"
45
#include "yb/util/result.h"
46
#include "yb/util/status.h"
47
#include "yb/util/status_format.h"
48
49
using std::vector;
50
51
using yb::HybridTime;
52
53
namespace yb {
54
namespace docdb {
55
56
namespace {
57
58
Expiration GetNewExpiration(
59
    const Expiration& parent_exp, const MonoDelta& ttl,
60
27.0M
    const DocHybridTime& new_write_time) {
61
27.0M
  Expiration new_exp = parent_exp;
62
  // We may need to update the TTL in individual columns.
63
27.0M
  if (new_write_time.hybrid_time() >= new_exp.write_ht) {
64
    // We want to keep the default TTL otherwise.
65
27.0M
    if (ttl != Value::kMaxTtl) {
66
587
      new_exp.write_ht = new_write_time.hybrid_time();
67
587
      new_exp.ttl = ttl;
68
27.0M
    } else if (new_exp.ttl.IsNegative()) {
69
0
      new_exp.ttl = -new_exp.ttl;
70
0
    }
71
27.0M
  }
72
73
  // If the hybrid time is kMin, then we must be using default TTL.
74
27.0M
  if (new_exp.write_ht == HybridTime::kMin) {
75
26.7M
    new_exp.write_ht = new_write_time.hybrid_time();
76
26.7M
  }
77
78
27.0M
  return new_exp;
79
27.0M
}
80
81
} // namespace
82
83
ObsolescenceTracker::ObsolescenceTracker(DocHybridTime write_time_watermark):
84
245M
    write_time_watermark_(write_time_watermark) {}
85
86
ObsolescenceTracker::ObsolescenceTracker(
87
    const ReadHybridTime& read_time, DocHybridTime write_time_watermark, Expiration expiration):
88
40.8M
    write_time_watermark_(write_time_watermark), read_time_(read_time), expiration_(expiration) {}
89
90
37.9M
const DocHybridTime& ObsolescenceTracker::GetHighWriteTime() { return write_time_watermark_; }
91
92
225M
bool ObsolescenceTracker::IsObsolete(const DocHybridTime& write_time) const {
93
225M
  if (expiration_.has_value()) {
94
26.1M
    DCHECK(read_time_.has_value());
95
26.1M
    if (HasExpiredTTL(expiration_.value().write_ht, expiration_.value().ttl,
96
9.02k
                      read_time_.value().read)) {
97
9.02k
      return true;
98
9.02k
    }
99
225M
  }
100
225M
  return write_time < write_time_watermark_;
101
225M
}
102
103
ObsolescenceTracker ObsolescenceTracker::Child(
104
248M
    const DocHybridTime& write_time, const MonoDelta& ttl) const {
105
248M
  auto new_write_time_watermark = std::max(write_time, write_time_watermark_);
106
248M
  if (expiration_.has_value()) {
107
26.9M
    DCHECK(read_time_.has_value());
108
26.9M
    return ObsolescenceTracker(
109
26.9M
        read_time_.value(), new_write_time_watermark,
110
26.9M
        GetNewExpiration(expiration_.value(), ttl, write_time));
111
26.9M
  }
112
221M
  return ObsolescenceTracker(new_write_time_watermark);
113
221M
}
114
115
34.1M
ObsolescenceTracker ObsolescenceTracker::Child(const DocHybridTime& write_time) const {
116
34.1M
  auto new_write_time_watermark = std::max(write_time, write_time_watermark_);
117
34.1M
  if (expiration_.has_value()) {
118
10.1M
    return ObsolescenceTracker(read_time_.value(), new_write_time_watermark, expiration_.get());
119
10.1M
  }
120
23.9M
  return ObsolescenceTracker(new_write_time_watermark);
121
23.9M
}
122
123
boost::optional<uint64_t> ObsolescenceTracker::GetTtlRemainingSeconds(
124
215M
    const HybridTime& ttl_write_time) const {
125
215M
  if (!expiration_.has_value()) {
126
189M
    return boost::none;
127
189M
  }
128
129
25.8M
  DCHECK(read_time_.has_value());
130
131
25.8M
  auto ttl_value = expiration_.value().ttl;
132
25.8M
  auto doc_read_time = read_time_.value();
133
134
25.8M
  if (ttl_value == Value::kMaxTtl) {
135
12.8M
    return -1;
136
12.8M
  }
137
13.0M
  int64_t time_since_ttl_write_seconds = (
138
13.0M
      server::HybridClock::GetPhysicalValueMicros(doc_read_time.read) -
139
13.0M
      server::HybridClock::GetPhysicalValueMicros(ttl_write_time)) /
140
13.0M
      MonoTime::kMicrosecondsPerSecond;
141
13.0M
  int64_t ttl_value_seconds = ttl_value.ToMilliseconds() / MonoTime::kMillisecondsPerSecond;
142
13.0M
  int64_t ttl_remaining_seconds = ttl_value_seconds - time_since_ttl_write_seconds;
143
13.0M
  return std::max(static_cast<int64_t>(0), ttl_remaining_seconds);
144
13.0M
}
145
146
namespace {
147
148
// This class wraps access to a SubDocument instance specified in one of two ways:
149
// (1) -- From a pointer to an already existing instance of a SubDocument
150
// (2) -- From a pointer to an existing instance of LazySubDocumentHolder which represents the
151
//        "parent" of the SubDocument accessed via this LazySubDocumentHolder.
152
//
153
// Using this class, a SubDocument which is *specified* via (2) above will not be constructed unless
154
// it is accessed, either directly or via a child LazySubDocumentHolder instance. This class allows
155
// us to provide a unified interface to the reading/constructing code which may determine if a
156
// particular SubDocument is live based on deferred knowledge of whether any of it's children are
157
// live.
158
class LazySubDocumentHolder {
159
 public:
160
  // Specifies the SubDocument "target" provided. The pointer must be valid for the lifetime of this
161
  // instance. The provided key Slice must be valid and the underlying data should not change during
162
  // the lifetime of this instance.
163
242M
  LazySubDocumentHolder(SubDocument* target, Slice key) : target_(target), key_(key) {
164
242M
    DCHECK(target_);
165
242M
  }
166
167
  // Specifies a SubDocument with key "key" lazily constructed from "parent". Provided parent must
168
  // be valid and remain at the same address for the lifetime of this instance. The provided key
169
  // Slice must be valid and the underlying data should not change during the lifetime of this
170
  // instance.
171
6.20M
  LazySubDocumentHolder(LazySubDocumentHolder* parent, Slice key) : parent_(parent), key_(key) {
172
6.20M
    DCHECK(parent_);
173
6.20M
  }
174
175
  // Returns true if the SubDocument specified by this instance exists.
176
38.5M
  bool IsConstructed() const { return target_; }
177
178
  // Get a pointer to the SubDocument specified by this instance. Construct that SubDocument first
179
  // if it has not yet been constructed.
180
  Result<SubDocument*> Get();
181
182
 private:
183
  // The constructed SubDocument specified by this instance.
184
  SubDocument* target_ = nullptr;
185
186
  // A pointer to the parent_ of the specified SubDocument. This must be non-null unless this
187
  // instance was constructed via a concrete SubDocument*.
188
  LazySubDocumentHolder* const parent_ = nullptr;
189
190
  // The key of the specified SubDocument.
191
  Slice key_;
192
};
193
194
241M
Result<SubDocument*> LazySubDocumentHolder::Get() {
195
  // If target_ is not null, just return it. Otherwise, we must construct the SubDocument from its
196
  // parent.
197
241M
  if (target_) {
198
241M
    return target_;
199
241M
  }
200
201
  // Presumably, the parent_ key is a prefix of key_, otherwise it's not a valid parent.
202
18.4E
  DCHECK(key_.starts_with(parent_->key_))
203
18.4E
      << "Attempting to construct SubDocument for key: " << SubDocKey::DebugSliceToString(key_)
204
18.4E
      << " from parent whose key: " << SubDocKey::DebugSliceToString(parent_->key_)
205
18.4E
      << " is not a prefix of child key";
206
  // This code takes each subdoc key part after parent_ in key_ and traverses from the parent_
207
  // SubDocument to a SubDocument created via successive calls to GetOrAddChild. The SubDocument
208
  // returned is owned by and borrowed from parent_.
209
1.43k
  SubDocument* current = VERIFY_RESULT(parent_->Get());
210
1.43k
  Slice temp = key_;
211
1.43k
  temp.remove_prefix(parent_->key_.size());
212
1.56k
  for (;;) {
213
1.56k
    PrimitiveValue child_key_part;
214
1.56k
    RETURN_NOT_OK(child_key_part.DecodeFromKey(&temp));
215
1.56k
    current = current->GetOrAddChild(child_key_part).first;
216
1.56k
    if (temp.empty()) {
217
1.56k
      target_ = current;
218
1.56k
      return target_;
219
1.56k
    }
220
1.56k
  }
221
18.4E
  return STATUS(
222
1.43k
      InternalError,
223
1.43k
      "We return this status at the end of a function with a terminal infinite loop. We should "
224
1.43k
      "never get here.");
225
1.43k
}
226
227
// This class provides a wrapper to access data corresponding to a RocksDB row.
228
class DocDbRowData {
229
 public:
230
  DocDbRowData(const Slice& key, const DocHybridTime& write_time, Value&& value);
231
232
  static Result<std::unique_ptr<DocDbRowData>> CurrentRow(IntentAwareIterator* iter);
233
234
502M
  const KeyBytes& key() const { return target_key_; }
235
236
464M
  const Value& value() const { return value_; }
237
238
900M
  const DocHybridTime& write_time() const { return write_time_; }
239
240
248M
  bool IsTombstone() const { return value_.value_type() == ValueType::kTombstone; }
241
242
215M
  bool IsCollection() const { return IsCollectionType(value_.value_type()); }
243
244
247M
  bool IsPrimitiveValue() const { return IsPrimitiveValueType(value_.value_type()); }
245
246
241M
  PrimitiveValue* mutable_primitive_value() { return value_.mutable_primitive_value(); }
247
248
 private:
249
  const KeyBytes target_key_;
250
  const DocHybridTime write_time_;
251
  Value value_;
252
253
  DISALLOW_COPY_AND_ASSIGN(DocDbRowData);
254
};
255
256
DocDbRowData::DocDbRowData(
257
    const Slice& key, const DocHybridTime& write_time, Value&& value):
258
247M
    target_key_(std::move(key)), write_time_(std::move(write_time)), value_(std::move(value)) {}
259
260
248M
Result<std::unique_ptr<DocDbRowData>> DocDbRowData::CurrentRow(IntentAwareIterator* iter) {
261
248M
  auto key_data = VERIFY_RESULT(iter->FetchKey());
262
2.21k
  DCHECK(key_data.same_transaction ||
263
2.21k
      iter->read_time().global_limit >= key_data.write_time.hybrid_time())
264
2.21k
      << "Bad key: " << SubDocKey::DebugSliceToString(key_data.key)
265
2.21k
      << ", global limit: " << iter->read_time().global_limit
266
2.21k
      << ", write time: " << key_data.write_time.hybrid_time();
267
248M
  Value value;
268
  // TODO -- we could optimize be decoding directly into a SubDocument instance on the heap which
269
  // could be later bound to our result SubDocument. This could work if e.g. Value could be
270
  // initialized with a PrimitiveValue*.
271
248M
  RETURN_NOT_OK(value.Decode(iter->value()));
272
273
248M
  if (key_data.write_time == DocHybridTime::kMin) {
274
0
    return STATUS(Corruption, "No hybrid timestamp found on entry");
275
0
  }
276
277
248M
  return std::make_unique<DocDbRowData>(key_data.key, key_data.write_time, std::move(value));
278
248M
}
279
280
// This class provides a convenience handle for modifying a SubDocument specified by a provided
281
// LazySubDocumentHolder. Importantly, it is responsible for the semantics of when a
282
// LazySubDocumentHolder should be realized. Notably, it does *not* construct the specified
283
// SubDocument if it is instructed to store a tombstone, and it will only modify the SubDocument
284
// value if it already has been constructed.
285
class DocDbRowAssembler {
286
 public:
287
242M
  DocDbRowAssembler(SubDocument* target, Slice key) : root_(target, key) {}
288
289
  DocDbRowAssembler(DocDbRowAssembler* parent_assembler, Slice key):
290
6.20M
      root_(&parent_assembler->root_, key) {}
291
292
  CHECKED_STATUS SetEmptyCollection();
293
294
  CHECKED_STATUS SetTombstone();
295
296
  CHECKED_STATUS SetPrimitiveValue(DocDbRowData* row);
297
298
  Result<bool> HasStoredValue();
299
300
 private:
301
  LazySubDocumentHolder root_;
302
  bool is_tombstoned_ = false;
303
304
  DISALLOW_COPY_AND_ASSIGN(DocDbRowAssembler);
305
};
306
307
712
Status DocDbRowAssembler::SetEmptyCollection() {
308
712
  auto* subdoc = VERIFY_RESULT(root_.Get());
309
712
  *subdoc = SubDocument();
310
712
  return Status::OK();
311
712
}
312
313
32.4M
Status DocDbRowAssembler::SetTombstone() {
314
32.4M
  if (!root_.IsConstructed()) {
315
    // Do not construct a child subdocument from the parent if it is not constructed, since this is
316
    // a tombstone.
317
6.20M
    return Status::OK();
318
6.20M
  }
319
26.2M
  auto* subdoc = VERIFY_RESULT(root_.Get());
320
26.2M
  *subdoc = SubDocument(ValueType::kTombstone);
321
26.2M
  is_tombstoned_ = true;
322
26.2M
  return Status::OK();
323
26.2M
}
324
325
215M
Status DocDbRowAssembler::SetPrimitiveValue(DocDbRowData* row) {
326
  // TODO -- this interface with a non-const row pointer is not ideal. It's awkward to allow the
327
  // DocDbRowAssembler to modify the DocDbRowData's state. In the future, it might make more
328
  // sense to have ScopedDocDbRowContext orchestrate coordination between these classes.
329
  // TODO -- we currently modify the DocDbRowData's mutable primitive_value, and then make
330
  // a copy here to store onto the SubDocument. This should be made more efficient, especially
331
  // since it's on the critical path of all reads.
332
215M
  auto* subdoc = VERIFY_RESULT(root_.Get());
333
334
215M
  auto* mutable_primitive_value = row->mutable_primitive_value();
335
336
215M
  if (row->value().has_user_timestamp()) {
337
87
    mutable_primitive_value->SetWriteTime(row->value().user_timestamp());
338
215M
  } else {
339
215M
    mutable_primitive_value->SetWriteTime(row->write_time().hybrid_time().GetPhysicalValueMicros());
340
215M
  }
341
215M
  *subdoc = SubDocument(*mutable_primitive_value);
342
215M
  return Status::OK();
343
215M
}
344
345
6.20M
Result<bool> DocDbRowAssembler::HasStoredValue() {
346
6.20M
  if (!root_.IsConstructed()) {
347
6.20M
    return false;
348
6.20M
  }
349
1.62k
  auto* subdoc = VERIFY_RESULT(root_.Get());
350
1.62k
  return subdoc->value_type() != ValueType::kInvalid
351
1.56k
      && subdoc->value_type() != ValueType::kTombstone;
352
1.62k
}
353
354
class ScopedDocDbRowContext;
355
class ScopedDocDbRowContextWithData;
356
357
// This class represents a collection of ScopedDocDbRowContext instances corresponding to a DocDB
358
// collection. The user of this class can optionally call SetFirstChild on an owned
359
// ScopedDocDbRowContext in case it has already read the first row of this collection, and that will
360
// be returned before reading subsequent rows in the collection. Note this may only be done before
361
// the ScopedDocDbCollectionContext instance has read any other rows.
362
class ScopedDocDbCollectionContext {
363
 public:
364
  explicit ScopedDocDbCollectionContext(ScopedDocDbRowContext* parent);
365
366
  CHECKED_STATUS SetFirstChild(std::unique_ptr<DocDbRowData> first_row);
367
368
  Result<ScopedDocDbRowContextWithData*> GetNextChild();
369
370
 private:
371
  void SetNextChild(std::unique_ptr<DocDbRowData> child_row);
372
373
  ScopedDocDbRowContext* const parent_ = nullptr;
374
  std::unique_ptr<ScopedDocDbRowContextWithData> current_child_ = nullptr;
375
  ScopedDocDbRowContextWithData* next_child_ = nullptr;
376
};
377
378
// This class encapsulates all relevant context for reading the RocksDB state corresponding to a
379
// particular key and constructing a SubDocument instance which reflects that state. This context is
380
// used by control-flow functions at the bottom of this file. The context for a key also
381
// encapsulates a mechanism to collect the context for any children of the same key via
382
// ScopedDocDbCollectionContext, which is similarly used by control-flow functions at the bottom of
383
// this file.
384
class ScopedDocDbRowContext {
385
 public:
386
  ScopedDocDbRowContext(
387
      IntentAwareIterator* iter,
388
      DeadlineInfo* deadline_info,
389
      Slice key,
390
      SubDocument* assembly_target,
391
      ObsolescenceTracker obsolescence_tracker);
392
393
  ScopedDocDbRowContext(
394
      IntentAwareIterator* iter,
395
      DeadlineInfo* deadline_info,
396
      Slice key,
397
      DocDbRowAssembler* ancestor_assembler,
398
      ObsolescenceTracker obsolescence_tracker);
399
400
283M
  DocDbRowAssembler* mutable_assembler() { return &assembler_; }
401
402
248M
  const ObsolescenceTracker* obsolescence_tracker() const { return &obsolescence_tracker_; }
403
404
  ScopedDocDbCollectionContext* collection();
405
406
  CHECKED_STATUS CheckDeadline();
407
408
 protected:
409
  IntentAwareIterator* const iter_;
410
  DeadlineInfo* const deadline_info_;
411
  const Slice key_;
412
  const IntentAwareIteratorPrefixScope prefix_scope_;
413
  DocDbRowAssembler assembler_;
414
  ObsolescenceTracker obsolescence_tracker_;
415
  boost::optional<ScopedDocDbCollectionContext> collection_ = boost::none;
416
417
 private:
418
  friend class ScopedDocDbCollectionContext;
419
420
  DISALLOW_COPY_AND_ASSIGN(ScopedDocDbRowContext);
421
};
422
423
ScopedDocDbRowContext::ScopedDocDbRowContext(
424
      IntentAwareIterator* iter,
425
      DeadlineInfo* deadline_info,
426
      Slice key,
427
      SubDocument* assembly_target,
428
      ObsolescenceTracker obsolescence_tracker):
429
    iter_(iter),
430
    deadline_info_(deadline_info),
431
    key_(key),
432
    prefix_scope_(key_, iter_),
433
    assembler_(assembly_target, key_),
434
242M
    obsolescence_tracker_(obsolescence_tracker) {}
435
436
ScopedDocDbRowContext::ScopedDocDbRowContext(
437
      IntentAwareIterator* iter,
438
      DeadlineInfo* deadline_info,
439
      Slice key,
440
      DocDbRowAssembler* ancestor_assembler,
441
      ObsolescenceTracker obsolescence_tracker):
442
    iter_(iter),
443
    deadline_info_(deadline_info),
444
    key_(key),
445
    prefix_scope_(key_, iter_),
446
    assembler_(ancestor_assembler, key_),
447
6.20M
    obsolescence_tracker_(obsolescence_tracker) {}
448
449
22.9M
ScopedDocDbCollectionContext* ScopedDocDbRowContext::collection() {
450
23.0M
  if (collection_ == boost::none) {
451
23.0M
    iter_->SeekPastSubKey(key_);
452
23.0M
    collection_.emplace(this);
453
23.0M
  }
454
22.9M
  return &*collection_;
455
22.9M
}
456
457
248M
Status ScopedDocDbRowContext::CheckDeadline() {
458
249M
  if (deadline_info_ && deadline_info_->CheckAndSetDeadlinePassed()) {
459
0
    return STATUS(Expired, "Deadline for query passed.");
460
0
  }
461
248M
  return Status::OK();
462
248M
}
463
464
class ScopedDocDbRowContextWithData : public ScopedDocDbRowContext {
465
 public:
466
  ScopedDocDbRowContextWithData(
467
      std::unique_ptr<DocDbRowData> row,
468
      IntentAwareIterator* iter,
469
      DeadlineInfo* deadline_info,
470
      SubDocument* assembly_target,
471
      ObsolescenceTracker ancestor_obsolescence_tracker);
472
473
  ScopedDocDbRowContextWithData(
474
      std::unique_ptr<DocDbRowData> row,
475
      IntentAwareIterator* iter,
476
      DeadlineInfo* deadline_info,
477
      DocDbRowAssembler* ancestor_assembler,
478
      ObsolescenceTracker ancestor_obsolescence_tracker);
479
480
253M
  DocDbRowData* data() const { return data_.get(); }
481
482
  void SeekOutOfPrefix();
483
484
 private:
485
  std::unique_ptr<DocDbRowData> data_;
486
487
  DISALLOW_COPY_AND_ASSIGN(ScopedDocDbRowContextWithData);
488
};
489
490
ScopedDocDbRowContextWithData::ScopedDocDbRowContextWithData(
491
    std::unique_ptr<DocDbRowData> row,
492
    IntentAwareIterator* iter,
493
    DeadlineInfo* deadline_info,
494
    SubDocument* assembly_target,
495
    ObsolescenceTracker ancestor_obsolescence_tracker):
496
    ScopedDocDbRowContext(
497
        iter, deadline_info, row->key(), assembly_target,
498
        ancestor_obsolescence_tracker.Child(row->write_time(), row->value().ttl())),
499
242M
    data_(std::move(row)) {}
500
501
ScopedDocDbRowContextWithData::ScopedDocDbRowContextWithData(
502
    std::unique_ptr<DocDbRowData> row,
503
    IntentAwareIterator* iter,
504
    DeadlineInfo* deadline_info,
505
    DocDbRowAssembler* ancestor_assembler,
506
    ObsolescenceTracker ancestor_obsolescence_tracker):
507
    ScopedDocDbRowContext(
508
        iter, deadline_info, row->key(), ancestor_assembler,
509
        ancestor_obsolescence_tracker.Child(row->write_time(), row->value().ttl())),
510
6.20M
    data_(std::move(row)) {}
511
512
6.20M
void ScopedDocDbRowContextWithData::SeekOutOfPrefix() {
513
6.20M
  iter_->SeekOutOfSubDoc(data_->key());
514
6.20M
}
515
516
ScopedDocDbCollectionContext::ScopedDocDbCollectionContext(ScopedDocDbRowContext* parent):
517
23.0M
    parent_(parent) {}
518
519
933
Status ScopedDocDbCollectionContext::SetFirstChild(std::unique_ptr<DocDbRowData> first_row) {
520
933
  if (next_child_) {
521
0
    return STATUS(IllegalState, "Cannot set first_child if already set.");
522
0
  }
523
933
  if (current_child_) {
524
0
    return STATUS(IllegalState, "Cannot set first_child if a child has already been read.");
525
0
  }
526
933
  SetNextChild(std::move(first_row));
527
933
  next_child_ = current_child_.get();
528
933
  return Status::OK();
529
933
}
530
531
29.2M
Result<ScopedDocDbRowContextWithData*> ScopedDocDbCollectionContext::GetNextChild() {
532
29.2M
  if (next_child_) {
533
    // If there is a next_child_, then we've already stored a row which we read before. Serve that,
534
    // and reset it to resume normal operation next time.
535
933
    next_child_ = nullptr;
536
29.2M
  } else {
537
29.2M
    if (current_child_) {
538
      // prev_child_ points to the row we served last, which we are now done with, so we should
539
      // seek out of the scope of it and reset its state before proceeding to read the next row.
540
      // Note -- we currently seek away from the previous row only when a new row is requested by
541
      // the caller. It might be better from a perf perspective to do this before instead.
542
6.20M
      current_child_->SeekOutOfPrefix();
543
544
      // Reset current child to eliminate the IntentAwareIteratorPrefixScope it was holding.
545
6.20M
      current_child_.reset();
546
6.20M
    }
547
29.2M
    if (parent_->iter_->valid()) {
548
6.20M
      SetNextChild(VERIFY_RESULT(DocDbRowData::CurrentRow(parent_->iter_)));
549
6.20M
    }
550
29.2M
  }
551
5.31k
  DCHECK(
552
5.31k
      !current_child_ ||
553
5.31k
      !parent_ ||
554
5.31k
      current_child_->data()->key().AsSlice().starts_with(parent_->key_))
555
5.31k
      << "Child key " << SubDocKey::DebugSliceToString(current_child_->data()->key().AsSlice())
556
5.31k
      << " does not include parent key " << SubDocKey::DebugSliceToString(parent_->key_)
557
5.31k
      << " as prefix.";
558
29.2M
  return current_child_.get();
559
29.2M
}
560
561
6.20M
void ScopedDocDbCollectionContext::SetNextChild(std::unique_ptr<DocDbRowData> child_row) {
562
6.20M
  current_child_ = std::make_unique<ScopedDocDbRowContextWithData>(
563
6.20M
      std::move(child_row), parent_->iter_, parent_->deadline_info_, parent_->mutable_assembler(),
564
6.20M
      parent_->obsolescence_tracker_);
565
6.20M
}
566
567
CHECKED_STATUS ProcessSubDocument(ScopedDocDbRowContextWithData* scope);
568
569
23.0M
Result<uint32_t> ProcessChildren(ScopedDocDbCollectionContext* collection) {
570
23.0M
  uint32_t num_children = 0;
571
23.0M
  if (collection) {
572
29.2M
    while (ScopedDocDbRowContextWithData* child = VERIFY_RESULT(collection->GetNextChild())) {
573
6.20M
      RETURN_NOT_OK(ProcessSubDocument(child));
574
6.20M
      if (VERIFY_RESULT(child->mutable_assembler()->HasStoredValue())) {
575
1.56k
        ++num_children;
576
1.56k
      }
577
6.20M
    }
578
23.0M
  }
579
23.0M
  return num_children;
580
23.0M
}
581
582
712
Status ProcessCollection(ScopedDocDbRowContextWithData* scope) {
583
  // Set this row to an empty collection since it is alive/valid before processing its children.
584
712
  RETURN_NOT_OK(scope->mutable_assembler()->SetEmptyCollection());
585
712
  RETURN_NOT_OK(ProcessChildren(scope->collection()));
586
712
  return Status::OK();
587
712
}
588
589
22.9M
Status MaybeReviveCollection(ScopedDocDbRowContextWithData* scope) {
590
22.9M
  auto num_children = VERIFY_RESULT(ProcessChildren(scope->collection()));
591
23.0M
  if (num_children == 0) {
592
23.0M
    return scope->mutable_assembler()->SetTombstone();
593
23.0M
  }
594
18.4E
  return Status::OK();
595
18.4E
}
596
597
248M
Status ProcessSubDocument(ScopedDocDbRowContextWithData* scope) {
598
248M
  RETURN_NOT_OK(scope->CheckDeadline());
599
600
248M
  auto data = scope->data();
601
248M
  auto assembler = scope->mutable_assembler();
602
248M
  auto obsolescence_tracker = scope->obsolescence_tracker();
603
604
248M
  if (data->IsTombstone() || obsolescence_tracker->IsObsolete(data->write_time())) {
605
32.3M
    if (data->IsPrimitiveValue()) {
606
18.4E
      VLOG(4) << "Discarding overwritten or expired primitive value";
607
9.36M
      return assembler->SetTombstone();
608
9.36M
    }
609
    // If the latest written value is a tombstone or the record is expired at a top level, only
610
    // surface a subdocument if it has a valid (unexpired, non-tombstoned) child which overwrites
611
    // this record. Note: these semantics are only relevant to CQL reads.
612
23.0M
    return MaybeReviveCollection(scope);
613
23.0M
  }
614
615
216M
  if (data->IsCollection()) {
616
712
    return ProcessCollection(scope);
617
712
  }
618
619
216M
  if (data->IsPrimitiveValue()) {
620
215M
    auto ttl_opt = obsolescence_tracker->GetTtlRemainingSeconds(data->write_time().hybrid_time());
621
215M
    if (ttl_opt) {
622
25.9M
      data->mutable_primitive_value()->SetTtl(*ttl_opt);
623
25.9M
    }
624
215M
    return assembler->SetPrimitiveValue(data);
625
215M
  }
626
627
931k
  return STATUS_FORMAT(
628
931k
      Corruption,
629
931k
      "Expected primitive value type, collection, or tobmstone. Got $0",
630
931k
      data->value().value_type());
631
931k
}
632
633
}  // namespace
634
635
SubDocumentReader::SubDocumentReader(
636
    const KeyBytes& target_subdocument_key,
637
    IntentAwareIterator* iter,
638
    DeadlineInfo* deadline_info,
639
    const ObsolescenceTracker& ancestor_obsolescence_tracker):
640
    target_subdocument_key_(target_subdocument_key), iter_(iter), deadline_info_(deadline_info),
641
256M
    ancestor_obsolescence_tracker_(ancestor_obsolescence_tracker) {}
642
643
256M
Status SubDocumentReader::Get(SubDocument* result) {
644
256M
  IntentAwareIteratorPrefixScope target_scope(target_subdocument_key_, iter_);
645
256M
  if (!iter_->valid()) {
646
14.6M
    *result = SubDocument(ValueType::kInvalid);
647
14.6M
    return Status::OK();
648
14.6M
  }
649
242M
  auto first_row = VERIFY_RESULT(DocDbRowData::CurrentRow(iter_));
650
242M
  auto current_key = first_row->key();
651
652
242M
  if (current_key == target_subdocument_key_) {
653
242M
    ScopedDocDbRowContextWithData context(
654
242M
        std::move(first_row), iter_, deadline_info_, result, ancestor_obsolescence_tracker_);
655
242M
    return ProcessSubDocument(&context);
656
242M
  }
657
  // If the currently-pointed-to key is not equal to our target, but we are still in a valid state,
658
  // then that key must have the target key as a prefix, meaning we are pointing to a child of our
659
  // target. We should therefore process the rows as if we're already in a collection, rooted at the
660
  // target key.
661
18.4E
  ScopedDocDbRowContext context(
662
18.4E
      iter_, deadline_info_, target_subdocument_key_, result, ancestor_obsolescence_tracker_);
663
18.4E
  ScopedDocDbCollectionContext collection(&context);
664
18.4E
  RETURN_NOT_OK(collection.SetFirstChild(std::move(first_row)));
665
18.4E
  auto num_children = VERIFY_RESULT(ProcessChildren(&collection));
666
18.4E
  if (num_children == 0) {
667
703
    *result = SubDocument(ValueType::kTombstone);
668
703
  }
669
18.4E
  return Status::OK();
670
18.4E
}
671
672
SubDocumentReaderBuilder::SubDocumentReaderBuilder(
673
    IntentAwareIterator* iter, DeadlineInfo* deadline_info)
674
6.01M
    : iter_(iter), deadline_info_(deadline_info) {}
675
676
Result<std::unique_ptr<SubDocumentReader>> SubDocumentReaderBuilder::Build(
677
257M
    const KeyBytes& sub_doc_key) {
678
257M
  return std::make_unique<SubDocumentReader>(
679
257M
      sub_doc_key, iter_, deadline_info_, parent_obsolescence_tracker_);
680
257M
}
681
682
Status SubDocumentReaderBuilder::InitObsolescenceInfo(
683
    const ObsolescenceTracker& table_obsolescence_tracker,
684
34.1M
    const Slice& root_doc_key, const Slice& target_subdocument_key) {
685
34.1M
  parent_obsolescence_tracker_ = table_obsolescence_tracker;
686
687
  // Look at ancestors to collect ttl/write-time metadata.
688
34.1M
  IntentAwareIteratorPrefixScope prefix_scope(root_doc_key, iter_);
689
34.1M
  Slice temp_key = target_subdocument_key;
690
34.1M
  Slice prev_iter_key = temp_key.Prefix(root_doc_key.size());
691
34.1M
  temp_key.remove_prefix(root_doc_key.size());
692
34.2M
  for (;;) {
693
    // for each iteration of this loop, we consume another piece of the subdoc key path
694
34.2M
    auto decode_result = VERIFY_RESULT(SubDocKey::DecodeSubkey(&temp_key));
695
34.2M
    if (!decode_result) {
696
      // Stop once key_slice has consumed all subdoc keys and FindLastWriteTime has been called
697
      // with all but the last subdoc key
698
34.1M
      break;
699
34.1M
    }
700
56.7k
    RETURN_NOT_OK(UpdateWithParentWriteInfo(prev_iter_key));
701
56.7k
    prev_iter_key = Slice(prev_iter_key.data(), temp_key.data() - prev_iter_key.data());
702
56.7k
  }
703
34.1M
  DCHECK_EQ(prev_iter_key, target_subdocument_key);
704
34.1M
  return UpdateWithParentWriteInfo(target_subdocument_key);
705
34.1M
}
706
707
Status SubDocumentReaderBuilder::UpdateWithParentWriteInfo(
708
34.1M
    const Slice& parent_key_without_ht) {
709
34.1M
  Slice value;
710
34.1M
  DocHybridTime doc_ht = parent_obsolescence_tracker_.GetHighWriteTime();
711
34.1M
  RETURN_NOT_OK(iter_->FindLatestRecord(parent_key_without_ht, &doc_ht, &value));
712
713
34.1M
  if (!iter_->valid()) {
714
0
    return Status::OK();
715
0
  }
716
717
34.1M
  parent_obsolescence_tracker_ = parent_obsolescence_tracker_.Child(doc_ht);
718
34.1M
  return Status::OK();
719
34.1M
}
720
721
}  // namespace docdb
722
}  // namespace yb