YugabyteDB (2.13.0.0-b42, bfc6a6643e7399ac8a0e81d06a3ee6d6571b33ab)

Coverage Report

Created: 2022-03-09 17:30

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_reader.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_reader.h"
15
16
#include <string>
17
#include <vector>
18
19
#include "yb/common/doc_hybrid_time.h"
20
#include "yb/common/hybrid_time.h"
21
#include "yb/common/transaction.h"
22
23
#include "yb/docdb/docdb_fwd.h"
24
#include "yb/docdb/shared_lock_manager_fwd.h"
25
#include "yb/docdb/doc_key.h"
26
#include "yb/docdb/doc_ttl_util.h"
27
#include "yb/docdb/docdb-internal.h"
28
#include "yb/docdb/docdb_rocksdb_util.h"
29
#include "yb/docdb/intent_aware_iterator.h"
30
#include "yb/docdb/subdoc_reader.h"
31
#include "yb/docdb/subdocument.h"
32
#include "yb/docdb/value.h"
33
#include "yb/docdb/value_type.h"
34
35
#include "yb/util/monotime.h"
36
#include "yb/util/result.h"
37
#include "yb/util/status.h"
38
39
using std::vector;
40
41
using yb::HybridTime;
42
43
namespace yb {
44
namespace docdb {
45
46
47
  // TODO(dtxn) scan through all involved transactions first to cache statuses in a batch,
48
  // so during building subdocument we don't need to request them one by one.
49
  // TODO(dtxn) we need to restart read with scan_ht = commit_ht if some transaction was committed
50
  // at time commit_ht within [scan_ht; read_request_time + max_clock_skew). Also we need
51
  // to wait until time scan_ht = commit_ht passed.
52
  // TODO(dtxn) for each scanned key (and its subkeys) we need to avoid *new* values committed at
53
  // ht <= scan_ht (or just ht < scan_ht?)
54
  // Question: what will break if we allow later commit at ht <= scan_ht ? Need to write down
55
  // detailed example.
56
57
Result<boost::optional<SubDocument>> TEST_GetSubDocument(
58
    const Slice& sub_doc_key,
59
    const DocDB& doc_db,
60
    const rocksdb::QueryId query_id,
61
    const TransactionOperationContext& txn_op_context,
62
    CoarseTimePoint deadline,
63
    const ReadHybridTime& read_time,
64
0
    const std::vector<PrimitiveValue>* projection) {
65
0
  auto iter = CreateIntentAwareIterator(
66
0
      doc_db, BloomFilterMode::USE_BLOOM_FILTER, sub_doc_key, query_id,
67
0
      txn_op_context, deadline, read_time);
68
0
  DOCDB_DEBUG_LOG("GetSubDocument for key $0 @ $1", sub_doc_key.ToDebugHexString(),
69
0
                  iter->read_time().ToString());
70
0
  iter->SeekToLastDocKey();
71
0
  DocDBTableReader doc_reader(iter.get(), deadline);
72
0
  RETURN_NOT_OK(doc_reader.UpdateTableTombstoneTime(sub_doc_key));
73
74
0
  SubDocument result;
75
0
  if (VERIFY_RESULT(doc_reader.Get(sub_doc_key, projection, &result))) {
76
0
    return result;
77
0
  }
78
0
  return boost::none;
79
0
}
80
81
DocDBTableReader::DocDBTableReader(IntentAwareIterator* iter, CoarseTimePoint deadline)
82
    : iter_(iter),
83
      deadline_info_(deadline),
84
6.01M
      subdoc_reader_builder_(iter_, &deadline_info_) {}
85
86
3.75M
void DocDBTableReader::SetTableTtl(const Schema& table_schema) {
87
3.75M
  Expiration table_ttl(TableTTL(table_schema));
88
3.75M
  table_obsolescence_tracker_ = ObsolescenceTracker(
89
3.75M
      iter_->read_time(), table_obsolescence_tracker_.GetHighWriteTime(), table_ttl);
90
3.75M
}
91
92
6.02M
Status DocDBTableReader::UpdateTableTombstoneTime(const Slice& root_doc_key) {
93
6.02M
  if (root_doc_key[0] == ValueTypeAsChar::kPgTableOid) {
94
    // Update table_tombstone_time based on what is written to RocksDB if its not already set.
95
    // Otherwise, just accept its value.
96
    // TODO -- this is a bit of a hack to allow DocRowwiseIterator to pass along the table tombstone
97
    // time read at a previous invocation of this same code. If instead the DocRowwiseIterator owned
98
    // an instance of SubDocumentReaderBuilder, and this method call was hoisted up to that level,
99
    // passing around this table_tombstone_time would no longer be necessary.
100
58
    DocKey table_id;
101
58
    RETURN_NOT_OK(table_id.DecodeFrom(root_doc_key, DocKeyPart::kUpToId));
102
58
    iter_->Seek(table_id);
103
104
58
    Slice value;
105
58
    auto table_id_encoded = table_id.Encode();
106
58
    DocHybridTime doc_ht = DocHybridTime::kMin;
107
108
58
    RETURN_NOT_OK(iter_->FindLatestRecord(table_id_encoded, &doc_ht, &value));
109
58
    ValueType value_type;
110
58
    RETURN_NOT_OK(Value::DecodePrimitiveValueType(value, &value_type));
111
58
    if (value_type == ValueType::kTombstone) {
112
23
      SCHECK_NE(doc_ht, DocHybridTime::kInvalid, Corruption,
113
23
                "Invalid hybrid time for table tombstone");
114
23
      table_obsolescence_tracker_ = table_obsolescence_tracker_.Child(doc_ht, MonoDelta::kMax);
115
23
    }
116
58
  }
117
6.02M
  return Status::OK();;
118
0
}
119
120
34.1M
CHECKED_STATUS DocDBTableReader::InitForKey(const Slice& sub_doc_key) {
121
34.1M
  auto dockey_size =
122
34.1M
      VERIFY_RESULT(DocKey::EncodedSize(sub_doc_key, DocKeyPart::kWholeDocKey));
123
34.1M
  const Slice root_doc_key(sub_doc_key.data(), dockey_size);
124
34.1M
  iter_->SeekForward(root_doc_key);
125
34.1M
  RETURN_NOT_OK(subdoc_reader_builder_.InitObsolescenceInfo(
126
34.1M
      table_obsolescence_tracker_, root_doc_key, sub_doc_key));
127
34.1M
  return Status::OK();
128
34.1M
}
129
130
Result<bool> DocDBTableReader::Get(
131
34.1M
    const Slice& root_doc_key, const vector<PrimitiveValue>* projection, SubDocument* result) {
132
34.1M
  RETURN_NOT_OK(InitForKey(root_doc_key));
133
  // Seed key_bytes with the subdocument key. For each subkey in the projection, build subdocument
134
  // and reuse key_bytes while appending the subkey.
135
34.1M
  KeyBytes key_bytes;
136
  // Preallocate some extra space to avoid allocation for small subkeys.
137
34.1M
  key_bytes.Reserve(root_doc_key.size() + kMaxBytesPerEncodedHybridTime + 32);
138
34.1M
  key_bytes.AppendRawBytes(root_doc_key);
139
34.1M
  if (projection != nullptr) {
140
34.1M
    bool doc_found = false;
141
34.1M
    const size_t subdocument_key_size = key_bytes.size();
142
256M
    for (const PrimitiveValue& subkey : *projection) {
143
      // Append subkey to subdocument key. Reserve extra kMaxBytesPerEncodedHybridTime + 1 bytes in
144
      // key_bytes to avoid the internal buffer from getting reallocated and moved by SeekForward()
145
      // appending the hybrid time, thereby invalidating the buffer pointer saved by prefix_scope.
146
256M
      subkey.AppendToKey(&key_bytes);
147
256M
      key_bytes.Reserve(key_bytes.size() + kMaxBytesPerEncodedHybridTime + 1);
148
      // This seek is to initialize the iterator for BuildSubDocument call.
149
256M
      iter_->SeekForward(&key_bytes);
150
256M
      SubDocument descendant;
151
256M
      auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes));
152
256M
      RETURN_NOT_OK(reader->Get(&descendant));
153
256M
      doc_found = doc_found || (
154
38.3M
          descendant.value_type() != ValueType::kInvalid
155
38.0M
          && descendant.value_type() != ValueType::kTombstone);
156
256M
      result->SetChild(subkey, std::move(descendant));
157
158
      // Restore subdocument key by truncating the appended subkey.
159
256M
      key_bytes.Truncate(subdocument_key_size);
160
256M
    }
161
34.1M
    if (doc_found) {
162
33.8M
      iter_->SeekOutOfSubDoc(root_doc_key);
163
33.8M
      return true;
164
33.8M
    }
165
351k
  }
166
167
  // If doc is not found, decide if some non-projection column exists.
168
  // Currently we read the whole doc here,
169
  // may be optimized by exiting on the first column in future.
170
  // TODO -- is resetting *result = SubDocument() needed here?
171
  // TODO -- Add some metrics to understand:
172
  // (a) how often we scan back
173
  // (b) how often it's useful
174
  // Also maybe in debug mode add some every-n logging of the rocksdb values for which it is
175
  // useful
176
351k
  iter_->Seek(key_bytes);
177
351k
  auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes));
178
351k
  RETURN_NOT_OK(reader->Get(result));
179
351k
  return result->value_type() != ValueType::kInvalid
180
366k
      && result->value_type() != ValueType::kTombstone;
181
351k
}
182
183
}  // namespace docdb
184
}  // namespace yb