YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_reader.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_reader.h"
15
16
#include <string>
17
#include <vector>
18
19
#include "yb/common/doc_hybrid_time.h"
20
#include "yb/common/hybrid_time.h"
21
#include "yb/common/transaction.h"
22
23
#include "yb/docdb/docdb_fwd.h"
24
#include "yb/docdb/shared_lock_manager_fwd.h"
25
#include "yb/docdb/doc_key.h"
26
#include "yb/docdb/doc_ttl_util.h"
27
#include "yb/docdb/docdb-internal.h"
28
#include "yb/docdb/docdb_rocksdb_util.h"
29
#include "yb/docdb/intent_aware_iterator.h"
30
#include "yb/docdb/subdoc_reader.h"
31
#include "yb/docdb/subdocument.h"
32
#include "yb/docdb/value.h"
33
#include "yb/docdb/value_type.h"
34
35
#include "yb/util/monotime.h"
36
#include "yb/util/result.h"
37
#include "yb/util/status.h"
38
39
using std::vector;
40
41
using yb::HybridTime;
42
43
namespace yb {
44
namespace docdb {
45
46
47
  // TODO(dtxn) scan through all involved transactions first to cache statuses in a batch,
48
  // so during building subdocument we don't need to request them one by one.
49
  // TODO(dtxn) we need to restart read with scan_ht = commit_ht if some transaction was committed
50
  // at time commit_ht within [scan_ht; read_request_time + max_clock_skew). Also we need
51
  // to wait until time scan_ht = commit_ht passed.
52
  // TODO(dtxn) for each scanned key (and its subkeys) we need to avoid *new* values committed at
53
  // ht <= scan_ht (or just ht < scan_ht?)
54
  // Question: what will break if we allow later commit at ht <= scan_ht ? Need to write down
55
  // detailed example.
56
57
Result<boost::optional<SubDocument>> TEST_GetSubDocument(
58
    const Slice& sub_doc_key,
59
    const DocDB& doc_db,
60
    const rocksdb::QueryId query_id,
61
    const TransactionOperationContext& txn_op_context,
62
    CoarseTimePoint deadline,
63
    const ReadHybridTime& read_time,
64
716k
    const std::vector<PrimitiveValue>* projection) {
65
716k
  auto iter = CreateIntentAwareIterator(
66
716k
      doc_db, BloomFilterMode::USE_BLOOM_FILTER, sub_doc_key, query_id,
67
716k
      txn_op_context, deadline, read_time);
68
716k
  DOCDB_DEBUG_LOG("GetSubDocument for key $0 @ $1", sub_doc_key.ToDebugHexString(),
69
716k
                  iter->read_time().ToString());
70
716k
  iter->SeekToLastDocKey();
71
716k
  DocDBTableReader doc_reader(iter.get(), deadline);
72
716k
  RETURN_NOT_OK(doc_reader.UpdateTableTombstoneTime(sub_doc_key));
73
74
716k
  SubDocument result;
75
716k
  if (VERIFY_RESULT(doc_reader.Get(sub_doc_key, projection, &result))) {
76
652k
    return result;
77
652k
  }
78
64.3k
  return boost::none;
79
716k
}
80
81
DocDBTableReader::DocDBTableReader(IntentAwareIterator* iter, CoarseTimePoint deadline)
82
    : iter_(iter),
83
      deadline_info_(deadline),
84
14.2M
      subdoc_reader_builder_(iter_, &deadline_info_) {}
85
86
7.56M
void DocDBTableReader::SetTableTtl(const Schema& table_schema) {
87
7.56M
  Expiration table_ttl(TableTTL(table_schema));
88
7.56M
  table_obsolescence_tracker_ = ObsolescenceTracker(
89
7.56M
      iter_->read_time(), table_obsolescence_tracker_.GetHighWriteTime(), table_ttl);
90
7.56M
}
91
92
14.2M
Status DocDBTableReader::UpdateTableTombstoneTime(const Slice& root_doc_key) {
93
14.2M
  if (root_doc_key[0] == ValueTypeAsChar::kColocationId) {
94
    // Update table_tombstone_time based on what is written to RocksDB if its not already set.
95
    // Otherwise, just accept its value.
96
    // TODO -- this is a bit of a hack to allow DocRowwiseIterator to pass along the table tombstone
97
    // time read at a previous invocation of this same code. If instead the DocRowwiseIterator owned
98
    // an instance of SubDocumentReaderBuilder, and this method call was hoisted up to that level,
99
    // passing around this table_tombstone_time would no longer be necessary.
100
173
    DocKey table_id;
101
173
    RETURN_NOT_OK(table_id.DecodeFrom(root_doc_key, DocKeyPart::kUpToId));
102
173
    iter_->Seek(table_id);
103
104
173
    Slice value;
105
173
    auto table_id_encoded = table_id.Encode();
106
173
    DocHybridTime doc_ht = DocHybridTime::kMin;
107
108
173
    RETURN_NOT_OK(iter_->FindLatestRecord(table_id_encoded, &doc_ht, &value));
109
173
    if (VERIFY_RESULT(Value::IsTombstoned(value))) {
110
75
      SCHECK_NE(doc_ht, DocHybridTime::kInvalid, Corruption,
111
75
                "Invalid hybrid time for table tombstone");
112
75
      table_obsolescence_tracker_ = table_obsolescence_tracker_.Child(doc_ht, MonoDelta::kMax);
113
75
    }
114
173
  }
115
14.2M
  return Status::OK();;
116
0
}
117
118
83.5M
CHECKED_STATUS DocDBTableReader::InitForKey(const Slice& sub_doc_key) {
119
83.5M
  auto dockey_size =
120
83.5M
      VERIFY_RESULT(DocKey::EncodedSize(sub_doc_key, DocKeyPart::kWholeDocKey));
121
0
  const Slice root_doc_key(sub_doc_key.data(), dockey_size);
122
83.5M
  iter_->SeekForward(root_doc_key);
123
83.5M
  RETURN_NOT_OK(subdoc_reader_builder_.InitObsolescenceInfo(
124
83.5M
      table_obsolescence_tracker_, root_doc_key, sub_doc_key));
125
83.5M
  return Status::OK();
126
83.5M
}
127
128
Result<bool> DocDBTableReader::Get(
129
83.6M
    const Slice& root_doc_key, const vector<PrimitiveValue>* projection, SubDocument* result) {
130
83.6M
  RETURN_NOT_OK(InitForKey(root_doc_key));
131
  // Seed key_bytes with the subdocument key. For each subkey in the projection, build subdocument
132
  // and reuse key_bytes while appending the subkey.
133
83.6M
  KeyBytes key_bytes;
134
  // Preallocate some extra space to avoid allocation for small subkeys.
135
83.6M
  key_bytes.Reserve(root_doc_key.size() + kMaxBytesPerEncodedHybridTime + 32);
136
83.6M
  key_bytes.AppendRawBytes(root_doc_key);
137
83.6M
  if (projection != nullptr) {
138
82.8M
    bool doc_found = false;
139
82.8M
    const size_t subdocument_key_size = key_bytes.size();
140
718M
    for (const PrimitiveValue& subkey : *projection) {
141
      // Append subkey to subdocument key. Reserve extra kMaxBytesPerEncodedHybridTime + 1 bytes in
142
      // key_bytes to avoid the internal buffer from getting reallocated and moved by SeekForward()
143
      // appending the hybrid time, thereby invalidating the buffer pointer saved by prefix_scope.
144
718M
      subkey.AppendToKey(&key_bytes);
145
718M
      key_bytes.Reserve(key_bytes.size() + kMaxBytesPerEncodedHybridTime + 1);
146
      // This seek is to initialize the iterator for BuildSubDocument call.
147
718M
      iter_->SeekForward(&key_bytes);
148
718M
      SubDocument descendant;
149
718M
      auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes));
150
718M
      RETURN_NOT_OK(reader->Get(&descendant));
151
718M
      doc_found = doc_found || (
152
96.1M
          descendant.value_type() != ValueType::kInvalid
153
96.1M
          && 
descendant.value_type() != ValueType::kTombstone95.6M
);
154
718M
      result->SetChild(subkey, std::move(descendant));
155
156
      // Restore subdocument key by truncating the appended subkey.
157
718M
      key_bytes.Truncate(subdocument_key_size);
158
718M
    }
159
82.8M
    if (doc_found) {
160
82.0M
      iter_->SeekOutOfSubDoc(root_doc_key);
161
82.0M
      return true;
162
82.0M
    }
163
82.8M
  }
164
165
  // If doc is not found, decide if some non-projection column exists.
166
  // Currently we read the whole doc here,
167
  // may be optimized by exiting on the first column in future.
168
  // TODO -- is resetting *result = SubDocument() needed here?
169
  // TODO -- Add some metrics to understand:
170
  // (a) how often we scan back
171
  // (b) how often it's useful
172
  // Also maybe in debug mode add some every-n logging of the rocksdb values for which it is
173
  // useful
174
1.52M
  iter_->Seek(key_bytes);
175
1.52M
  auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes));
176
1.52M
  RETURN_NOT_OK(reader->Get(result));
177
1.52M
  return result->value_type() != ValueType::kInvalid
178
1.52M
      && 
result->value_type() != ValueType::kTombstone1.50M
;
179
1.52M
}
180
181
}  // namespace docdb
182
}  // namespace yb