/Users/deen/code/yugabyte-db/src/yb/docdb/doc_reader.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/docdb/doc_reader.h" |
15 | | |
16 | | #include <string> |
17 | | #include <vector> |
18 | | |
19 | | #include "yb/common/doc_hybrid_time.h" |
20 | | #include "yb/common/hybrid_time.h" |
21 | | #include "yb/common/transaction.h" |
22 | | |
23 | | #include "yb/docdb/docdb_fwd.h" |
24 | | #include "yb/docdb/shared_lock_manager_fwd.h" |
25 | | #include "yb/docdb/doc_key.h" |
26 | | #include "yb/docdb/doc_ttl_util.h" |
27 | | #include "yb/docdb/docdb-internal.h" |
28 | | #include "yb/docdb/docdb_rocksdb_util.h" |
29 | | #include "yb/docdb/intent_aware_iterator.h" |
30 | | #include "yb/docdb/subdoc_reader.h" |
31 | | #include "yb/docdb/subdocument.h" |
32 | | #include "yb/docdb/value.h" |
33 | | #include "yb/docdb/value_type.h" |
34 | | |
35 | | #include "yb/util/monotime.h" |
36 | | #include "yb/util/result.h" |
37 | | #include "yb/util/status.h" |
38 | | |
39 | | using std::vector; |
40 | | |
41 | | using yb::HybridTime; |
42 | | |
43 | | namespace yb { |
44 | | namespace docdb { |
45 | | |
46 | | |
47 | | // TODO(dtxn) scan through all involved transactions first to cache statuses in a batch, |
48 | | // so during building subdocument we don't need to request them one by one. |
49 | | // TODO(dtxn) we need to restart read with scan_ht = commit_ht if some transaction was committed |
50 | | // at time commit_ht within [scan_ht; read_request_time + max_clock_skew). Also we need |
51 | | // to wait until time scan_ht = commit_ht passed. |
52 | | // TODO(dtxn) for each scanned key (and its subkeys) we need to avoid *new* values committed at |
53 | | // ht <= scan_ht (or just ht < scan_ht?) |
54 | | // Question: what will break if we allow later commit at ht <= scan_ht ? Need to write down |
55 | | // detailed example. |
56 | | |
57 | | Result<boost::optional<SubDocument>> TEST_GetSubDocument( |
58 | | const Slice& sub_doc_key, |
59 | | const DocDB& doc_db, |
60 | | const rocksdb::QueryId query_id, |
61 | | const TransactionOperationContext& txn_op_context, |
62 | | CoarseTimePoint deadline, |
63 | | const ReadHybridTime& read_time, |
64 | 716k | const std::vector<PrimitiveValue>* projection) { |
65 | 716k | auto iter = CreateIntentAwareIterator( |
66 | 716k | doc_db, BloomFilterMode::USE_BLOOM_FILTER, sub_doc_key, query_id, |
67 | 716k | txn_op_context, deadline, read_time); |
68 | 716k | DOCDB_DEBUG_LOG("GetSubDocument for key $0 @ $1", sub_doc_key.ToDebugHexString(), |
69 | 716k | iter->read_time().ToString()); |
70 | 716k | iter->SeekToLastDocKey(); |
71 | 716k | DocDBTableReader doc_reader(iter.get(), deadline); |
72 | 716k | RETURN_NOT_OK(doc_reader.UpdateTableTombstoneTime(sub_doc_key)); |
73 | | |
74 | 716k | SubDocument result; |
75 | 716k | if (VERIFY_RESULT(doc_reader.Get(sub_doc_key, projection, &result))) { |
76 | 652k | return result; |
77 | 652k | } |
78 | 64.3k | return boost::none; |
79 | 716k | } |
80 | | |
81 | | DocDBTableReader::DocDBTableReader(IntentAwareIterator* iter, CoarseTimePoint deadline) |
82 | | : iter_(iter), |
83 | | deadline_info_(deadline), |
84 | 14.2M | subdoc_reader_builder_(iter_, &deadline_info_) {} |
85 | | |
86 | 7.56M | void DocDBTableReader::SetTableTtl(const Schema& table_schema) { |
87 | 7.56M | Expiration table_ttl(TableTTL(table_schema)); |
88 | 7.56M | table_obsolescence_tracker_ = ObsolescenceTracker( |
89 | 7.56M | iter_->read_time(), table_obsolescence_tracker_.GetHighWriteTime(), table_ttl); |
90 | 7.56M | } |
91 | | |
92 | 14.2M | Status DocDBTableReader::UpdateTableTombstoneTime(const Slice& root_doc_key) { |
93 | 14.2M | if (root_doc_key[0] == ValueTypeAsChar::kColocationId) { |
94 | | // Update table_tombstone_time based on what is written to RocksDB if its not already set. |
95 | | // Otherwise, just accept its value. |
96 | | // TODO -- this is a bit of a hack to allow DocRowwiseIterator to pass along the table tombstone |
97 | | // time read at a previous invocation of this same code. If instead the DocRowwiseIterator owned |
98 | | // an instance of SubDocumentReaderBuilder, and this method call was hoisted up to that level, |
99 | | // passing around this table_tombstone_time would no longer be necessary. |
100 | 173 | DocKey table_id; |
101 | 173 | RETURN_NOT_OK(table_id.DecodeFrom(root_doc_key, DocKeyPart::kUpToId)); |
102 | 173 | iter_->Seek(table_id); |
103 | | |
104 | 173 | Slice value; |
105 | 173 | auto table_id_encoded = table_id.Encode(); |
106 | 173 | DocHybridTime doc_ht = DocHybridTime::kMin; |
107 | | |
108 | 173 | RETURN_NOT_OK(iter_->FindLatestRecord(table_id_encoded, &doc_ht, &value)); |
109 | 173 | if (VERIFY_RESULT(Value::IsTombstoned(value))) { |
110 | 75 | SCHECK_NE(doc_ht, DocHybridTime::kInvalid, Corruption, |
111 | 75 | "Invalid hybrid time for table tombstone"); |
112 | 75 | table_obsolescence_tracker_ = table_obsolescence_tracker_.Child(doc_ht, MonoDelta::kMax); |
113 | 75 | } |
114 | 173 | } |
115 | 14.2M | return Status::OK();; |
116 | 0 | } |
117 | | |
118 | 83.5M | CHECKED_STATUS DocDBTableReader::InitForKey(const Slice& sub_doc_key) { |
119 | 83.5M | auto dockey_size = |
120 | 83.5M | VERIFY_RESULT(DocKey::EncodedSize(sub_doc_key, DocKeyPart::kWholeDocKey)); |
121 | 0 | const Slice root_doc_key(sub_doc_key.data(), dockey_size); |
122 | 83.5M | iter_->SeekForward(root_doc_key); |
123 | 83.5M | RETURN_NOT_OK(subdoc_reader_builder_.InitObsolescenceInfo( |
124 | 83.5M | table_obsolescence_tracker_, root_doc_key, sub_doc_key)); |
125 | 83.5M | return Status::OK(); |
126 | 83.5M | } |
127 | | |
128 | | Result<bool> DocDBTableReader::Get( |
129 | 83.6M | const Slice& root_doc_key, const vector<PrimitiveValue>* projection, SubDocument* result) { |
130 | 83.6M | RETURN_NOT_OK(InitForKey(root_doc_key)); |
131 | | // Seed key_bytes with the subdocument key. For each subkey in the projection, build subdocument |
132 | | // and reuse key_bytes while appending the subkey. |
133 | 83.6M | KeyBytes key_bytes; |
134 | | // Preallocate some extra space to avoid allocation for small subkeys. |
135 | 83.6M | key_bytes.Reserve(root_doc_key.size() + kMaxBytesPerEncodedHybridTime + 32); |
136 | 83.6M | key_bytes.AppendRawBytes(root_doc_key); |
137 | 83.6M | if (projection != nullptr) { |
138 | 82.8M | bool doc_found = false; |
139 | 82.8M | const size_t subdocument_key_size = key_bytes.size(); |
140 | 718M | for (const PrimitiveValue& subkey : *projection) { |
141 | | // Append subkey to subdocument key. Reserve extra kMaxBytesPerEncodedHybridTime + 1 bytes in |
142 | | // key_bytes to avoid the internal buffer from getting reallocated and moved by SeekForward() |
143 | | // appending the hybrid time, thereby invalidating the buffer pointer saved by prefix_scope. |
144 | 718M | subkey.AppendToKey(&key_bytes); |
145 | 718M | key_bytes.Reserve(key_bytes.size() + kMaxBytesPerEncodedHybridTime + 1); |
146 | | // This seek is to initialize the iterator for BuildSubDocument call. |
147 | 718M | iter_->SeekForward(&key_bytes); |
148 | 718M | SubDocument descendant; |
149 | 718M | auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes)); |
150 | 718M | RETURN_NOT_OK(reader->Get(&descendant)); |
151 | 718M | doc_found = doc_found || ( |
152 | 96.1M | descendant.value_type() != ValueType::kInvalid |
153 | 96.1M | && descendant.value_type() != ValueType::kTombstone95.6M ); |
154 | 718M | result->SetChild(subkey, std::move(descendant)); |
155 | | |
156 | | // Restore subdocument key by truncating the appended subkey. |
157 | 718M | key_bytes.Truncate(subdocument_key_size); |
158 | 718M | } |
159 | 82.8M | if (doc_found) { |
160 | 82.0M | iter_->SeekOutOfSubDoc(root_doc_key); |
161 | 82.0M | return true; |
162 | 82.0M | } |
163 | 82.8M | } |
164 | | |
165 | | // If doc is not found, decide if some non-projection column exists. |
166 | | // Currently we read the whole doc here, |
167 | | // may be optimized by exiting on the first column in future. |
168 | | // TODO -- is resetting *result = SubDocument() needed here? |
169 | | // TODO -- Add some metrics to understand: |
170 | | // (a) how often we scan back |
171 | | // (b) how often it's useful |
172 | | // Also maybe in debug mode add some every-n logging of the rocksdb values for which it is |
173 | | // useful |
174 | 1.52M | iter_->Seek(key_bytes); |
175 | 1.52M | auto reader = VERIFY_RESULT(subdoc_reader_builder_.Build(key_bytes)); |
176 | 1.52M | RETURN_NOT_OK(reader->Get(result)); |
177 | 1.52M | return result->value_type() != ValueType::kInvalid |
178 | 1.52M | && result->value_type() != ValueType::kTombstone1.50M ; |
179 | 1.52M | } |
180 | | |
181 | | } // namespace docdb |
182 | | } // namespace yb |