/Users/deen/code/yugabyte-db/src/yb/common/doc_hybrid_time.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/common/doc_hybrid_time.h" |
15 | | |
16 | | #include "yb/gutil/casts.h" |
17 | | |
18 | | #include "yb/util/bytes_formatter.h" |
19 | | #include "yb/util/cast.h" |
20 | | #include "yb/util/debug-util.h" |
21 | | #include "yb/util/fast_varint.h" |
22 | | #include "yb/util/result.h" |
23 | | #include "yb/util/status.h" |
24 | | #include "yb/util/status_format.h" |
25 | | #include "yb/util/varint.h" |
26 | | |
27 | | using yb::util::VarInt; |
28 | | using yb::util::FastEncodeDescendingSignedVarInt; |
29 | | using yb::util::FastDecodeDescendingSignedVarIntUnsafe; |
30 | | using yb::FormatBytesAsStr; |
31 | | using yb::FormatSliceAsStr; |
32 | | using yb::QuotesType; |
33 | | |
34 | | using strings::Substitute; |
35 | | using strings::SubstituteAndAppend; |
36 | | |
37 | | namespace yb { |
38 | | |
39 | | // It does not really matter what write id we use here. We determine DocHybridTime validity based |
40 | | // on its HybridTime component's validity. However, given that HybridTime::kInvalid is close to the |
41 | | // highest possible value of the underlying in-memory representation of HybridTime, we use |
42 | | // kMaxWriteId for the write id portion of this constant for consistency. |
43 | | const DocHybridTime DocHybridTime::kInvalid = DocHybridTime(HybridTime::kInvalid, kMaxWriteId); |
44 | | |
45 | | const DocHybridTime DocHybridTime::kMin = DocHybridTime(HybridTime::kMin, 0); |
46 | | const DocHybridTime DocHybridTime::kMax = DocHybridTime(HybridTime::kMax, kMaxWriteId); |
47 | | |
48 | | constexpr int kNumBitsForHybridTimeSize = 5; |
49 | | constexpr int kHybridTimeSizeMask = (1 << kNumBitsForHybridTimeSize) - 1; |
50 | | |
51 | 134M | char* DocHybridTime::EncodedInDocDbFormat(char* dest) const { |
52 | | // We compute the difference between the physical time as microseconds since the UNIX epoch and |
53 | | // the "YugaByte epoch" as a signed operation, so that we can still represent hybrid times earlier |
54 | | // than the YugaByte epoch. |
55 | 134M | char* out = dest; |
56 | | |
57 | | // Hybrid time generation number. This is currently always 0. In the future this can be used to |
58 | | // reset hybrid time throughout the entire cluster back to a lower value if it gets stuck at some |
59 | | // far-in-the-future point due to a temporary clock issue. |
60 | 134M | out = FastEncodeDescendingSignedVarInt(0, out); |
61 | | |
62 | 134M | out = FastEncodeDescendingSignedVarInt( |
63 | 134M | static_cast<int64_t>(hybrid_time_.GetPhysicalValueMicros() - kYugaByteMicrosecondEpoch), |
64 | 134M | out); |
65 | 134M | out = FastEncodeDescendingSignedVarInt(hybrid_time_.GetLogicalValue(), out); |
66 | | |
67 | | // We add one to write_id to ensure the negated value used in the encoding is always negative |
68 | | // (i.e. is never zero). Then we shift it left by kNumBitsForHybridTimeSize bits so that we |
69 | | // always have kNumBitsForHybridTimeSize lowest bits to store the encoded size. This way we can |
70 | | // also decode the VarInt, negate it, obtain an always-positive value, and look at the lowest |
71 | | // kNumBitsForHybridTimeSize bits to get the encoded size of the entire DocHybridTime. |
72 | | // |
73 | | // It is important that we cast to int64_t before adding 1, otherwise WriteId might overflow. |
74 | | // (As of 04/17/2017 we're using a 32-bit unsigned int for WriteId). |
75 | 134M | out = FastEncodeDescendingSignedVarInt( |
76 | 134M | (static_cast<int64_t>(write_id_) + 1) << kNumBitsForHybridTimeSize, out); |
77 | | |
78 | | // Store the encoded DocHybridTime size in the last kNumBitsForHybridTimeSize bits so we |
79 | | // can decode the hybrid time from the end of an encoded DocKey efficiently. |
80 | 134M | const uint8_t last_byte = static_cast<uint8_t>(out[-1]); |
81 | | |
82 | 134M | const uint8_t encoded_size = static_cast<uint8_t>(out - dest); |
83 | 134M | DCHECK_LE(1, encoded_size); |
84 | 134M | DCHECK_LE(encoded_size, kMaxBytesPerEncodedHybridTime); |
85 | 134M | out[-1] = static_cast<char>((last_byte & ~kHybridTimeSizeMask) | encoded_size); |
86 | 134M | return out; |
87 | 134M | } |
88 | | |
89 | 459M | Status DocHybridTime::DecodeFrom(Slice *slice) { |
90 | 459M | const size_t previous_size = slice->size(); |
91 | 459M | { |
92 | | // Currently we just ignore the generation number as it should always be 0. |
93 | 459M | RETURN_NOT_OK(FastDecodeDescendingSignedVarIntUnsafe(slice)); |
94 | 459M | int64_t decoded_micros = |
95 | 459M | kYugaByteMicrosecondEpoch + VERIFY_RESULT(FastDecodeDescendingSignedVarIntUnsafe(slice)); |
96 | | |
97 | 459M | auto decoded_logical = narrow_cast<LogicalTimeComponent>( |
98 | 459M | VERIFY_RESULT(FastDecodeDescendingSignedVarIntUnsafe(slice))); |
99 | | |
100 | 459M | hybrid_time_ = HybridTime::FromMicrosecondsAndLogicalValue(decoded_micros, decoded_logical); |
101 | 459M | } |
102 | | |
103 | 459M | const auto ptr_before_decoding_write_id = slice->data(); |
104 | 459M | int64_t decoded_shifted_write_id = VERIFY_RESULT(FastDecodeDescendingSignedVarIntUnsafe(slice)); |
105 | | |
106 | 459M | if (decoded_shifted_write_id < 0) { |
107 | 0 | return STATUS_SUBSTITUTE( |
108 | 0 | Corruption, |
109 | 0 | "Negative decoded_shifted_write_id: $0. Was trying to decode from: $1", |
110 | 0 | decoded_shifted_write_id, |
111 | 0 | Slice(ptr_before_decoding_write_id, |
112 | 0 | slice->data() + slice->size() - ptr_before_decoding_write_id).ToDebugHexString()); |
113 | 0 | } |
114 | 459M | write_id_ = narrow_cast<IntraTxnWriteId>( |
115 | 459M | (decoded_shifted_write_id >> kNumBitsForHybridTimeSize) - 1); |
116 | | |
117 | 459M | const size_t bytes_decoded = previous_size - slice->size(); |
118 | 459M | const size_t size_at_the_end = (*(slice->data() - 1)) & kHybridTimeSizeMask; |
119 | 459M | if (size_at_the_end != bytes_decoded) { |
120 | 0 | return STATUS_SUBSTITUTE( |
121 | 0 | Corruption, |
122 | 0 | "Wrong encoded DocHybridTime size at the end: $0. Expected: $1. " |
123 | 0 | "Encoded timestamp: $2.", |
124 | 0 | size_at_the_end, |
125 | 0 | bytes_decoded, |
126 | 0 | Slice(to_char_ptr(slice->data() - bytes_decoded), bytes_decoded).ToDebugHexString()); |
127 | 0 | } |
128 | | |
129 | 459M | return Status::OK(); |
130 | 459M | } |
131 | | |
132 | 335M | Status DocHybridTime::FullyDecodeFrom(const Slice& encoded) { |
133 | 335M | Slice s = encoded; |
134 | 335M | RETURN_NOT_OK(DecodeFrom(&s)); |
135 | 335M | if (!s.empty()) { |
136 | 0 | return STATUS_SUBSTITUTE( |
137 | 0 | Corruption, |
138 | 0 | "$0 extra bytes left when decoding a DocHybridTime $1", |
139 | 0 | s.size(), FormatSliceAsStr(encoded, QuotesType::kDoubleQuotes, /* max_length = */ 32)); |
140 | 0 | } |
141 | 335M | return Status::OK(); |
142 | 335M | } |
143 | | |
144 | 305M | Result<DocHybridTime> DocHybridTime::DecodeFromEnd(Slice* encoded_key_with_ht_at_end) { |
145 | 305M | size_t encoded_size = 0; |
146 | 305M | RETURN_NOT_OK(CheckAndGetEncodedSize(*encoded_key_with_ht_at_end, &encoded_size)); |
147 | 305M | Slice s(encoded_key_with_ht_at_end->end() - encoded_size, encoded_size); |
148 | 305M | DocHybridTime result; |
149 | 305M | RETURN_NOT_OK(result.FullyDecodeFrom(s)); |
150 | 305M | encoded_key_with_ht_at_end->remove_suffix(encoded_size); |
151 | 305M | return result; |
152 | 305M | } |
153 | | |
154 | 10.8M | Status DocHybridTime::DecodeFromEnd(Slice encoded_key_with_ht_at_end) { |
155 | 10.8M | *this = VERIFY_RESULT(DecodeFromEnd(&encoded_key_with_ht_at_end)); |
156 | 10.8M | return Status::OK(); |
157 | 10.8M | } |
158 | | |
159 | 180k | string DocHybridTime::ToString() const { |
160 | 180k | if (write_id_ == 0) { |
161 | 87 | return hybrid_time_.ToDebugString(); |
162 | 87 | } |
163 | | |
164 | 179k | string s = hybrid_time_.ToDebugString(); |
165 | 179k | if (s[s.length() - 1] == '}') { |
166 | 179k | s.resize(s.length() - 2); |
167 | 0 | } else { |
168 | 0 | s.insert(2, "{ "); |
169 | 0 | } |
170 | 179k | if (write_id_ == kMaxWriteId) { |
171 | 3 | s += " w: Max }"; |
172 | 179k | } else { |
173 | 179k | SubstituteAndAppend(&s, " w: $0 }", write_id_); |
174 | 179k | } |
175 | 179k | return s; |
176 | 179k | } |
177 | | |
178 | 783M | Status DocHybridTime::CheckEncodedSize(size_t encoded_ht_size, size_t encoded_key_size) { |
179 | 783M | if (encoded_key_size == 0) { |
180 | 0 | return STATUS(RuntimeError, |
181 | 0 | "Got an empty encoded key when looking for a DocHybridTime at the end."); |
182 | 0 | } |
183 | | |
184 | 783M | SCHECK_GE(encoded_ht_size, |
185 | 783M | 1U, |
186 | 783M | Corruption, |
187 | 783M | Substitute("Encoded HybridTime must be at least one byte, found $0.", encoded_ht_size)); |
188 | | |
189 | 783M | SCHECK_LE(encoded_ht_size, |
190 | 783M | kMaxBytesPerEncodedHybridTime, |
191 | 783M | Corruption, |
192 | 783M | Substitute("Encoded HybridTime can't be more than $0 bytes, found $1.", |
193 | 783M | kMaxBytesPerEncodedHybridTime, encoded_ht_size)); |
194 | | |
195 | | |
196 | 783M | SCHECK_LT(encoded_ht_size, |
197 | 783M | encoded_key_size, |
198 | 783M | Corruption, |
199 | 783M | Substitute( |
200 | 783M | "Trying to extract an encoded HybridTime with a size of $0 bytes from " |
201 | 783M | "an encoded key of length $1 bytes (must be strictly less -- one byte is " |
202 | 783M | "used for value type).", |
203 | 783M | encoded_ht_size, encoded_key_size)); |
204 | | |
205 | 783M | return Status::OK(); |
206 | 783M | } |
207 | | |
208 | 783M | int DocHybridTime::GetEncodedSize(const Slice& encoded_key) { |
209 | | // We are not checking for errors here -- see CheckEncodedSize for that. We return something |
210 | | // even for a zero-size slice. |
211 | 0 | return encoded_key.empty() ? 0 |
212 | 783M | : static_cast<uint8_t>(encoded_key.end()[-1]) & kHybridTimeSizeMask; |
213 | 783M | } |
214 | | |
215 | | CHECKED_STATUS DocHybridTime::CheckAndGetEncodedSize( |
216 | 783M | const Slice& encoded_key, size_t* encoded_ht_size) { |
217 | 783M | *encoded_ht_size = GetEncodedSize(encoded_key); |
218 | 783M | return CheckEncodedSize(*encoded_ht_size, encoded_key.size()); |
219 | 783M | } |
220 | | |
221 | 1 | std::string DocHybridTime::DebugSliceToString(Slice input) { |
222 | 1 | DocHybridTime temp; |
223 | 1 | auto status = temp.FullyDecodeFrom(input); |
224 | 1 | if (!status.ok()) { |
225 | 0 | LOG(WARNING) << "Failed to decode DocHybridTime: " << status; |
226 | 0 | return input.ToDebugHexString(); |
227 | 0 | } |
228 | 1 | return temp.ToString(); |
229 | 1 | } |
230 | | |
231 | | } // namespace yb |