/Users/deen/code/yugabyte-db/src/yb/docdb/doc_kv_util.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/docdb/doc_kv_util.h" |
15 | | |
16 | | #include "yb/docdb/docdb_fwd.h" |
17 | | #include "yb/docdb/docdb.h" |
18 | | #include "yb/docdb/value_type.h" |
19 | | |
20 | | #include "yb/util/bytes_formatter.h" |
21 | | #include "yb/util/result.h" |
22 | | #include "yb/util/status_format.h" |
23 | | |
24 | | using std::string; |
25 | | |
26 | | using strings::Substitute; |
27 | | using yb::HybridTime; |
28 | | using yb::FormatBytesAsStr; |
29 | | |
30 | | namespace yb { |
31 | | namespace docdb { |
32 | | |
33 | 0 | bool KeyBelongsToDocKeyInTest(const rocksdb::Slice &key, const string &encoded_doc_key) { |
34 | 0 | if (key.starts_with(encoded_doc_key)) { |
35 | 0 | const auto encoded_doc_key_size = encoded_doc_key.size(); |
36 | 0 | const char* key_data = key.cdata(); |
37 | 0 | return key.size() >= encoded_doc_key_size + 2 && |
38 | 0 | key_data[encoded_doc_key_size] == '\0' && |
39 | 0 | key_data[encoded_doc_key_size + 1] == '\0'; |
40 | 0 | } else { |
41 | 0 | return false; |
42 | 0 | } |
43 | 0 | } |
44 | | |
45 | 54.3M | Status ConsumeHybridTimeFromKey(rocksdb::Slice* slice, DocHybridTime* hybrid_time) { |
46 | 54.3M | return hybrid_time->DecodeFrom(slice); |
47 | 54.3M | } |
48 | | |
49 | | Status DecodeHybridTimeFromEndOfKey( |
50 | | const rocksdb::Slice &key, |
51 | 563k | DocHybridTime *dest) { |
52 | 563k | return dest->DecodeFromEnd(key); |
53 | 563k | } |
54 | | |
55 | | // Given a DocDB key stored in RocksDB, validate the DocHybridTime size stored as the |
56 | | // last few bits of the final byte of the key, and ensure that the ValueType byte preceding that |
57 | | // encoded DocHybridTime is ValueType::kHybridTime. |
58 | 34.2M | Status CheckHybridTimeSizeAndValueType(const rocksdb::Slice& key, size_t* ht_byte_size_dest) { |
59 | 34.2M | RETURN_NOT_OK( |
60 | 34.2M | DocHybridTime::CheckAndGetEncodedSize(key, ht_byte_size_dest)); |
61 | 34.2M | const size_t hybrid_time_value_type_offset = key.size() - *ht_byte_size_dest - 1; |
62 | 34.2M | const ValueType value_type = DecodeValueType(key[hybrid_time_value_type_offset]); |
63 | 34.2M | if (value_type != ValueType::kHybridTime) { |
64 | 0 | return STATUS_FORMAT( |
65 | 0 | Corruption, |
66 | 0 | "Expected to find value type kHybridTime preceding the HybridTime component of the " |
67 | 0 | "encoded key, found $0. DocHybridTime bytes: $1", |
68 | 0 | value_type, |
69 | 0 | ToShortDebugStr(rocksdb::Slice(key.data() + hybrid_time_value_type_offset, |
70 | 0 | key.size() - hybrid_time_value_type_offset))); |
71 | 0 | } |
72 | | |
73 | 34.2M | return Status::OK(); |
74 | 34.2M | } |
75 | | |
76 | | template <char END_OF_STRING> |
77 | 13.6M | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { |
78 | 13.6M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', |
79 | 13.6M | "Only characters '\0' and '\xff' allowed as a template parameter"); |
80 | 13.6M | if (END_OF_STRING == '\0' && s.find('\0') == string::npos) { |
81 | | // Fast path: no zero characters, nothing to encode. |
82 | 12.2M | dest->append(s); |
83 | 1.39M | } else { |
84 | 24.3M | for (char c : s) { |
85 | 24.3M | if (c == '\0') { |
86 | 4.03M | dest->push_back(END_OF_STRING); |
87 | 4.03M | dest->push_back(END_OF_STRING ^ 1); |
88 | 20.3M | } else { |
89 | 20.3M | dest->push_back(END_OF_STRING ^ c); |
90 | 20.3M | } |
91 | 24.3M | } |
92 | 1.39M | } |
93 | 13.6M | } _ZN2yb5docdb21AppendEncodedStrToKeyILc0EEEvRKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEPNS_10ByteBufferILm64EEE Line | Count | Source | 77 | 13.5M | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { | 78 | 13.5M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 79 | 13.5M | "Only characters '\0' and '\xff' allowed as a template parameter"); | 80 | 13.5M | if (END_OF_STRING == '\0' && s.find('\0') == string::npos) { | 81 | | // Fast path: no zero characters, nothing to encode. | 82 | 12.2M | dest->append(s); | 83 | 1.31M | } else { | 84 | 23.6M | for (char c : s) { | 85 | 23.6M | if (c == '\0') { | 86 | 4.03M | dest->push_back(END_OF_STRING); | 87 | 4.03M | dest->push_back(END_OF_STRING ^ 1); | 88 | 19.6M | } else { | 89 | 19.6M | dest->push_back(END_OF_STRING ^ c); | 90 | 19.6M | } | 91 | 23.6M | } | 92 | 1.31M | } | 93 | 13.5M | } |
_ZN2yb5docdb21AppendEncodedStrToKeyILcn1EEEvRKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEEPNS_10ByteBufferILm64EEE Line | Count | Source | 77 | 78.5k | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { | 78 | 78.5k | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 79 | 78.5k | "Only characters '\0' and '\xff' allowed as a template parameter"); | 80 | 78.5k | if (END_OF_STRING == '\0' && s.find('\0') == string::npos) { | 81 | | // Fast path: no zero characters, nothing to encode. | 82 | 0 | dest->append(s); | 83 | 78.5k | } else { | 84 | 698k | for (char c : s) { | 85 | 698k | if (c == '\0') { | 86 | 694 | dest->push_back(END_OF_STRING); | 87 | 694 | dest->push_back(END_OF_STRING ^ 1); | 88 | 697k | } else { | 89 | 697k | dest->push_back(END_OF_STRING ^ c); | 90 | 697k | } | 91 | 698k | } | 92 | 78.5k | } | 93 | 78.5k | } |
|
94 | | |
95 | 13.5M | void AppendZeroEncodedStrToKey(const string &s, KeyBuffer *dest) { |
96 | 13.5M | AppendEncodedStrToKey<'\0'>(s, dest); |
97 | 13.5M | } |
98 | | |
99 | 78.5k | void AppendComplementZeroEncodedStrToKey(const string &s, KeyBuffer *dest) { |
100 | 78.5k | AppendEncodedStrToKey<'\xff'>(s, dest); |
101 | 78.5k | } |
102 | | |
103 | | template <char A> |
104 | 13.6M | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { |
105 | 13.6M | dest->push_back(A); |
106 | 13.6M | dest->push_back(A); |
107 | 13.6M | } _ZN2yb5docdb22TerminateEncodedKeyStrILc0EEEvPNS_10ByteBufferILm64EEE Line | Count | Source | 104 | 13.5M | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { | 105 | 13.5M | dest->push_back(A); | 106 | 13.5M | dest->push_back(A); | 107 | 13.5M | } |
_ZN2yb5docdb22TerminateEncodedKeyStrILcn1EEEvPNS_10ByteBufferILm64EEE Line | Count | Source | 104 | 78.5k | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { | 105 | 78.5k | dest->push_back(A); | 106 | 78.5k | dest->push_back(A); | 107 | 78.5k | } |
|
108 | | |
109 | 13.5M | void TerminateZeroEncodedKeyStr(KeyBuffer *dest) { |
110 | 13.5M | TerminateEncodedKeyStr<'\0'>(dest); |
111 | 13.5M | } |
112 | | |
113 | 78.5k | void TerminateComplementZeroEncodedKeyStr(KeyBuffer *dest) { |
114 | 78.5k | TerminateEncodedKeyStr<'\xff'>(dest); |
115 | 78.5k | } |
116 | | |
117 | | template<char END_OF_STRING> |
118 | 196M | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { |
119 | 196M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', |
120 | 196M | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); |
121 | 196M | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; |
122 | 196M | const char* p = slice->cdata(); |
123 | 196M | const char* end = p + slice->size(); |
124 | | |
125 | 16.5G | while (p != end) { |
126 | 16.5G | if (*p == END_OF_STRING) { |
127 | 255M | ++p; |
128 | 255M | if (p == end) { |
129 | 2 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", |
130 | 2 | END_OF_STRING)); |
131 | 2 | } |
132 | 255M | if (*p == END_OF_STRING) { |
133 | | // Found two END_OF_STRING characters, this is the end of the encoded string. |
134 | 196M | ++p; |
135 | 196M | break; |
136 | 196M | } |
137 | 58.7M | if (*p == END_OF_STRING_ESCAPE) { |
138 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. |
139 | 58.7M | if (result != nullptr) { |
140 | 37.5M | result->push_back(0); |
141 | 37.5M | } |
142 | 58.7M | ++p; |
143 | 18.4E | } else { |
144 | 18.4E | return STATUS(Corruption, StringPrintf( |
145 | 18.4E | "Invalid sequence in encoded string: " |
146 | 18.4E | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", |
147 | 18.4E | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); |
148 | 18.4E | } |
149 | 16.3G | } else { |
150 | 16.3G | if (result != nullptr) { |
151 | 8.43G | result->push_back((*p) ^ END_OF_STRING); |
152 | 8.43G | } |
153 | 16.3G | ++p; |
154 | 16.3G | } |
155 | 16.5G | } |
156 | 196M | if (result != nullptr) { |
157 | 87.3M | result->shrink_to_fit(); |
158 | 87.3M | } |
159 | 196M | slice->remove_prefix(p - slice->cdata()); |
160 | 196M | return Status::OK(); |
161 | 196M | } _ZN2yb5docdb16DecodeEncodedStrILcn1EEENS_6StatusEPNS_5SliceEPNSt3__112basic_stringIcNS5_11char_traitsIcEENS5_9allocatorIcEEEE Line | Count | Source | 118 | 549k | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { | 119 | 549k | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 120 | 549k | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); | 121 | 549k | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; | 122 | 549k | const char* p = slice->cdata(); | 123 | 549k | const char* end = p + slice->size(); | 124 | | | 125 | 5.47M | while (p != end) { | 126 | 5.47M | if (*p == END_OF_STRING) { | 127 | 554k | ++p; | 128 | 554k | if (p == end) { | 129 | 0 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", | 130 | 0 | END_OF_STRING)); | 131 | 0 | } | 132 | 554k | if (*p == END_OF_STRING) { | 133 | | // Found two END_OF_STRING characters, this is the end of the encoded string. | 134 | 549k | ++p; | 135 | 549k | break; | 136 | 549k | } | 137 | 5.73k | if (*p == END_OF_STRING_ESCAPE) { | 138 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. | 139 | 5.73k | if (result != nullptr) { | 140 | 4.23k | result->push_back(0); | 141 | 4.23k | } | 142 | 5.73k | ++p; | 143 | 0 | } else { | 144 | 0 | return STATUS(Corruption, StringPrintf( | 145 | 0 | "Invalid sequence in encoded string: " | 146 | 0 | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", | 147 | 0 | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); | 148 | 0 | } | 149 | 4.92M | } else { | 150 | 4.92M | if (result != nullptr) { | 151 | 3.03M | result->push_back((*p) ^ END_OF_STRING); | 152 | 3.03M | } | 153 | 4.92M | ++p; | 154 | 4.92M | } | 155 | 5.47M | } | 156 | 549k | if (result != nullptr) { | 157 | 337k | result->shrink_to_fit(); | 158 | 337k | } | 159 | 549k | slice->remove_prefix(p - slice->cdata()); | 160 | 549k | return Status::OK(); | 161 | 549k | } |
_ZN2yb5docdb16DecodeEncodedStrILc0EEENS_6StatusEPNS_5SliceEPNSt3__112basic_stringIcNS5_11char_traitsIcEENS5_9allocatorIcEEEE Line | Count | Source | 118 | 195M | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { | 119 | 195M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 120 | 195M | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); | 121 | 195M | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; | 122 | 195M | const char* p = slice->cdata(); | 123 | 195M | const char* end = p + slice->size(); | 124 | | | 125 | 16.5G | while (p != end) { | 126 | 16.5G | if (*p == END_OF_STRING) { | 127 | 254M | ++p; | 128 | 254M | if (p == end) { | 129 | 2 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", | 130 | 2 | END_OF_STRING)); | 131 | 2 | } | 132 | 254M | if (*p == END_OF_STRING) { | 133 | | // Found two END_OF_STRING characters, this is the end of the encoded string. | 134 | 196M | ++p; | 135 | 196M | break; | 136 | 196M | } | 137 | 58.7M | if (*p == END_OF_STRING_ESCAPE) { | 138 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. | 139 | 58.7M | if (result != nullptr) { | 140 | 37.5M | result->push_back(0); | 141 | 37.5M | } | 142 | 58.7M | ++p; | 143 | 18.4E | } else { | 144 | 18.4E | return STATUS(Corruption, StringPrintf( | 145 | 18.4E | "Invalid sequence in encoded string: " | 146 | 18.4E | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", | 147 | 18.4E | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); | 148 | 18.4E | } | 149 | 16.2G | } else { | 150 | 16.2G | if (result != nullptr) { | 151 | 8.43G | result->push_back((*p) ^ END_OF_STRING); | 152 | 8.43G | } | 153 | 16.2G | ++p; | 154 | 16.2G | } | 155 | 16.5G | } | 156 | 196M | if (result != nullptr) { | 157 | 87.0M | result->shrink_to_fit(); | 158 | 87.0M | } | 159 | 196M | slice->remove_prefix(p - slice->cdata()); | 160 | 196M | return Status::OK(); | 161 | 195M | } |
|
162 | | |
163 | 549k | Status DecodeComplementZeroEncodedStr(rocksdb::Slice* slice, std::string* result) { |
164 | 549k | return DecodeEncodedStr<'\xff'>(slice, result); |
165 | 549k | } |
166 | | |
167 | 195M | Status DecodeZeroEncodedStr(rocksdb::Slice* slice, string* result) { |
168 | 195M | return DecodeEncodedStr<'\0'>(slice, result); |
169 | 195M | } |
170 | | |
171 | 0 | string DecodeZeroEncodedStr(string encoded_str) { |
172 | 0 | string result; |
173 | 0 | rocksdb::Slice slice(encoded_str); |
174 | 0 | Status status = DecodeZeroEncodedStr(&slice, &result); |
175 | 0 | if (!status.ok()) { |
176 | 0 | LOG(FATAL) << "Failed to decode zero-encoded string " << FormatBytesAsStr(encoded_str) << ": " |
177 | 0 | << status.ToString(); |
178 | 0 | } |
179 | 0 | if (!slice.empty()) { |
180 | 0 | LOG(FATAL) << "Did not consume all characters from a zero-encoded string " |
181 | 0 | << FormatBytesAsStr(encoded_str) << ": " |
182 | 0 | << "bytes left: " << slice.size() << ", " |
183 | 0 | << "encoded_str.size(): " << encoded_str.size(); |
184 | 0 | } |
185 | 0 | return result; |
186 | 0 | } |
187 | | |
188 | 0 | std::string ToShortDebugStr(rocksdb::Slice slice) { |
189 | 0 | return FormatSliceAsStr(slice, QuotesType::kDoubleQuotes, kShortDebugStringLength); |
190 | 0 | } |
191 | | |
192 | 0 | Result<DocHybridTime> DecodeInvertedDocHt(Slice key_slice) { |
193 | 0 | if (key_slice.empty() || key_slice.size() > kMaxBytesPerEncodedHybridTime + 1) { |
194 | 0 | return STATUS_FORMAT( |
195 | 0 | Corruption, |
196 | 0 | "Invalid doc hybrid time in reverse intent record suffix: $0", |
197 | 0 | key_slice.ToDebugHexString()); |
198 | 0 | } |
199 | | |
200 | 0 | DocHybridTimeWordBuffer doc_ht_buffer; |
201 | 0 | key_slice = InvertEncodedDocHT(key_slice, &doc_ht_buffer); |
202 | |
|
203 | 0 | if (static_cast<ValueType>(key_slice[0]) != ValueType::kHybridTime) { |
204 | 0 | return STATUS_FORMAT( |
205 | 0 | Corruption, |
206 | 0 | "Invalid prefix of doc hybrid time in reverse intent record decoded suffix: $0", |
207 | 0 | key_slice.ToDebugHexString()); |
208 | 0 | } |
209 | 0 | key_slice.consume_byte(); |
210 | 0 | DocHybridTime doc_ht; |
211 | 0 | RETURN_NOT_OK(doc_ht.DecodeFrom(&key_slice)); |
212 | 0 | return doc_ht; |
213 | 0 | } |
214 | | |
215 | 27.9M | Slice InvertEncodedDocHT(const Slice& input, DocHybridTimeWordBuffer* buffer) { |
216 | 27.9M | memcpy(buffer->data(), input.data(), input.size()); |
217 | 139M | for (size_t i = 0; i != kMaxWordsPerEncodedHybridTimeWithValueType; ++i) { |
218 | 111M | (*buffer)[i] = ~(*buffer)[i]; |
219 | 111M | } |
220 | 27.9M | return {pointer_cast<char*>(buffer->data()), input.size()}; |
221 | 27.9M | } |
222 | | |
223 | | } // namespace docdb |
224 | | } // namespace yb |