/Users/deen/code/yugabyte-db/src/yb/docdb/doc_kv_util.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #include "yb/docdb/doc_kv_util.h" |
15 | | |
16 | | #include "yb/docdb/docdb_fwd.h" |
17 | | #include "yb/docdb/docdb.h" |
18 | | #include "yb/docdb/value_type.h" |
19 | | |
20 | | #include "yb/util/bytes_formatter.h" |
21 | | #include "yb/util/result.h" |
22 | | #include "yb/util/status_format.h" |
23 | | |
24 | | using std::string; |
25 | | |
26 | | using strings::Substitute; |
27 | | using yb::HybridTime; |
28 | | using yb::FormatBytesAsStr; |
29 | | |
30 | | namespace yb { |
31 | | namespace docdb { |
32 | | |
33 | 6 | bool KeyBelongsToDocKeyInTest(const rocksdb::Slice &key, const string &encoded_doc_key) { |
34 | 6 | if (key.starts_with(encoded_doc_key)) { |
35 | 6 | const auto encoded_doc_key_size = encoded_doc_key.size(); |
36 | 6 | const char* key_data = key.cdata(); |
37 | 6 | return key.size() >= encoded_doc_key_size + 2 && |
38 | 6 | key_data[encoded_doc_key_size] == '\0'2 && |
39 | 6 | key_data[encoded_doc_key_size + 1] == '\0'2 ; |
40 | 6 | } else { |
41 | 0 | return false; |
42 | 0 | } |
43 | 6 | } |
44 | | |
45 | | // Given a DocDB key stored in RocksDB, validate the DocHybridTime size stored as the |
46 | | // last few bits of the final byte of the key, and ensure that the ValueType byte preceding that |
47 | | // encoded DocHybridTime is ValueType::kHybridTime. |
48 | 83.3M | Status CheckHybridTimeSizeAndValueType(const rocksdb::Slice& key, size_t* ht_byte_size_dest) { |
49 | 83.3M | RETURN_NOT_OK( |
50 | 83.3M | DocHybridTime::CheckAndGetEncodedSize(key, ht_byte_size_dest)); |
51 | 83.3M | const size_t hybrid_time_value_type_offset = key.size() - *ht_byte_size_dest - 1; |
52 | 83.3M | const ValueType value_type = DecodeValueType(key[hybrid_time_value_type_offset]); |
53 | 83.3M | if (value_type != ValueType::kHybridTime) { |
54 | 0 | return STATUS_FORMAT( |
55 | 0 | Corruption, |
56 | 0 | "Expected to find value type kHybridTime preceding the HybridTime component of the " |
57 | 0 | "encoded key, found $0. DocHybridTime bytes: $1", |
58 | 0 | value_type, |
59 | 0 | ToShortDebugStr(rocksdb::Slice(key.data() + hybrid_time_value_type_offset, |
60 | 0 | key.size() - hybrid_time_value_type_offset))); |
61 | 0 | } |
62 | | |
63 | 83.3M | return Status::OK(); |
64 | 83.3M | } |
65 | | |
66 | | template <char END_OF_STRING> |
67 | 39.4M | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { |
68 | 39.4M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', |
69 | 39.4M | "Only characters '\0' and '\xff' allowed as a template parameter"); |
70 | 39.4M | if (END_OF_STRING == '\0' && s.find('\0') == string::npos39.3M ) { |
71 | | // Fast path: no zero characters, nothing to encode. |
72 | 33.3M | dest->append(s); |
73 | 33.3M | } else { |
74 | 117M | for (char c : s) { |
75 | 117M | if (c == '\0') { |
76 | 20.9M | dest->push_back(END_OF_STRING); |
77 | 20.9M | dest->push_back(END_OF_STRING ^ 1); |
78 | 96.1M | } else { |
79 | 96.1M | dest->push_back(END_OF_STRING ^ c); |
80 | 96.1M | } |
81 | 117M | } |
82 | 6.10M | } |
83 | 39.4M | } void yb::docdb::AppendEncodedStrToKey<(char)0>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, yb::ByteBuffer<64ul>*) Line | Count | Source | 67 | 39.3M | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { | 68 | 39.3M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 69 | 39.3M | "Only characters '\0' and '\xff' allowed as a template parameter"); | 70 | 39.3M | if (END_OF_STRING == '\0' && s.find('\0') == string::npos39.3M ) { | 71 | | // Fast path: no zero characters, nothing to encode. | 72 | 33.3M | dest->append(s); | 73 | 33.3M | } else { | 74 | 115M | for (char c : s) { | 75 | 115M | if (c == '\0') { | 76 | 20.9M | dest->push_back(END_OF_STRING); | 77 | 20.9M | dest->push_back(END_OF_STRING ^ 1); | 78 | 95.0M | } else { | 79 | 95.0M | dest->push_back(END_OF_STRING ^ c); | 80 | 95.0M | } | 81 | 115M | } | 82 | 6.00M | } | 83 | 39.3M | } |
void yb::docdb::AppendEncodedStrToKey<(char)-1>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, yb::ByteBuffer<64ul>*) Line | Count | Source | 67 | 99.5k | void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) { | 68 | 99.5k | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 69 | 99.5k | "Only characters '\0' and '\xff' allowed as a template parameter"); | 70 | 99.5k | if (END_OF_STRING == '\0' && s.find('\0') == string::npos0 ) { | 71 | | // Fast path: no zero characters, nothing to encode. | 72 | 0 | dest->append(s); | 73 | 99.5k | } else { | 74 | 1.14M | for (char c : s) { | 75 | 1.14M | if (c == '\0') { | 76 | 612 | dest->push_back(END_OF_STRING); | 77 | 612 | dest->push_back(END_OF_STRING ^ 1); | 78 | 1.14M | } else { | 79 | 1.14M | dest->push_back(END_OF_STRING ^ c); | 80 | 1.14M | } | 81 | 1.14M | } | 82 | 99.5k | } | 83 | 99.5k | } |
|
84 | | |
85 | 39.3M | void AppendZeroEncodedStrToKey(const string &s, KeyBuffer *dest) { |
86 | 39.3M | AppendEncodedStrToKey<'\0'>(s, dest); |
87 | 39.3M | } |
88 | | |
89 | 99.5k | void AppendComplementZeroEncodedStrToKey(const string &s, KeyBuffer *dest) { |
90 | 99.5k | AppendEncodedStrToKey<'\xff'>(s, dest); |
91 | 99.5k | } |
92 | | |
93 | | template <char A> |
94 | 39.3M | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { |
95 | 39.3M | dest->push_back(A); |
96 | 39.3M | dest->push_back(A); |
97 | 39.3M | } void yb::docdb::TerminateEncodedKeyStr<(char)0>(yb::ByteBuffer<64ul>*) Line | Count | Source | 94 | 39.2M | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { | 95 | 39.2M | dest->push_back(A); | 96 | 39.2M | dest->push_back(A); | 97 | 39.2M | } |
void yb::docdb::TerminateEncodedKeyStr<(char)-1>(yb::ByteBuffer<64ul>*) Line | Count | Source | 94 | 99.5k | inline void TerminateEncodedKeyStr(KeyBuffer *dest) { | 95 | 99.5k | dest->push_back(A); | 96 | 99.5k | dest->push_back(A); | 97 | 99.5k | } |
|
98 | | |
99 | 39.2M | void TerminateZeroEncodedKeyStr(KeyBuffer *dest) { |
100 | 39.2M | TerminateEncodedKeyStr<'\0'>(dest); |
101 | 39.2M | } |
102 | | |
103 | 99.5k | void TerminateComplementZeroEncodedKeyStr(KeyBuffer *dest) { |
104 | 99.5k | TerminateEncodedKeyStr<'\xff'>(dest); |
105 | 99.5k | } |
106 | | |
107 | | template<char END_OF_STRING> |
108 | 565M | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { |
109 | 565M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', |
110 | 565M | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); |
111 | 565M | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; |
112 | 565M | const char* p = slice->cdata(); |
113 | 565M | const char* end = p + slice->size(); |
114 | | |
115 | 34.3G | while (p != end) { |
116 | 34.2G | if (*p == END_OF_STRING) { |
117 | 812M | ++p; |
118 | 812M | if (p == end) { |
119 | 1 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", |
120 | 1 | END_OF_STRING)); |
121 | 1 | } |
122 | 812M | if (*p == END_OF_STRING) { |
123 | | // Found two END_OF_STRING characters, this is the end of the encoded string. |
124 | 543M | ++p; |
125 | 543M | break; |
126 | 543M | } |
127 | 268M | if (*p == END_OF_STRING_ESCAPE) { |
128 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. |
129 | 268M | if (result != nullptr) { |
130 | 182M | result->push_back(0); |
131 | 182M | } |
132 | 268M | ++p; |
133 | 268M | } else { |
134 | 728k | return STATUS(Corruption, StringPrintf( |
135 | 728k | "Invalid sequence in encoded string: " |
136 | 728k | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", |
137 | 728k | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); |
138 | 728k | } |
139 | 33.4G | } else { |
140 | 33.4G | if (result != nullptr) { |
141 | 17.1G | result->push_back((*p) ^ END_OF_STRING); |
142 | 17.1G | } |
143 | 33.4G | ++p; |
144 | 33.4G | } |
145 | 34.2G | } |
146 | 564M | if (result != nullptr) { |
147 | 272M | result->shrink_to_fit(); |
148 | 272M | } |
149 | 564M | slice->remove_prefix(p - slice->cdata()); |
150 | 564M | return Status::OK(); |
151 | 565M | } yb::Status yb::docdb::DecodeEncodedStr<(char)-1>(yb::Slice*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*) Line | Count | Source | 108 | 602k | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { | 109 | 602k | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 110 | 602k | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); | 111 | 602k | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; | 112 | 602k | const char* p = slice->cdata(); | 113 | 602k | const char* end = p + slice->size(); | 114 | | | 115 | 6.63M | while (p != end) { | 116 | 6.63M | if (*p == END_OF_STRING) { | 117 | 607k | ++p; | 118 | 607k | if (p == end) { | 119 | 0 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", | 120 | 0 | END_OF_STRING)); | 121 | 0 | } | 122 | 607k | if (*p == END_OF_STRING) { | 123 | | // Found two END_OF_STRING characters, this is the end of the encoded string. | 124 | 602k | ++p; | 125 | 602k | break; | 126 | 602k | } | 127 | 5.09k | if (5.09k *p == END_OF_STRING_ESCAPE5.09k ) { | 128 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. | 129 | 5.09k | if (result != nullptr) { | 130 | 3.76k | result->push_back(0); | 131 | 3.76k | } | 132 | 5.09k | ++p; | 133 | 18.4E | } else { | 134 | 18.4E | return STATUS(Corruption, StringPrintf( | 135 | 18.4E | "Invalid sequence in encoded string: " | 136 | 18.4E | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", | 137 | 18.4E | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); | 138 | 18.4E | } | 139 | 6.02M | } else { | 140 | 6.02M | if (result != nullptr) { | 141 | 3.02M | result->push_back((*p) ^ END_OF_STRING); | 142 | 3.02M | } | 143 | 6.02M | ++p; | 144 | 6.02M | } | 145 | 6.63M | } | 146 | 602k | if (result != nullptr) { | 147 | 339k | result->shrink_to_fit(); | 148 | 339k | } | 149 | 602k | slice->remove_prefix(p - slice->cdata()); | 150 | 602k | return Status::OK(); | 151 | 602k | } |
yb::Status yb::docdb::DecodeEncodedStr<(char)0>(yb::Slice*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*) Line | Count | Source | 108 | 564M | Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) { | 109 | 564M | static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff', | 110 | 564M | "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted"); | 111 | 564M | constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1; | 112 | 564M | const char* p = slice->cdata(); | 113 | 564M | const char* end = p + slice->size(); | 114 | | | 115 | 34.3G | while (p != end) { | 116 | 34.2G | if (*p == END_OF_STRING) { | 117 | 812M | ++p; | 118 | 812M | if (p == end) { | 119 | 1 | return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ", | 120 | 1 | END_OF_STRING)); | 121 | 1 | } | 122 | 812M | if (*p == END_OF_STRING) { | 123 | | // Found two END_OF_STRING characters, this is the end of the encoded string. | 124 | 543M | ++p; | 125 | 543M | break; | 126 | 543M | } | 127 | 268M | if (*p == END_OF_STRING_ESCAPE) { | 128 | | // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding. | 129 | 268M | if (result != nullptr) { | 130 | 182M | result->push_back(0); | 131 | 182M | } | 132 | 268M | ++p; | 133 | 268M | } else { | 134 | 728k | return STATUS(Corruption, StringPrintf( | 135 | 728k | "Invalid sequence in encoded string: " | 136 | 728k | R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#", | 137 | 728k | END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE)); | 138 | 728k | } | 139 | 33.4G | } else { | 140 | 33.4G | if (result != nullptr) { | 141 | 17.1G | result->push_back((*p) ^ END_OF_STRING); | 142 | 17.1G | } | 143 | 33.4G | ++p; | 144 | 33.4G | } | 145 | 34.2G | } | 146 | 564M | if (result != nullptr) { | 147 | 272M | result->shrink_to_fit(); | 148 | 272M | } | 149 | 564M | slice->remove_prefix(p - slice->cdata()); | 150 | 564M | return Status::OK(); | 151 | 564M | } |
|
152 | | |
153 | 602k | Status DecodeComplementZeroEncodedStr(rocksdb::Slice* slice, std::string* result) { |
154 | 602k | return DecodeEncodedStr<'\xff'>(slice, result); |
155 | 602k | } |
156 | | |
157 | 564M | Status DecodeZeroEncodedStr(rocksdb::Slice* slice, string* result) { |
158 | 564M | return DecodeEncodedStr<'\0'>(slice, result); |
159 | 564M | } |
160 | | |
161 | 1.00k | string DecodeZeroEncodedStr(string encoded_str) { |
162 | 1.00k | string result; |
163 | 1.00k | rocksdb::Slice slice(encoded_str); |
164 | 1.00k | Status status = DecodeZeroEncodedStr(&slice, &result); |
165 | 1.00k | if (!status.ok()) { |
166 | 0 | LOG(FATAL) << "Failed to decode zero-encoded string " << FormatBytesAsStr(encoded_str) << ": " |
167 | 0 | << status.ToString(); |
168 | 0 | } |
169 | 1.00k | if (!slice.empty()) { |
170 | 0 | LOG(FATAL) << "Did not consume all characters from a zero-encoded string " |
171 | 0 | << FormatBytesAsStr(encoded_str) << ": " |
172 | 0 | << "bytes left: " << slice.size() << ", " |
173 | 0 | << "encoded_str.size(): " << encoded_str.size(); |
174 | 0 | } |
175 | 1.00k | return result; |
176 | 1.00k | } |
177 | | |
178 | 3.59k | std::string ToShortDebugStr(rocksdb::Slice slice) { |
179 | 3.59k | return FormatSliceAsStr(slice, QuotesType::kDoubleQuotes, kShortDebugStringLength); |
180 | 3.59k | } |
181 | | |
182 | 1.23k | Result<DocHybridTime> DecodeInvertedDocHt(Slice key_slice) { |
183 | 1.23k | if (key_slice.empty() || key_slice.size() > kMaxBytesPerEncodedHybridTime + 1) { |
184 | 0 | return STATUS_FORMAT( |
185 | 0 | Corruption, |
186 | 0 | "Invalid doc hybrid time in reverse intent record suffix: $0", |
187 | 0 | key_slice.ToDebugHexString()); |
188 | 0 | } |
189 | | |
190 | 1.23k | DocHybridTimeWordBuffer doc_ht_buffer; |
191 | 1.23k | key_slice = InvertEncodedDocHT(key_slice, &doc_ht_buffer); |
192 | | |
193 | 1.23k | if (static_cast<ValueType>(key_slice[0]) != ValueType::kHybridTime) { |
194 | 0 | return STATUS_FORMAT( |
195 | 0 | Corruption, |
196 | 0 | "Invalid prefix of doc hybrid time in reverse intent record decoded suffix: $0", |
197 | 0 | key_slice.ToDebugHexString()); |
198 | 0 | } |
199 | 1.23k | key_slice.consume_byte(); |
200 | 1.23k | return DocHybridTime::DecodeFrom(&key_slice); |
201 | 1.23k | } |
202 | | |
203 | 86.6M | Slice InvertEncodedDocHT(const Slice& input, DocHybridTimeWordBuffer* buffer) { |
204 | 86.6M | memcpy(buffer->data(), input.data(), input.size()); |
205 | 433M | for (size_t i = 0; i != kMaxWordsPerEncodedHybridTimeWithValueType; ++i346M ) { |
206 | 346M | (*buffer)[i] = ~(*buffer)[i]; |
207 | 346M | } |
208 | 86.6M | return {pointer_cast<char*>(buffer->data()), input.size()}; |
209 | 86.6M | } |
210 | | |
211 | | } // namespace docdb |
212 | | } // namespace yb |