YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/docdb/doc_kv_util.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#include "yb/docdb/doc_kv_util.h"
15
16
#include "yb/docdb/docdb_fwd.h"
17
#include "yb/docdb/docdb.h"
18
#include "yb/docdb/value_type.h"
19
20
#include "yb/util/bytes_formatter.h"
21
#include "yb/util/result.h"
22
#include "yb/util/status_format.h"
23
24
using std::string;
25
26
using strings::Substitute;
27
using yb::HybridTime;
28
using yb::FormatBytesAsStr;
29
30
namespace yb {
31
namespace docdb {
32
33
6
bool KeyBelongsToDocKeyInTest(const rocksdb::Slice &key, const string &encoded_doc_key) {
34
6
  if (key.starts_with(encoded_doc_key)) {
35
6
    const auto encoded_doc_key_size = encoded_doc_key.size();
36
6
    const char* key_data = key.cdata();
37
6
    return key.size() >= encoded_doc_key_size + 2 &&
38
6
           
key_data[encoded_doc_key_size] == '\0'2
&&
39
6
           
key_data[encoded_doc_key_size + 1] == '\0'2
;
40
6
  } else {
41
0
    return false;
42
0
  }
43
6
}
44
45
// Given a DocDB key stored in RocksDB, validate the DocHybridTime size stored as the
46
// last few bits of the final byte of the key, and ensure that the ValueType byte preceding that
47
// encoded DocHybridTime is ValueType::kHybridTime.
48
83.3M
Status CheckHybridTimeSizeAndValueType(const rocksdb::Slice& key, size_t* ht_byte_size_dest) {
49
83.3M
  RETURN_NOT_OK(
50
83.3M
      DocHybridTime::CheckAndGetEncodedSize(key, ht_byte_size_dest));
51
83.3M
  const size_t hybrid_time_value_type_offset = key.size() - *ht_byte_size_dest - 1;
52
83.3M
  const ValueType value_type = DecodeValueType(key[hybrid_time_value_type_offset]);
53
83.3M
  if (value_type != ValueType::kHybridTime) {
54
0
    return STATUS_FORMAT(
55
0
        Corruption,
56
0
        "Expected to find value type kHybridTime preceding the HybridTime component of the "
57
0
            "encoded key, found $0. DocHybridTime bytes: $1",
58
0
        value_type,
59
0
        ToShortDebugStr(rocksdb::Slice(key.data() + hybrid_time_value_type_offset,
60
0
                                       key.size() - hybrid_time_value_type_offset)));
61
0
  }
62
63
83.3M
  return Status::OK();
64
83.3M
}
65
66
template <char END_OF_STRING>
67
39.4M
void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) {
68
39.4M
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
69
39.4M
                "Only characters '\0' and '\xff' allowed as a template parameter");
70
39.4M
  if (END_OF_STRING == '\0' && 
s.find('\0') == string::npos39.3M
) {
71
    // Fast path: no zero characters, nothing to encode.
72
33.3M
    dest->append(s);
73
33.3M
  } else {
74
117M
    for (char c : s) {
75
117M
      if (c == '\0') {
76
20.9M
        dest->push_back(END_OF_STRING);
77
20.9M
        dest->push_back(END_OF_STRING ^ 1);
78
96.1M
      } else {
79
96.1M
        dest->push_back(END_OF_STRING ^ c);
80
96.1M
      }
81
117M
    }
82
6.10M
  }
83
39.4M
}
void yb::docdb::AppendEncodedStrToKey<(char)0>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, yb::ByteBuffer<64ul>*)
Line
Count
Source
67
39.3M
void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) {
68
39.3M
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
69
39.3M
                "Only characters '\0' and '\xff' allowed as a template parameter");
70
39.3M
  if (END_OF_STRING == '\0' && 
s.find('\0') == string::npos39.3M
) {
71
    // Fast path: no zero characters, nothing to encode.
72
33.3M
    dest->append(s);
73
33.3M
  } else {
74
115M
    for (char c : s) {
75
115M
      if (c == '\0') {
76
20.9M
        dest->push_back(END_OF_STRING);
77
20.9M
        dest->push_back(END_OF_STRING ^ 1);
78
95.0M
      } else {
79
95.0M
        dest->push_back(END_OF_STRING ^ c);
80
95.0M
      }
81
115M
    }
82
6.00M
  }
83
39.3M
}
void yb::docdb::AppendEncodedStrToKey<(char)-1>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, yb::ByteBuffer<64ul>*)
Line
Count
Source
67
99.5k
void AppendEncodedStrToKey(const string &s, KeyBuffer *dest) {
68
99.5k
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
69
99.5k
                "Only characters '\0' and '\xff' allowed as a template parameter");
70
99.5k
  if (END_OF_STRING == '\0' && 
s.find('\0') == string::npos0
) {
71
    // Fast path: no zero characters, nothing to encode.
72
0
    dest->append(s);
73
99.5k
  } else {
74
1.14M
    for (char c : s) {
75
1.14M
      if (c == '\0') {
76
612
        dest->push_back(END_OF_STRING);
77
612
        dest->push_back(END_OF_STRING ^ 1);
78
1.14M
      } else {
79
1.14M
        dest->push_back(END_OF_STRING ^ c);
80
1.14M
      }
81
1.14M
    }
82
99.5k
  }
83
99.5k
}
84
85
39.3M
void AppendZeroEncodedStrToKey(const string &s, KeyBuffer *dest) {
86
39.3M
  AppendEncodedStrToKey<'\0'>(s, dest);
87
39.3M
}
88
89
99.5k
void AppendComplementZeroEncodedStrToKey(const string &s, KeyBuffer *dest) {
90
99.5k
  AppendEncodedStrToKey<'\xff'>(s, dest);
91
99.5k
}
92
93
template <char A>
94
39.3M
inline void TerminateEncodedKeyStr(KeyBuffer *dest) {
95
39.3M
  dest->push_back(A);
96
39.3M
  dest->push_back(A);
97
39.3M
}
void yb::docdb::TerminateEncodedKeyStr<(char)0>(yb::ByteBuffer<64ul>*)
Line
Count
Source
94
39.2M
inline void TerminateEncodedKeyStr(KeyBuffer *dest) {
95
39.2M
  dest->push_back(A);
96
39.2M
  dest->push_back(A);
97
39.2M
}
void yb::docdb::TerminateEncodedKeyStr<(char)-1>(yb::ByteBuffer<64ul>*)
Line
Count
Source
94
99.5k
inline void TerminateEncodedKeyStr(KeyBuffer *dest) {
95
99.5k
  dest->push_back(A);
96
99.5k
  dest->push_back(A);
97
99.5k
}
98
99
39.2M
void TerminateZeroEncodedKeyStr(KeyBuffer *dest) {
100
39.2M
  TerminateEncodedKeyStr<'\0'>(dest);
101
39.2M
}
102
103
99.5k
void TerminateComplementZeroEncodedKeyStr(KeyBuffer *dest) {
104
99.5k
  TerminateEncodedKeyStr<'\xff'>(dest);
105
99.5k
}
106
107
template<char END_OF_STRING>
108
565M
Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) {
109
565M
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
110
565M
                "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted");
111
565M
  constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1;
112
565M
  const char* p = slice->cdata();
113
565M
  const char* end = p + slice->size();
114
115
34.3G
  while (p != end) {
116
34.2G
    if (*p == END_OF_STRING) {
117
812M
      ++p;
118
812M
      if (p == end) {
119
1
        return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ",
120
1
                                               END_OF_STRING));
121
1
      }
122
812M
      if (*p == END_OF_STRING) {
123
        // Found two END_OF_STRING characters, this is the end of the encoded string.
124
543M
        ++p;
125
543M
        break;
126
543M
      }
127
268M
      if (*p == END_OF_STRING_ESCAPE) {
128
        // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding.
129
268M
        if (result != nullptr) {
130
182M
          result->push_back(0);
131
182M
        }
132
268M
        ++p;
133
268M
      } else {
134
728k
        return STATUS(Corruption, StringPrintf(
135
728k
            "Invalid sequence in encoded string: "
136
728k
            R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#",
137
728k
            END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE));
138
728k
      }
139
33.4G
    } else {
140
33.4G
      if (result != nullptr) {
141
17.1G
        result->push_back((*p) ^ END_OF_STRING);
142
17.1G
      }
143
33.4G
      ++p;
144
33.4G
    }
145
34.2G
  }
146
564M
  if (result != nullptr) {
147
272M
    result->shrink_to_fit();
148
272M
  }
149
564M
  slice->remove_prefix(p - slice->cdata());
150
564M
  return Status::OK();
151
565M
}
yb::Status yb::docdb::DecodeEncodedStr<(char)-1>(yb::Slice*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*)
Line
Count
Source
108
602k
Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) {
109
602k
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
110
602k
                "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted");
111
602k
  constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1;
112
602k
  const char* p = slice->cdata();
113
602k
  const char* end = p + slice->size();
114
115
6.63M
  while (p != end) {
116
6.63M
    if (*p == END_OF_STRING) {
117
607k
      ++p;
118
607k
      if (p == end) {
119
0
        return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ",
120
0
                                               END_OF_STRING));
121
0
      }
122
607k
      if (*p == END_OF_STRING) {
123
        // Found two END_OF_STRING characters, this is the end of the encoded string.
124
602k
        ++p;
125
602k
        break;
126
602k
      }
127
5.09k
      
if (5.09k
*p == END_OF_STRING_ESCAPE5.09k
) {
128
        // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding.
129
5.09k
        if (result != nullptr) {
130
3.76k
          result->push_back(0);
131
3.76k
        }
132
5.09k
        ++p;
133
18.4E
      } else {
134
18.4E
        return STATUS(Corruption, StringPrintf(
135
18.4E
            "Invalid sequence in encoded string: "
136
18.4E
            R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#",
137
18.4E
            END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE));
138
18.4E
      }
139
6.02M
    } else {
140
6.02M
      if (result != nullptr) {
141
3.02M
        result->push_back((*p) ^ END_OF_STRING);
142
3.02M
      }
143
6.02M
      ++p;
144
6.02M
    }
145
6.63M
  }
146
602k
  if (result != nullptr) {
147
339k
    result->shrink_to_fit();
148
339k
  }
149
602k
  slice->remove_prefix(p - slice->cdata());
150
602k
  return Status::OK();
151
602k
}
yb::Status yb::docdb::DecodeEncodedStr<(char)0>(yb::Slice*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*)
Line
Count
Source
108
564M
Status DecodeEncodedStr(rocksdb::Slice* slice, string* result) {
109
564M
  static_assert(END_OF_STRING == '\0' || END_OF_STRING == '\xff',
110
564M
                "Invalid END_OF_STRING character. Only '\0' and '\xff' accepted");
111
564M
  constexpr char END_OF_STRING_ESCAPE = END_OF_STRING ^ 1;
112
564M
  const char* p = slice->cdata();
113
564M
  const char* end = p + slice->size();
114
115
34.3G
  while (p != end) {
116
34.2G
    if (*p == END_OF_STRING) {
117
812M
      ++p;
118
812M
      if (p == end) {
119
1
        return STATUS(Corruption, StringPrintf("Encoded string ends with only one \\0x%02x ",
120
1
                                               END_OF_STRING));
121
1
      }
122
812M
      if (*p == END_OF_STRING) {
123
        // Found two END_OF_STRING characters, this is the end of the encoded string.
124
543M
        ++p;
125
543M
        break;
126
543M
      }
127
268M
      if (*p == END_OF_STRING_ESCAPE) {
128
        // 0 is encoded as 00 01 in ascending encoding and FF FE in descending encoding.
129
268M
        if (result != nullptr) {
130
182M
          result->push_back(0);
131
182M
        }
132
268M
        ++p;
133
268M
      } else {
134
728k
        return STATUS(Corruption, StringPrintf(
135
728k
            "Invalid sequence in encoded string: "
136
728k
            R"#(\0x%02x\0x%02x (must be either \0x%02x\0x%02x or \0x%02x\0x%02x))#",
137
728k
            END_OF_STRING, *p, END_OF_STRING, END_OF_STRING, END_OF_STRING, END_OF_STRING_ESCAPE));
138
728k
      }
139
33.4G
    } else {
140
33.4G
      if (result != nullptr) {
141
17.1G
        result->push_back((*p) ^ END_OF_STRING);
142
17.1G
      }
143
33.4G
      ++p;
144
33.4G
    }
145
34.2G
  }
146
564M
  if (result != nullptr) {
147
272M
    result->shrink_to_fit();
148
272M
  }
149
564M
  slice->remove_prefix(p - slice->cdata());
150
564M
  return Status::OK();
151
564M
}
152
153
602k
Status DecodeComplementZeroEncodedStr(rocksdb::Slice* slice, std::string* result) {
154
602k
  return DecodeEncodedStr<'\xff'>(slice, result);
155
602k
}
156
157
564M
Status DecodeZeroEncodedStr(rocksdb::Slice* slice, string* result) {
158
564M
  return DecodeEncodedStr<'\0'>(slice, result);
159
564M
}
160
161
1.00k
string DecodeZeroEncodedStr(string encoded_str) {
162
1.00k
  string result;
163
1.00k
  rocksdb::Slice slice(encoded_str);
164
1.00k
  Status status = DecodeZeroEncodedStr(&slice, &result);
165
1.00k
  if (!status.ok()) {
166
0
    LOG(FATAL) << "Failed to decode zero-encoded string " << FormatBytesAsStr(encoded_str) << ": "
167
0
               << status.ToString();
168
0
  }
169
1.00k
  if (!slice.empty()) {
170
0
    LOG(FATAL) << "Did not consume all characters from a zero-encoded string "
171
0
               << FormatBytesAsStr(encoded_str) << ": "
172
0
               << "bytes left: " << slice.size() << ", "
173
0
               << "encoded_str.size(): " << encoded_str.size();
174
0
  }
175
1.00k
  return result;
176
1.00k
}
177
178
3.59k
std::string ToShortDebugStr(rocksdb::Slice slice) {
179
3.59k
  return FormatSliceAsStr(slice, QuotesType::kDoubleQuotes, kShortDebugStringLength);
180
3.59k
}
181
182
1.23k
Result<DocHybridTime> DecodeInvertedDocHt(Slice key_slice) {
183
1.23k
  if (key_slice.empty() || key_slice.size() > kMaxBytesPerEncodedHybridTime + 1) {
184
0
    return STATUS_FORMAT(
185
0
        Corruption,
186
0
        "Invalid doc hybrid time in reverse intent record suffix: $0",
187
0
        key_slice.ToDebugHexString());
188
0
  }
189
190
1.23k
  DocHybridTimeWordBuffer doc_ht_buffer;
191
1.23k
  key_slice = InvertEncodedDocHT(key_slice, &doc_ht_buffer);
192
193
1.23k
  if (static_cast<ValueType>(key_slice[0]) != ValueType::kHybridTime) {
194
0
    return STATUS_FORMAT(
195
0
        Corruption,
196
0
        "Invalid prefix of doc hybrid time in reverse intent record decoded suffix: $0",
197
0
        key_slice.ToDebugHexString());
198
0
  }
199
1.23k
  key_slice.consume_byte();
200
1.23k
  return DocHybridTime::DecodeFrom(&key_slice);
201
1.23k
}
202
203
86.6M
Slice InvertEncodedDocHT(const Slice& input, DocHybridTimeWordBuffer* buffer) {
204
86.6M
  memcpy(buffer->data(), input.data(), input.size());
205
433M
  for (size_t i = 0; i != kMaxWordsPerEncodedHybridTimeWithValueType; 
++i346M
) {
206
346M
    (*buffer)[i] = ~(*buffer)[i];
207
346M
  }
208
86.6M
  return {pointer_cast<char*>(buffer->data()), input.size()};
209
86.6M
}
210
211
}  // namespace docdb
212
}  // namespace yb