YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/util/pb_util.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// Licensed to the Apache Software Foundation (ASF) under one
3
// or more contributor license agreements.  See the NOTICE file
4
// distributed with this work for additional information
5
// regarding copyright ownership.  The ASF licenses this file
6
// to you under the Apache License, Version 2.0 (the
7
// "License"); you may not use this file except in compliance
8
// with the License.  You may obtain a copy of the License at
9
//
10
//   http://www.apache.org/licenses/LICENSE-2.0
11
//
12
// Unless required by applicable law or agreed to in writing,
13
// software distributed under the License is distributed on an
14
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
// KIND, either express or implied.  See the License for the
16
// specific language governing permissions and limitations
17
// under the License.
18
//
19
// The following only applies to changes made to this file as part of YugaByte development.
20
//
21
// Portions Copyright (c) YugaByte, Inc.
22
//
23
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
24
// in compliance with the License.  You may obtain a copy of the License at
25
//
26
// http://www.apache.org/licenses/LICENSE-2.0
27
//
28
// Unless required by applicable law or agreed to in writing, software distributed under the License
29
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
30
// or implied.  See the License for the specific language governing permissions and limitations
31
// under the License.
32
//
33
// Utilities for dealing with protocol buffers.
34
// These are mostly just functions similar to what are found in the protobuf
35
// library itself, but using yb::faststring instances instead of STL strings.
36
#ifndef YB_UTIL_PB_UTIL_H
37
#define YB_UTIL_PB_UTIL_H
38
39
#include <string>
40
41
#include <gtest/gtest_prod.h>
42
43
#include "yb/util/faststring.h"
44
#include "yb/util/slice.h"
45
#include "yb/util/status_fwd.h"
46
47
namespace google {
48
namespace protobuf {
49
50
class FileDescriptor;
51
class FileDescriptorSet;
52
class MessageLite;
53
class Message;
54
55
template <class T>
56
class RepeatedPtrField;
57
58
} // namespace protobuf
59
} // namespace google
60
61
namespace yb {
62
63
class Env;
64
class RandomAccessFile;
65
class SequentialFile;
66
class Slice;
67
class WritableFile;
68
69
namespace pb_util {
70
71
using google::protobuf::MessageLite;
72
73
enum SyncMode {
74
  SYNC,
75
  NO_SYNC
76
};
77
78
enum CreateMode {
79
  OVERWRITE,
80
  NO_OVERWRITE
81
};
82
83
// See MessageLite::AppendToString
84
void AppendToString(const MessageLite &msg, faststring *output);
85
86
// See MessageLite::AppendPartialToString
87
void AppendPartialToString(const MessageLite &msg, faststring *output);
88
void AppendPartialToString(const MessageLite &msg, std::string *output);
89
90
// See MessageLite::SerializeToString.
91
void SerializeToString(const MessageLite &msg, faststring *output);
92
93
// See MessageLite::ParseFromZeroCopyStream
94
// TODO: change this to return Status - differentiate IO error from bad PB
95
bool ParseFromSequentialFile(MessageLite *msg, SequentialFile *rfile);
96
97
// Similar to MessageLite::ParseFromArray, with the difference that it returns
98
// Status::kCorruption if the message could not be parsed.
99
Status ParseFromArray(MessageLite* msg, const uint8_t* data, size_t length);
100
101
template<class T>
102
9.14k
Result<T> ParseFromSlice(const Slice& slice) {
103
9.14k
  T result;
104
9.14k
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
9.14k
  return result;
106
9.14k
}
Unexecuted instantiation: yb::Result<yb::docdb::ApplyTransactionStatePB> yb::pb_util::ParseFromSlice<yb::docdb::ApplyTransactionStatePB>(yb::Slice const&)
yb::Result<yb::encryption::UniverseKeyRegistryPB> yb::pb_util::ParseFromSlice<yb::encryption::UniverseKeyRegistryPB>(yb::Slice const&)
Line
Count
Source
102
80
Result<T> ParseFromSlice(const Slice& slice) {
103
80
  T result;
104
80
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
80
  return result;
106
80
}
yb::Result<yb::master::SysSnapshotEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysSnapshotEntryPB>(yb::Slice const&)
Line
Count
Source
102
94
Result<T> ParseFromSlice(const Slice& slice) {
103
94
  T result;
104
94
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
94
  return result;
106
94
}
yb::Result<yb::master::SnapshotScheduleOptionsPB> yb::pb_util::ParseFromSlice<yb::master::SnapshotScheduleOptionsPB>(yb::Slice const&)
Line
Count
Source
102
33
Result<T> ParseFromSlice(const Slice& slice) {
103
33
  T result;
104
33
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
33
  return result;
106
33
}
yb::Result<yb::master::DdlLogEntryPB> yb::pb_util::ParseFromSlice<yb::master::DdlLogEntryPB>(yb::Slice const&)
Line
Count
Source
102
3
Result<T> ParseFromSlice(const Slice& slice) {
103
3
  T result;
104
3
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
3
  return result;
106
3
}
yb::Result<yb::master::SysTablesEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysTablesEntryPB>(yb::Slice const&)
Line
Count
Source
102
8.36k
Result<T> ParseFromSlice(const Slice& slice) {
103
8.36k
  T result;
104
8.36k
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
8.36k
  return result;
106
8.36k
}
yb::Result<yb::master::SysNamespaceEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysNamespaceEntryPB>(yb::Slice const&)
Line
Count
Source
102
133
Result<T> ParseFromSlice(const Slice& slice) {
103
133
  T result;
104
133
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
133
  return result;
106
133
}
yb::Result<yb::master::SysTabletsEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysTabletsEntryPB>(yb::Slice const&)
Line
Count
Source
102
402
Result<T> ParseFromSlice(const Slice& slice) {
103
402
  T result;
104
402
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
402
  return result;
106
402
}
yb::Result<yb::encryption::EncryptionParamsPB> yb::pb_util::ParseFromSlice<yb::encryption::EncryptionParamsPB>(yb::Slice const&)
Line
Count
Source
102
35
Result<T> ParseFromSlice(const Slice& slice) {
103
35
  T result;
104
35
  RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size()));
105
35
  return result;
106
35
}
107
108
// Load a protobuf from the given path.
109
Status ReadPBFromPath(Env* env, const std::string& path, MessageLite* msg);
110
111
// Serialize a protobuf to the given path.
112
//
113
// If SyncMode SYNC is provided, ensures the changes are made durable.
114
Status WritePBToPath(Env* env, const std::string& path, const MessageLite& msg, SyncMode sync);
115
116
// Truncate any 'bytes' or 'string' fields of this message to max_len.
117
// The text "<truncated>" is appended to any such truncated fields.
118
void TruncateFields(google::protobuf::Message* message, int max_len);
119
120
// A protobuf "container" has the following format (all integers in
121
// little-endian byte order):
122
//
123
//
124
//
125
// magic number: 8 byte string identifying the file format.
126
//
127
//               Included so that we have a minimal guarantee that this file is
128
//               of the type we expect and that we are not just reading garbage.
129
//
130
// container_version: 4 byte unsigned integer indicating the "version" of the
131
//                    container format. Must be set to 1 at this time.
132
//
133
//                    Included so that this file format may be extended at some
134
//                    later date while maintaining backwards compatibility.
135
//
136
//
137
// The remaining container fields are repeated (in a group) for each protobuf message.
138
//
139
//
140
// data size: 4 byte unsigned integer indicating the size of the encoded data.
141
//
142
//            Included because PB messages aren't self-delimiting, and thus
143
//            writing a stream of messages to the same file requires
144
//            delimiting each with its size.
145
//
146
//            See https://developers.google.com/protocol-buffers/docs/techniques?hl=zh-cn#streaming
147
//            for more details.
148
//
149
// data: "size" bytes of protobuf data encoded according to the schema.
150
//
151
//       Our payload.
152
//
153
// checksum: 4 byte unsigned integer containing the CRC32C checksum of "data".
154
//
155
//           Included to ensure validity of the data on-disk.
156
//
157
// Every container must have at least one protobuf message: the
158
// supplemental header. It includes additional container-level information.
159
// See pb_util.proto for details. As a containerized PB message, the header
160
// is protected by a CRC32C checksum like any other message.
161
//
162
//
163
// It is worth describing the kinds of errors that can be detected by the
164
// protobuf container and the kinds that cannot.
165
//
166
// The checksums in the container are independent, not rolling. As such,
167
// they won't detect the disappearance or reordering of entire protobuf
168
// messages, which can happen if a range of the file is collapsed (see
169
// man fallocate(2)) or if the file is otherwise manually manipulated.
170
// Moreover, the checksums do not protect against corruption in the data
171
// size fields, though that is mitigated by validating each data size
172
// against the remaining number of bytes in the container.
173
//
174
// Additionally, the container does not include footers or periodic
175
// checkpoints. As such, it will not detect if entire protobuf messages
176
// are truncated.
177
//
178
// That said, all corruption or truncation of the magic number or the
179
// container version will be detected, as will most corruption/truncation
180
// of the data size, data, and checksum (subject to CRC32 limitations).
181
//
182
// These tradeoffs in error detection are reasonable given the failure
183
// environment that YB operates within. We tolerate failures such as
184
// "kill -9" of the YB process, machine power loss, or fsync/fdatasync
185
// failure, but not failures like runaway processes mangling data files
186
// in arbitrary ways or attackers crafting malicious data files.
187
//
188
// The one kind of failure that clients must handle is truncation of entire
189
// protobuf messages (see above). The protobuf container will not detect
190
// these failures, so clients must tolerate them in some way.
191
//
192
// For further reading on what files might look like following a normal
193
// filesystem failure, see:
194
//
195
// https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-pillai.pdf
196
197
// Protobuf container file opened for writing.
198
//
199
// Can be built around an existing file or a completely new file.
200
//
201
// Not thread-safe.
202
class WritablePBContainerFile {
203
 public:
204
205
  // Initializes the class instance; writer must be open.
206
  explicit WritablePBContainerFile(std::unique_ptr<WritableFile> writer);
207
208
  // Closes the container if not already closed.
209
  ~WritablePBContainerFile();
210
211
  // Writes the header information to the container.
212
  //
213
  // 'msg' need not be populated; its type is used to "lock" the container
214
  // to a particular protobuf message type in Append().
215
  CHECKED_STATUS Init(const google::protobuf::Message& msg);
216
217
  // Writes a protobuf message to the container, beginning with its size
218
  // and ending with its CRC32 checksum.
219
  CHECKED_STATUS Append(const google::protobuf::Message& msg);
220
221
  // Asynchronously flushes all dirty container data to the filesystem.
222
  CHECKED_STATUS Flush();
223
224
  // Synchronizes all dirty container data to the filesystem.
225
  //
226
  // Note: the parent directory is _not_ synchronized. Because the
227
  // container file was provided during construction, we don't know whether
228
  // it was created or reopened, and parent directory synchronization is
229
  // only needed in the former case.
230
  CHECKED_STATUS Sync();
231
232
  // Closes the container.
233
  CHECKED_STATUS Close();
234
235
 private:
236
  FRIEND_TEST(TestPBUtil, TestPopulateDescriptorSet);
237
238
  // Write the protobuf schemas belonging to 'desc' and all of its
239
  // dependencies to 'output'.
240
  //
241
  // Schemas are written in dependency order (i.e. if A depends on B which
242
  // depends on C, the order is C, B, A).
243
  static void PopulateDescriptorSet(const google::protobuf::FileDescriptor* desc,
244
                                    google::protobuf::FileDescriptorSet* output);
245
246
  // Serialize the contents of 'msg' into 'buf' along with additional metadata
247
  // to aid in deserialization.
248
  CHECKED_STATUS AppendMsgToBuffer(const google::protobuf::Message& msg, faststring* buf);
249
250
  bool closed_;
251
252
  std::unique_ptr<WritableFile> writer_;
253
};
254
255
// Protobuf container file opened for reading.
256
//
257
// Can be built around a file with existing contents or an empty file (in
258
// which case it's safe to interleave with WritablePBContainerFile).
259
class ReadablePBContainerFile {
260
 public:
261
262
  // Initializes the class instance; reader must be open.
263
  explicit ReadablePBContainerFile(std::unique_ptr<RandomAccessFile> reader);
264
265
  // Closes the file if not already closed.
266
  ~ReadablePBContainerFile();
267
268
  // Reads the header information from the container and validates it.
269
  CHECKED_STATUS Init();
270
271
  // Reads a protobuf message from the container, validating its size and
272
  // data using a CRC32 checksum.
273
  CHECKED_STATUS ReadNextPB(google::protobuf::Message* msg);
274
275
  // Dumps any unread protobuf messages in the container to 'os'. Each
276
  // message's DebugString() method is invoked to produce its textual form.
277
  //
278
  // If 'oneline' is true, prints each message on a single line.
279
  CHECKED_STATUS Dump(std::ostream* os, bool oneline);
280
281
  // Closes the container.
282
  CHECKED_STATUS Close();
283
284
  // Expected PB type and schema for each message to be read.
285
  //
286
  // Only valid after a successful call to Init().
287
2
  const std::string& pb_type() const { return pb_type_; }
288
14
  const google::protobuf::FileDescriptorSet* protos() const {
289
14
    return protos_.get();
290
14
  }
291
292
 private:
293
  enum EofOK {
294
    EOF_OK,
295
    EOF_NOT_OK
296
  };
297
298
  // Reads exactly 'length' bytes from the container file into 'scratch',
299
  // validating the correctness of the read both before and after and
300
  // returning a slice of the bytes in 'result'.
301
  //
302
  // If 'eofOK' is EOF_OK, an EOF is returned as-is. Otherwise, it is
303
  // considered to be an invalid short read and returned as an error.
304
  CHECKED_STATUS ValidateAndRead(size_t length, EofOK eofOK,
305
                                 Slice* result, std::unique_ptr<uint8_t[]>* scratch);
306
307
  size_t offset_;
308
309
  // The fully-qualified PB type name of the messages in the container.
310
  std::string pb_type_;
311
312
  // Wrapped in a std::unique_ptr so that clients need not include PB headers.
313
  std::unique_ptr<google::protobuf::FileDescriptorSet> protos_;
314
315
  std::unique_ptr<RandomAccessFile> reader_;
316
};
317
318
// Convenience functions for protobuf containers holding just one record.
319
320
// Load a "containerized" protobuf from the given path.
321
// If the file does not exist, returns STATUS(NotFound, ""). Otherwise, may
322
// return other Status error codes such as Status::IOError.
323
Status ReadPBContainerFromPath(Env* env, const std::string& path,
324
                               google::protobuf::Message* msg);
325
326
Status ReadPBContainerFromPath(Env* env, const std::string& path, const std::string& pb_type_name,
327
                               google::protobuf::Message* msg);
328
329
// Serialize a "containerized" protobuf to the given path.
330
//
331
// If create == NO_OVERWRITE and 'path' already exists, the function will fail.
332
// If sync == SYNC, the newly created file will be fsynced before returning.
333
Status WritePBContainerToPath(Env* env, const std::string& path,
334
                              const google::protobuf::Message& msg,
335
                              CreateMode create,
336
                              SyncMode sync);
337
338
// Return true if the two PBs are equal.
339
//
340
// If 'diff_str' is not null, stores a textual description of the
341
// difference.
342
bool ArePBsEqual(const google::protobuf::Message& prev_pb,
343
                 const google::protobuf::Message& new_pb,
344
                 std::string* diff_str);
345
346
} // namespace pb_util
347
348
using RepeatedBytes = google::protobuf::RepeatedPtrField<std::string>;
349
350
} // namespace yb
351
352
#endif // YB_UTIL_PB_UTIL_H