/Users/deen/code/yugabyte-db/src/yb/util/pb_util.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // Licensed to the Apache Software Foundation (ASF) under one |
3 | | // or more contributor license agreements. See the NOTICE file |
4 | | // distributed with this work for additional information |
5 | | // regarding copyright ownership. The ASF licenses this file |
6 | | // to you under the Apache License, Version 2.0 (the |
7 | | // "License"); you may not use this file except in compliance |
8 | | // with the License. You may obtain a copy of the License at |
9 | | // |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // |
12 | | // Unless required by applicable law or agreed to in writing, |
13 | | // software distributed under the License is distributed on an |
14 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | | // KIND, either express or implied. See the License for the |
16 | | // specific language governing permissions and limitations |
17 | | // under the License. |
18 | | // |
19 | | // The following only applies to changes made to this file as part of YugaByte development. |
20 | | // |
21 | | // Portions Copyright (c) YugaByte, Inc. |
22 | | // |
23 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
24 | | // in compliance with the License. You may obtain a copy of the License at |
25 | | // |
26 | | // http://www.apache.org/licenses/LICENSE-2.0 |
27 | | // |
28 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
29 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
30 | | // or implied. See the License for the specific language governing permissions and limitations |
31 | | // under the License. |
32 | | // |
33 | | // Utilities for dealing with protocol buffers. |
34 | | // These are mostly just functions similar to what are found in the protobuf |
35 | | // library itself, but using yb::faststring instances instead of STL strings. |
36 | | #ifndef YB_UTIL_PB_UTIL_H |
37 | | #define YB_UTIL_PB_UTIL_H |
38 | | |
39 | | #include <string> |
40 | | |
41 | | #include <gtest/gtest_prod.h> |
42 | | |
43 | | #include "yb/util/faststring.h" |
44 | | #include "yb/util/slice.h" |
45 | | #include "yb/util/status_fwd.h" |
46 | | |
47 | | namespace google { |
48 | | namespace protobuf { |
49 | | |
50 | | class FileDescriptor; |
51 | | class FileDescriptorSet; |
52 | | class MessageLite; |
53 | | class Message; |
54 | | |
55 | | template <class T> |
56 | | class RepeatedPtrField; |
57 | | |
58 | | } // namespace protobuf |
59 | | } // namespace google |
60 | | |
61 | | namespace yb { |
62 | | |
63 | | class Env; |
64 | | class RandomAccessFile; |
65 | | class SequentialFile; |
66 | | class Slice; |
67 | | class WritableFile; |
68 | | |
69 | | namespace pb_util { |
70 | | |
71 | | using google::protobuf::MessageLite; |
72 | | |
73 | | enum SyncMode { |
74 | | SYNC, |
75 | | NO_SYNC |
76 | | }; |
77 | | |
78 | | enum CreateMode { |
79 | | OVERWRITE, |
80 | | NO_OVERWRITE |
81 | | }; |
82 | | |
83 | | // See MessageLite::AppendToString |
84 | | void AppendToString(const MessageLite &msg, faststring *output); |
85 | | |
86 | | // See MessageLite::AppendPartialToString |
87 | | void AppendPartialToString(const MessageLite &msg, faststring *output); |
88 | | void AppendPartialToString(const MessageLite &msg, std::string *output); |
89 | | |
90 | | // See MessageLite::SerializeToString. |
91 | | void SerializeToString(const MessageLite &msg, faststring *output); |
92 | | |
93 | | // See MessageLite::ParseFromZeroCopyStream |
94 | | // TODO: change this to return Status - differentiate IO error from bad PB |
95 | | bool ParseFromSequentialFile(MessageLite *msg, SequentialFile *rfile); |
96 | | |
97 | | // Similar to MessageLite::ParseFromArray, with the difference that it returns |
98 | | // Status::kCorruption if the message could not be parsed. |
99 | | Status ParseFromArray(MessageLite* msg, const uint8_t* data, size_t length); |
100 | | |
101 | | template<class T> |
102 | 9.14k | Result<T> ParseFromSlice(const Slice& slice) { |
103 | 9.14k | T result; |
104 | 9.14k | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); |
105 | 9.14k | return result; |
106 | 9.14k | } Unexecuted instantiation: yb::Result<yb::docdb::ApplyTransactionStatePB> yb::pb_util::ParseFromSlice<yb::docdb::ApplyTransactionStatePB>(yb::Slice const&) yb::Result<yb::encryption::UniverseKeyRegistryPB> yb::pb_util::ParseFromSlice<yb::encryption::UniverseKeyRegistryPB>(yb::Slice const&) Line | Count | Source | 102 | 80 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 80 | T result; | 104 | 80 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 80 | return result; | 106 | 80 | } |
yb::Result<yb::master::SysSnapshotEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysSnapshotEntryPB>(yb::Slice const&) Line | Count | Source | 102 | 94 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 94 | T result; | 104 | 94 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 94 | return result; | 106 | 94 | } |
yb::Result<yb::master::SnapshotScheduleOptionsPB> yb::pb_util::ParseFromSlice<yb::master::SnapshotScheduleOptionsPB>(yb::Slice const&) Line | Count | Source | 102 | 33 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 33 | T result; | 104 | 33 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 33 | return result; | 106 | 33 | } |
yb::Result<yb::master::DdlLogEntryPB> yb::pb_util::ParseFromSlice<yb::master::DdlLogEntryPB>(yb::Slice const&) Line | Count | Source | 102 | 3 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 3 | T result; | 104 | 3 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 3 | return result; | 106 | 3 | } |
yb::Result<yb::master::SysTablesEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysTablesEntryPB>(yb::Slice const&) Line | Count | Source | 102 | 8.36k | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 8.36k | T result; | 104 | 8.36k | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 8.36k | return result; | 106 | 8.36k | } |
yb::Result<yb::master::SysNamespaceEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysNamespaceEntryPB>(yb::Slice const&) Line | Count | Source | 102 | 133 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 133 | T result; | 104 | 133 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 133 | return result; | 106 | 133 | } |
yb::Result<yb::master::SysTabletsEntryPB> yb::pb_util::ParseFromSlice<yb::master::SysTabletsEntryPB>(yb::Slice const&) Line | Count | Source | 102 | 402 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 402 | T result; | 104 | 402 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 402 | return result; | 106 | 402 | } |
yb::Result<yb::encryption::EncryptionParamsPB> yb::pb_util::ParseFromSlice<yb::encryption::EncryptionParamsPB>(yb::Slice const&) Line | Count | Source | 102 | 35 | Result<T> ParseFromSlice(const Slice& slice) { | 103 | 35 | T result; | 104 | 35 | RETURN_NOT_OK(ParseFromArray(&result, slice.data(), slice.size())); | 105 | 35 | return result; | 106 | 35 | } |
|
107 | | |
108 | | // Load a protobuf from the given path. |
109 | | Status ReadPBFromPath(Env* env, const std::string& path, MessageLite* msg); |
110 | | |
111 | | // Serialize a protobuf to the given path. |
112 | | // |
113 | | // If SyncMode SYNC is provided, ensures the changes are made durable. |
114 | | Status WritePBToPath(Env* env, const std::string& path, const MessageLite& msg, SyncMode sync); |
115 | | |
116 | | // Truncate any 'bytes' or 'string' fields of this message to max_len. |
117 | | // The text "<truncated>" is appended to any such truncated fields. |
118 | | void TruncateFields(google::protobuf::Message* message, int max_len); |
119 | | |
120 | | // A protobuf "container" has the following format (all integers in |
121 | | // little-endian byte order): |
122 | | // |
123 | | // |
124 | | // |
125 | | // magic number: 8 byte string identifying the file format. |
126 | | // |
127 | | // Included so that we have a minimal guarantee that this file is |
128 | | // of the type we expect and that we are not just reading garbage. |
129 | | // |
130 | | // container_version: 4 byte unsigned integer indicating the "version" of the |
131 | | // container format. Must be set to 1 at this time. |
132 | | // |
133 | | // Included so that this file format may be extended at some |
134 | | // later date while maintaining backwards compatibility. |
135 | | // |
136 | | // |
137 | | // The remaining container fields are repeated (in a group) for each protobuf message. |
138 | | // |
139 | | // |
140 | | // data size: 4 byte unsigned integer indicating the size of the encoded data. |
141 | | // |
142 | | // Included because PB messages aren't self-delimiting, and thus |
143 | | // writing a stream of messages to the same file requires |
144 | | // delimiting each with its size. |
145 | | // |
146 | | // See https://developers.google.com/protocol-buffers/docs/techniques?hl=zh-cn#streaming |
147 | | // for more details. |
148 | | // |
149 | | // data: "size" bytes of protobuf data encoded according to the schema. |
150 | | // |
151 | | // Our payload. |
152 | | // |
153 | | // checksum: 4 byte unsigned integer containing the CRC32C checksum of "data". |
154 | | // |
155 | | // Included to ensure validity of the data on-disk. |
156 | | // |
157 | | // Every container must have at least one protobuf message: the |
158 | | // supplemental header. It includes additional container-level information. |
159 | | // See pb_util.proto for details. As a containerized PB message, the header |
160 | | // is protected by a CRC32C checksum like any other message. |
161 | | // |
162 | | // |
163 | | // It is worth describing the kinds of errors that can be detected by the |
164 | | // protobuf container and the kinds that cannot. |
165 | | // |
166 | | // The checksums in the container are independent, not rolling. As such, |
167 | | // they won't detect the disappearance or reordering of entire protobuf |
168 | | // messages, which can happen if a range of the file is collapsed (see |
169 | | // man fallocate(2)) or if the file is otherwise manually manipulated. |
170 | | // Moreover, the checksums do not protect against corruption in the data |
171 | | // size fields, though that is mitigated by validating each data size |
172 | | // against the remaining number of bytes in the container. |
173 | | // |
174 | | // Additionally, the container does not include footers or periodic |
175 | | // checkpoints. As such, it will not detect if entire protobuf messages |
176 | | // are truncated. |
177 | | // |
178 | | // That said, all corruption or truncation of the magic number or the |
179 | | // container version will be detected, as will most corruption/truncation |
180 | | // of the data size, data, and checksum (subject to CRC32 limitations). |
181 | | // |
182 | | // These tradeoffs in error detection are reasonable given the failure |
183 | | // environment that YB operates within. We tolerate failures such as |
184 | | // "kill -9" of the YB process, machine power loss, or fsync/fdatasync |
185 | | // failure, but not failures like runaway processes mangling data files |
186 | | // in arbitrary ways or attackers crafting malicious data files. |
187 | | // |
188 | | // The one kind of failure that clients must handle is truncation of entire |
189 | | // protobuf messages (see above). The protobuf container will not detect |
190 | | // these failures, so clients must tolerate them in some way. |
191 | | // |
192 | | // For further reading on what files might look like following a normal |
193 | | // filesystem failure, see: |
194 | | // |
195 | | // https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-pillai.pdf |
196 | | |
197 | | // Protobuf container file opened for writing. |
198 | | // |
199 | | // Can be built around an existing file or a completely new file. |
200 | | // |
201 | | // Not thread-safe. |
202 | | class WritablePBContainerFile { |
203 | | public: |
204 | | |
205 | | // Initializes the class instance; writer must be open. |
206 | | explicit WritablePBContainerFile(std::unique_ptr<WritableFile> writer); |
207 | | |
208 | | // Closes the container if not already closed. |
209 | | ~WritablePBContainerFile(); |
210 | | |
211 | | // Writes the header information to the container. |
212 | | // |
213 | | // 'msg' need not be populated; its type is used to "lock" the container |
214 | | // to a particular protobuf message type in Append(). |
215 | | CHECKED_STATUS Init(const google::protobuf::Message& msg); |
216 | | |
217 | | // Writes a protobuf message to the container, beginning with its size |
218 | | // and ending with its CRC32 checksum. |
219 | | CHECKED_STATUS Append(const google::protobuf::Message& msg); |
220 | | |
221 | | // Asynchronously flushes all dirty container data to the filesystem. |
222 | | CHECKED_STATUS Flush(); |
223 | | |
224 | | // Synchronizes all dirty container data to the filesystem. |
225 | | // |
226 | | // Note: the parent directory is _not_ synchronized. Because the |
227 | | // container file was provided during construction, we don't know whether |
228 | | // it was created or reopened, and parent directory synchronization is |
229 | | // only needed in the former case. |
230 | | CHECKED_STATUS Sync(); |
231 | | |
232 | | // Closes the container. |
233 | | CHECKED_STATUS Close(); |
234 | | |
235 | | private: |
236 | | FRIEND_TEST(TestPBUtil, TestPopulateDescriptorSet); |
237 | | |
238 | | // Write the protobuf schemas belonging to 'desc' and all of its |
239 | | // dependencies to 'output'. |
240 | | // |
241 | | // Schemas are written in dependency order (i.e. if A depends on B which |
242 | | // depends on C, the order is C, B, A). |
243 | | static void PopulateDescriptorSet(const google::protobuf::FileDescriptor* desc, |
244 | | google::protobuf::FileDescriptorSet* output); |
245 | | |
246 | | // Serialize the contents of 'msg' into 'buf' along with additional metadata |
247 | | // to aid in deserialization. |
248 | | CHECKED_STATUS AppendMsgToBuffer(const google::protobuf::Message& msg, faststring* buf); |
249 | | |
250 | | bool closed_; |
251 | | |
252 | | std::unique_ptr<WritableFile> writer_; |
253 | | }; |
254 | | |
255 | | // Protobuf container file opened for reading. |
256 | | // |
257 | | // Can be built around a file with existing contents or an empty file (in |
258 | | // which case it's safe to interleave with WritablePBContainerFile). |
259 | | class ReadablePBContainerFile { |
260 | | public: |
261 | | |
262 | | // Initializes the class instance; reader must be open. |
263 | | explicit ReadablePBContainerFile(std::unique_ptr<RandomAccessFile> reader); |
264 | | |
265 | | // Closes the file if not already closed. |
266 | | ~ReadablePBContainerFile(); |
267 | | |
268 | | // Reads the header information from the container and validates it. |
269 | | CHECKED_STATUS Init(); |
270 | | |
271 | | // Reads a protobuf message from the container, validating its size and |
272 | | // data using a CRC32 checksum. |
273 | | CHECKED_STATUS ReadNextPB(google::protobuf::Message* msg); |
274 | | |
275 | | // Dumps any unread protobuf messages in the container to 'os'. Each |
276 | | // message's DebugString() method is invoked to produce its textual form. |
277 | | // |
278 | | // If 'oneline' is true, prints each message on a single line. |
279 | | CHECKED_STATUS Dump(std::ostream* os, bool oneline); |
280 | | |
281 | | // Closes the container. |
282 | | CHECKED_STATUS Close(); |
283 | | |
284 | | // Expected PB type and schema for each message to be read. |
285 | | // |
286 | | // Only valid after a successful call to Init(). |
287 | 2 | const std::string& pb_type() const { return pb_type_; } |
288 | 14 | const google::protobuf::FileDescriptorSet* protos() const { |
289 | 14 | return protos_.get(); |
290 | 14 | } |
291 | | |
292 | | private: |
293 | | enum EofOK { |
294 | | EOF_OK, |
295 | | EOF_NOT_OK |
296 | | }; |
297 | | |
298 | | // Reads exactly 'length' bytes from the container file into 'scratch', |
299 | | // validating the correctness of the read both before and after and |
300 | | // returning a slice of the bytes in 'result'. |
301 | | // |
302 | | // If 'eofOK' is EOF_OK, an EOF is returned as-is. Otherwise, it is |
303 | | // considered to be an invalid short read and returned as an error. |
304 | | CHECKED_STATUS ValidateAndRead(size_t length, EofOK eofOK, |
305 | | Slice* result, std::unique_ptr<uint8_t[]>* scratch); |
306 | | |
307 | | size_t offset_; |
308 | | |
309 | | // The fully-qualified PB type name of the messages in the container. |
310 | | std::string pb_type_; |
311 | | |
312 | | // Wrapped in a std::unique_ptr so that clients need not include PB headers. |
313 | | std::unique_ptr<google::protobuf::FileDescriptorSet> protos_; |
314 | | |
315 | | std::unique_ptr<RandomAccessFile> reader_; |
316 | | }; |
317 | | |
318 | | // Convenience functions for protobuf containers holding just one record. |
319 | | |
320 | | // Load a "containerized" protobuf from the given path. |
321 | | // If the file does not exist, returns STATUS(NotFound, ""). Otherwise, may |
322 | | // return other Status error codes such as Status::IOError. |
323 | | Status ReadPBContainerFromPath(Env* env, const std::string& path, |
324 | | google::protobuf::Message* msg); |
325 | | |
326 | | Status ReadPBContainerFromPath(Env* env, const std::string& path, const std::string& pb_type_name, |
327 | | google::protobuf::Message* msg); |
328 | | |
329 | | // Serialize a "containerized" protobuf to the given path. |
330 | | // |
331 | | // If create == NO_OVERWRITE and 'path' already exists, the function will fail. |
332 | | // If sync == SYNC, the newly created file will be fsynced before returning. |
333 | | Status WritePBContainerToPath(Env* env, const std::string& path, |
334 | | const google::protobuf::Message& msg, |
335 | | CreateMode create, |
336 | | SyncMode sync); |
337 | | |
338 | | // Return true if the two PBs are equal. |
339 | | // |
340 | | // If 'diff_str' is not null, stores a textual description of the |
341 | | // difference. |
342 | | bool ArePBsEqual(const google::protobuf::Message& prev_pb, |
343 | | const google::protobuf::Message& new_pb, |
344 | | std::string* diff_str); |
345 | | |
346 | | } // namespace pb_util |
347 | | |
348 | | using RepeatedBytes = google::protobuf::RepeatedPtrField<std::string>; |
349 | | |
350 | | } // namespace yb |
351 | | |
352 | | #endif // YB_UTIL_PB_UTIL_H |