YugabyteDB (2.13.1.0-b60, 21121d69985fbf76aa6958d8f04a9bfa936293b5)

Coverage Report

Created: 2022-03-22 16:43

/Users/deen/code/yugabyte-db/src/yb/common/jsonb.h
Line
Count
Source
1
// Copyright (c) YugaByte, Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
4
// in compliance with the License.  You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software distributed under the License
9
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
10
// or implied.  See the License for the specific language governing permissions and limitations
11
// under the License.
12
//
13
14
#ifndef YB_COMMON_JSONB_H
15
#define YB_COMMON_JSONB_H
16
17
#include <string>
18
19
#include <rapidjson/document.h>
20
21
#include "yb/common/common_fwd.h"
22
23
#include "yb/util/slice.h"
24
#include "yb/util/status_fwd.h"
25
26
namespace yb {
27
namespace common {
28
29
using JsonbMetadata = uint32_t;
30
using JsonbHeader = JsonbMetadata;
31
using JEntry = JsonbMetadata;
32
33
// Jsonb is a serialization format for json that used in postgresql. This implementation of jsonb
34
// is similar to the jsonb format, although not exactly the same (details regarding differences
35
// follow). The jsonb format, first includes a 32 bit header, whose first 28 bits store the total
36
// number of key-value pairs in the json object. The next four bits are used to indicate whether
37
// this is a json object, json array or just a scalar value.
38
//
39
// Next, we store the metadata for all the keys and values in the json object. The key-value
40
// pairs are sorted based on keys before serialization and hence the original order is lost.
41
// However, the sorting of key-value pairs would make it easier to search for a particular key in
42
// jsonb. After the 32 bit jsonb header, we store 32 bit metadata for each key, followed by a
43
// 32 bit metadata for each value. Next, we store all the keys followed by all the values.
44
//
45
// In case of arrays, we store the metadata for all the array elements first and then store the
46
// data for the corresponding array elements after that. The original order of the array elements
47
// is maintained.
48
//
49
// The 32 bit metadata is called a JEntry and the first 28 bits store the ending offset of the
50
// data. The last 4 bits indicate the type of the data (ex: string, numeric, bool, array, object
51
// or null).
52
//
53
// The following are some of the differences from postgresql's jsonb implementation:
54
// 1. In the JEntry, postgresql sometimes stores offsets and sometimes stores the length. This is
55
// done for better compressibility in their case. Although, for us this doesn't make much of a
56
// difference and hence its simpler to just use offsets.
57
// 2. In our serialization format, we just use the BigEndian format used in docdb to store
58
// serialized integers.
59
// 3. We store the data type for ints, uints, floats and doubles in the JEntry.
60
// 4. We store information about whether a container is an array or an object in the JEntry.
61
class Jsonb {
62
 public:
63
  Jsonb();
64
65
  // Creates an object from a serialized jsonb payload.
66
  explicit Jsonb(const std::string& jsonb);
67
68
  explicit Jsonb(std::string&& jsonb);
69
70
  void Assign(const std::string& jsonb);
71
  void Assign(std::string&& jsonb);
72
73
  // Creates a serialized jsonb string from plaintext json.
74
  CHECKED_STATUS FromString(const std::string& json);
75
76
  // Creates a serialized jsonb string from rapidjson document or value.
77
  CHECKED_STATUS FromRapidJson(const rapidjson::Document& document);
78
  CHECKED_STATUS FromRapidJson(const rapidjson::Value& value);
79
80
  // Creates a serialized jsonb string from QLValuePB.
81
  CHECKED_STATUS FromQLValuePB(const QLValuePB& value_pb);
82
83
  // Builds a json document from serialized jsonb.
84
  CHECKED_STATUS ToRapidJson(rapidjson::Document* document) const;
85
86
  // Returns a json string for serialized jsonb
87
  CHECKED_STATUS ToJsonString(std::string* json) const;
88
89
  CHECKED_STATUS ApplyJsonbOperators(const QLJsonColumnOperationsPB& json_ops,
90
                                     QLValue* result) const;
91
92
  const std::string& SerializedJsonb() const;
93
94
  // Use with extreme care since this destroys the internal state of the object. The only purpose
95
  // for this method is to allow for efficiently moving the serialized jsonb.
96
  std::string&& MoveSerializedJsonb();
97
98
  bool operator==(const Jsonb& other) const;
99
100
 private:
101
  std::string serialized_jsonb_;
102
103
  // Given a jsonb slice, it applies the given operator to the slice and returns the result as a
104
  // Slice and the element's metadata.
105
  static CHECKED_STATUS ApplyJsonbOperator(const Slice& jsonb, const QLJsonOperationPB& json_op,
106
                                           Slice* result, JEntry* element_metadata);
107
108
  static bool IsScalar(const JEntry& jentry);
109
110
  // Given a scalar value retrieved from a serialized jsonb, this method creates a jsonb scalar
111
  // (which is a single element within an array). This is required for comparison purposes.
112
  static CHECKED_STATUS CreateScalar(const Slice& scalar, const JEntry& original_jentry,
113
                                     std::string* scalar_jsonb);
114
115
  // Given a serialized json scalar and its metadata, return a string representation of it.
116
  static CHECKED_STATUS ScalarToString(const JEntry& element_metadata, const Slice& json_value,
117
                                       std::string* result);
118
119
  static CHECKED_STATUS ToJsonStringInternal(const Slice& jsonb, std::string* json);
120
  static size_t ComputeDataOffset(const size_t num_entries, const uint32_t container_type);
121
  static CHECKED_STATUS ToJsonbInternal(const rapidjson::Value& document, std::string* jsonb);
122
  static CHECKED_STATUS ToJsonbProcessObject(const rapidjson::Value& document,
123
                                             std::string* jsonb);
124
  static CHECKED_STATUS ToJsonbProcessArray(const rapidjson::Value& document,
125
                                            bool is_scalar,
126
                                            std::string* jsonb);
127
  static CHECKED_STATUS ProcessJsonValueAndMetadata(const rapidjson::Value& value,
128
                                                    const size_t data_begin_offset,
129
                                                    std::string* jsonb,
130
                                                    size_t* metadata_offset);
131
132
  // Method to recursively build the json object from serialized jsonb. The offset denotes the
133
  // starting position in the jsonb from which we need to start processing.
134
  static CHECKED_STATUS FromJsonbInternal(const Slice& jsonb, rapidjson::Document* document);
135
  static CHECKED_STATUS FromJsonbProcessObject(const Slice& jsonb,
136
                                               const JsonbHeader& jsonb_header,
137
                                               rapidjson::Document* document);
138
  static CHECKED_STATUS FromJsonbProcessArray(const Slice& jsonb,
139
                                              const JsonbHeader& jsonb_header,
140
                                              rapidjson::Document* document);
141
142
  static std::pair<size_t, size_t> ComputeOffsetsAndJsonbHeader(size_t num_entries,
143
                                                                uint32_t container_type,
144
                                                                std::string* jsonb);
145
  // Retrieves an element in serialized jsonb array with the provided index. The result is a
146
  // slice pointing to a section of the serialized jsonb string provided. The parameters
147
  // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and
148
  // data in the serialized jsonb. The method also returns a JEntry for the specified element, if
149
  // metadata information for that element is required.
150
  static CHECKED_STATUS GetArrayElement(size_t index, const Slice& jsonb,
151
                                        size_t metadata_begin_offset, size_t data_begin_offset,
152
                                        Slice* result, JEntry* element_metadata);
153
154
  // Retrieves the key from a serialized jsonb object at the given index. The result is a
155
  // slice pointing to a section of the serialized jsonb string provided. The parameters
156
  // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and
157
  // data in the serialized jsonb.
158
  static CHECKED_STATUS GetObjectKey(size_t index, const Slice& jsonb, size_t metadata_begin_offset,
159
                                     size_t data_begin_offset, Slice *result);
160
161
  // Retrieves the value from a serialized jsonb object at the given index. The result is a
162
  // slice pointing to a section of the serialized jsonb string provided. The parameters
163
  // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and
164
  // data in the serialized jsonb. The parameter num_kv_pairs indicates the total number of kv
165
  // pairs in the json object. The method also returns a JEntry for the specified element, if
166
  // metadata information for that element is required.
167
  static CHECKED_STATUS GetObjectValue(size_t index, const Slice& jsonb,
168
                                       size_t metadata_begin_offset, size_t data_begin_offset,
169
                                       size_t num_kv_pairs, Slice *result, JEntry* value_metadata);
170
171
  // Helper method to retrieve the (offset, length) of a key/value serialized in jsonb format.
172
  // element_metadata_offset denotes the offset for the JEntry of the key/value,
173
  // element_end_offset denotes the end of data portion of the key/value, data_begin_offset
174
  // denotes the offset from which the data portion of jsonb starts, metadata_begin_offset is the
175
  // offset from which all the JEntry fields begin.
176
  static std::pair<size_t, size_t> GetOffsetAndLength(size_t element_metadata_offset,
177
                                                      const Slice& jsonb,
178
                                                      size_t element_end_offset,
179
                                                      size_t data_begin_offset,
180
                                                      size_t metadata_begin_offset);
181
182
  static CHECKED_STATUS ApplyJsonbOperatorToArray(const Slice& jsonb,
183
                                                  const QLJsonOperationPB& json_op,
184
                                                  const JsonbHeader& jsonb_header,
185
                                                  Slice* result,
186
                                                  JEntry* element_metadata);
187
188
  static CHECKED_STATUS ApplyJsonbOperatorToObject(const Slice& jsonb,
189
                                                   const QLJsonOperationPB& json_op,
190
                                                   const JsonbHeader& jsonb_header,
191
                                                   Slice* result,
192
                                                   JEntry* element_metadata);
193
194
2.20M
  static inline uint32_t GetOffset(JEntry metadata) { return metadata & kJEOffsetMask; }
195
196
70.0k
  static inline uint32_t GetJEType(JEntry metadata) { return metadata & kJETypeMask; }
197
198
2.08M
  static inline uint32_t GetCount(JsonbHeader jsonb_header) { return jsonb_header & kJBCountMask; }
199
200
  // Bit masks for jsonb header fields.
201
  static constexpr uint32_t kJBCountMask = 0x0FFFFFFF; // mask for number of kv pairs.
202
  static constexpr uint32_t kJBScalar = 0x10000000; // indicates whether we have a scalar value.
203
  static constexpr uint32_t kJBObject = 0x20000000; // indicates whether we have a json object.
204
  static constexpr uint32_t kJBArray = 0x40000000; // indicates whether we have a json array.
205
206
  // Bit masks for json header fields.
207
  static constexpr uint32_t kJEOffsetMask = 0x0FFFFFFF;
208
  static constexpr uint32_t kJETypeMask = 0xF0000000;
209
210
  // Values stored in the type bits.
211
  static constexpr uint32_t kJEIsString = 0x00000000;
212
  static constexpr uint32_t kJEIsObject = 0x10000000;
213
  static constexpr uint32_t kJEIsBoolFalse = 0x20000000;
214
  static constexpr uint32_t kJEIsBoolTrue = 0x30000000;
215
  static constexpr uint32_t kJEIsNull = 0x40000000;
216
  static constexpr uint32_t kJEIsArray = 0x50000000;
217
  static constexpr uint32_t kJEIsInt = 0x60000000;
218
  static constexpr uint32_t kJEIsUInt = 0x70000000;
219
  static constexpr uint32_t kJEIsInt64 = 0x80000000;
220
  static constexpr uint32_t kJEIsUInt64 = 0x90000000;
221
  static constexpr uint32_t kJEIsFloat = 0xA0000000;
222
  static constexpr uint32_t kJEIsDouble = 0xB0000000;
223
};
224
225
} // namespace common
226
} // namespace yb
227
228
#endif // YB_COMMON_JSONB_H