/Users/deen/code/yugabyte-db/src/yb/common/jsonb.h
Line | Count | Source |
1 | | // Copyright (c) YugaByte, Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
4 | | // in compliance with the License. You may obtain a copy of the License at |
5 | | // |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // |
8 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
9 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
10 | | // or implied. See the License for the specific language governing permissions and limitations |
11 | | // under the License. |
12 | | // |
13 | | |
14 | | #ifndef YB_COMMON_JSONB_H |
15 | | #define YB_COMMON_JSONB_H |
16 | | |
17 | | #include <string> |
18 | | |
19 | | #include <rapidjson/document.h> |
20 | | |
21 | | #include "yb/common/common_fwd.h" |
22 | | |
23 | | #include "yb/util/slice.h" |
24 | | #include "yb/util/status_fwd.h" |
25 | | |
26 | | namespace yb { |
27 | | namespace common { |
28 | | |
29 | | using JsonbMetadata = uint32_t; |
30 | | using JsonbHeader = JsonbMetadata; |
31 | | using JEntry = JsonbMetadata; |
32 | | |
33 | | // Jsonb is a serialization format for json that used in postgresql. This implementation of jsonb |
34 | | // is similar to the jsonb format, although not exactly the same (details regarding differences |
35 | | // follow). The jsonb format, first includes a 32 bit header, whose first 28 bits store the total |
36 | | // number of key-value pairs in the json object. The next four bits are used to indicate whether |
37 | | // this is a json object, json array or just a scalar value. |
38 | | // |
39 | | // Next, we store the metadata for all the keys and values in the json object. The key-value |
40 | | // pairs are sorted based on keys before serialization and hence the original order is lost. |
41 | | // However, the sorting of key-value pairs would make it easier to search for a particular key in |
42 | | // jsonb. After the 32 bit jsonb header, we store 32 bit metadata for each key, followed by a |
43 | | // 32 bit metadata for each value. Next, we store all the keys followed by all the values. |
44 | | // |
45 | | // In case of arrays, we store the metadata for all the array elements first and then store the |
46 | | // data for the corresponding array elements after that. The original order of the array elements |
47 | | // is maintained. |
48 | | // |
49 | | // The 32 bit metadata is called a JEntry and the first 28 bits store the ending offset of the |
50 | | // data. The last 4 bits indicate the type of the data (ex: string, numeric, bool, array, object |
51 | | // or null). |
52 | | // |
53 | | // The following are some of the differences from postgresql's jsonb implementation: |
54 | | // 1. In the JEntry, postgresql sometimes stores offsets and sometimes stores the length. This is |
55 | | // done for better compressibility in their case. Although, for us this doesn't make much of a |
56 | | // difference and hence its simpler to just use offsets. |
57 | | // 2. In our serialization format, we just use the BigEndian format used in docdb to store |
58 | | // serialized integers. |
59 | | // 3. We store the data type for ints, uints, floats and doubles in the JEntry. |
60 | | // 4. We store information about whether a container is an array or an object in the JEntry. |
61 | | class Jsonb { |
62 | | public: |
63 | | Jsonb(); |
64 | | |
65 | | // Creates an object from a serialized jsonb payload. |
66 | | explicit Jsonb(const std::string& jsonb); |
67 | | |
68 | | explicit Jsonb(std::string&& jsonb); |
69 | | |
70 | | void Assign(const std::string& jsonb); |
71 | | void Assign(std::string&& jsonb); |
72 | | |
73 | | // Creates a serialized jsonb string from plaintext json. |
74 | | CHECKED_STATUS FromString(const std::string& json); |
75 | | |
76 | | // Creates a serialized jsonb string from rapidjson document or value. |
77 | | CHECKED_STATUS FromRapidJson(const rapidjson::Document& document); |
78 | | CHECKED_STATUS FromRapidJson(const rapidjson::Value& value); |
79 | | |
80 | | // Creates a serialized jsonb string from QLValuePB. |
81 | | CHECKED_STATUS FromQLValuePB(const QLValuePB& value_pb); |
82 | | |
83 | | // Builds a json document from serialized jsonb. |
84 | | CHECKED_STATUS ToRapidJson(rapidjson::Document* document) const; |
85 | | |
86 | | // Returns a json string for serialized jsonb |
87 | | CHECKED_STATUS ToJsonString(std::string* json) const; |
88 | | |
89 | | CHECKED_STATUS ApplyJsonbOperators(const QLJsonColumnOperationsPB& json_ops, |
90 | | QLValue* result) const; |
91 | | |
92 | | const std::string& SerializedJsonb() const; |
93 | | |
94 | | // Use with extreme care since this destroys the internal state of the object. The only purpose |
95 | | // for this method is to allow for efficiently moving the serialized jsonb. |
96 | | std::string&& MoveSerializedJsonb(); |
97 | | |
98 | | bool operator==(const Jsonb& other) const; |
99 | | |
100 | | private: |
101 | | std::string serialized_jsonb_; |
102 | | |
103 | | // Given a jsonb slice, it applies the given operator to the slice and returns the result as a |
104 | | // Slice and the element's metadata. |
105 | | static CHECKED_STATUS ApplyJsonbOperator(const Slice& jsonb, const QLJsonOperationPB& json_op, |
106 | | Slice* result, JEntry* element_metadata); |
107 | | |
108 | | static bool IsScalar(const JEntry& jentry); |
109 | | |
110 | | // Given a scalar value retrieved from a serialized jsonb, this method creates a jsonb scalar |
111 | | // (which is a single element within an array). This is required for comparison purposes. |
112 | | static CHECKED_STATUS CreateScalar(const Slice& scalar, const JEntry& original_jentry, |
113 | | std::string* scalar_jsonb); |
114 | | |
115 | | // Given a serialized json scalar and its metadata, return a string representation of it. |
116 | | static CHECKED_STATUS ScalarToString(const JEntry& element_metadata, const Slice& json_value, |
117 | | std::string* result); |
118 | | |
119 | | static CHECKED_STATUS ToJsonStringInternal(const Slice& jsonb, std::string* json); |
120 | | static size_t ComputeDataOffset(const size_t num_entries, const uint32_t container_type); |
121 | | static CHECKED_STATUS ToJsonbInternal(const rapidjson::Value& document, std::string* jsonb); |
122 | | static CHECKED_STATUS ToJsonbProcessObject(const rapidjson::Value& document, |
123 | | std::string* jsonb); |
124 | | static CHECKED_STATUS ToJsonbProcessArray(const rapidjson::Value& document, |
125 | | bool is_scalar, |
126 | | std::string* jsonb); |
127 | | static CHECKED_STATUS ProcessJsonValueAndMetadata(const rapidjson::Value& value, |
128 | | const size_t data_begin_offset, |
129 | | std::string* jsonb, |
130 | | size_t* metadata_offset); |
131 | | |
132 | | // Method to recursively build the json object from serialized jsonb. The offset denotes the |
133 | | // starting position in the jsonb from which we need to start processing. |
134 | | static CHECKED_STATUS FromJsonbInternal(const Slice& jsonb, rapidjson::Document* document); |
135 | | static CHECKED_STATUS FromJsonbProcessObject(const Slice& jsonb, |
136 | | const JsonbHeader& jsonb_header, |
137 | | rapidjson::Document* document); |
138 | | static CHECKED_STATUS FromJsonbProcessArray(const Slice& jsonb, |
139 | | const JsonbHeader& jsonb_header, |
140 | | rapidjson::Document* document); |
141 | | |
142 | | static std::pair<size_t, size_t> ComputeOffsetsAndJsonbHeader(size_t num_entries, |
143 | | uint32_t container_type, |
144 | | std::string* jsonb); |
145 | | // Retrieves an element in serialized jsonb array with the provided index. The result is a |
146 | | // slice pointing to a section of the serialized jsonb string provided. The parameters |
147 | | // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and |
148 | | // data in the serialized jsonb. The method also returns a JEntry for the specified element, if |
149 | | // metadata information for that element is required. |
150 | | static CHECKED_STATUS GetArrayElement(size_t index, const Slice& jsonb, |
151 | | size_t metadata_begin_offset, size_t data_begin_offset, |
152 | | Slice* result, JEntry* element_metadata); |
153 | | |
154 | | // Retrieves the key from a serialized jsonb object at the given index. The result is a |
155 | | // slice pointing to a section of the serialized jsonb string provided. The parameters |
156 | | // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and |
157 | | // data in the serialized jsonb. |
158 | | static CHECKED_STATUS GetObjectKey(size_t index, const Slice& jsonb, size_t metadata_begin_offset, |
159 | | size_t data_begin_offset, Slice *result); |
160 | | |
161 | | // Retrieves the value from a serialized jsonb object at the given index. The result is a |
162 | | // slice pointing to a section of the serialized jsonb string provided. The parameters |
163 | | // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and |
164 | | // data in the serialized jsonb. The parameter num_kv_pairs indicates the total number of kv |
165 | | // pairs in the json object. The method also returns a JEntry for the specified element, if |
166 | | // metadata information for that element is required. |
167 | | static CHECKED_STATUS GetObjectValue(size_t index, const Slice& jsonb, |
168 | | size_t metadata_begin_offset, size_t data_begin_offset, |
169 | | size_t num_kv_pairs, Slice *result, JEntry* value_metadata); |
170 | | |
171 | | // Helper method to retrieve the (offset, length) of a key/value serialized in jsonb format. |
172 | | // element_metadata_offset denotes the offset for the JEntry of the key/value, |
173 | | // element_end_offset denotes the end of data portion of the key/value, data_begin_offset |
174 | | // denotes the offset from which the data portion of jsonb starts, metadata_begin_offset is the |
175 | | // offset from which all the JEntry fields begin. |
176 | | static std::pair<size_t, size_t> GetOffsetAndLength(size_t element_metadata_offset, |
177 | | const Slice& jsonb, |
178 | | size_t element_end_offset, |
179 | | size_t data_begin_offset, |
180 | | size_t metadata_begin_offset); |
181 | | |
182 | | static CHECKED_STATUS ApplyJsonbOperatorToArray(const Slice& jsonb, |
183 | | const QLJsonOperationPB& json_op, |
184 | | const JsonbHeader& jsonb_header, |
185 | | Slice* result, |
186 | | JEntry* element_metadata); |
187 | | |
188 | | static CHECKED_STATUS ApplyJsonbOperatorToObject(const Slice& jsonb, |
189 | | const QLJsonOperationPB& json_op, |
190 | | const JsonbHeader& jsonb_header, |
191 | | Slice* result, |
192 | | JEntry* element_metadata); |
193 | | |
194 | 2.20M | static inline uint32_t GetOffset(JEntry metadata) { return metadata & kJEOffsetMask; } |
195 | | |
196 | 70.0k | static inline uint32_t GetJEType(JEntry metadata) { return metadata & kJETypeMask; } |
197 | | |
198 | 2.08M | static inline uint32_t GetCount(JsonbHeader jsonb_header) { return jsonb_header & kJBCountMask; } |
199 | | |
200 | | // Bit masks for jsonb header fields. |
201 | | static constexpr uint32_t kJBCountMask = 0x0FFFFFFF; // mask for number of kv pairs. |
202 | | static constexpr uint32_t kJBScalar = 0x10000000; // indicates whether we have a scalar value. |
203 | | static constexpr uint32_t kJBObject = 0x20000000; // indicates whether we have a json object. |
204 | | static constexpr uint32_t kJBArray = 0x40000000; // indicates whether we have a json array. |
205 | | |
206 | | // Bit masks for json header fields. |
207 | | static constexpr uint32_t kJEOffsetMask = 0x0FFFFFFF; |
208 | | static constexpr uint32_t kJETypeMask = 0xF0000000; |
209 | | |
210 | | // Values stored in the type bits. |
211 | | static constexpr uint32_t kJEIsString = 0x00000000; |
212 | | static constexpr uint32_t kJEIsObject = 0x10000000; |
213 | | static constexpr uint32_t kJEIsBoolFalse = 0x20000000; |
214 | | static constexpr uint32_t kJEIsBoolTrue = 0x30000000; |
215 | | static constexpr uint32_t kJEIsNull = 0x40000000; |
216 | | static constexpr uint32_t kJEIsArray = 0x50000000; |
217 | | static constexpr uint32_t kJEIsInt = 0x60000000; |
218 | | static constexpr uint32_t kJEIsUInt = 0x70000000; |
219 | | static constexpr uint32_t kJEIsInt64 = 0x80000000; |
220 | | static constexpr uint32_t kJEIsUInt64 = 0x90000000; |
221 | | static constexpr uint32_t kJEIsFloat = 0xA0000000; |
222 | | static constexpr uint32_t kJEIsDouble = 0xB0000000; |
223 | | }; |
224 | | |
225 | | } // namespace common |
226 | | } // namespace yb |
227 | | |
228 | | #endif // YB_COMMON_JSONB_H |