/Users/deen/code/yugabyte-db/src/yb/common/jsonb.h
| Line | Count | Source | 
| 1 |  | // Copyright (c) YugaByte, Inc. | 
| 2 |  | // | 
| 3 |  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | 
| 4 |  | // in compliance with the License.  You may obtain a copy of the License at | 
| 5 |  | // | 
| 6 |  | // http://www.apache.org/licenses/LICENSE-2.0 | 
| 7 |  | // | 
| 8 |  | // Unless required by applicable law or agreed to in writing, software distributed under the License | 
| 9 |  | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | 
| 10 |  | // or implied.  See the License for the specific language governing permissions and limitations | 
| 11 |  | // under the License. | 
| 12 |  | // | 
| 13 |  |  | 
| 14 |  | #ifndef YB_COMMON_JSONB_H | 
| 15 |  | #define YB_COMMON_JSONB_H | 
| 16 |  |  | 
| 17 |  | #include <string> | 
| 18 |  |  | 
| 19 |  | #include <rapidjson/document.h> | 
| 20 |  |  | 
| 21 |  | #include "yb/common/common_fwd.h" | 
| 22 |  |  | 
| 23 |  | #include "yb/util/slice.h" | 
| 24 |  | #include "yb/util/status_fwd.h" | 
| 25 |  |  | 
| 26 |  | namespace yb { | 
| 27 |  | namespace common { | 
| 28 |  |  | 
| 29 |  | using JsonbMetadata = uint32_t; | 
| 30 |  | using JsonbHeader = JsonbMetadata; | 
| 31 |  | using JEntry = JsonbMetadata; | 
| 32 |  |  | 
| 33 |  | // Jsonb is a serialization format for json that used in postgresql. This implementation of jsonb | 
| 34 |  | // is similar to the jsonb format, although not exactly the same (details regarding differences | 
| 35 |  | // follow). The jsonb format, first includes a 32 bit header, whose first 28 bits store the total | 
| 36 |  | // number of key-value pairs in the json object. The next four bits are used to indicate whether | 
| 37 |  | // this is a json object, json array or just a scalar value. | 
| 38 |  | // | 
| 39 |  | // Next, we store the metadata for all the keys and values in the json object. The key-value | 
| 40 |  | // pairs are sorted based on keys before serialization and hence the original order is lost. | 
| 41 |  | // However, the sorting of key-value pairs would make it easier to search for a particular key in | 
| 42 |  | // jsonb. After the 32 bit jsonb header, we store 32 bit metadata for each key, followed by a | 
| 43 |  | // 32 bit metadata for each value. Next, we store all the keys followed by all the values. | 
| 44 |  | // | 
| 45 |  | // In case of arrays, we store the metadata for all the array elements first and then store the | 
| 46 |  | // data for the corresponding array elements after that. The original order of the array elements | 
| 47 |  | // is maintained. | 
| 48 |  | // | 
| 49 |  | // The 32 bit metadata is called a JEntry and the first 28 bits store the ending offset of the | 
| 50 |  | // data. The last 4 bits indicate the type of the data (ex: string, numeric, bool, array, object | 
| 51 |  | // or null). | 
| 52 |  | // | 
| 53 |  | // The following are some of the differences from postgresql's jsonb implementation: | 
| 54 |  | // 1. In the JEntry, postgresql sometimes stores offsets and sometimes stores the length. This is | 
| 55 |  | // done for better compressibility in their case. Although, for us this doesn't make much of a | 
| 56 |  | // difference and hence its simpler to just use offsets. | 
| 57 |  | // 2. In our serialization format, we just use the BigEndian format used in docdb to store | 
| 58 |  | // serialized integers. | 
| 59 |  | // 3. We store the data type for ints, uints, floats and doubles in the JEntry. | 
| 60 |  | // 4. We store information about whether a container is an array or an object in the JEntry. | 
| 61 |  | class Jsonb { | 
| 62 |  |  public: | 
| 63 |  |   Jsonb(); | 
| 64 |  |  | 
| 65 |  |   // Creates an object from a serialized jsonb payload. | 
| 66 |  |   explicit Jsonb(const std::string& jsonb); | 
| 67 |  |  | 
| 68 |  |   explicit Jsonb(std::string&& jsonb); | 
| 69 |  |  | 
| 70 |  |   void Assign(const std::string& jsonb); | 
| 71 |  |   void Assign(std::string&& jsonb); | 
| 72 |  |  | 
| 73 |  |   // Creates a serialized jsonb string from plaintext json. | 
| 74 |  |   CHECKED_STATUS FromString(const std::string& json); | 
| 75 |  |  | 
| 76 |  |   // Creates a serialized jsonb string from rapidjson document or value. | 
| 77 |  |   CHECKED_STATUS FromRapidJson(const rapidjson::Document& document); | 
| 78 |  |   CHECKED_STATUS FromRapidJson(const rapidjson::Value& value); | 
| 79 |  |  | 
| 80 |  |   // Creates a serialized jsonb string from QLValuePB. | 
| 81 |  |   CHECKED_STATUS FromQLValuePB(const QLValuePB& value_pb); | 
| 82 |  |  | 
| 83 |  |   // Builds a json document from serialized jsonb. | 
| 84 |  |   CHECKED_STATUS ToRapidJson(rapidjson::Document* document) const; | 
| 85 |  |  | 
| 86 |  |   // Returns a json string for serialized jsonb | 
| 87 |  |   CHECKED_STATUS ToJsonString(std::string* json) const; | 
| 88 |  |  | 
| 89 |  |   CHECKED_STATUS ApplyJsonbOperators(const QLJsonColumnOperationsPB& json_ops, | 
| 90 |  |                                      QLValue* result) const; | 
| 91 |  |  | 
| 92 |  |   const std::string& SerializedJsonb() const; | 
| 93 |  |  | 
| 94 |  |   // Use with extreme care since this destroys the internal state of the object. The only purpose | 
| 95 |  |   // for this method is to allow for efficiently moving the serialized jsonb. | 
| 96 |  |   std::string&& MoveSerializedJsonb(); | 
| 97 |  |  | 
| 98 |  |   bool operator==(const Jsonb& other) const; | 
| 99 |  |  | 
| 100 |  |  private: | 
| 101 |  |   std::string serialized_jsonb_; | 
| 102 |  |  | 
| 103 |  |   // Given a jsonb slice, it applies the given operator to the slice and returns the result as a | 
| 104 |  |   // Slice and the element's metadata. | 
| 105 |  |   static CHECKED_STATUS ApplyJsonbOperator(const Slice& jsonb, const QLJsonOperationPB& json_op, | 
| 106 |  |                                            Slice* result, JEntry* element_metadata); | 
| 107 |  |  | 
| 108 |  |   static bool IsScalar(const JEntry& jentry); | 
| 109 |  |  | 
| 110 |  |   // Given a scalar value retrieved from a serialized jsonb, this method creates a jsonb scalar | 
| 111 |  |   // (which is a single element within an array). This is required for comparison purposes. | 
| 112 |  |   static CHECKED_STATUS CreateScalar(const Slice& scalar, const JEntry& original_jentry, | 
| 113 |  |                                      std::string* scalar_jsonb); | 
| 114 |  |  | 
| 115 |  |   // Given a serialized json scalar and its metadata, return a string representation of it. | 
| 116 |  |   static CHECKED_STATUS ScalarToString(const JEntry& element_metadata, const Slice& json_value, | 
| 117 |  |                                        std::string* result); | 
| 118 |  |  | 
| 119 |  |   static CHECKED_STATUS ToJsonStringInternal(const Slice& jsonb, std::string* json); | 
| 120 |  |   static size_t ComputeDataOffset(const size_t num_entries, const uint32_t container_type); | 
| 121 |  |   static CHECKED_STATUS ToJsonbInternal(const rapidjson::Value& document, std::string* jsonb); | 
| 122 |  |   static CHECKED_STATUS ToJsonbProcessObject(const rapidjson::Value& document, | 
| 123 |  |                                              std::string* jsonb); | 
| 124 |  |   static CHECKED_STATUS ToJsonbProcessArray(const rapidjson::Value& document, | 
| 125 |  |                                             bool is_scalar, | 
| 126 |  |                                             std::string* jsonb); | 
| 127 |  |   static CHECKED_STATUS ProcessJsonValueAndMetadata(const rapidjson::Value& value, | 
| 128 |  |                                                     const size_t data_begin_offset, | 
| 129 |  |                                                     std::string* jsonb, | 
| 130 |  |                                                     size_t* metadata_offset); | 
| 131 |  |  | 
| 132 |  |   // Method to recursively build the json object from serialized jsonb. The offset denotes the | 
| 133 |  |   // starting position in the jsonb from which we need to start processing. | 
| 134 |  |   static CHECKED_STATUS FromJsonbInternal(const Slice& jsonb, rapidjson::Document* document); | 
| 135 |  |   static CHECKED_STATUS FromJsonbProcessObject(const Slice& jsonb, | 
| 136 |  |                                                const JsonbHeader& jsonb_header, | 
| 137 |  |                                                rapidjson::Document* document); | 
| 138 |  |   static CHECKED_STATUS FromJsonbProcessArray(const Slice& jsonb, | 
| 139 |  |                                               const JsonbHeader& jsonb_header, | 
| 140 |  |                                               rapidjson::Document* document); | 
| 141 |  |  | 
| 142 |  |   static std::pair<size_t, size_t> ComputeOffsetsAndJsonbHeader(size_t num_entries, | 
| 143 |  |                                                                 uint32_t container_type, | 
| 144 |  |                                                                 std::string* jsonb); | 
| 145 |  |   // Retrieves an element in serialized jsonb array with the provided index. The result is a | 
| 146 |  |   // slice pointing to a section of the serialized jsonb string provided. The parameters | 
| 147 |  |   // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and | 
| 148 |  |   // data in the serialized jsonb. The method also returns a JEntry for the specified element, if | 
| 149 |  |   // metadata information for that element is required. | 
| 150 |  |   static CHECKED_STATUS GetArrayElement(size_t index, const Slice& jsonb, | 
| 151 |  |                                         size_t metadata_begin_offset, size_t data_begin_offset, | 
| 152 |  |                                         Slice* result, JEntry* element_metadata); | 
| 153 |  |  | 
| 154 |  |   // Retrieves the key from a serialized jsonb object at the given index. The result is a | 
| 155 |  |   // slice pointing to a section of the serialized jsonb string provided. The parameters | 
| 156 |  |   // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and | 
| 157 |  |   // data in the serialized jsonb. | 
| 158 |  |   static CHECKED_STATUS GetObjectKey(size_t index, const Slice& jsonb, size_t metadata_begin_offset, | 
| 159 |  |                                      size_t data_begin_offset, Slice *result); | 
| 160 |  |  | 
| 161 |  |   // Retrieves the value from a serialized jsonb object at the given index. The result is a | 
| 162 |  |   // slice pointing to a section of the serialized jsonb string provided. The parameters | 
| 163 |  |   // metdata_begin_offset and data_begin_offset indicate the starting positions of metadata and | 
| 164 |  |   // data in the serialized jsonb. The parameter num_kv_pairs indicates the total number of kv | 
| 165 |  |   // pairs in the json object. The method also returns a JEntry for the specified element, if | 
| 166 |  |   // metadata information for that element is required. | 
| 167 |  |   static CHECKED_STATUS GetObjectValue(size_t index, const Slice& jsonb, | 
| 168 |  |                                        size_t metadata_begin_offset, size_t data_begin_offset, | 
| 169 |  |                                        size_t num_kv_pairs, Slice *result, JEntry* value_metadata); | 
| 170 |  |  | 
| 171 |  |   // Helper method to retrieve the (offset, length) of a key/value serialized in jsonb format. | 
| 172 |  |   // element_metadata_offset denotes the offset for the JEntry of the key/value, | 
| 173 |  |   // element_end_offset denotes the end of data portion of the key/value, data_begin_offset | 
| 174 |  |   // denotes the offset from which the data portion of jsonb starts, metadata_begin_offset is the | 
| 175 |  |   // offset from which all the JEntry fields begin. | 
| 176 |  |   static std::pair<size_t, size_t> GetOffsetAndLength(size_t element_metadata_offset, | 
| 177 |  |                                                       const Slice& jsonb, | 
| 178 |  |                                                       size_t element_end_offset, | 
| 179 |  |                                                       size_t data_begin_offset, | 
| 180 |  |                                                       size_t metadata_begin_offset); | 
| 181 |  |  | 
| 182 |  |   static CHECKED_STATUS ApplyJsonbOperatorToArray(const Slice& jsonb, | 
| 183 |  |                                                   const QLJsonOperationPB& json_op, | 
| 184 |  |                                                   const JsonbHeader& jsonb_header, | 
| 185 |  |                                                   Slice* result, | 
| 186 |  |                                                   JEntry* element_metadata); | 
| 187 |  |  | 
| 188 |  |   static CHECKED_STATUS ApplyJsonbOperatorToObject(const Slice& jsonb, | 
| 189 |  |                                                    const QLJsonOperationPB& json_op, | 
| 190 |  |                                                    const JsonbHeader& jsonb_header, | 
| 191 |  |                                                    Slice* result, | 
| 192 |  |                                                    JEntry* element_metadata); | 
| 193 |  |  | 
| 194 | 1.44M |   static inline uint32_t GetOffset(JEntry metadata) { return metadata & kJEOffsetMask; } | 
| 195 |  |  | 
| 196 | 63.0k |   static inline uint32_t GetJEType(JEntry metadata) { return metadata & kJETypeMask; } | 
| 197 |  |  | 
| 198 | 1.33M |   static inline uint32_t GetCount(JsonbHeader jsonb_header) { return jsonb_header & kJBCountMask; } | 
| 199 |  |  | 
| 200 |  |   // Bit masks for jsonb header fields. | 
| 201 |  |   static constexpr uint32_t kJBCountMask = 0x0FFFFFFF; // mask for number of kv pairs. | 
| 202 |  |   static constexpr uint32_t kJBScalar = 0x10000000; // indicates whether we have a scalar value. | 
| 203 |  |   static constexpr uint32_t kJBObject = 0x20000000; // indicates whether we have a json object. | 
| 204 |  |   static constexpr uint32_t kJBArray = 0x40000000; // indicates whether we have a json array. | 
| 205 |  |  | 
| 206 |  |   // Bit masks for json header fields. | 
| 207 |  |   static constexpr uint32_t kJEOffsetMask = 0x0FFFFFFF; | 
| 208 |  |   static constexpr uint32_t kJETypeMask = 0xF0000000; | 
| 209 |  |  | 
| 210 |  |   // Values stored in the type bits. | 
| 211 |  |   static constexpr uint32_t kJEIsString = 0x00000000; | 
| 212 |  |   static constexpr uint32_t kJEIsObject = 0x10000000; | 
| 213 |  |   static constexpr uint32_t kJEIsBoolFalse = 0x20000000; | 
| 214 |  |   static constexpr uint32_t kJEIsBoolTrue = 0x30000000; | 
| 215 |  |   static constexpr uint32_t kJEIsNull = 0x40000000; | 
| 216 |  |   static constexpr uint32_t kJEIsArray = 0x50000000; | 
| 217 |  |   static constexpr uint32_t kJEIsInt = 0x60000000; | 
| 218 |  |   static constexpr uint32_t kJEIsUInt = 0x70000000; | 
| 219 |  |   static constexpr uint32_t kJEIsInt64 = 0x80000000; | 
| 220 |  |   static constexpr uint32_t kJEIsUInt64 = 0x90000000; | 
| 221 |  |   static constexpr uint32_t kJEIsFloat = 0xA0000000; | 
| 222 |  |   static constexpr uint32_t kJEIsDouble = 0xB0000000; | 
| 223 |  | }; | 
| 224 |  |  | 
| 225 |  | } // namespace common | 
| 226 |  | } // namespace yb | 
| 227 |  |  | 
| 228 |  | #endif // YB_COMMON_JSONB_H |