/Users/deen/code/yugabyte-db/src/yb/rocksdb/third-party/fbson/FbsonJsonParser.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2011-present, Facebook, Inc. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under the BSD-style license found in the |
6 | | * LICENSE file in the root directory of this source tree. An additional grant |
7 | | * of patent rights can be found in the PATENTS file in the same directory. |
8 | | * |
9 | | * The following only applies to changes made to this file as part of YugaByte development. |
10 | | * |
11 | | * Portions Copyright (c) YugaByte, Inc. |
12 | | * |
13 | | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
14 | | * in compliance with the License. You may obtain a copy of the License at |
15 | | * |
16 | | * http://www.apache.org/licenses/LICENSE-2.0 |
17 | | * |
18 | | * Unless required by applicable law or agreed to in writing, software distributed under the License |
19 | | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
20 | | * or implied. See the License for the specific language governing permissions and limitations |
21 | | * under the License. |
22 | | * |
23 | | * |
24 | | */ |
25 | | |
26 | | /* |
27 | | * This file defines FbsonJsonParserT (template) and FbsonJsonParser. |
28 | | * |
29 | | * FbsonJsonParserT is a template class which implements a JSON parser. |
30 | | * FbsonJsonParserT parses JSON text, and serialize it to FBSON binary format |
31 | | * by using FbsonWriterT object. By default, FbsonJsonParserT creates a new |
32 | | * FbsonWriterT object with an output stream object. However, you can also |
33 | | * pass in your FbsonWriterT or any stream object that implements some basic |
34 | | * interface of std::ostream (see FbsonStream.h). |
35 | | * |
36 | | * FbsonJsonParser specializes FbsonJsonParserT with FbsonOutStream type (see |
37 | | * FbsonStream.h). So unless you want to provide own a different output stream |
38 | | * type, use FbsonJsonParser object. |
39 | | * |
40 | | * ** Parsing JSON ** |
41 | | * FbsonJsonParserT parses JSON string, and directly serializes into FBSON |
42 | | * packed bytes. There are three ways to parse a JSON string: (1) using |
43 | | * c-string, (2) using string with len, (3) using std::istream object. You can |
44 | | * use custome streambuf to redirect output. FbsonOutBuffer is a streambuf used |
45 | | * internally if the input is raw character buffer. |
46 | | * |
47 | | * You can reuse an FbsonJsonParserT object to parse/serialize multiple JSON |
48 | | * strings, and the previous FBSON will be overwritten. |
49 | | * |
50 | | * If parsing fails (returned false), the error code will be set to one of |
51 | | * FbsonErrType, and can be retrieved by calling getErrorCode(). |
52 | | * |
53 | | * ** External dictionary ** |
54 | | * During parsing a JSON string, you can pass a call-back function to map a key |
55 | | * string to an id, and store the dictionary id in FBSON to save space. The |
56 | | * purpose of using an external dictionary is more towards a collection of |
57 | | * documents (which has common keys) rather than a single document, so that |
58 | | * space saving will be siginificant. |
59 | | * |
60 | | * ** Endianness ** |
61 | | * Note: FBSON serialization doesn't assume endianness of the server. However |
62 | | * you will need to ensure that the endianness at the reader side is the same |
63 | | * as that at the writer side (if they are on different machines). Otherwise, |
64 | | * proper conversion is needed when a number value is returned to the |
65 | | * caller/writer. |
66 | | * |
67 | | * @author Tian Xia <tianx@fb.com> |
68 | | */ |
69 | | |
70 | | #ifndef FBSON_FBSONPARSER_H |
71 | | #define FBSON_FBSONPARSER_H |
72 | | |
73 | | #include <cmath> |
74 | | #include <limits> |
75 | | #include "FbsonDocument.h" |
76 | | #include "FbsonWriter.h" |
77 | | |
78 | | #include "yb/gutil/macros.h" |
79 | | |
80 | | namespace fbson { |
81 | | |
82 | | const char* const kJsonDelim = " ,]}\t\r\n"; |
83 | | const char* const kWhiteSpace = " \t\n\r"; |
84 | | |
85 | | /* |
86 | | * Error codes |
87 | | */ |
88 | | enum class FbsonErrType { |
89 | | E_NONE = 0, |
90 | | E_INVALID_VER, |
91 | | E_EMPTY_STR, |
92 | | E_OUTPUT_FAIL, |
93 | | E_INVALID_DOCU, |
94 | | E_INVALID_VALUE, |
95 | | E_INVALID_KEY, |
96 | | E_INVALID_STR, |
97 | | E_INVALID_OBJ, |
98 | | E_INVALID_ARR, |
99 | | E_INVALID_HEX, |
100 | | E_INVALID_OCTAL, |
101 | | E_INVALID_DECIMAL, |
102 | | E_INVALID_EXPONENT, |
103 | | E_HEX_OVERFLOW, |
104 | | E_OCTAL_OVERFLOW, |
105 | | E_DECIMAL_OVERFLOW, |
106 | | E_DOUBLE_OVERFLOW, |
107 | | E_EXPONENT_OVERFLOW, |
108 | | }; |
109 | | |
110 | | /* |
111 | | * Template FbsonJsonParserT |
112 | | */ |
113 | | template <class OS_TYPE> |
114 | | class FbsonJsonParserT { |
115 | | public: |
116 | 54 | FbsonJsonParserT() : err_(FbsonErrType::E_NONE) {} |
117 | | |
118 | | explicit FbsonJsonParserT(OS_TYPE& os) |
119 | | : writer_(os), err_(FbsonErrType::E_NONE) {} |
120 | | |
121 | | // parse a UTF-8 JSON string |
122 | | bool parse(const std::string& str, hDictInsert handler = nullptr) { |
123 | | return parse(str.c_str(), (unsigned int)str.size(), handler); |
124 | | } |
125 | | |
126 | | // parse a UTF-8 JSON c-style string (NULL terminated) |
127 | 54 | bool parse(const char* c_str, hDictInsert handler = nullptr) { |
128 | 54 | return parse(c_str, (unsigned int)strlen(c_str), handler); |
129 | 54 | } |
130 | | |
131 | | // parse a UTF-8 JSON string with length |
132 | 54 | bool parse(const char* pch, unsigned int len, hDictInsert handler = nullptr) { |
133 | 54 | if (!pch || len == 0) { |
134 | 0 | err_ = FbsonErrType::E_EMPTY_STR; |
135 | 0 | return false; |
136 | 0 | } |
137 | | |
138 | 54 | FbsonInBuffer sb(pch, len); |
139 | 54 | std::istream in(&sb); |
140 | 54 | return parse(in, handler); |
141 | 54 | } |
142 | | |
143 | | // parse UTF-8 JSON text from an input stream |
144 | 54 | bool parse(std::istream& in, hDictInsert handler = nullptr) { |
145 | 54 | bool res = false; |
146 | | |
147 | | // reset output stream |
148 | 54 | writer_.reset(); |
149 | | |
150 | 54 | trim(in); |
151 | | |
152 | 54 | if (in.peek() == '{') { |
153 | 35 | in.ignore(); |
154 | 35 | res = parseObject(in, handler); |
155 | 35 | } else if (19 in.peek() == '['19 ) { |
156 | 19 | in.ignore(); |
157 | 19 | res = parseArray(in, handler); |
158 | 19 | } else { |
159 | 0 | err_ = FbsonErrType::E_INVALID_DOCU; |
160 | 0 | } |
161 | | |
162 | 54 | trim(in); |
163 | 54 | if (res && !in.eof()53 ) { |
164 | 0 | err_ = FbsonErrType::E_INVALID_DOCU; |
165 | 0 | return false; |
166 | 0 | } |
167 | | |
168 | 54 | return res; |
169 | 54 | } |
170 | | |
171 | 106 | FbsonWriterT<OS_TYPE>& getWriter() { return writer_; } |
172 | | |
173 | | FbsonErrType getErrorCode() { return err_; } |
174 | | |
175 | | // clear error code |
176 | | void clearErr() { err_ = FbsonErrType::E_NONE; } |
177 | | |
178 | | private: |
179 | | // parse a JSON object (comma-separated list of key-value pairs) |
180 | 96 | bool parseObject(std::istream& in, hDictInsert handler) { |
181 | 96 | if (!writer_.writeStartObject()) { |
182 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
183 | 0 | return false; |
184 | 0 | } |
185 | | |
186 | 96 | trim(in); |
187 | | |
188 | 96 | if (in.peek() == '}') { |
189 | 0 | in.ignore(); |
190 | | // empty object |
191 | 0 | if (!writer_.writeEndObject()) { |
192 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
193 | 0 | return false; |
194 | 0 | } |
195 | 0 | return true; |
196 | 0 | } |
197 | | |
198 | 207 | while (96 in.good()) { |
199 | 207 | if (in.get() != '"') { |
200 | 0 | err_ = FbsonErrType::E_INVALID_KEY; |
201 | 0 | return false; |
202 | 0 | } |
203 | | |
204 | 207 | if (!parseKVPair(in, handler)) { |
205 | 1 | return false; |
206 | 1 | } |
207 | | |
208 | 206 | trim(in); |
209 | | |
210 | 206 | char ch = in.get(); |
211 | 206 | if (ch == '}') { |
212 | | // end of the object |
213 | 95 | if (!writer_.writeEndObject()) { |
214 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
215 | 0 | return false; |
216 | 0 | } |
217 | 95 | return true; |
218 | 111 | } else if (ch != ',') { |
219 | 0 | err_ = FbsonErrType::E_INVALID_OBJ; |
220 | 0 | return false; |
221 | 0 | } |
222 | | |
223 | 111 | trim(in); |
224 | 111 | } |
225 | | |
226 | 0 | err_ = FbsonErrType::E_INVALID_OBJ; |
227 | 0 | return false; |
228 | 96 | } |
229 | | |
230 | | // parse a JSON array (comma-separated list of values) |
231 | 30 | bool parseArray(std::istream& in, hDictInsert handler) { |
232 | 30 | if (!writer_.writeStartArray()) { |
233 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
234 | 0 | return false; |
235 | 0 | } |
236 | | |
237 | 30 | trim(in); |
238 | | |
239 | 30 | if (in.peek() == ']') { |
240 | 1 | in.ignore(); |
241 | | // empty array |
242 | 1 | if (!writer_.writeEndArray()) { |
243 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
244 | 0 | return false; |
245 | 0 | } |
246 | 1 | return true; |
247 | 1 | } |
248 | | |
249 | 48 | while (29 in.good()) { |
250 | 48 | if (!parseValue(in, handler)) { |
251 | 0 | return false; |
252 | 0 | } |
253 | | |
254 | 48 | trim(in); |
255 | | |
256 | 48 | char ch = in.get(); |
257 | 48 | if (ch == ']') { |
258 | | // end of the array |
259 | 29 | if (!writer_.writeEndArray()) { |
260 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
261 | 0 | return false; |
262 | 0 | } |
263 | 29 | return true; |
264 | 29 | } else if (19 ch != ','19 ) { |
265 | 0 | err_ = FbsonErrType::E_INVALID_ARR; |
266 | 0 | return false; |
267 | 0 | } |
268 | | |
269 | 19 | trim(in); |
270 | 19 | } |
271 | | |
272 | 0 | err_ = FbsonErrType::E_INVALID_ARR; |
273 | 0 | return false; |
274 | 29 | } |
275 | | |
276 | | // parse a key-value pair, separated by ":" |
277 | 207 | bool parseKVPair(std::istream& in, hDictInsert handler) { |
278 | 207 | if (parseKey(in, handler) && parseValue(in, handler)206 ) { |
279 | 206 | return true; |
280 | 206 | } |
281 | | |
282 | 1 | return false; |
283 | 207 | } |
284 | | |
285 | | // parse a key (must be string) |
286 | 207 | bool parseKey(std::istream& in, hDictInsert handler) { |
287 | 207 | char key[FbsonKeyValue::sMaxKeyLen]; |
288 | 207 | int i = 0; |
289 | 1.40k | while (in.good() && in.peek() != '"'1.40k && i < FbsonKeyValue::sMaxKeyLen1.20k ) { |
290 | 1.20k | key[i++] = in.get(); |
291 | 1.20k | } |
292 | | |
293 | 207 | if (!in.good() || in.peek() != '"'206 || i == 0206 ) { |
294 | 1 | err_ = FbsonErrType::E_INVALID_KEY; |
295 | 1 | return false; |
296 | 1 | } |
297 | | |
298 | 206 | in.ignore(); // discard '"' |
299 | | |
300 | 206 | int key_id = -1; |
301 | 206 | if (handler) { |
302 | 0 | key_id = handler(key, i); |
303 | 0 | } |
304 | | |
305 | 206 | if (key_id < 0) { |
306 | 206 | writer_.writeKey(key, i); |
307 | 206 | } else { |
308 | 0 | writer_.writeKey(key_id); |
309 | 0 | } |
310 | | |
311 | 206 | trim(in); |
312 | | |
313 | 206 | if (in.get() != ':') { |
314 | 0 | err_ = FbsonErrType::E_INVALID_OBJ; |
315 | 0 | return false; |
316 | 0 | } |
317 | | |
318 | 206 | return true; |
319 | 206 | } |
320 | | |
321 | | // parse a value |
322 | 254 | bool parseValue(std::istream& in, hDictInsert handler) { |
323 | 254 | bool res = false; |
324 | | |
325 | 254 | trim(in); |
326 | | |
327 | 254 | switch (in.peek()) { |
328 | 0 | case 'N': |
329 | 4 | case 'n': { |
330 | 4 | in.ignore(); |
331 | 4 | res = parseNull(in); |
332 | 4 | break; |
333 | 0 | } |
334 | 0 | case 'T': |
335 | 4 | case 't': { |
336 | 4 | in.ignore(); |
337 | 4 | res = parseTrue(in); |
338 | 4 | break; |
339 | 0 | } |
340 | 0 | case 'F': |
341 | 0 | case 'f': { |
342 | 0 | in.ignore(); |
343 | 0 | res = parseFalse(in); |
344 | 0 | break; |
345 | 0 | } |
346 | 64 | case '"': { |
347 | 64 | in.ignore(); |
348 | 64 | res = parseString(in); |
349 | 64 | break; |
350 | 0 | } |
351 | 61 | case '{': { |
352 | 61 | in.ignore(); |
353 | 61 | res = parseObject(in, handler); |
354 | 61 | break; |
355 | 0 | } |
356 | 11 | case '[': { |
357 | 11 | in.ignore(); |
358 | 11 | res = parseArray(in, handler); |
359 | 11 | break; |
360 | 0 | } |
361 | 110 | default: { |
362 | 110 | res = parseNumber(in); |
363 | 110 | break; |
364 | 0 | } |
365 | 254 | } |
366 | | |
367 | 254 | return res; |
368 | 254 | } |
369 | | |
370 | | // parse NULL value |
371 | 4 | bool parseNull(std::istream& in) { |
372 | 4 | if (tolower(in.get()) == 'u' && tolower(in.get()) == 'l' && |
373 | 4 | tolower(in.get()) == 'l') { |
374 | 4 | writer_.writeNull(); |
375 | 4 | return true; |
376 | 4 | } |
377 | | |
378 | 0 | err_ = FbsonErrType::E_INVALID_VALUE; |
379 | 0 | return false; |
380 | 4 | } |
381 | | |
382 | | // parse TRUE value |
383 | 4 | bool parseTrue(std::istream& in) { |
384 | 4 | if (tolower(in.get()) == 'r' && tolower(in.get()) == 'u' && |
385 | 4 | tolower(in.get()) == 'e') { |
386 | 4 | writer_.writeBool(true); |
387 | 4 | return true; |
388 | 4 | } |
389 | | |
390 | 0 | err_ = FbsonErrType::E_INVALID_VALUE; |
391 | 0 | return false; |
392 | 4 | } |
393 | | |
394 | | // parse FALSE value |
395 | 0 | bool parseFalse(std::istream& in) { |
396 | 0 | if (tolower(in.get()) == 'a' && tolower(in.get()) == 'l' && |
397 | 0 | tolower(in.get()) == 's' && tolower(in.get()) == 'e') { |
398 | 0 | writer_.writeBool(false); |
399 | 0 | return true; |
400 | 0 | } |
401 | | |
402 | 0 | err_ = FbsonErrType::E_INVALID_VALUE; |
403 | 0 | return false; |
404 | 0 | } |
405 | | |
406 | | // parse a string |
407 | 64 | bool parseString(std::istream& in) { |
408 | 64 | if (!writer_.writeStartString()) { |
409 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
410 | 0 | return false; |
411 | 0 | } |
412 | | |
413 | 64 | bool escaped = false; |
414 | 64 | char buffer[4096]; // write 4KB at a time |
415 | 64 | int nread = 0; |
416 | 395 | while (in.good()) { |
417 | 395 | char ch = in.get(); |
418 | 395 | if (ch != '"' || escaped64 ) { |
419 | 331 | buffer[nread++] = ch; |
420 | 331 | if (nread == 4096) { |
421 | | // flush buffer |
422 | 0 | if (!writer_.writeString(buffer, nread)) { |
423 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
424 | 0 | return false; |
425 | 0 | } |
426 | 0 | nread = 0; |
427 | 0 | } |
428 | | // set/reset escape |
429 | 331 | if (ch == '\\' || escaped) { |
430 | 0 | escaped = !escaped; |
431 | 0 | } |
432 | 331 | } else { |
433 | | // write all remaining bytes in the buffer |
434 | 64 | if (nread > 0) { |
435 | 64 | if (!writer_.writeString(buffer, nread)) { |
436 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
437 | 0 | return false; |
438 | 0 | } |
439 | 64 | } |
440 | | // end writing string |
441 | 64 | if (!writer_.writeEndString()) { |
442 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
443 | 0 | return false; |
444 | 0 | } |
445 | 64 | return true; |
446 | 64 | } |
447 | 395 | } |
448 | | |
449 | 0 | err_ = FbsonErrType::E_INVALID_STR; |
450 | 0 | return false; |
451 | 64 | } |
452 | | |
453 | | // parse a number |
454 | | // Number format can be hex, octal, or decimal (including float). |
455 | | // Only decimal can have (+/-) sign prefix. |
456 | 110 | bool parseNumber(std::istream& in) { |
457 | 110 | bool ret = false; |
458 | 110 | switch (in.peek()) { |
459 | 3 | case '0': { |
460 | 3 | in.ignore(); |
461 | | |
462 | 3 | if (in.peek() == 'x' || in.peek() == 'X') { |
463 | 0 | in.ignore(); |
464 | 0 | ret = parseHex(in); |
465 | 3 | } else if (in.peek() == '.') { |
466 | 0 | in.ignore(); |
467 | 0 | ret = parseDouble(in, 0, 0, 1); |
468 | 3 | } else { |
469 | 3 | ret = parseOctal(in); |
470 | 3 | } |
471 | | |
472 | 3 | break; |
473 | 0 | } |
474 | 10 | case '-': { |
475 | 10 | in.ignore(); |
476 | 10 | ret = parseDecimal(in, -1); |
477 | 10 | break; |
478 | 0 | } |
479 | 0 | case '+': |
480 | 0 | in.ignore(); |
481 | 0 | FALLTHROUGH_INTENDED; |
482 | 97 | default: |
483 | 97 | ret = parseDecimal(in, 1); |
484 | 97 | break; |
485 | 110 | } |
486 | | |
487 | 110 | return ret; |
488 | 110 | } |
489 | | |
490 | | // parse a number in hex format |
491 | 0 | bool parseHex(std::istream& in) { |
492 | 0 | uint64_t val = 0; |
493 | 0 | int num_digits = 0; |
494 | 0 | char ch = tolower(in.peek()); |
495 | 0 | while (in.good() && !strchr(kJsonDelim, ch) && (++num_digits) <= 16) { |
496 | 0 | if (ch >= '0' && ch <= '9') { |
497 | 0 | val = (val << 4) + (ch - '0'); |
498 | 0 | } else if (ch >= 'a' && ch <= 'f') { |
499 | 0 | val = (val << 4) + (ch - 'a' + 10); |
500 | 0 | } else { // unrecognized hex digit |
501 | 0 | err_ = FbsonErrType::E_INVALID_HEX; |
502 | 0 | return false; |
503 | 0 | } |
504 | | |
505 | 0 | in.ignore(); |
506 | 0 | ch = tolower(in.peek()); |
507 | 0 | } |
508 | | |
509 | 0 | int size = 0; |
510 | 0 | if (num_digits <= 2) { |
511 | 0 | size = writer_.writeInt8((int8_t)val); |
512 | 0 | } else if (num_digits <= 4) { |
513 | 0 | size = writer_.writeInt16((int16_t)val); |
514 | 0 | } else if (num_digits <= 8) { |
515 | 0 | size = writer_.writeInt32((int32_t)val); |
516 | 0 | } else if (num_digits <= 16) { |
517 | 0 | size = writer_.writeInt64(val); |
518 | 0 | } else { |
519 | 0 | err_ = FbsonErrType::E_HEX_OVERFLOW; |
520 | 0 | return false; |
521 | 0 | } |
522 | | |
523 | 0 | if (size == 0) { |
524 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
525 | 0 | return false; |
526 | 0 | } |
527 | | |
528 | 0 | return true; |
529 | 0 | } |
530 | | |
531 | | // parse a number in octal format |
532 | 3 | bool parseOctal(std::istream& in) { |
533 | 3 | int64_t val = 0; |
534 | 3 | char ch = in.peek(); |
535 | 3 | while (in.good() && !strchr(kJsonDelim, ch)) { |
536 | 0 | if (ch >= '0' && ch <= '7') { |
537 | 0 | val = val * 8 + (ch - '0'); |
538 | 0 | } else { |
539 | 0 | err_ = FbsonErrType::E_INVALID_OCTAL; |
540 | 0 | return false; |
541 | 0 | } |
542 | | |
543 | | // check if the number overflows |
544 | 0 | if (val < 0) { |
545 | 0 | err_ = FbsonErrType::E_OCTAL_OVERFLOW; |
546 | 0 | return false; |
547 | 0 | } |
548 | | |
549 | 0 | in.ignore(); |
550 | 0 | ch = in.peek(); |
551 | 0 | } |
552 | | |
553 | 3 | int size = 0; |
554 | 3 | if (val <= std::numeric_limits<int8_t>::max()) { |
555 | 3 | size = writer_.writeInt8((int8_t)val); |
556 | 3 | } else if (0 val <= std::numeric_limits<int16_t>::max()0 ) { |
557 | 0 | size = writer_.writeInt16((int16_t)val); |
558 | 0 | } else if (val <= std::numeric_limits<int32_t>::max()) { |
559 | 0 | size = writer_.writeInt32((int32_t)val); |
560 | 0 | } else { // val <= INT64_MAX |
561 | 0 | size = writer_.writeInt64(val); |
562 | 0 | } |
563 | | |
564 | 3 | if (size == 0) { |
565 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
566 | 0 | return false; |
567 | 0 | } |
568 | | |
569 | 3 | return true; |
570 | 3 | } |
571 | | |
572 | | // parse a number in decimal (including float) |
573 | 107 | bool parseDecimal(std::istream& in, int sign) { |
574 | 107 | int64_t val = 0; |
575 | 107 | int precision = 0; |
576 | | |
577 | 107 | char ch = 0; |
578 | 107 | while (in.good() && (ch = in.peek()) == '0') |
579 | 0 | in.ignore(); |
580 | | |
581 | 283 | while (in.good() && !strchr(kJsonDelim, ch)) { |
582 | 210 | if (ch >= '0' && ch <= '9'176 ) { |
583 | 176 | val = val * 10 + (ch - '0'); |
584 | 176 | ++precision; |
585 | 176 | } else if (34 ch == '.'34 ) { |
586 | | // note we don't pop out '.' |
587 | 34 | return parseDouble(in, static_cast<double>(val), precision, sign); |
588 | 34 | } else { |
589 | 0 | err_ = FbsonErrType::E_INVALID_DECIMAL; |
590 | 0 | return false; |
591 | 0 | } |
592 | | |
593 | 176 | in.ignore(); |
594 | | |
595 | | // if the number overflows int64_t, first parse it as double iff we see a |
596 | | // decimal point later. Otherwise, will treat it as overflow |
597 | 176 | if (val < 0 && val > std::numeric_limits<int64_t>::min()0 ) { |
598 | 0 | return parseDouble(in, static_cast<double>(val), precision, sign); |
599 | 0 | } |
600 | | |
601 | 176 | ch = in.peek(); |
602 | 176 | } |
603 | | |
604 | 73 | if (sign < 0) { |
605 | 10 | val = -val; |
606 | 10 | } |
607 | | |
608 | 73 | int size = 0; |
609 | 73 | if (val >= std::numeric_limits<int8_t>::min() && |
610 | 73 | val <= std::numeric_limits<int8_t>::max()) { |
611 | 71 | size = writer_.writeInt8((int8_t)val); |
612 | 71 | } else if (2 val >= std::numeric_limits<int16_t>::min()2 && |
613 | 2 | val <= std::numeric_limits<int16_t>::max()) { |
614 | 1 | size = writer_.writeInt16((int16_t)val); |
615 | 1 | } else if (val >= std::numeric_limits<int32_t>::min() && |
616 | 1 | val <= std::numeric_limits<int32_t>::max()) { |
617 | 1 | size = writer_.writeInt32((int32_t)val); |
618 | 1 | } else { // val <= INT64_MAX |
619 | 0 | size = writer_.writeInt64(val); |
620 | 0 | } |
621 | | |
622 | 73 | if (size == 0) { |
623 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
624 | 0 | return false; |
625 | 0 | } |
626 | | |
627 | 73 | return true; |
628 | 73 | } |
629 | | |
630 | | // parse IEEE745 double precision: |
631 | | // Significand precision length - 15 |
632 | | // Maximum exponent value - 308 |
633 | | // |
634 | | // "If a decimal string with at most 15 significant digits is converted to |
635 | | // IEEE 754 double precision representation and then converted back to a |
636 | | // string with the same number of significant digits, then the final string |
637 | | // should match the original" |
638 | 34 | bool parseDouble(std::istream& in, double val, int precision, int sign) { |
639 | 34 | int integ = precision; |
640 | 34 | int frac = 0; |
641 | 34 | bool is_frac = false; |
642 | | |
643 | 34 | char ch = in.peek(); |
644 | 34 | if (ch == '.') { |
645 | 34 | is_frac = true; |
646 | 34 | in.ignore(); |
647 | 34 | ch = in.peek(); |
648 | 34 | } |
649 | | |
650 | 34 | int exp = 0; |
651 | 83 | while (in.good() && !strchr(kJsonDelim, ch)) { |
652 | 54 | if (ch >= '0' && ch <= '9') { |
653 | 49 | if (precision < 15) { |
654 | 49 | val = val * 10 + (ch - '0'); |
655 | 49 | if (is_frac) { |
656 | 49 | ++frac; |
657 | 49 | } else { |
658 | 0 | ++integ; |
659 | 0 | } |
660 | 49 | ++precision; |
661 | 49 | } else if (0 !is_frac0 ) { |
662 | 0 | ++exp; |
663 | 0 | } |
664 | 49 | } else if (5 ch == 'e'5 || ch == 'E'0 ) { |
665 | 5 | in.ignore(); |
666 | 5 | int exp2; |
667 | 5 | if (!parseExponent(in, exp2)) { |
668 | 0 | return false; |
669 | 0 | } |
670 | | |
671 | 5 | exp += exp2; |
672 | | // check if exponent overflows |
673 | 5 | if (exp > 308 || exp < -308) { |
674 | 0 | err_ = FbsonErrType::E_EXPONENT_OVERFLOW; |
675 | 0 | return false; |
676 | 0 | } |
677 | | |
678 | 5 | is_frac = true; |
679 | 5 | break; |
680 | 5 | } |
681 | | |
682 | 49 | in.ignore(); |
683 | 49 | ch = in.peek(); |
684 | 49 | } |
685 | | |
686 | 34 | if (!is_frac) { |
687 | 0 | err_ = FbsonErrType::E_DECIMAL_OVERFLOW; |
688 | 0 | return false; |
689 | 0 | } |
690 | | |
691 | 34 | val *= std::pow(10, exp - frac); |
692 | 34 | if (std::isnan(val) || std::isinf(val)) { |
693 | 0 | err_ = FbsonErrType::E_DOUBLE_OVERFLOW; |
694 | 0 | return false; |
695 | 0 | } |
696 | | |
697 | 34 | if (sign < 0) { |
698 | 0 | val = -val; |
699 | 0 | } |
700 | | |
701 | 34 | if (writer_.writeDouble(val) == 0) { |
702 | 0 | err_ = FbsonErrType::E_OUTPUT_FAIL; |
703 | 0 | return false; |
704 | 0 | } |
705 | | |
706 | 34 | return true; |
707 | 34 | } |
708 | | |
709 | | // parse the exponent part of a double number |
710 | 5 | bool parseExponent(std::istream& in, int& exp) { |
711 | 5 | bool neg = false; |
712 | | |
713 | 5 | char ch = in.peek(); |
714 | 5 | if (ch == '+') { |
715 | 0 | in.ignore(); |
716 | 0 | ch = in.peek(); |
717 | 5 | } else if (ch == '-') { |
718 | 5 | neg = true; |
719 | 5 | in.ignore(); |
720 | 5 | ch = in.peek(); |
721 | 5 | } |
722 | | |
723 | 5 | exp = 0; |
724 | 10 | while (in.good() && !strchr(kJsonDelim, ch)) { |
725 | 5 | if (ch >= '0' && ch <= '9') { |
726 | 5 | exp = exp * 10 + (ch - '0'); |
727 | 5 | } else { |
728 | 0 | err_ = FbsonErrType::E_INVALID_EXPONENT; |
729 | 0 | return false; |
730 | 0 | } |
731 | | |
732 | 5 | if (exp > 308) { |
733 | 0 | err_ = FbsonErrType::E_EXPONENT_OVERFLOW; |
734 | 0 | return false; |
735 | 0 | } |
736 | | |
737 | 5 | in.ignore(); |
738 | 5 | ch = in.peek(); |
739 | 5 | } |
740 | | |
741 | 5 | if (neg) { |
742 | 5 | exp = -exp; |
743 | 5 | } |
744 | | |
745 | 5 | return true; |
746 | 5 | } |
747 | | |
748 | 1.07k | void trim(std::istream& in) { |
749 | 1.44k | while (in.good() && strchr(kWhiteSpace, in.peek())1.44k ) { |
750 | 368 | in.ignore(); |
751 | 368 | } |
752 | 1.07k | } |
753 | | |
754 | | private: |
755 | | FbsonWriterT<OS_TYPE> writer_; |
756 | | FbsonErrType err_; |
757 | | }; |
758 | | |
759 | | typedef FbsonJsonParserT<FbsonOutStream> FbsonJsonParser; |
760 | | |
761 | | } // namespace fbson |
762 | | |
763 | | #endif // FBSON_FBSONPARSER_H |