simdjson 4.0.7
Ridiculously Fast JSON
Loading...
Searching...
No Matches
serialization-inl.h
1
2#ifndef SIMDJSON_SERIALIZATION_INL_H
3#define SIMDJSON_SERIALIZATION_INL_H
4
5#include "simdjson/dom/base.h"
6#include "simdjson/dom/parser.h"
7#include "simdjson/dom/serialization.h"
8#include "simdjson/internal/tape_type.h"
9
10#include "simdjson/dom/array-inl.h"
11#include "simdjson/dom/object-inl.h"
12#include "simdjson/internal/tape_ref-inl.h"
13
14#include <cstring>
15
16namespace simdjson {
17namespace dom {
18inline bool parser::print_json(std::ostream &os) const noexcept {
19 if (!valid) {
20 return false;
21 }
22 simdjson::internal::string_builder<> sb;
23 sb.append(doc.root());
24 std::string_view answer = sb.str();
25 os << answer;
26 return true;
27}
28
29inline std::ostream &operator<<(std::ostream &out,
31 simdjson::internal::string_builder<> sb;
32 sb.append(value);
33 return (out << sb.str());
34}
35#if SIMDJSON_EXCEPTIONS
36inline std::ostream &
37operator<<(std::ostream &out,
39 if (x.error()) {
41 }
42 return (out << x.value());
43}
44#endif
45inline std::ostream &operator<<(std::ostream &out, simdjson::dom::array value) {
46 simdjson::internal::string_builder<> sb;
47 sb.append(value);
48 return (out << sb.str());
49}
50#if SIMDJSON_EXCEPTIONS
51inline std::ostream &
52operator<<(std::ostream &out,
54 if (x.error()) {
56 }
57 return (out << x.value());
58}
59#endif
60inline std::ostream &operator<<(std::ostream &out,
62 simdjson::internal::string_builder<> sb;
63 sb.append(value);
64 return (out << sb.str());
65}
66#if SIMDJSON_EXCEPTIONS
67inline std::ostream &
68operator<<(std::ostream &out,
70 if (x.error()) {
72 }
73 return (out << x.value());
74}
75#endif
76
77} // namespace dom
78
79/***
80 * Number utility functions
81 **/
82namespace {
87struct escape_sequence {
88 uint8_t length;
89 const char
90 string[7]; // technically, we only ever need 6 characters, we pad to 8
91};
101static char *fast_itoa(char *output, int64_t value) noexcept {
102 // This is a standard implementation of itoa.
103 char buffer[20];
104 uint64_t value_positive;
105 // In general, negating a signed integer is unsafe.
106 if (value < 0) {
107 *output++ = '-';
108 // Doing value_positive = -value; while avoiding
109 // undefined behavior warnings.
110 // It assumes two complement's which is universal at this
111 // point in time.
112 std::memcpy(&value_positive, &value, sizeof(value));
113 value_positive = (~value_positive) + 1; // this is a negation
114 } else {
115 value_positive = value;
116 }
117 // We work solely with value_positive. It *might* be easier
118 // for an optimizing compiler to deal with an unsigned variable
119 // as far as performance goes.
120 const char *const end_buffer = buffer + 20;
121 char *write_pointer = buffer + 19;
122 // A faster approach is possible if we expect large integers:
123 // unroll the loop (work in 100s, 1000s) and use some kind of
124 // memoization.
125 while (value_positive >= 10) {
126 *write_pointer-- = char('0' + (value_positive % 10));
127 value_positive /= 10;
128 }
129 *write_pointer = char('0' + value_positive);
130 size_t len = end_buffer - write_pointer;
131 std::memcpy(output, write_pointer, len);
132 return output + len;
133}
143static char *fast_itoa(char *output, uint64_t value) noexcept {
144 // This is a standard implementation of itoa.
145 char buffer[20];
146 const char *const end_buffer = buffer + 20;
147 char *write_pointer = buffer + 19;
148 // A faster approach is possible if we expect large integers:
149 // unroll the loop (work in 100s, 1000s) and use some kind of
150 // memoization.
151 while (value >= 10) {
152 *write_pointer-- = char('0' + (value % 10));
153 value /= 10;
154 };
155 *write_pointer = char('0' + value);
156 size_t len = end_buffer - write_pointer;
157 std::memcpy(output, write_pointer, len);
158 return output + len;
159}
160
161} // anonymous namespace
162namespace internal {
163
164/***
165 * Minifier/formatter code.
166 **/
167
168template <class formatter>
169simdjson_inline void base_formatter<formatter>::number(uint64_t x) {
170 char number_buffer[24];
171 char *newp = fast_itoa(number_buffer, x);
172 chars(number_buffer, newp);
173}
174
175template <class formatter>
176simdjson_inline void base_formatter<formatter>::number(int64_t x) {
177 char number_buffer[24];
178 char *newp = fast_itoa(number_buffer, x);
179 chars(number_buffer, newp);
180}
181
182template <class formatter>
183simdjson_inline void base_formatter<formatter>::number(double x) {
184 char number_buffer[24];
185 // Currently, passing the nullptr to the second argument is
186 // safe because our implementation does not check the second
187 // argument.
188 char *newp = internal::to_chars(number_buffer, nullptr, x);
189 chars(number_buffer, newp);
190}
191
192template <class formatter>
193simdjson_inline void base_formatter<formatter>::start_array() {
194 one_char('[');
195}
196
197template <class formatter>
198simdjson_inline void base_formatter<formatter>::end_array() {
199 one_char(']');
200}
201
202template <class formatter>
203simdjson_inline void base_formatter<formatter>::start_object() {
204 one_char('{');
205}
206
207template <class formatter>
208simdjson_inline void base_formatter<formatter>::end_object() {
209 one_char('}');
210}
211
212template <class formatter>
213simdjson_inline void base_formatter<formatter>::comma() {
214 one_char(',');
215}
216
217template <class formatter>
218simdjson_inline void base_formatter<formatter>::true_atom() {
219 const char *s = "true";
220 chars(s, s + 4);
221}
222
223template <class formatter>
224simdjson_inline void base_formatter<formatter>::false_atom() {
225 const char *s = "false";
226 chars(s, s + 5);
227}
228
229template <class formatter>
230simdjson_inline void base_formatter<formatter>::null_atom() {
231 const char *s = "null";
232 chars(s, s + 4);
233}
234
235template <class formatter>
236simdjson_inline void base_formatter<formatter>::one_char(char c) {
237 buffer.push_back(c);
238}
239
240template <class formatter>
241simdjson_inline void base_formatter<formatter>::chars(const char *begin,
242 const char *end) {
243 buffer.append(begin, end);
244}
245
246template <class formatter>
247simdjson_inline void
248base_formatter<formatter>::key(std::string_view unescaped) {
249 string(unescaped);
250 one_char(':');
251}
252
253template <class formatter>
254simdjson_inline void
255base_formatter<formatter>::string(std::string_view unescaped) {
256 one_char('\"');
257 size_t i = 0;
258 // Fast path for the case where we have no control character, no ", and no
259 // backslash. This should include most keys.
260 //
261 // We would like to use 'bool' but some compilers take offense to bitwise
262 // operation with bool types.
263 constexpr static char needs_escaping[] = {
264 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
265 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
268 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
270 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
271 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
272 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
273 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
275 for (; i + 8 <= unescaped.length(); i += 8) {
276 // Poor's man vectorization. This could get much faster if we used SIMD.
277 //
278 // It is not the case that replacing '|' with '||' would be neutral
279 // performance-wise.
280 if (needs_escaping[uint8_t(unescaped[i])] |
281 needs_escaping[uint8_t(unescaped[i + 1])] |
282 needs_escaping[uint8_t(unescaped[i + 2])] |
283 needs_escaping[uint8_t(unescaped[i + 3])] |
284 needs_escaping[uint8_t(unescaped[i + 4])] |
285 needs_escaping[uint8_t(unescaped[i + 5])] |
286 needs_escaping[uint8_t(unescaped[i + 6])] |
287 needs_escaping[uint8_t(unescaped[i + 7])]) {
288 break;
289 }
290 }
291 for (; i < unescaped.length(); i++) {
292 if (needs_escaping[uint8_t(unescaped[i])]) {
293 break;
294 }
295 }
296 // The following is also possible and omits a 256-byte table, but it is
297 // slower: for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
298 // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
299
300 // At least for long strings, the following should be fast. We could
301 // do better by integrating the checks and the insertion.
302 chars(unescaped.data(), unescaped.data() + i);
303 // We caught a control character if we enter this loop (slow).
304 // Note that we are do not restart from the beginning, but rather we continue
305 // from the point where we encountered something that requires escaping.
306 for (; i < unescaped.length(); i++) {
307 switch (unescaped[i]) {
308 case '\"': {
309 const char *s = "\\\"";
310 chars(s, s + 2);
311 } break;
312 case '\\': {
313 const char *s = "\\\\";
314 chars(s, s + 2);
315 } break;
316 default:
317 if (uint8_t(unescaped[i]) <= 0x1F) {
318 // If packed, this uses 8 * 32 bytes.
319 // Note that we expect most compilers to embed this code in the data
320 // section.
321 constexpr static escape_sequence escaped[32] = {
322 {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
323 {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
324 {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"},
325 {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"},
326 {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
327 {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
328 {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
329 {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
330 auto u = escaped[uint8_t(unescaped[i])];
331 chars(u.string, u.string + u.length);
332 } else {
333 one_char(unescaped[i]);
334 }
335 } // switch
336 } // for
337 one_char('\"');
338}
339
340template <class formatter> inline void base_formatter<formatter>::clear() {
341 buffer.clear();
342}
343
344template <class formatter>
345simdjson_inline std::string_view base_formatter<formatter>::str() const {
346 return buffer.str();
347}
348
349simdjson_inline void mini_formatter::print_newline() { return; }
350
351simdjson_inline void mini_formatter::print_indents(size_t depth) {
352 (void)depth;
353 return;
354}
355
356simdjson_inline void mini_formatter::print_space() { return; }
357
358simdjson_inline void pretty_formatter::print_newline() { one_char('\n'); }
359
360simdjson_inline void pretty_formatter::print_indents(size_t depth) {
361 if (this->indent_step <= 0) {
362 return;
363 }
364 for (size_t i = 0; i < this->indent_step * depth; i++) {
365 one_char(' ');
366 }
367}
368
369simdjson_inline void pretty_formatter::print_space() { one_char(' '); }
370
371/***
372 * String building code.
373 **/
374
375template <class serializer>
376inline void string_builder<serializer>::append(simdjson::dom::element value) {
377 // using tape_type = simdjson::internal::tape_type;
378 size_t depth = 0;
379 constexpr size_t MAX_DEPTH = 16;
380 bool is_object[MAX_DEPTH];
381 is_object[0] = false;
382 bool after_value = false;
383
384 internal::tape_ref iter(value.tape);
385 do {
386 // print commas after each value
387 if (after_value) {
388 format.comma();
389 format.print_newline();
390 }
391
392 format.print_indents(depth);
393
394 // If we are in an object, print the next key and :, and skip to the next
395 // value.
396 if (is_object[depth]) {
397 format.key(iter.get_string_view());
398 format.print_space();
399 iter.json_index++;
400 }
401 switch (iter.tape_ref_type()) {
402
403 // Arrays
404 case tape_type::START_ARRAY: {
405 // If we're too deep, we need to recurse to go deeper.
406 depth++;
407 if (simdjson_unlikely(depth >= MAX_DEPTH)) {
408 append(simdjson::dom::array(iter));
409 iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
410 depth--;
411 break;
412 }
413
414 // Output start [
415 format.start_array();
416 iter.json_index++;
417
418 // Handle empty [] (we don't want to come back around and print commas)
419 if (iter.tape_ref_type() == tape_type::END_ARRAY) {
420 format.end_array();
421 depth--;
422 break;
423 }
424
425 is_object[depth] = false;
426 after_value = false;
427 format.print_newline();
428 continue;
429 }
430
431 // Objects
432 case tape_type::START_OBJECT: {
433 // If we're too deep, we need to recurse to go deeper.
434 depth++;
435 if (simdjson_unlikely(depth >= MAX_DEPTH)) {
436 append(simdjson::dom::object(iter));
437 iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
438 depth--;
439 break;
440 }
441
442 // Output start {
443 format.start_object();
444 iter.json_index++;
445
446 // Handle empty {} (we don't want to come back around and print commas)
447 if (iter.tape_ref_type() == tape_type::END_OBJECT) {
448 format.end_object();
449 depth--;
450 break;
451 }
452
453 is_object[depth] = true;
454 after_value = false;
455 format.print_newline();
456 continue;
457 }
458
459 // Scalars
460 case tape_type::STRING:
461 format.string(iter.get_string_view());
462 break;
463 case tape_type::INT64:
464 format.number(iter.next_tape_value<int64_t>());
465 iter.json_index++; // numbers take up 2 spots, so we need to increment
466 // extra
467 break;
468 case tape_type::UINT64:
469 format.number(iter.next_tape_value<uint64_t>());
470 iter.json_index++; // numbers take up 2 spots, so we need to increment
471 // extra
472 break;
473 case tape_type::DOUBLE:
474 format.number(iter.next_tape_value<double>());
475 iter.json_index++; // numbers take up 2 spots, so we need to increment
476 // extra
477 break;
478 case tape_type::TRUE_VALUE:
479 format.true_atom();
480 break;
481 case tape_type::FALSE_VALUE:
482 format.false_atom();
483 break;
484 case tape_type::NULL_VALUE:
485 format.null_atom();
486 break;
487
488 // These are impossible
489 case tape_type::END_ARRAY:
490 case tape_type::END_OBJECT:
491 case tape_type::ROOT:
492 SIMDJSON_UNREACHABLE();
493 }
494 iter.json_index++;
495 after_value = true;
496
497 // Handle multiple ends in a row
498 while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
499 iter.tape_ref_type() == tape_type::END_OBJECT)) {
500 format.print_newline();
501 depth--;
502 format.print_indents(depth);
503 if (iter.tape_ref_type() == tape_type::END_ARRAY) {
504 format.end_array();
505 } else {
506 format.end_object();
507 }
508 iter.json_index++;
509 }
510
511 // Stop when we're at depth 0
512 } while (depth != 0);
513
514 format.print_newline();
515}
516
517template <class serializer>
518inline void string_builder<serializer>::append(simdjson::dom::object value) {
519 format.start_object();
520 auto pair = value.begin();
521 auto end = value.end();
522 if (pair != end) {
523 append(*pair);
524 for (++pair; pair != end; ++pair) {
525 format.comma();
526 append(*pair);
527 }
528 }
529 format.end_object();
530}
531
532template <class serializer>
533inline void string_builder<serializer>::append(simdjson::dom::array value) {
534 format.start_array();
535 auto iter = value.begin();
536 auto end = value.end();
537 if (iter != end) {
538 append(*iter);
539 for (++iter; iter != end; ++iter) {
540 format.comma();
541 append(*iter);
542 }
543 }
544 format.end_array();
545}
546
547template <class serializer>
548simdjson_inline void
549string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
550 format.key(kv.key);
551 append(kv.value);
552}
553
554template <class serializer>
555simdjson_inline void string_builder<serializer>::clear() {
556 format.clear();
557}
558
559template <class serializer>
560simdjson_inline std::string_view string_builder<serializer>::str() const {
561 return format.str();
562}
563
564} // namespace internal
565} // namespace simdjson
566
567#endif
JSON array.
Definition array.h:15
A JSON element.
Definition element.h:33
Key/value pair in an object.
Definition object.h:258
std::string_view key
key in the key-value pair
Definition object.h:261
element value
value in the key-value pair
Definition object.h:263
JSON object.
Definition object.h:16
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
Exception thrown when an exception-supporting simdjson method is called.
Definition error.h:91
The result of a simdjson operation that could fail.
Definition error.h:278
simdjson_inline error_code error() const noexcept
The error.
Definition error-inl.h:168
simdjson_inline T & value() &noexcept(false)
Get the result value.
Definition error-inl.h:175