simdjson  3.11.0
Ridiculously Fast JSON
serialization-inl.h
1 
2 #ifndef SIMDJSON_SERIALIZATION_INL_H
3 #define SIMDJSON_SERIALIZATION_INL_H
4 
5 #include "simdjson/dom/base.h"
6 #include "simdjson/dom/serialization.h"
7 #include "simdjson/dom/parser.h"
8 #include "simdjson/internal/tape_type.h"
9 
10 #include "simdjson/dom/array-inl.h"
11 #include "simdjson/dom/object-inl.h"
12 #include "simdjson/internal/tape_ref-inl.h"
13 
14 #include <cstring>
15 
16 namespace simdjson {
17 namespace dom {
18 inline bool parser::print_json(std::ostream &os) const noexcept {
19  if (!valid) { return false; }
20  simdjson::internal::string_builder<> sb;
21  sb.append(doc.root());
22  std::string_view answer = sb.str();
23  os << answer;
24  return true;
25 }
26 
27 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) {
28  simdjson::internal::string_builder<> sb;
29  sb.append(value);
30  return (out << sb.str());
31 }
32 #if SIMDJSON_EXCEPTIONS
33 inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::element> x) {
34  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
35  return (out << x.value());
36 }
37 #endif
38 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) {
39  simdjson::internal::string_builder<> sb;
40  sb.append(value);
41  return (out << sb.str());
42 }
43 #if SIMDJSON_EXCEPTIONS
44 inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::array> x) {
45  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
46  return (out << x.value());
47 }
48 #endif
49 inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) {
50  simdjson::internal::string_builder<> sb;
51  sb.append(value);
52  return (out << sb.str());
53 }
54 #if SIMDJSON_EXCEPTIONS
55 inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result<simdjson::dom::object> x) {
56  if (x.error()) { throw simdjson::simdjson_error(x.error()); }
57  return (out << x.value());
58 }
59 #endif
60 
61 } // namespace dom
62 
63 /***
64  * Number utility functions
65  **/
66 namespace {
71 struct escape_sequence {
72  uint8_t length;
73  const char string[7]; // technically, we only ever need 6 characters, we pad to 8
74 };
84 static char *fast_itoa(char *output, int64_t value) noexcept {
85  // This is a standard implementation of itoa.
86  char buffer[20];
87  uint64_t value_positive;
88  // In general, negating a signed integer is unsafe.
89  if(value < 0) {
90  *output++ = '-';
91  // Doing value_positive = -value; while avoiding
92  // undefined behavior warnings.
93  // It assumes two complement's which is universal at this
94  // point in time.
95  std::memcpy(&value_positive, &value, sizeof(value));
96  value_positive = (~value_positive) + 1; // this is a negation
97  } else {
98  value_positive = value;
99  }
100  // We work solely with value_positive. It *might* be easier
101  // for an optimizing compiler to deal with an unsigned variable
102  // as far as performance goes.
103  const char *const end_buffer = buffer + 20;
104  char *write_pointer = buffer + 19;
105  // A faster approach is possible if we expect large integers:
106  // unroll the loop (work in 100s, 1000s) and use some kind of
107  // memoization.
108  while(value_positive >= 10) {
109  *write_pointer-- = char('0' + (value_positive % 10));
110  value_positive /= 10;
111  }
112  *write_pointer = char('0' + value_positive);
113  size_t len = end_buffer - write_pointer;
114  std::memcpy(output, write_pointer, len);
115  return output + len;
116 }
126 static char *fast_itoa(char *output, uint64_t value) noexcept {
127  // This is a standard implementation of itoa.
128  char buffer[20];
129  const char *const end_buffer = buffer + 20;
130  char *write_pointer = buffer + 19;
131  // A faster approach is possible if we expect large integers:
132  // unroll the loop (work in 100s, 1000s) and use some kind of
133  // memoization.
134  while(value >= 10) {
135  *write_pointer-- = char('0' + (value % 10));
136  value /= 10;
137  };
138  *write_pointer = char('0' + value);
139  size_t len = end_buffer - write_pointer;
140  std::memcpy(output, write_pointer, len);
141  return output + len;
142 }
143 
144 
145 } // anonymous namespace
146 namespace internal {
147 
148 /***
149  * Minifier/formatter code.
150  **/
151 
152 template<class formatter>
153 simdjson_inline void base_formatter<formatter>::number(uint64_t x) {
154  char number_buffer[24];
155  char *newp = fast_itoa(number_buffer, x);
156  buffer.insert(buffer.end(), number_buffer, newp);
157 }
158 
159 template<class formatter>
160 simdjson_inline void base_formatter<formatter>::number(int64_t x) {
161  char number_buffer[24];
162  char *newp = fast_itoa(number_buffer, x);
163  buffer.insert(buffer.end(), number_buffer, newp);
164 }
165 
166 template<class formatter>
167 simdjson_inline void base_formatter<formatter>::number(double x) {
168  char number_buffer[24];
169  // Currently, passing the nullptr to the second argument is
170  // safe because our implementation does not check the second
171  // argument.
172  char *newp = internal::to_chars(number_buffer, nullptr, x);
173  buffer.insert(buffer.end(), number_buffer, newp);
174 }
175 
176 template<class formatter>
177 simdjson_inline void base_formatter<formatter>::start_array() { one_char('['); }
178 
179 
180 template<class formatter>
181 simdjson_inline void base_formatter<formatter>::end_array() { one_char(']'); }
182 
183 template<class formatter>
184 simdjson_inline void base_formatter<formatter>::start_object() { one_char('{'); }
185 
186 template<class formatter>
187 simdjson_inline void base_formatter<formatter>::end_object() { one_char('}'); }
188 
189 template<class formatter>
190 simdjson_inline void base_formatter<formatter>::comma() { one_char(','); }
191 
192 template<class formatter>
193 simdjson_inline void base_formatter<formatter>::true_atom() {
194  const char * s = "true";
195  buffer.insert(buffer.end(), s, s + 4);
196 }
197 
198 template<class formatter>
199 simdjson_inline void base_formatter<formatter>::false_atom() {
200  const char * s = "false";
201  buffer.insert(buffer.end(), s, s + 5);
202 }
203 
204 template<class formatter>
205 simdjson_inline void base_formatter<formatter>::null_atom() {
206  const char * s = "null";
207  buffer.insert(buffer.end(), s, s + 4);
208 }
209 
210 template<class formatter>
211 simdjson_inline void base_formatter<formatter>::one_char(char c) { buffer.push_back(c); }
212 
213 template<class formatter>
214 simdjson_inline void base_formatter<formatter>::key(std::string_view unescaped) {
215  string(unescaped);
216  one_char(':');
217 }
218 
219 template<class formatter>
220 simdjson_inline void base_formatter<formatter>::string(std::string_view unescaped) {
221  one_char('\"');
222  size_t i = 0;
223  // Fast path for the case where we have no control character, no ", and no backslash.
224  // This should include most keys.
225  //
226  // We would like to use 'bool' but some compilers take offense to bitwise operation
227  // with bool types.
228  constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
229  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
230  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
231  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
232  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
238  for(;i + 8 <= unescaped.length(); i += 8) {
239  // Poor's man vectorization. This could get much faster if we used SIMD.
240  //
241  // It is not the case that replacing '|' with '||' would be neutral performance-wise.
242  if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])]
243  | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])]
244  | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])]
245  | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])]
246  ) { break; }
247  }
248  for(;i < unescaped.length(); i++) {
249  if(needs_escaping[uint8_t(unescaped[i])]) { break; }
250  }
251  // The following is also possible and omits a 256-byte table, but it is slower:
252  // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
253  // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
254 
255  // At least for long strings, the following should be fast. We could
256  // do better by integrating the checks and the insertion.
257  buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i);
258  // We caught a control character if we enter this loop (slow).
259  // Note that we are do not restart from the beginning, but rather we continue
260  // from the point where we encountered something that requires escaping.
261  for (; i < unescaped.length(); i++) {
262  switch (unescaped[i]) {
263  case '\"':
264  {
265  const char * s = "\\\"";
266  buffer.insert(buffer.end(), s, s + 2);
267  }
268  break;
269  case '\\':
270  {
271  const char * s = "\\\\";
272  buffer.insert(buffer.end(), s, s + 2);
273  }
274  break;
275  default:
276  if (uint8_t(unescaped[i]) <= 0x1F) {
277  // If packed, this uses 8 * 32 bytes.
278  // Note that we expect most compilers to embed this code in the data
279  // section.
280  constexpr static escape_sequence escaped[32] = {
281  {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"},
282  {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"},
283  {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"},
284  {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"},
285  {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"},
286  {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"},
287  {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
288  {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
289  auto u = escaped[uint8_t(unescaped[i])];
290  buffer.insert(buffer.end(), u.string, u.string + u.length);
291  } else {
292  one_char(unescaped[i]);
293  }
294  } // switch
295  } // for
296  one_char('\"');
297 }
298 
299 
300 template<class formatter>
301 inline void base_formatter<formatter>::clear() {
302  buffer.clear();
303 }
304 
305 template<class formatter>
306 simdjson_inline std::string_view base_formatter<formatter>::str() const {
307  return std::string_view(buffer.data(), buffer.size());
308 }
309 
310 simdjson_inline void mini_formatter::print_newline() {
311  return;
312 }
313 
314 simdjson_inline void mini_formatter::print_indents(size_t depth) {
315  (void)depth;
316  return;
317 }
318 
319 simdjson_inline void mini_formatter::print_space() {
320  return;
321 }
322 
323 simdjson_inline void pretty_formatter::print_newline() {
324  one_char('\n');
325 }
326 
327 simdjson_inline void pretty_formatter::print_indents(size_t depth) {
328  if(this->indent_step <= 0) {
329  return;
330  }
331  for(size_t i = 0; i < this->indent_step * depth; i++) {
332  one_char(' ');
333  }
334 }
335 
336 simdjson_inline void pretty_formatter::print_space() {
337  one_char(' ');
338 }
339 
340 /***
341  * String building code.
342  **/
343 
344 template <class serializer>
345 inline void string_builder<serializer>::append(simdjson::dom::element value) {
346  // using tape_type = simdjson::internal::tape_type;
347  size_t depth = 0;
348  constexpr size_t MAX_DEPTH = 16;
349  bool is_object[MAX_DEPTH];
350  is_object[0] = false;
351  bool after_value = false;
352 
353  internal::tape_ref iter(value.tape);
354  do {
355  // print commas after each value
356  if (after_value) {
357  format.comma();
358  format.print_newline();
359  }
360 
361  format.print_indents(depth);
362 
363  // If we are in an object, print the next key and :, and skip to the next
364  // value.
365  if (is_object[depth]) {
366  format.key(iter.get_string_view());
367  format.print_space();
368  iter.json_index++;
369  }
370  switch (iter.tape_ref_type()) {
371 
372  // Arrays
373  case tape_type::START_ARRAY: {
374  // If we're too deep, we need to recurse to go deeper.
375  depth++;
376  if (simdjson_unlikely(depth >= MAX_DEPTH)) {
377  append(simdjson::dom::array(iter));
378  iter.json_index = iter.matching_brace_index() - 1; // Jump to the ]
379  depth--;
380  break;
381  }
382 
383  // Output start [
384  format.start_array();
385  iter.json_index++;
386 
387  // Handle empty [] (we don't want to come back around and print commas)
388  if (iter.tape_ref_type() == tape_type::END_ARRAY) {
389  format.end_array();
390  depth--;
391  break;
392  }
393 
394  is_object[depth] = false;
395  after_value = false;
396  format.print_newline();
397  continue;
398  }
399 
400  // Objects
401  case tape_type::START_OBJECT: {
402  // If we're too deep, we need to recurse to go deeper.
403  depth++;
404  if (simdjson_unlikely(depth >= MAX_DEPTH)) {
405  append(simdjson::dom::object(iter));
406  iter.json_index = iter.matching_brace_index() - 1; // Jump to the }
407  depth--;
408  break;
409  }
410 
411  // Output start {
412  format.start_object();
413  iter.json_index++;
414 
415  // Handle empty {} (we don't want to come back around and print commas)
416  if (iter.tape_ref_type() == tape_type::END_OBJECT) {
417  format.end_object();
418  depth--;
419  break;
420  }
421 
422  is_object[depth] = true;
423  after_value = false;
424  format.print_newline();
425  continue;
426  }
427 
428  // Scalars
429  case tape_type::STRING:
430  format.string(iter.get_string_view());
431  break;
432  case tape_type::INT64:
433  format.number(iter.next_tape_value<int64_t>());
434  iter.json_index++; // numbers take up 2 spots, so we need to increment
435  // extra
436  break;
437  case tape_type::UINT64:
438  format.number(iter.next_tape_value<uint64_t>());
439  iter.json_index++; // numbers take up 2 spots, so we need to increment
440  // extra
441  break;
442  case tape_type::DOUBLE:
443  format.number(iter.next_tape_value<double>());
444  iter.json_index++; // numbers take up 2 spots, so we need to increment
445  // extra
446  break;
447  case tape_type::TRUE_VALUE:
448  format.true_atom();
449  break;
450  case tape_type::FALSE_VALUE:
451  format.false_atom();
452  break;
453  case tape_type::NULL_VALUE:
454  format.null_atom();
455  break;
456 
457  // These are impossible
458  case tape_type::END_ARRAY:
459  case tape_type::END_OBJECT:
460  case tape_type::ROOT:
461  SIMDJSON_UNREACHABLE();
462  }
463  iter.json_index++;
464  after_value = true;
465 
466  // Handle multiple ends in a row
467  while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY ||
468  iter.tape_ref_type() == tape_type::END_OBJECT)) {
469  format.print_newline();
470  depth--;
471  format.print_indents(depth);
472  if (iter.tape_ref_type() == tape_type::END_ARRAY) {
473  format.end_array();
474  } else {
475  format.end_object();
476  }
477  iter.json_index++;
478  }
479 
480  // Stop when we're at depth 0
481  } while (depth != 0);
482 
483  format.print_newline();
484 }
485 
486 template <class serializer>
487 inline void string_builder<serializer>::append(simdjson::dom::object value) {
488  format.start_object();
489  auto pair = value.begin();
490  auto end = value.end();
491  if (pair != end) {
492  append(*pair);
493  for (++pair; pair != end; ++pair) {
494  format.comma();
495  append(*pair);
496  }
497  }
498  format.end_object();
499 }
500 
501 template <class serializer>
502 inline void string_builder<serializer>::append(simdjson::dom::array value) {
503  format.start_array();
504  auto iter = value.begin();
505  auto end = value.end();
506  if (iter != end) {
507  append(*iter);
508  for (++iter; iter != end; ++iter) {
509  format.comma();
510  append(*iter);
511  }
512  }
513  format.end_array();
514 }
515 
516 template <class serializer>
517 simdjson_inline void string_builder<serializer>::append(simdjson::dom::key_value_pair kv) {
518  format.key(kv.key);
519  append(kv.value);
520 }
521 
522 template <class serializer>
523 simdjson_inline void string_builder<serializer>::clear() {
524  format.clear();
525 }
526 
527 template <class serializer>
528 simdjson_inline std::string_view string_builder<serializer>::str() const {
529  return format.str();
530 }
531 
532 
533 } // namespace internal
534 } // namespace simdjson
535 
536 #endif
JSON array.
Definition: array.h:13
A JSON element.
Definition: element.h:31
Key/value pair in an object.
Definition: object.h:238
std::string_view key
key in the key-value pair
Definition: object.h:241
element value
value in the key-value pair
Definition: object.h:243
JSON object.
Definition: object.h:14
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8
Exception thrown when an exception-supporting simdjson method is called.
Definition: error.h:82
The result of a simdjson operation that could fail.
Definition: error.h:215
simdjson_inline error_code error() const noexcept
The error.
Definition: error-inl.h:131
simdjson_inline T & value() &noexcept(false)
Get the result value.