simdjson  3.11.0
Ridiculously Fast JSON
json_iterator-inl.h
1 #ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
2 
3 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
4 #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
5 #include "simdjson/internal/dom_parser_implementation.h"
6 #include "simdjson/generic/ondemand/base.h"
7 #include "simdjson/generic/ondemand/json_iterator.h"
8 #include "simdjson/generic/ondemand/parser.h"
9 #include "simdjson/generic/ondemand/raw_json_string.h"
10 #include "simdjson/generic/ondemand/logger-inl.h"
11 #include "simdjson/generic/ondemand/parser-inl.h"
12 #include "simdjson/generic/ondemand/token_iterator-inl.h"
13 #endif // SIMDJSON_CONDITIONAL_INCLUDE
14 
15 namespace simdjson {
16 namespace SIMDJSON_IMPLEMENTATION {
17 namespace ondemand {
18 
19 simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept
20  : token(std::forward<token_iterator>(other.token)),
21  parser{other.parser},
22  _string_buf_loc{other._string_buf_loc},
23  error{other.error},
24  _depth{other._depth},
25  _root{other._root},
26  _streaming{other._streaming}
27 {
28  other.parser = nullptr;
29 }
30 simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
31  token = other.token;
32  parser = other.parser;
33  _string_buf_loc = other._string_buf_loc;
34  error = other.error;
35  _depth = other._depth;
36  _root = other._root;
37  _streaming = other._streaming;
38  other.parser = nullptr;
39  return *this;
40 }
41 
42 simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
43  : token(buf, &_parser->implementation->structural_indexes[0]),
44  parser{_parser},
45  _string_buf_loc{parser->string_buf.get()},
46  _depth{1},
47  _root{parser->implementation->structural_indexes.get()},
48  _streaming{false}
49 
50 {
51  logger::log_headers();
52 #if SIMDJSON_CHECK_EOF
53  assert_more_tokens();
54 #endif
55 }
56 
57 #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
58 simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept
59  : token(buf, &_parser->implementation->structural_indexes[0]),
60  parser{_parser},
61  _string_buf_loc{parser->string_buf.get()},
62  _depth{1},
63  _root{parser->implementation->structural_indexes.get()},
64  _streaming{streaming}
65 
66 {
67  logger::log_headers();
68 #if SIMDJSON_CHECK_EOF
69  assert_more_tokens();
70 #endif
71 }
72 #endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
73 
74 inline void json_iterator::rewind() noexcept {
75  token.set_position( root_position() );
76  logger::log_headers(); // We start again
77  _string_buf_loc = parser->string_buf.get();
78  _depth = 1;
79 }
80 
81 inline bool json_iterator::balanced() const noexcept {
82  token_iterator ti(token);
83  int32_t count{0};
84  ti.set_position( root_position() );
85  while(ti.peek() <= peek_last()) {
86  switch (*ti.return_current_and_advance())
87  {
88  case '[': case '{':
89  count++;
90  break;
91  case ']': case '}':
92  count--;
93  break;
94  default:
95  break;
96  }
97  }
98  return count == 0;
99 }
100 
101 
102 // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
103 // relating depth and parent_depth, which is a desired effect. The warning does not show up if the
104 // skip_child() function is not marked inline).
105 SIMDJSON_PUSH_DISABLE_WARNINGS
106 SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
107 simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
108  if (depth() <= parent_depth) { return SUCCESS; }
109  switch (*return_current_and_advance()) {
110  // TODO consider whether matching braces is a requirement: if non-matching braces indicates
111  // *missing* braces, then future lookups are not in the object/arrays they think they are,
112  // violating the rule "validate enough structure that the user can be confident they are
113  // looking at the right values."
114  // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
115 
116  // For the first open array/object in a value, we've already incremented depth, so keep it the same
117  // We never stop at colon, but if we did, it wouldn't affect depth
118  case '[': case '{': case ':':
119  logger::log_start_value(*this, "skip");
120  break;
121  // If there is a comma, we have just finished a value in an array/object, and need to get back in
122  case ',':
123  logger::log_value(*this, "skip");
124  break;
125  // ] or } means we just finished a value and need to jump out of the array/object
126  case ']': case '}':
127  logger::log_end_value(*this, "skip");
128  _depth--;
129  if (depth() <= parent_depth) { return SUCCESS; }
130 #if SIMDJSON_CHECK_EOF
131  // If there are no more tokens, the parent is incomplete.
132  if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); }
133 #endif // SIMDJSON_CHECK_EOF
134  break;
135  case '"':
136  if(*peek() == ':') {
137  // We are at a key!!!
138  // This might happen if you just started an object and you skip it immediately.
139  // Performance note: it would be nice to get rid of this check as it is somewhat
140  // expensive.
141  // https://github.com/simdjson/simdjson/issues/1742
142  logger::log_value(*this, "key");
143  return_current_and_advance(); // eat up the ':'
144  break; // important!!!
145  }
146  simdjson_fallthrough;
147  // Anything else must be a scalar value
148  default:
149  // For the first scalar, we will have incremented depth already, so we decrement it here.
150  logger::log_value(*this, "skip");
151  _depth--;
152  if (depth() <= parent_depth) { return SUCCESS; }
153  break;
154  }
155 
156  // Now that we've considered the first value, we only increment/decrement for arrays/objects
157  while (position() < end_position()) {
158  switch (*return_current_and_advance()) {
159  case '[': case '{':
160  logger::log_start_value(*this, "skip");
161  _depth++;
162  break;
163  // TODO consider whether matching braces is a requirement: if non-matching braces indicates
164  // *missing* braces, then future lookups are not in the object/arrays they think they are,
165  // violating the rule "validate enough structure that the user can be confident they are
166  // looking at the right values."
167  // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
168  case ']': case '}':
169  logger::log_end_value(*this, "skip");
170  _depth--;
171  if (depth() <= parent_depth) { return SUCCESS; }
172  break;
173  default:
174  logger::log_value(*this, "skip", "");
175  break;
176  }
177  }
178 
179  return report_error(TAPE_ERROR, "not enough close braces");
180 }
181 
182 SIMDJSON_POP_DISABLE_WARNINGS
183 
184 simdjson_inline bool json_iterator::at_root() const noexcept {
185  return position() == root_position();
186 }
187 
188 simdjson_inline bool json_iterator::is_single_token() const noexcept {
189  return parser->implementation->n_structural_indexes == 1;
190 }
191 
192 simdjson_inline bool json_iterator::streaming() const noexcept {
193  return _streaming;
194 }
195 
196 simdjson_inline token_position json_iterator::root_position() const noexcept {
197  return _root;
198 }
199 
200 simdjson_inline void json_iterator::assert_at_document_depth() const noexcept {
201  SIMDJSON_ASSUME( _depth == 1 );
202 }
203 
204 simdjson_inline void json_iterator::assert_at_root() const noexcept {
205  SIMDJSON_ASSUME( _depth == 1 );
206 #ifndef SIMDJSON_CLANG_VISUAL_STUDIO
207  // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
208  // has side effects that will be discarded.
209  SIMDJSON_ASSUME( token.position() == _root );
210 #endif
211 }
212 
213 simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept {
214  assert_valid_position(token._position + required_tokens - 1);
215 }
216 
217 simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept {
218 #ifndef SIMDJSON_CLANG_VISUAL_STUDIO
219  SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] );
220  SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] );
221 #endif
222 }
223 
224 simdjson_inline bool json_iterator::at_end() const noexcept {
225  return position() == end_position();
226 }
227 simdjson_inline token_position json_iterator::end_position() const noexcept {
228  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
229  return &parser->implementation->structural_indexes[n_structural_indexes];
230 }
231 
232 inline std::string json_iterator::to_string() const noexcept {
233  if( !is_alive() ) { return "dead json_iterator instance"; }
234  const char * current_structural = reinterpret_cast<const char *>(token.peek());
235  return std::string("json_iterator [ depth : ") + std::to_string(_depth)
236  + std::string(", structural : '") + std::string(current_structural,1)
237  + std::string("', offset : ") + std::to_string(token.current_offset())
238  + std::string("', error : ") + error_message(error)
239  + std::string(" ]");
240 }
241 
242 inline simdjson_result<const char *> json_iterator::current_location() const noexcept {
243  if (!is_alive()) { // Unrecoverable error
244  if (!at_root()) {
245  return reinterpret_cast<const char *>(token.peek(-1));
246  } else {
247  return reinterpret_cast<const char *>(token.peek());
248  }
249  }
250  if (at_end()) {
251  return OUT_OF_BOUNDS;
252  }
253  return reinterpret_cast<const char *>(token.peek());
254 }
255 
256 simdjson_inline bool json_iterator::is_alive() const noexcept {
257  return parser;
258 }
259 
260 simdjson_inline void json_iterator::abandon() noexcept {
261  parser = nullptr;
262  _depth = 0;
263 }
264 
265 simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept {
266 #if SIMDJSON_CHECK_EOF
267  assert_more_tokens();
268 #endif // SIMDJSON_CHECK_EOF
269  return token.return_current_and_advance();
270 }
271 
272 simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept {
273  // deliberately done without safety guard:
274  return token.peek();
275 }
276 
277 simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept {
278 #if SIMDJSON_CHECK_EOF
279  assert_more_tokens(delta+1);
280 #endif // SIMDJSON_CHECK_EOF
281  return token.peek(delta);
282 }
283 
284 simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept {
285 #if SIMDJSON_CHECK_EOF
286  assert_more_tokens(delta+1);
287 #endif // #if SIMDJSON_CHECK_EOF
288  return token.peek_length(delta);
289 }
290 
291 simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept {
292  // todo: currently we require end-of-string buffering, but the following
293  // assert_valid_position should be turned on if/when we lift that condition.
294  // assert_valid_position(position);
295  // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF
296  // is ON by default, we have no choice but to disable it for real with a comment.
297  return token.peek(position);
298 }
299 
300 simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept {
301 #if SIMDJSON_CHECK_EOF
302  assert_valid_position(position);
303 #endif // SIMDJSON_CHECK_EOF
304  return token.peek_length(position);
305 }
306 simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept {
307 #if SIMDJSON_CHECK_EOF
308  assert_valid_position(position);
309 #endif // SIMDJSON_CHECK_EOF
310  return token.peek_root_length(position);
311 }
312 
313 simdjson_inline token_position json_iterator::last_position() const noexcept {
314  // The following line fails under some compilers...
315  // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
316  // since it has side-effects.
317  uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
318  SIMDJSON_ASSUME(n_structural_indexes > 0);
319  return &parser->implementation->structural_indexes[n_structural_indexes - 1];
320 }
321 simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept {
322  return token.peek(last_position());
323 }
324 
325 simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
326  SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
327  SIMDJSON_ASSUME(_depth == parent_depth + 1);
328  _depth = parent_depth;
329 }
330 
331 simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
332  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
333  SIMDJSON_ASSUME(_depth == child_depth - 1);
334  _depth = child_depth;
335 }
336 
337 simdjson_inline depth_t json_iterator::depth() const noexcept {
338  return _depth;
339 }
340 
341 simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept {
342  return _string_buf_loc;
343 }
344 
345 simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept {
346  SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD);
347  logger::log_error(*this, message);
348  error = _error;
349  return error;
350 }
351 
352 simdjson_inline token_position json_iterator::position() const noexcept {
353  return token.position();
354 }
355 
356 simdjson_inline simdjson_result<std::string_view> json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept {
357 #if SIMDJSON_DEVELOPMENT_CHECKS
358  auto result = parser->unescape(in, _string_buf_loc, allow_replacement);
359  SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
360  return result;
361 #else
362  return parser->unescape(in, _string_buf_loc, allow_replacement);
363 #endif
364 }
365 
366 simdjson_inline simdjson_result<std::string_view> json_iterator::unescape_wobbly(raw_json_string in) noexcept {
367 #if SIMDJSON_DEVELOPMENT_CHECKS
368  auto result = parser->unescape_wobbly(in, _string_buf_loc);
369  SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
370  return result;
371 #else
372  return parser->unescape_wobbly(in, _string_buf_loc);
373 #endif
374 }
375 
376 simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept {
377  SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
378  SIMDJSON_ASSUME(_depth == child_depth - 1);
379 #if SIMDJSON_DEVELOPMENT_CHECKS
380 #ifndef SIMDJSON_CLANG_VISUAL_STUDIO
381  SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth());
382  SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]);
383 #endif
384 #endif
385  token.set_position(position);
386  _depth = child_depth;
387 }
388 
389 simdjson_inline error_code json_iterator::consume_character(char c) noexcept {
390  if (*peek() == c) {
391  return_current_and_advance();
392  return SUCCESS;
393  }
394  return TAPE_ERROR;
395 }
396 
397 #if SIMDJSON_DEVELOPMENT_CHECKS
398 
399 simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept {
400  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
401  return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0;
402 }
403 
404 simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept {
405  SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
406  if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; }
407 }
408 
409 #endif
410 
411 
412 simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
413  SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
414  logger::log_error(*this, message);
415  return _error;
416 }
417 
418 
419 simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept {
420  // This function is not expected to be called in performance-sensitive settings.
421  // Let us guard against silly cases:
422  if((N < max_len) || (N == 0)) { return false; }
423  // Copy to the buffer.
424  std::memcpy(tmpbuf, json, max_len);
425  if(N > max_len) { // We pad whatever remains with ' '.
426  std::memset(tmpbuf + max_len, ' ', N - max_len);
427  }
428  return true;
429 }
430 
431 } // namespace ondemand
432 } // namespace SIMDJSON_IMPLEMENTATION
433 } // namespace simdjson
434 
435 namespace simdjson {
436 
437 simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
438  : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
439 simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
440  : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(error) {}
441 
442 } // namespace simdjson
443 
444 #endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
int32_t depth_t
Represents the depth of a JSON value (number of nested arrays/objects).
Definition: base.h:18
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition: error-inl.h:20
error_code
All possible errors returned by simdjson.
Definition: error.h:19
@ INCORRECT_TYPE
JSON element has a different type than user expected.
Definition: error.h:37
@ OUT_OF_BOUNDS
Attempted to access location outside of document.
Definition: error.h:50
@ TAPE_ERROR
Something went wrong, this is a generic error.
Definition: error.h:23
@ NO_SUCH_FIELD
JSON field not found in object.
Definition: error.h:40
@ SUCCESS
No error.
Definition: error.h:20
@ INCOMPLETE_ARRAY_OR_OBJECT
The document ends early.
Definition: error.h:48
@ UNINITIALIZED
unknown error, or uninitialized document
Definition: error.h:32