simdjson 4.2.4
Ridiculously Fast JSON
Loading...
Searching...
No Matches
json_iterator-inl.h
1#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
2
3#ifndef SIMDJSON_CONDITIONAL_INCLUDE
4#define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
5#include "simdjson/internal/dom_parser_implementation.h"
6#include "simdjson/generic/ondemand/base.h"
7#include "simdjson/generic/ondemand/json_iterator.h"
8#include "simdjson/generic/ondemand/parser.h"
9#include "simdjson/generic/ondemand/raw_json_string.h"
10#include "simdjson/generic/ondemand/logger-inl.h"
11#include "simdjson/generic/ondemand/parser-inl.h"
12#include "simdjson/generic/ondemand/token_iterator-inl.h"
13#endif // SIMDJSON_CONDITIONAL_INCLUDE
14
15namespace simdjson {
16namespace SIMDJSON_IMPLEMENTATION {
17namespace ondemand {
18
19simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept
20 : token(std::forward<token_iterator>(other.token)),
21 parser{other.parser},
22 _string_buf_loc{other._string_buf_loc},
23 error{other.error},
24 _depth{other._depth},
25 _root{other._root},
26 _streaming{other._streaming}
27{
28 other.parser = nullptr;
29}
30simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
31 token = other.token;
32 parser = other.parser;
33 _string_buf_loc = other._string_buf_loc;
34 error = other.error;
35 _depth = other._depth;
36 _root = other._root;
37 _streaming = other._streaming;
38 other.parser = nullptr;
39 return *this;
40}
41
42simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
43 : token(buf, &_parser->implementation->structural_indexes[0]),
44 parser{_parser},
45 _string_buf_loc{parser->string_buf.get()},
46 _depth{1},
47 _root{parser->implementation->structural_indexes.get()},
48 _streaming{false}
49
50{
51 logger::log_headers();
52#if SIMDJSON_CHECK_EOF
53 assert_more_tokens();
54#endif
55}
56
57#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
58simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept
59 : token(buf, &_parser->implementation->structural_indexes[0]),
60 parser{_parser},
61 _string_buf_loc{parser->string_buf.get()},
62 _depth{1},
63 _root{parser->implementation->structural_indexes.get()},
64 _streaming{streaming}
65
66{
67 logger::log_headers();
68#if SIMDJSON_CHECK_EOF
69 assert_more_tokens();
70#endif
71}
72#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
73
74inline void json_iterator::rewind() noexcept {
75 token.set_position( root_position() );
76 logger::log_headers(); // We start again
77 _string_buf_loc = parser->string_buf.get();
78 _depth = 1;
79}
80
81inline bool json_iterator::balanced() const noexcept {
82 token_iterator ti(token);
83 int32_t count{0};
84 ti.set_position( root_position() );
85 while(ti.peek() <= peek_last()) {
86 switch (*ti.return_current_and_advance())
87 {
88 case '[': case '{':
89 count++;
90 break;
91 case ']': case '}':
92 count--;
93 break;
94 default:
95 break;
96 }
97 }
98 return count == 0;
99}
100
101
102// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
103// relating depth and parent_depth, which is a desired effect. The warning does not show up if the
104// skip_child() function is not marked inline).
105SIMDJSON_PUSH_DISABLE_WARNINGS
106SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
107simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
108 if (depth() <= parent_depth) { return SUCCESS; }
109 switch (*return_current_and_advance()) {
110 // TODO consider whether matching braces is a requirement: if non-matching braces indicates
111 // *missing* braces, then future lookups are not in the object/arrays they think they are,
112 // violating the rule "validate enough structure that the user can be confident they are
113 // looking at the right values."
114 // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
115
116 // For the first open array/object in a value, we've already incremented depth, so keep it the same
117 // We never stop at colon, but if we did, it wouldn't affect depth
118 case '[': case '{': case ':':
119 logger::log_start_value(*this, "skip");
120 break;
121 // If there is a comma, we have just finished a value in an array/object, and need to get back in
122 case ',':
123 logger::log_value(*this, "skip");
124 break;
125 // ] or } means we just finished a value and need to jump out of the array/object
126 case ']': case '}':
127 logger::log_end_value(*this, "skip");
128 _depth--;
129 if (depth() <= parent_depth) { return SUCCESS; }
130#if SIMDJSON_CHECK_EOF
131 // If there are no more tokens, the parent is incomplete.
132 if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); }
133#endif // SIMDJSON_CHECK_EOF
134 break;
135 case '"':
136 if(*peek() == ':') {
137 // We are at a key!!!
138 // This might happen if you just started an object and you skip it immediately.
139 // Performance note: it would be nice to get rid of this check as it is somewhat
140 // expensive.
141 // https://github.com/simdjson/simdjson/issues/1742
142 logger::log_value(*this, "key");
143 return_current_and_advance(); // eat up the ':'
144 break; // important!!!
145 }
146 simdjson_fallthrough;
147 // Anything else must be a scalar value
148 default:
149 // For the first scalar, we will have incremented depth already, so we decrement it here.
150 logger::log_value(*this, "skip");
151 _depth--;
152 if (depth() <= parent_depth) { return SUCCESS; }
153 break;
154 }
155
156 // Now that we've considered the first value, we only increment/decrement for arrays/objects
157 while (position() < end_position()) {
158 switch (*return_current_and_advance()) {
159 case '[': case '{':
160 logger::log_start_value(*this, "skip");
161 _depth++;
162 break;
163 // TODO consider whether matching braces is a requirement: if non-matching braces indicates
164 // *missing* braces, then future lookups are not in the object/arrays they think they are,
165 // violating the rule "validate enough structure that the user can be confident they are
166 // looking at the right values."
167 // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
168 case ']': case '}':
169 logger::log_end_value(*this, "skip");
170 _depth--;
171 if (depth() <= parent_depth) { return SUCCESS; }
172 break;
173 default:
174 logger::log_value(*this, "skip", "");
175 break;
176 }
177 }
178
179 return report_error(TAPE_ERROR, "not enough close braces");
180}
181
182SIMDJSON_POP_DISABLE_WARNINGS
183
184simdjson_inline bool json_iterator::at_root() const noexcept {
185 return position() == root_position();
186}
187
188simdjson_inline bool json_iterator::is_single_token() const noexcept {
189 return parser->implementation->n_structural_indexes == 1;
190}
191
192simdjson_inline bool json_iterator::streaming() const noexcept {
193 return _streaming;
194}
195
196simdjson_inline token_position json_iterator::root_position() const noexcept {
197 return _root;
198}
199
200simdjson_inline void json_iterator::assert_at_document_depth() const noexcept {
201 SIMDJSON_ASSUME( _depth == 1 );
202}
203
204simdjson_inline void json_iterator::assert_at_root() const noexcept {
205 SIMDJSON_ASSUME( _depth == 1 );
206#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
207 // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
208 // has side effects that will be discarded.
209 SIMDJSON_ASSUME( token.position() == _root );
210#endif
211}
212
213simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept {
214 assert_valid_position(token._position + required_tokens - 1);
215}
216
217simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept {
218#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
219 SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] );
220 SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] );
221#else
222 (void)position; // Suppress unused parameter warning
223#endif
224}
225
226simdjson_inline bool json_iterator::at_end() const noexcept {
227 return position() == end_position();
228}
229simdjson_inline token_position json_iterator::end_position() const noexcept {
230 uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
231 return &parser->implementation->structural_indexes[n_structural_indexes];
232}
233
234inline std::string json_iterator::to_string() const noexcept {
235 if( !is_alive() ) { return "dead json_iterator instance"; }
236 const char * current_structural = reinterpret_cast<const char *>(token.peek());
237 return std::string("json_iterator [ depth : ") + std::to_string(_depth)
238 + std::string(", structural : '") + std::string(current_structural,1)
239 + std::string("', offset : ") + std::to_string(token.current_offset())
240 + std::string("', error : ") + error_message(error)
241 + std::string(" ]");
242}
243
244inline simdjson_result<const char *> json_iterator::current_location() const noexcept {
245 if (!is_alive()) { // Unrecoverable error
246 if (!at_root()) {
247 return reinterpret_cast<const char *>(token.peek(-1));
248 } else {
249 return reinterpret_cast<const char *>(token.peek());
250 }
251 }
252 if (at_end()) {
253 return OUT_OF_BOUNDS;
254 }
255 return reinterpret_cast<const char *>(token.peek());
256}
257
258simdjson_inline bool json_iterator::is_alive() const noexcept {
259 return parser;
260}
261
262simdjson_inline void json_iterator::abandon() noexcept {
263 parser = nullptr;
264 _depth = 0;
265}
266
267simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept {
268#if SIMDJSON_CHECK_EOF
269 assert_more_tokens();
270#endif // SIMDJSON_CHECK_EOF
271 return token.return_current_and_advance();
272}
273
274simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept {
275 // deliberately done without safety guard:
276 return token.peek();
277}
278
279simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept {
280#if SIMDJSON_CHECK_EOF
281 assert_more_tokens(delta+1);
282#endif // SIMDJSON_CHECK_EOF
283 return token.peek(delta);
284}
285
286simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept {
287#if SIMDJSON_CHECK_EOF
288 assert_more_tokens(delta+1);
289#endif // #if SIMDJSON_CHECK_EOF
290 return token.peek_length(delta);
291}
292
293simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept {
294 // todo: currently we require end-of-string buffering, but the following
295 // assert_valid_position should be turned on if/when we lift that condition.
296 // assert_valid_position(position);
297 // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF
298 // is ON by default, we have no choice but to disable it for real with a comment.
299 return token.peek(position);
300}
301
302simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept {
303#if SIMDJSON_CHECK_EOF
304 assert_valid_position(position);
305#endif // SIMDJSON_CHECK_EOF
306 return token.peek_length(position);
307}
308simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept {
309#if SIMDJSON_CHECK_EOF
310 assert_valid_position(position);
311#endif // SIMDJSON_CHECK_EOF
312 return token.peek_root_length(position);
313}
314
315simdjson_inline token_position json_iterator::last_position() const noexcept {
316 // The following line fails under some compilers...
317 // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
318 // since it has side-effects.
319 uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
320 SIMDJSON_ASSUME(n_structural_indexes > 0);
321 return &parser->implementation->structural_indexes[n_structural_indexes - 1];
322}
323simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept {
324 return token.peek(last_position());
325}
326
327simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
328 SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
329 SIMDJSON_ASSUME(_depth == parent_depth + 1);
330 _depth = parent_depth;
331}
332
333simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
334 SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
335 SIMDJSON_ASSUME(_depth == child_depth - 1);
336 _depth = child_depth;
337}
338
339simdjson_inline depth_t json_iterator::depth() const noexcept {
340 return _depth;
341}
342
343simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept {
344 return _string_buf_loc;
345}
346
347simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept {
348 SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD);
349 logger::log_error(*this, message);
350 error = _error;
351 return error;
352}
353
354simdjson_inline token_position json_iterator::position() const noexcept {
355 return token.position();
356}
357
358simdjson_inline simdjson_result<std::string_view> json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept {
359#if SIMDJSON_DEVELOPMENT_CHECKS
360 auto result = parser->unescape(in, _string_buf_loc, allow_replacement);
361 SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
362 return result;
363#else
364 return parser->unescape(in, _string_buf_loc, allow_replacement);
365#endif
366}
367
368simdjson_inline simdjson_result<std::string_view> json_iterator::unescape_wobbly(raw_json_string in) noexcept {
369#if SIMDJSON_DEVELOPMENT_CHECKS
370 auto result = parser->unescape_wobbly(in, _string_buf_loc);
371 SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
372 return result;
373#else
374 return parser->unescape_wobbly(in, _string_buf_loc);
375#endif
376}
377
378simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept {
379 SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
380 SIMDJSON_ASSUME(_depth == child_depth - 1);
381#if SIMDJSON_DEVELOPMENT_CHECKS
382#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
383 SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth());
384 SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]);
385#endif
386#endif
387 token.set_position(position);
388 _depth = child_depth;
389}
390
391simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept {
392 if (*peek() == c) {
393 return_current_and_advance();
394 return SUCCESS;
395 }
396 return TAPE_ERROR;
397}
398
399#if SIMDJSON_DEVELOPMENT_CHECKS
400
401simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept {
402 SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
403 return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0;
404}
405
406simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept {
407 SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
408 if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; }
409}
410
411#endif
412
413
414simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
415 SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
416 logger::log_error(*this, message);
417 return _error;
418}
419
420
421simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept {
422 // This function is not expected to be called in performance-sensitive settings.
423 // Let us guard against silly cases:
424 if((N < max_len) || (N == 0)) { return false; }
425 // Copy to the buffer.
426 std::memcpy(tmpbuf, json, max_len);
427 if(N > max_len) { // We pad whatever remains with ' '.
428 std::memset(tmpbuf + max_len, ' ', N - max_len);
429 }
430 return true;
431}
432
433} // namespace ondemand
434} // namespace SIMDJSON_IMPLEMENTATION
435} // namespace simdjson
436
437namespace simdjson {
438
439simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
440 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
441simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
442 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(error) {}
443
444} // namespace simdjson
445
446#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
int32_t depth_t
Represents the depth of a JSON value (number of nested arrays/objects).
Definition base.h:18
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition error-inl.h:25
error_code
All possible errors returned by simdjson.
Definition error.h:19
@ INCORRECT_TYPE
JSON element has a different type than user expected.
Definition error.h:37
@ OUT_OF_BOUNDS
Attempted to access location outside of document.
Definition error.h:50
@ TAPE_ERROR
Something went wrong, this is a generic error. Fatal/unrecoverable error.
Definition error.h:23
@ NO_SUCH_FIELD
JSON field not found in object.
Definition error.h:40
@ SUCCESS
No error.
Definition error.h:20
@ INCOMPLETE_ARRAY_OR_OBJECT
The document ends early. Fatal/unrecoverable error.
Definition error.h:48
@ UNINITIALIZED
unknown error, or uninitialized document
Definition error.h:32