simdjson 4.3.1
Ridiculously Fast JSON
Loading...
Searching...
No Matches
json_iterator-inl.h
1#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
2
3#ifndef SIMDJSON_CONDITIONAL_INCLUDE
4#define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
5#include "simdjson/internal/dom_parser_implementation.h"
6#include "simdjson/generic/ondemand/base.h"
7#include "simdjson/generic/ondemand/json_iterator.h"
8#include "simdjson/generic/ondemand/parser.h"
9#include "simdjson/generic/ondemand/raw_json_string.h"
10#include "simdjson/generic/ondemand/logger-inl.h"
11#include "simdjson/generic/ondemand/parser-inl.h"
12#include "simdjson/generic/ondemand/token_iterator-inl.h"
13#endif // SIMDJSON_CONDITIONAL_INCLUDE
14
15namespace simdjson {
16namespace SIMDJSON_IMPLEMENTATION {
17namespace ondemand {
18
19simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept
20 : token(std::forward<token_iterator>(other.token)),
21 parser{other.parser},
22 _string_buf_loc{other._string_buf_loc},
23 error{other.error},
24 _depth{other._depth},
25 _root{other._root},
26 _streaming{other._streaming}
27{
28 other.parser = nullptr;
29}
30simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept {
31 token = other.token;
32 parser = other.parser;
33 _string_buf_loc = other._string_buf_loc;
34 error = other.error;
35 _depth = other._depth;
36 _root = other._root;
37 _streaming = other._streaming;
38 other.parser = nullptr;
39 return *this;
40}
41
42simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept
43 : token(buf, &_parser->implementation->structural_indexes[0]),
44 parser{_parser},
45 _string_buf_loc{parser->string_buf.get()},
46 _depth{1},
47 _root{parser->implementation->structural_indexes.get()},
48 _streaming{false}
49
50{
51 logger::log_headers();
52#if SIMDJSON_CHECK_EOF
53 assert_more_tokens();
54#endif
55}
56
57#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
58simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept
59 : token(buf, &_parser->implementation->structural_indexes[0]),
60 parser{_parser},
61 _string_buf_loc{parser->string_buf.get()},
62 _depth{1},
63 _root{parser->implementation->structural_indexes.get()},
64 _streaming{streaming}
65
66{
67 logger::log_headers();
68#if SIMDJSON_CHECK_EOF
69 assert_more_tokens();
70#endif
71}
72#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
73
74inline void json_iterator::rewind() noexcept {
75 token.set_position( root_position() );
76 logger::log_headers(); // We start again
77 _string_buf_loc = parser->string_buf.get();
78 _depth = 1;
79}
80
81inline bool json_iterator::balanced() const noexcept {
82 token_iterator ti(token);
83 int32_t count{0};
84 ti.set_position( root_position() );
85 while(ti.peek() <= peek_last()) {
86 switch (*ti.return_current_and_advance())
87 {
88 case '[': case '{':
89 count++;
90 break;
91 case ']': case '}':
92 count--;
93 break;
94 default:
95 break;
96 }
97 }
98 return count == 0;
99}
100
101
102// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller
103// relating depth and parent_depth, which is a desired effect. The warning does not show up if the
104// skip_child() function is not marked inline).
105SIMDJSON_PUSH_DISABLE_WARNINGS
106SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING
107simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept {
108 if (depth() <= parent_depth) { return SUCCESS; }
109 switch (*return_current_and_advance()) {
110 // TODO consider whether matching braces is a requirement: if non-matching braces indicates
111 // *missing* braces, then future lookups are not in the object/arrays they think they are,
112 // violating the rule "validate enough structure that the user can be confident they are
113 // looking at the right values."
114 // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
115
116 // For the first open array/object in a value, we've already incremented depth, so keep it the same
117 // We never stop at colon, but if we did, it wouldn't affect depth
118 case '[': case '{': case ':':
119 logger::log_start_value(*this, "skip");
120 break;
121 // If there is a comma, we have just finished a value in an array/object, and need to get back in
122 case ',':
123 logger::log_value(*this, "skip");
124 break;
125 // ] or } means we just finished a value and need to jump out of the array/object
126 case ']': case '}':
127 logger::log_end_value(*this, "skip");
128 _depth--;
129 if (depth() <= parent_depth) { return SUCCESS; }
130#if SIMDJSON_CHECK_EOF
131 // If there are no more tokens, the parent is incomplete.
132 if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); }
133#endif // SIMDJSON_CHECK_EOF
134 break;
135 case '"':
136 if(*peek() == ':') {
137 // We are at a key!!!
138 // This might happen if you just started an object and you skip it immediately.
139 // Performance note: it would be nice to get rid of this check as it is somewhat
140 // expensive.
141 // https://github.com/simdjson/simdjson/issues/1742
142 logger::log_value(*this, "key");
143 return_current_and_advance(); // eat up the ':'
144 break; // important!!!
145 }
146 simdjson_fallthrough;
147 // Anything else must be a scalar value
148 default:
149 // For the first scalar, we will have incremented depth already, so we decrement it here.
150 logger::log_value(*this, "skip");
151 _depth--;
152 if (depth() <= parent_depth) { return SUCCESS; }
153 break;
154 }
155
156 // Now that we've considered the first value, we only increment/decrement for arrays/objects
157 while (position() < end_position()) {
158 switch (*return_current_and_advance()) {
159 case '[': case '{':
160 logger::log_start_value(*this, "skip");
161 _depth++;
162 break;
163 // TODO consider whether matching braces is a requirement: if non-matching braces indicates
164 // *missing* braces, then future lookups are not in the object/arrays they think they are,
165 // violating the rule "validate enough structure that the user can be confident they are
166 // looking at the right values."
167 // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth
168 case ']': case '}':
169 logger::log_end_value(*this, "skip");
170 _depth--;
171 if (depth() <= parent_depth) { return SUCCESS; }
172 break;
173 default:
174 logger::log_value(*this, "skip", "");
175 break;
176 }
177 }
178
179 return report_error(TAPE_ERROR, "not enough close braces");
180}
181
182SIMDJSON_POP_DISABLE_WARNINGS
183
184simdjson_inline bool json_iterator::at_root() const noexcept {
185 return position() == root_position();
186}
187
188simdjson_inline bool json_iterator::is_single_token() const noexcept {
189 return parser->implementation->n_structural_indexes == 1;
190}
191
192simdjson_inline bool json_iterator::streaming() const noexcept {
193 return _streaming;
194}
195
196simdjson_inline token_position json_iterator::root_position() const noexcept {
197 return _root;
198}
199
200simdjson_inline void json_iterator::assert_at_document_depth() const noexcept {
201 SIMDJSON_ASSUME( _depth == 1 );
202}
203
204simdjson_inline void json_iterator::assert_at_root() const noexcept {
205 SIMDJSON_ASSUME( _depth == 1 );
206#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
207 // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
208 // has side effects that will be discarded.
209 SIMDJSON_ASSUME( token.position() == _root );
210#endif
211}
212
213simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept {
214 assert_valid_position(token._position + required_tokens - 1);
215}
216
217simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept {
218 (void)position; // Suppress unused parameter warning
219#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
220 SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] );
221 SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] );
222#endif
223}
224
225simdjson_inline bool json_iterator::at_end() const noexcept {
226 return position() == end_position();
227}
228simdjson_inline token_position json_iterator::end_position() const noexcept {
229 uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
230 return &parser->implementation->structural_indexes[n_structural_indexes];
231}
232
233inline std::string json_iterator::to_string() const noexcept {
234 if( !is_alive() ) { return "dead json_iterator instance"; }
235 const char * current_structural = reinterpret_cast<const char *>(token.peek());
236 return std::string("json_iterator [ depth : ") + std::to_string(_depth)
237 + std::string(", structural : '") + std::string(current_structural,1)
238 + std::string("', offset : ") + std::to_string(token.current_offset())
239 + std::string("', error : ") + error_message(error)
240 + std::string(" ]");
241}
242
243inline simdjson_result<const char *> json_iterator::current_location() const noexcept {
244 if (!is_alive()) { // Unrecoverable error
245 if (!at_root()) {
246 return reinterpret_cast<const char *>(token.peek(-1));
247 } else {
248 return reinterpret_cast<const char *>(token.peek());
249 }
250 }
251 if (at_end()) {
252 return OUT_OF_BOUNDS;
253 }
254 return reinterpret_cast<const char *>(token.peek());
255}
256
257simdjson_inline bool json_iterator::is_alive() const noexcept {
258 return parser;
259}
260
261simdjson_inline void json_iterator::abandon() noexcept {
262 parser = nullptr;
263 _depth = 0;
264}
265
266simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept {
267#if SIMDJSON_CHECK_EOF
268 assert_more_tokens();
269#endif // SIMDJSON_CHECK_EOF
270 return token.return_current_and_advance();
271}
272
273simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept {
274 // deliberately done without safety guard:
275 return token.peek();
276}
277
278simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept {
279#if SIMDJSON_CHECK_EOF
280 assert_more_tokens(delta+1);
281#endif // SIMDJSON_CHECK_EOF
282 return token.peek(delta);
283}
284
285simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept {
286#if SIMDJSON_CHECK_EOF
287 assert_more_tokens(delta+1);
288#endif // #if SIMDJSON_CHECK_EOF
289 return token.peek_length(delta);
290}
291
292simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept {
293 // todo: currently we require end-of-string buffering, but the following
294 // assert_valid_position should be turned on if/when we lift that condition.
295 // assert_valid_position(position);
296 // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF
297 // is ON by default, we have no choice but to disable it for real with a comment.
298 return token.peek(position);
299}
300
301simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept {
302#if SIMDJSON_CHECK_EOF
303 assert_valid_position(position);
304#endif // SIMDJSON_CHECK_EOF
305 return token.peek_length(position);
306}
307simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept {
308#if SIMDJSON_CHECK_EOF
309 assert_valid_position(position);
310#endif // SIMDJSON_CHECK_EOF
311 return token.peek_root_length(position);
312}
313
314simdjson_inline token_position json_iterator::last_position() const noexcept {
315 // The following line fails under some compilers...
316 // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0);
317 // since it has side-effects.
318 uint32_t n_structural_indexes{parser->implementation->n_structural_indexes};
319 SIMDJSON_ASSUME(n_structural_indexes > 0);
320 return &parser->implementation->structural_indexes[n_structural_indexes - 1];
321}
322simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept {
323 return token.peek(last_position());
324}
325
326simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept {
327 SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1);
328 SIMDJSON_ASSUME(_depth == parent_depth + 1);
329 _depth = parent_depth;
330}
331
332simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept {
333 SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
334 SIMDJSON_ASSUME(_depth == child_depth - 1);
335 _depth = child_depth;
336}
337
338simdjson_inline depth_t json_iterator::depth() const noexcept {
339 return _depth;
340}
341
342simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept {
343 return _string_buf_loc;
344}
345
346simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept {
347 SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD);
348 logger::log_error(*this, message);
349 error = _error;
350 return error;
351}
352
353simdjson_inline token_position json_iterator::position() const noexcept {
354 return token.position();
355}
356
357simdjson_inline simdjson_result<std::string_view> json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept {
358#if SIMDJSON_DEVELOPMENT_CHECKS
359 auto result = parser->unescape(in, _string_buf_loc, allow_replacement);
360#if !defined(SIMDJSON_VISUAL_STUDIO) && !defined(SIMDJSON_CLANG_VISUAL_STUDIO)
361 // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
362 // has side effects that will be discarded.
363 SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
364#endif // !defined(SIMDJSON_VISUAL_STUDIO) && !defined(SIMDJSON_CLANG_VISUAL_STUDIO)
365 return result;
366#else
367 return parser->unescape(in, _string_buf_loc, allow_replacement);
368#endif
369}
370
371simdjson_inline simdjson_result<std::string_view> json_iterator::unescape_wobbly(raw_json_string in) noexcept {
372#if SIMDJSON_DEVELOPMENT_CHECKS
373 auto result = parser->unescape_wobbly(in, _string_buf_loc);
374#if !defined(SIMDJSON_VISUAL_STUDIO) && !defined(SIMDJSON_CLANG_VISUAL_STUDIO)
375 // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument
376 // has side effects that will be discarded.
377 SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc));
378#endif // !defined(SIMDJSON_VISUAL_STUDIO) && !defined(SIMDJSON_CLANG_VISUAL_STUDIO)
379 return result;
380#else
381 return parser->unescape_wobbly(in, _string_buf_loc);
382#endif
383}
384
385simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept {
386 SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX);
387 SIMDJSON_ASSUME(_depth == child_depth - 1);
388#if SIMDJSON_DEVELOPMENT_CHECKS
389#ifndef SIMDJSON_CLANG_VISUAL_STUDIO
390 SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth());
391 SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]);
392#endif
393#endif
394 token.set_position(position);
395 _depth = child_depth;
396}
397
398simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept {
399 if (*peek() == c) {
400 return_current_and_advance();
401 return SUCCESS;
402 }
403 return TAPE_ERROR;
404}
405
406#if SIMDJSON_DEVELOPMENT_CHECKS
407
408simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept {
409 SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
410 return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0;
411}
412
413simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept {
414 SIMDJSON_ASSUME(size_t(depth) < parser->max_depth());
415 if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; }
416}
417
418#endif
419
420
421simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept {
422 SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD);
423 logger::log_error(*this, message);
424 return _error;
425}
426
427
428simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept {
429 // This function is not expected to be called in performance-sensitive settings.
430 // Let us guard against silly cases:
431 if((N < max_len) || (N == 0)) { return false; }
432 // Copy to the buffer.
433 std::memcpy(tmpbuf, json, max_len);
434 if(N > max_len) { // We pad whatever remains with ' '.
435 std::memset(tmpbuf + max_len, ' ', N - max_len);
436 }
437 return true;
438}
439
440} // namespace ondemand
441} // namespace SIMDJSON_IMPLEMENTATION
442} // namespace simdjson
443
444namespace simdjson {
445
446simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept
447 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(value)) {}
448simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>::simdjson_result(error_code error) noexcept
449 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::json_iterator>(error) {}
450
451} // namespace simdjson
452
453#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H
int32_t depth_t
Represents the depth of a JSON value (number of nested arrays/objects).
Definition base.h:18
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition error-inl.h:25
error_code
All possible errors returned by simdjson.
Definition error.h:19
@ INCORRECT_TYPE
JSON element has a different type than user expected.
Definition error.h:37
@ OUT_OF_BOUNDS
Attempted to access location outside of document.
Definition error.h:50
@ TAPE_ERROR
Something went wrong, this is a generic error. Fatal/unrecoverable error.
Definition error.h:23
@ NO_SUCH_FIELD
JSON field not found in object.
Definition error.h:40
@ SUCCESS
No error.
Definition error.h:20
@ INCOMPLETE_ARRAY_OR_OBJECT
The document ends early. Fatal/unrecoverable error.
Definition error.h:48
@ UNINITIALIZED
unknown error, or uninitialized document
Definition error.h:32