simdjson 4.2.1
Ridiculously Fast JSON
Loading...
Searching...
No Matches
parser-inl.h
1#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
2
3#ifndef SIMDJSON_CONDITIONAL_INCLUDE
4#define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
5#include "simdjson/padded_string.h"
6#include "simdjson/padded_string_view.h"
7#include "simdjson/implementation.h"
8#include "simdjson/internal/dom_parser_implementation.h"
9#include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY
10#include "simdjson/generic/ondemand/base.h"
11#include "simdjson/generic/ondemand/document_stream.h"
12#include "simdjson/generic/ondemand/parser.h"
13#include "simdjson/generic/ondemand/raw_json_string.h"
14#endif // SIMDJSON_CONDITIONAL_INCLUDE
15
16namespace simdjson {
17namespace SIMDJSON_IMPLEMENTATION {
18namespace ondemand {
19
20simdjson_inline parser::parser(size_t max_capacity) noexcept
21 : _max_capacity{max_capacity} {
22}
23
24simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
25 if (new_capacity > max_capacity()) { return CAPACITY; }
26 if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; }
27
28 // string_capacity copied from document::allocate
29 _capacity = 0;
30 size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
31 string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
32#if SIMDJSON_DEVELOPMENT_CHECKS
33 start_positions.reset(new (std::nothrow) token_position[new_max_depth]);
34#endif
35 if (implementation) {
36 SIMDJSON_TRY( implementation->set_capacity(new_capacity) );
37 SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) );
38 } else {
39 SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) );
40 }
41 _capacity = new_capacity;
42 _max_depth = new_max_depth;
43 return SUCCESS;
44}
45#if SIMDJSON_DEVELOPMENT_CHECKS
46simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept {
47 return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity());
48}
49#endif
50
51simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(padded_string_view json) & noexcept {
52 if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; }
53
54 json.remove_utf8_bom();
55
56 // Allocate if needed
57 if (capacity() < json.length() || !string_buf) {
58 SIMDJSON_TRY( allocate(json.length(), max_depth()) );
59 }
60
61 // Run stage 1.
62 SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
63 return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this });
64}
65
66#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
67simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept {
68 if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; }
69
70 json.remove_utf8_bom();
71
72 // Allocate if needed
73 if (capacity() < json.length() || !string_buf) {
74 SIMDJSON_TRY( allocate(json.length(), max_depth()) );
75 }
76
77 // Run stage 1.
78 const simdjson::error_code err = implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular);
79 if (err) {
80 if (err != UNCLOSED_STRING)
81 return err;
82 }
83 return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this, true });
84}
85#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
86
87simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const char *json, size_t len, size_t allocated) & noexcept {
88 return iterate(padded_string_view(json, len, allocated));
89}
90
91simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept {
92 return iterate(padded_string_view(json, len, allocated));
93}
94
95simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string_view json, size_t allocated) & noexcept {
96 return iterate(padded_string_view(json, allocated));
97}
98
99simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string &json) & noexcept {
100 return iterate(pad_with_reserve(json));
101}
102
103simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const std::string &json) & noexcept {
104 return iterate(padded_string_view(json));
105}
106
107simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string_view> &result) & noexcept {
108 // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
109 SIMDJSON_TRY( result.error() );
110 padded_string_view json = result.value_unsafe();
111 return iterate(json);
112}
113
114simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string> &result) & noexcept {
115 // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
116 SIMDJSON_TRY( result.error() );
117 const padded_string &json = result.value_unsafe();
118 return iterate(json);
119}
120
121simdjson_warn_unused simdjson_inline simdjson_result<json_iterator> parser::iterate_raw(padded_string_view json) & noexcept {
122 if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; }
123
124 json.remove_utf8_bom();
125
126 // Allocate if needed
127 if (capacity() < json.length()) {
128 SIMDJSON_TRY( allocate(json.length(), max_depth()) );
129 }
130
131 // Run stage 1.
132 SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
133 return json_iterator(reinterpret_cast<const uint8_t *>(json.data()), this);
134}
135
136inline simdjson_result<document_stream> parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept {
137 // Warning: no check is done on the buffer padding. We trust the user.
138 if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
139 if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
140 buf += 3;
141 len -= 3;
142 }
143 if(allow_comma_separated && batch_size < len) { batch_size = len; }
144 return document_stream(*this, buf, len, batch_size, allow_comma_separated);
145}
146
147inline simdjson_result<document_stream> parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept {
148 // Warning: no check is done on the buffer padding. We trust the user.
149 return iterate_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size, allow_comma_separated);
150}
151inline simdjson_result<document_stream> parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept {
152 if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; }
153 return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated);
154}
155inline simdjson_result<document_stream> parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept {
156 return iterate_many(padded_string_view(s), batch_size, allow_comma_separated);
157}
158inline simdjson_result<document_stream> parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept {
159 return iterate_many(padded_string_view(s), batch_size, allow_comma_separated);
160}
161inline simdjson_result<document_stream> parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept {
162 return iterate_many(pad(s), batch_size, allow_comma_separated);
163}
164simdjson_pure simdjson_inline size_t parser::capacity() const noexcept {
165 return _capacity;
166}
167simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept {
168 return _max_capacity;
169}
170simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
171 return _max_depth;
172}
173
174simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
175 if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) {
176 _max_capacity = max_capacity;
177 } else {
178 _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY;
179 }
180}
181
182simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept {
183 uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement);
184 if (!end) { return STRING_ERROR; }
185 std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
186 dst = end;
187 return result;
188}
189
190simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept {
191 uint8_t *end = implementation->parse_wobbly_string(in.buf, dst);
192 if (!end) { return STRING_ERROR; }
193 std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
194 dst = end;
195 return result;
196}
197
198simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() {
199 return *parser::get_parser_instance();
200}
201
202simdjson_inline bool release_parser() {
203 auto &parser_instance = parser::get_threadlocal_parser_if_exists();
204 if (parser_instance) {
205 parser_instance.reset();
206 return true;
207 }
208 return false;
209}
210
211simdjson_inline simdjson_warn_unused std::unique_ptr<ondemand::parser>& parser::get_parser_instance() {
212 std::unique_ptr<ondemand::parser>& parser_instance = get_threadlocal_parser_if_exists();
213 if (!parser_instance) {
214 parser_instance.reset(new ondemand::parser());
215 }
216 return parser_instance;
217}
218
219simdjson_inline simdjson_warn_unused std::unique_ptr<ondemand::parser>& parser::get_threadlocal_parser_if_exists() {
220 // @the-moisrex points out that this could be implemented with std::optional (C++17).
221 thread_local std::unique_ptr<ondemand::parser> parser_instance = nullptr;
222 return parser_instance;
223}
224
225
226} // namespace ondemand
227} // namespace SIMDJSON_IMPLEMENTATION
228} // namespace simdjson
229
230namespace simdjson {
231
232simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept
233 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::parser>(value)) {}
234simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(error_code error) noexcept
235 : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(error) {}
236
237} // namespace simdjson
238
239#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
simdjson_warn_unused simdjson_result< document > iterate(padded_string_view json) &noexcept
Start iterating an on-demand JSON document.
Definition parser-inl.h:51
simdjson_pure simdjson_inline size_t max_capacity() const noexcept
The maximum capacity of this parser (the largest document it is allowed to process).
Definition parser-inl.h:167
simdjson_inline simdjson_result< std::string_view > unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement=false) const noexcept
Unescape this JSON string, replacing \ with \, with newline, etc.
Definition parser-inl.h:182
simdjson_pure simdjson_inline size_t max_depth() const noexcept
The maximum depth of this parser (the most deeply nested objects and arrays it can process).
Definition parser-inl.h:170
simdjson_pure simdjson_inline size_t capacity() const noexcept
The capacity of this parser (the largest document it can process).
Definition parser-inl.h:164
parser(size_t max_capacity=SIMDJSON_MAXSIZE_BYTES) noexcept
Create a JSON parser.
Definition parser-inl.h:20
simdjson_inline simdjson_result< std::string_view > unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept
Unescape this JSON string, replacing \ with \, with newline, etc.
Definition parser-inl.h:190
simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept
Ensure this parser has enough memory to process JSON documents up to capacity bytes in length and max...
Definition parser-inl.h:24
static simdjson_inline simdjson_warn_unused ondemand::parser & get_parser()
Get a unique parser instance corresponding to the current thread.
Definition parser-inl.h:198
simdjson_result< document_stream > iterate_many(const uint8_t *buf, size_t len, size_t batch_size=DEFAULT_BATCH_SIZE, bool allow_comma_separated=false) noexcept
Parse a buffer containing many JSON documents.
Definition parser-inl.h:136
A string escaped per JSON rules, terminated with quote (").
An implementation of simdjson for a particular CPU architecture.
User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr< const implementation > & get_active_implementation()
The active implementation.
padded_string_view pad_with_reserve(std::string &s) noexcept
Create a padded_string_view from a string.
error_code
All possible errors returned by simdjson.
Definition error.h:19
@ UNCLOSED_STRING
missing quote at the end
Definition error.h:35
@ CAPACITY
This parser can't support a document that big.
Definition error.h:21
@ STRING_ERROR
Problem while parsing a string.
Definition error.h:25
@ SUCCESS
No error.
Definition error.h:20
@ INSUFFICIENT_PADDING
The JSON doesn't have enough padding for simdjson to safely parse it.
Definition error.h:47
padded_string_view pad(std::string &s) noexcept
Create a padded_string_view from a string.
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition base.h:33
The result of a simdjson operation that could fail.
Definition error.h:278