simdjson  3.11.0
Ridiculously Fast JSON
parser-inl.h
1 #ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
2 
3 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
4 #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
5 #include "simdjson/padded_string.h"
6 #include "simdjson/padded_string_view.h"
7 #include "simdjson/implementation.h"
8 #include "simdjson/internal/dom_parser_implementation.h"
9 #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY
10 #include "simdjson/generic/ondemand/base.h"
11 #include "simdjson/generic/ondemand/document_stream.h"
12 #include "simdjson/generic/ondemand/parser.h"
13 #include "simdjson/generic/ondemand/raw_json_string.h"
14 #endif // SIMDJSON_CONDITIONAL_INCLUDE
15 
16 namespace simdjson {
17 namespace SIMDJSON_IMPLEMENTATION {
18 namespace ondemand {
19 
20 simdjson_inline parser::parser(size_t max_capacity) noexcept
21  : _max_capacity{max_capacity} {
22 }
23 
24 simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept {
25  if (new_capacity > max_capacity()) { return CAPACITY; }
26  if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; }
27 
28  // string_capacity copied from document::allocate
29  _capacity = 0;
30  size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64);
31  string_buf.reset(new (std::nothrow) uint8_t[string_capacity]);
32 #if SIMDJSON_DEVELOPMENT_CHECKS
33  start_positions.reset(new (std::nothrow) token_position[new_max_depth]);
34 #endif
35  if (implementation) {
36  SIMDJSON_TRY( implementation->set_capacity(new_capacity) );
37  SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) );
38  } else {
39  SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) );
40  }
41  _capacity = new_capacity;
42  _max_depth = new_max_depth;
43  return SUCCESS;
44 }
45 #if SIMDJSON_DEVELOPMENT_CHECKS
46 simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept {
47  return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity());
48 }
49 #endif
50 
51 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(padded_string_view json) & noexcept {
52  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
53 
54  json.remove_utf8_bom();
55 
56  // Allocate if needed
57  if (capacity() < json.length() || !string_buf) {
58  SIMDJSON_TRY( allocate(json.length(), max_depth()) );
59  }
60 
61  // Run stage 1.
62  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
63  return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this });
64 }
65 
66 #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
67 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept {
68  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
69 
70  json.remove_utf8_bom();
71 
72  // Allocate if needed
73  if (capacity() < json.length() || !string_buf) {
74  SIMDJSON_TRY( allocate(json.length(), max_depth()) );
75  }
76 
77  // Run stage 1.
78  const simdjson::error_code err = implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular);
79  if (err) {
80  if (err != UNCLOSED_STRING)
81  return err;
82  }
83  return document::start({ reinterpret_cast<const uint8_t *>(json.data()), this, true });
84 }
85 #endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON
86 
87 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const char *json, size_t len, size_t allocated) & noexcept {
88  return iterate(padded_string_view(json, len, allocated));
89 }
90 
91 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept {
92  return iterate(padded_string_view(json, len, allocated));
93 }
94 
95 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string_view json, size_t allocated) & noexcept {
96  return iterate(padded_string_view(json, allocated));
97 }
98 
99 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(std::string &json) & noexcept {
100  if(json.capacity() - json.size() < SIMDJSON_PADDING) {
101  json.reserve(json.size() + SIMDJSON_PADDING);
102  }
103  return iterate(padded_string_view(json));
104 }
105 
106 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const std::string &json) & noexcept {
107  return iterate(padded_string_view(json));
108 }
109 
110 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string_view> &result) & noexcept {
111  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
112  SIMDJSON_TRY( result.error() );
113  padded_string_view json = result.value_unsafe();
114  return iterate(json);
115 }
116 
117 simdjson_warn_unused simdjson_inline simdjson_result<document> parser::iterate(const simdjson_result<padded_string> &result) & noexcept {
118  // We don't presently have a way to temporarily get a const T& from a simdjson_result<T> without throwing an exception
119  SIMDJSON_TRY( result.error() );
120  const padded_string &json = result.value_unsafe();
121  return iterate(json);
122 }
123 
124 simdjson_warn_unused simdjson_inline simdjson_result<json_iterator> parser::iterate_raw(padded_string_view json) & noexcept {
125  if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; }
126 
127  json.remove_utf8_bom();
128 
129  // Allocate if needed
130  if (capacity() < json.length()) {
131  SIMDJSON_TRY( allocate(json.length(), max_depth()) );
132  }
133 
134  // Run stage 1.
135  SIMDJSON_TRY( implementation->stage1(reinterpret_cast<const uint8_t *>(json.data()), json.length(), stage1_mode::regular) );
136  return json_iterator(reinterpret_cast<const uint8_t *>(json.data()), this);
137 }
138 
139 inline simdjson_result<document_stream> parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept {
140  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
141  if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
142  buf += 3;
143  len -= 3;
144  }
145  if(allow_comma_separated && batch_size < len) { batch_size = len; }
146  return document_stream(*this, buf, len, batch_size, allow_comma_separated);
147 }
148 inline simdjson_result<document_stream> parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept {
149  return iterate_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size, allow_comma_separated);
150 }
151 inline simdjson_result<document_stream> parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept {
152  return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated);
153 }
154 inline simdjson_result<document_stream> parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept {
155  return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated);
156 }
157 
158 simdjson_pure simdjson_inline size_t parser::capacity() const noexcept {
159  return _capacity;
160 }
161 simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept {
162  return _max_capacity;
163 }
164 simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
165  return _max_depth;
166 }
167 
168 simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
169  if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) {
170  _max_capacity = max_capacity;
171  } else {
172  _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY;
173  }
174 }
175 
176 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept {
177  uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement);
178  if (!end) { return STRING_ERROR; }
179  std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
180  dst = end;
181  return result;
182 }
183 
184 simdjson_inline simdjson_warn_unused simdjson_result<std::string_view> parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept {
185  uint8_t *end = implementation->parse_wobbly_string(in.buf, dst);
186  if (!end) { return STRING_ERROR; }
187  std::string_view result(reinterpret_cast<const char *>(dst), end-dst);
188  dst = end;
189  return result;
190 }
191 
192 } // namespace ondemand
193 } // namespace SIMDJSON_IMPLEMENTATION
194 } // namespace simdjson
195 
196 namespace simdjson {
197 
198 simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(SIMDJSON_IMPLEMENTATION::ondemand::parser &&value) noexcept
199  : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(std::forward<SIMDJSON_IMPLEMENTATION::ondemand::parser>(value)) {}
200 simdjson_inline simdjson_result<SIMDJSON_IMPLEMENTATION::ondemand::parser>::simdjson_result(error_code error) noexcept
201  : implementation_simdjson_result_base<SIMDJSON_IMPLEMENTATION::ondemand::parser>(error) {}
202 
203 } // namespace simdjson
204 
205 #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H
simdjson_warn_unused simdjson_result< document > iterate(padded_string_view json) &noexcept
Start iterating an on-demand JSON document.
Definition: parser-inl.h:51
simdjson_pure simdjson_inline size_t max_capacity() const noexcept
The maximum capacity of this parser (the largest document it is allowed to process).
Definition: parser-inl.h:161
simdjson_inline simdjson_result< std::string_view > unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement=false) const noexcept
Unescape this JSON string, replacing \ with \, with newline, etc.
Definition: parser-inl.h:176
simdjson_pure simdjson_inline size_t max_depth() const noexcept
The maximum depth of this parser (the most deeply nested objects and arrays it can process).
Definition: parser-inl.h:164
simdjson_pure simdjson_inline size_t capacity() const noexcept
The capacity of this parser (the largest document it can process).
Definition: parser-inl.h:158
parser(size_t max_capacity=SIMDJSON_MAXSIZE_BYTES) noexcept
Create a JSON parser.
Definition: parser-inl.h:20
simdjson_inline simdjson_result< std::string_view > unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept
Unescape this JSON string, replacing \ with \, with newline, etc.
Definition: parser-inl.h:184
simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept
Ensure this parser has enough memory to process JSON documents up to capacity bytes in length and max...
Definition: parser-inl.h:24
simdjson_result< document_stream > iterate_many(const uint8_t *buf, size_t len, size_t batch_size=DEFAULT_BATCH_SIZE, bool allow_comma_separated=false) noexcept
Parse a buffer containing many JSON documents.
Definition: parser-inl.h:139
A string escaped per JSON rules, terminated with quote (").
An implementation of simdjson for a particular CPU architecture.
User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8
error_code
All possible errors returned by simdjson.
Definition: error.h:19
@ UNCLOSED_STRING
missing quote at the end
Definition: error.h:35
@ CAPACITY
This parser can't support a document that big.
Definition: error.h:21
@ STRING_ERROR
Problem while parsing a string.
Definition: error.h:25
@ SUCCESS
No error.
Definition: error.h:20
@ INSUFFICIENT_PADDING
The JSON doesn't have enough padding for simdjson to safely parse it.
Definition: error.h:47
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr< const implementation > & get_active_implementation()
The active implementation.
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition: base.h:32
String with extra allocation for ease of use with parser::parse()
Definition: padded_string.h:23
The result of a simdjson operation that could fail.
Definition: error.h:215