simdjson 4.1.0
Ridiculously Fast JSON
Loading...
Searching...
No Matches
parser-inl.h
1#ifndef SIMDJSON_PARSER_INL_H
2#define SIMDJSON_PARSER_INL_H
3
4#include "simdjson/dom/base.h"
5#include "simdjson/dom/document_stream.h"
6#include "simdjson/implementation.h"
7#include "simdjson/internal/dom_parser_implementation.h"
8
9#include "simdjson/error-inl.h"
10#include "simdjson/padded_string-inl.h"
11#include "simdjson/dom/document_stream-inl.h"
12#include "simdjson/dom/element-inl.h"
13
14#include <climits>
15#include <cstring> /* memcmp */
16
17namespace simdjson {
18namespace dom {
19
20//
21// parser inline implementation
22//
23simdjson_inline parser::parser(size_t max_capacity) noexcept
24 : _max_capacity{max_capacity},
25 loaded_bytes(nullptr) {
26}
27simdjson_inline parser::parser(parser &&other) noexcept = default;
28simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
29
30inline bool parser::is_valid() const noexcept { return valid; }
31inline int parser::get_error_code() const noexcept { return error; }
32inline std::string parser::get_error_message() const noexcept { return error_message(error); }
33
34inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
35 return valid ? doc.dump_raw_tape(os) : false;
36}
37
38inline simdjson_result<size_t> parser::read_file(std::string_view path) noexcept {
39 // Open the file
40 SIMDJSON_PUSH_DISABLE_WARNINGS
41 SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
42 std::FILE *fp = std::fopen(path.data(), "rb");
43 SIMDJSON_POP_DISABLE_WARNINGS
44
45 if (fp == nullptr) {
46 return IO_ERROR;
47 }
48
49 // Get the file size
50 int ret;
51#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
52 ret = _fseeki64(fp, 0, SEEK_END);
53#else
54 ret = std::fseek(fp, 0, SEEK_END);
55#endif // _WIN64
56 if(ret < 0) {
57 std::fclose(fp);
58 return IO_ERROR;
59 }
60#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
61 __int64 len = _ftelli64(fp);
62 if(len == -1L) {
63 std::fclose(fp);
64 return IO_ERROR;
65 }
66#else
67 long len = std::ftell(fp);
68 if((len < 0) || (len == LONG_MAX)) {
69 std::fclose(fp);
70 return IO_ERROR;
71 }
72#endif
73
74 // Make sure we have enough capacity to load the file
75 if (_loaded_bytes_capacity < size_t(len)) {
76 loaded_bytes.reset( internal::allocate_padded_buffer(len) );
77 if (!loaded_bytes) {
78 std::fclose(fp);
79 return MEMALLOC;
80 }
81 _loaded_bytes_capacity = len;
82 }
83
84 // Read the string
85 std::rewind(fp);
86 size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
87 if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
88 return IO_ERROR;
89 }
90
91 return bytes_read;
92}
93
94inline simdjson_result<element> parser::load(std::string_view path) & noexcept {
95 return load_into_document(doc, path);
96}
97
98inline simdjson_result<element> parser::load_into_document(document& provided_doc, std::string_view path) & noexcept {
99 size_t len;
100 auto _error = read_file(path).get(len);
101 if (_error) { return _error; }
102 return parse_into_document(provided_doc, loaded_bytes.get(), len, false);
103}
104
105inline simdjson_result<document_stream> parser::load_many(std::string_view path, size_t batch_size) noexcept {
106 size_t len;
107 auto _error = read_file(path).get(len);
108 if (_error) { return _error; }
109 if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
110 return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
111}
112
113inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
114 // Important: we need to ensure that document has enough capacity.
115 // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
116 error_code _error = ensure_capacity(provided_doc, len);
117 if (_error) { return _error; }
118 if (realloc_if_needed) {
119 // Make sure we have enough capacity to copy len bytes
120 if (!loaded_bytes || _loaded_bytes_capacity < len) {
121 loaded_bytes.reset( internal::allocate_padded_buffer(len) );
122 if (!loaded_bytes) {
123 return MEMALLOC;
124 }
125 _loaded_bytes_capacity = len;
126 }
127 std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
128 buf = reinterpret_cast<const uint8_t*>(loaded_bytes.get());
129 }
130
131 if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
132 buf += 3;
133 len -= 3;
134 }
135 _error = implementation->parse(buf, len, provided_doc);
136
137 if (_error) { return _error; }
138
139 return provided_doc.root();
140}
141
142simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
143 return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
144}
145simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
146 return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
147}
148simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
149 return parse_into_document(provided_doc, s.data(), s.length(), false);
150}
151
152
153inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
154 return parse_into_document(doc, buf, len, realloc_if_needed);
155}
156
157simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
158 return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
159}
160simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
161 return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
162}
163simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
164 return parse(s.data(), s.length(), false);
165}
166simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept {
167 return parse(v.data(), v.length(), false);
168}
169
170inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
171 if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
172 if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
173 buf += 3;
174 len -= 3;
175 }
176 return document_stream(*this, buf, len, batch_size);
177}
178inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
179 return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
180}
181inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
182 return parse_many(s.data(), s.length(), batch_size);
183}
184inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
185 return parse_many(s.data(), s.length(), batch_size);
186}
187
188simdjson_inline size_t parser::capacity() const noexcept {
189 return implementation ? implementation->capacity() : 0;
190}
191simdjson_inline size_t parser::max_capacity() const noexcept {
192 return _max_capacity;
193}
194simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
195 return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
196}
197
198simdjson_warn_unused
199inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
200 //
201 // Reallocate implementation if needed
202 //
203 error_code err;
204 if (implementation) {
205 err = implementation->allocate(capacity, max_depth);
206 } else {
207 err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
208 }
209 if (err) { return err; }
210 return SUCCESS;
211}
212
213#ifndef SIMDJSON_DISABLE_DEPRECATED_API
214simdjson_warn_unused
215inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
216 return !allocate(capacity, max_depth);
217}
218#endif // SIMDJSON_DISABLE_DEPRECATED_API
219
220inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
221 return ensure_capacity(doc, desired_capacity);
222}
223
224
225inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
226 // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
227 // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
228 if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
229 // If we don't have enough capacity, (try to) automatically bump it.
230 // If the document needs allocation, do it too.
231 // Both in one if statement to minimize unlikely branching.
232 //
233 // Note: we must make sure that this function is called if capacity() == 0. We do so because we
234 // ensure that desired_capacity > 0.
235 if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
236 if (desired_capacity > max_capacity()) {
237 return error = CAPACITY;
238 }
239 error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
240 error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
241 if(err1 != SUCCESS) { return error = err1; }
242 if(err2 != SUCCESS) { return error = err2; }
243 }
244 return SUCCESS;
245}
246
247simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
248 if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) {
249 _max_capacity = max_capacity;
250 } else {
251 _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
252 }
253}
254
255} // namespace dom
256} // namespace simdjson
257
258#endif // SIMDJSON_PARSER_INL_H
A forward-only stream of documents.
A parsed JSON document.
Definition document.h:16
simdjson_inline parser(size_t max_capacity=SIMDJSON_MAXSIZE_BYTES) noexcept
Create a JSON parser.
Definition parser-inl.h:23
simdjson_result< element > parse(const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document and return a temporary reference to it.
Definition parser-inl.h:153
simdjson_result< element > parse_into_document(document &doc, const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document into a provide document instance and return a temporary reference to it.
Definition parser-inl.h:113
simdjson_result< document_stream > parse_many(const uint8_t *buf, size_t len, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Parse a buffer containing many JSON documents.
Definition parser-inl.h:170
simdjson_inline void set_max_capacity(size_t max_capacity) noexcept
Set max_capacity.
Definition parser-inl.h:247
simdjson_inline size_t max_capacity() const noexcept
The largest document this parser can automatically support.
Definition parser-inl.h:191
simdjson_result< document_stream > load_many(std::string_view path, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Load a file containing many JSON documents.
Definition parser-inl.h:105
simdjson_result< element > load_into_document(document &doc, std::string_view path) &noexcept
Load a JSON document from a file into a provide document instance and return a temporary reference to...
Definition parser-inl.h:98
simdjson_pure simdjson_inline size_t max_depth() const noexcept
The maximum level of nested object and arrays supported by this parser.
Definition parser-inl.h:194
simdjson_result< element > load(std::string_view path) &noexcept
Load a JSON document from a file and return a reference to it.
Definition parser-inl.h:94
simdjson_inline size_t capacity() const noexcept
The largest document this parser can support without reallocating.
Definition parser-inl.h:188
simdjson_inline parser & operator=(parser &&other) noexcept
Take another parser's buffers and state.
simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept
Ensure this parser has enough memory to process JSON documents up to capacity bytes in length and max...
Definition parser-inl.h:199
An implementation of simdjson for a particular CPU architecture.
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr< const implementation > & get_active_implementation()
The active implementation.
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition error-inl.h:25
constexpr size_t DEFAULT_MAX_DEPTH
By default, simdjson supports this many nested objects and arrays.
Definition base.h:40
error_code
All possible errors returned by simdjson.
Definition error.h:19
@ CAPACITY
This parser can't support a document that big.
Definition error.h:21
@ MEMALLOC
Error allocating memory, most likely out of memory.
Definition error.h:22
@ SUCCESS
No error.
Definition error.h:20
@ IO_ERROR
Error reading a file.
Definition error.h:41
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition base.h:33
The result of a simdjson operation that could fail.
Definition error.h:278
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.
Definition error-inl.h:163