simdjson 4.6.3
Ridiculously Fast JSON
Loading...
Searching...
No Matches
parser-inl.h
1#ifndef SIMDJSON_PARSER_INL_H
2#define SIMDJSON_PARSER_INL_H
3
4#include "simdjson/dom/base.h"
5#include "simdjson/dom/document_stream.h"
6#include "simdjson/implementation.h"
7#include "simdjson/internal/dom_parser_implementation.h"
8
9#include "simdjson/error-inl.h"
10#include "simdjson/padded_string-inl.h"
11#include "simdjson/dom/document_stream-inl.h"
12#include "simdjson/dom/element-inl.h"
13
14#include <climits>
15#include <cstring> /* memcmp */
16
17namespace simdjson {
18namespace dom {
19
20//
21// parser inline implementation
22//
23simdjson_inline parser::parser(size_t max_capacity) noexcept
24 : _max_capacity{max_capacity},
25 loaded_bytes(nullptr) {
26}
27simdjson_inline parser::parser(parser &&other) noexcept = default;
28simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
29
30inline bool parser::is_valid() const noexcept { return valid; }
31inline int parser::get_error_code() const noexcept { return error; }
32inline std::string parser::get_error_message() const noexcept { return error_message(error); }
33
34inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
35 return valid ? doc.dump_raw_tape(os) : false;
36}
37
38inline simdjson_result<size_t> parser::read_file(std::string_view path) noexcept {
39 const std::string path_copy(path);
40 // Open the file
41 SIMDJSON_PUSH_DISABLE_WARNINGS
42 SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
43 std::FILE *fp = std::fopen(path_copy.c_str(), "rb");
44 SIMDJSON_POP_DISABLE_WARNINGS
45
46 if (fp == nullptr) {
47 return IO_ERROR;
48 }
49
50 // Get the file size
51 int ret;
52#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
53 ret = _fseeki64(fp, 0, SEEK_END);
54#else
55 ret = std::fseek(fp, 0, SEEK_END);
56#endif // _WIN64
57 if(ret < 0) {
58 std::fclose(fp);
59 return IO_ERROR;
60 }
61#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
62 __int64 len = _ftelli64(fp);
63 if(len == -1L) {
64 std::fclose(fp);
65 return IO_ERROR;
66 }
67#else
68 long len = std::ftell(fp);
69 if((len < 0) || (len == LONG_MAX)) {
70 std::fclose(fp);
71 return IO_ERROR;
72 }
73#endif
74
75 // Make sure we have enough capacity to load the file
76 if (_loaded_bytes_capacity < size_t(len)) {
77 loaded_bytes.reset( internal::allocate_padded_buffer(len) );
78 if (!loaded_bytes) {
79 std::fclose(fp);
80 return MEMALLOC;
81 }
82 _loaded_bytes_capacity = len;
83 }
84
85 // Read the string
86 std::rewind(fp);
87 size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
88 if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
89 return IO_ERROR;
90 }
91
92 return bytes_read;
93}
94
95inline simdjson_result<element> parser::load(std::string_view path) & noexcept {
96 return load_into_document(doc, path);
97}
98
99inline simdjson_result<element> parser::load_into_document(document& provided_doc, std::string_view path) & noexcept {
100 size_t len;
101 auto _error = read_file(path).get(len);
102 if (_error) { return _error; }
103 return parse_into_document(provided_doc, loaded_bytes.get(), len, false);
104}
105
106inline simdjson_result<document_stream> parser::load_many(std::string_view path, size_t batch_size) noexcept {
107 size_t len;
108 auto _error = read_file(path).get(len);
109 if (_error) { return _error; }
110 if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
111 return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
112}
113
114inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
115 // Important: we need to ensure that document has enough capacity.
116 // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
117 error_code _error = ensure_capacity(provided_doc, len);
118 if (_error) { return _error; }
119 if (realloc_if_needed) {
120 // Make sure we have enough capacity to copy len bytes
121 if (!loaded_bytes || _loaded_bytes_capacity < len) {
122 loaded_bytes.reset( internal::allocate_padded_buffer(len) );
123 if (!loaded_bytes) {
124 return MEMALLOC;
125 }
126 _loaded_bytes_capacity = len;
127 }
128 std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
129 buf = reinterpret_cast<const uint8_t*>(loaded_bytes.get());
130 }
131
132 if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
133 buf += 3;
134 len -= 3;
135 }
136 implementation->_number_as_string = _number_as_string;
137 _error = implementation->parse(buf, len, provided_doc);
138
139 if (_error) { return _error; }
140
141 return provided_doc.root();
142}
143
144simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
145 return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
146}
147simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
148 return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
149}
150simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
151 return parse_into_document(provided_doc, s.data(), s.length(), false);
152}
153
154
155inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
156 return parse_into_document(doc, buf, len, realloc_if_needed);
157}
158
159simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
160 return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
161}
162simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
163 return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
164}
165simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
166 return parse(s.data(), s.length(), false);
167}
168simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept {
169 return parse(v.data(), v.length(), false);
170}
171
172inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
173 if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
174 if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
175 buf += 3;
176 len -= 3;
177 }
178 return document_stream(*this, buf, len, batch_size);
179}
180inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
181 return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
182}
183inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
184 return parse_many(s.data(), s.length(), batch_size);
185}
186inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
187 return parse_many(s.data(), s.length(), batch_size);
188}
189
190simdjson_inline size_t parser::capacity() const noexcept {
191 return implementation ? implementation->capacity() : 0;
192}
193simdjson_inline size_t parser::max_capacity() const noexcept {
194 return _max_capacity;
195}
196simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
197 return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
198}
199
200simdjson_warn_unused
201inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
202 //
203 // Reallocate implementation if needed
204 //
205 error_code err;
206 if (implementation) {
207 err = implementation->allocate(capacity, max_depth);
208 } else {
209 err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
210 }
211 if (err) { return err; }
212 return SUCCESS;
213}
214
215#ifndef SIMDJSON_DISABLE_DEPRECATED_API
216simdjson_warn_unused
217inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
218 return !allocate(capacity, max_depth);
219}
220#endif // SIMDJSON_DISABLE_DEPRECATED_API
221
222inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
223 return ensure_capacity(doc, desired_capacity);
224}
225
226
227inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
228 // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
229 // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
230 if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
231 // If we don't have enough capacity, (try to) automatically bump it.
232 // If the document needs allocation, do it too.
233 // Both in one if statement to minimize unlikely branching.
234 //
235 // Note: we must make sure that this function is called if capacity() == 0. We do so because we
236 // ensure that desired_capacity > 0.
237 if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
238 if (desired_capacity > max_capacity()) {
239 return error = CAPACITY;
240 }
241 error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
242 error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
243 if(err1 != SUCCESS) { return error = err1; }
244 if(err2 != SUCCESS) { return error = err2; }
245 }
246 return SUCCESS;
247}
248
249simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
250 if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) {
251 _max_capacity = max_capacity;
252 } else {
253 _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
254 }
255}
256
257} // namespace dom
258} // namespace simdjson
259
260#endif // SIMDJSON_PARSER_INL_H
A forward-only stream of documents.
A parsed JSON document.
Definition document.h:16
simdjson_inline parser(size_t max_capacity=SIMDJSON_MAXSIZE_BYTES) noexcept
Create a JSON parser.
Definition parser-inl.h:23
simdjson_result< element > parse(const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document and return a temporary reference to it.
Definition parser-inl.h:155
simdjson_result< element > parse_into_document(document &doc, const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document into a provide document instance and return a temporary reference to it.
Definition parser-inl.h:114
simdjson_result< document_stream > parse_many(const uint8_t *buf, size_t len, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Parse a buffer containing many JSON documents.
Definition parser-inl.h:172
simdjson_inline void set_max_capacity(size_t max_capacity) noexcept
Set max_capacity.
Definition parser-inl.h:249
simdjson_inline size_t max_capacity() const noexcept
The largest document this parser can automatically support.
Definition parser-inl.h:193
simdjson_result< document_stream > load_many(std::string_view path, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Load a file containing many JSON documents.
Definition parser-inl.h:106
simdjson_result< element > load_into_document(document &doc, std::string_view path) &noexcept
Load a JSON document from a file into a provide document instance and return a temporary reference to...
Definition parser-inl.h:99
simdjson_pure simdjson_inline size_t max_depth() const noexcept
The maximum level of nested object and arrays supported by this parser.
Definition parser-inl.h:196
simdjson_result< element > load(std::string_view path) &noexcept
Load a JSON document from a file and return a reference to it.
Definition parser-inl.h:95
simdjson_inline size_t capacity() const noexcept
The largest document this parser can support without reallocating.
Definition parser-inl.h:190
simdjson_inline parser & operator=(parser &&other) noexcept
Take another parser's buffers and state.
simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept
Ensure this parser has enough memory to process JSON documents up to capacity bytes in length and max...
Definition parser-inl.h:201
An implementation of simdjson for a particular CPU architecture.
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr< const implementation > & get_active_implementation()
The active implementation.
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition error-inl.h:25
constexpr size_t DEFAULT_MAX_DEPTH
By default, simdjson supports this many nested objects and arrays.
Definition base.h:40
error_code
All possible errors returned by simdjson.
Definition error.h:19
@ CAPACITY
This parser can't support a document that big.
Definition error.h:21
@ MEMALLOC
Error allocating memory, most likely out of memory.
Definition error.h:22
@ SUCCESS
No error.
Definition error.h:20
@ IO_ERROR
Error reading a file.
Definition error.h:41
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition base.h:33
The result of a simdjson operation that could fail.
Definition error.h:280
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.
Definition error-inl.h:163