simdjson  3.11.0
Ridiculously Fast JSON
parser-inl.h
1 #ifndef SIMDJSON_PARSER_INL_H
2 #define SIMDJSON_PARSER_INL_H
3 
4 #include "simdjson/dom/base.h"
5 #include "simdjson/dom/document_stream.h"
6 #include "simdjson/implementation.h"
7 #include "simdjson/internal/dom_parser_implementation.h"
8 
9 #include "simdjson/error-inl.h"
10 #include "simdjson/padded_string-inl.h"
11 #include "simdjson/dom/document_stream-inl.h"
12 #include "simdjson/dom/element-inl.h"
13 
14 #include <climits>
15 #include <cstring> /* memcmp */
16 
17 namespace simdjson {
18 namespace dom {
19 
20 //
21 // parser inline implementation
22 //
23 simdjson_inline parser::parser(size_t max_capacity) noexcept
24  : _max_capacity{max_capacity},
25  loaded_bytes(nullptr) {
26 }
27 simdjson_inline parser::parser(parser &&other) noexcept = default;
28 simdjson_inline parser &parser::operator=(parser &&other) noexcept = default;
29 
30 inline bool parser::is_valid() const noexcept { return valid; }
31 inline int parser::get_error_code() const noexcept { return error; }
32 inline std::string parser::get_error_message() const noexcept { return error_message(error); }
33 
34 inline bool parser::dump_raw_tape(std::ostream &os) const noexcept {
35  return valid ? doc.dump_raw_tape(os) : false;
36 }
37 
38 inline simdjson_result<size_t> parser::read_file(const std::string &path) noexcept {
39  // Open the file
40  SIMDJSON_PUSH_DISABLE_WARNINGS
41  SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
42  std::FILE *fp = std::fopen(path.c_str(), "rb");
43  SIMDJSON_POP_DISABLE_WARNINGS
44 
45  if (fp == nullptr) {
46  return IO_ERROR;
47  }
48 
49  // Get the file size
50  int ret;
51 #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
52  ret = _fseeki64(fp, 0, SEEK_END);
53 #else
54  ret = std::fseek(fp, 0, SEEK_END);
55 #endif // _WIN64
56  if(ret < 0) {
57  std::fclose(fp);
58  return IO_ERROR;
59  }
60 #if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
61  __int64 len = _ftelli64(fp);
62  if(len == -1L) {
63  std::fclose(fp);
64  return IO_ERROR;
65  }
66 #else
67  long len = std::ftell(fp);
68  if((len < 0) || (len == LONG_MAX)) {
69  std::fclose(fp);
70  return IO_ERROR;
71  }
72 #endif
73 
74  // Make sure we have enough capacity to load the file
75  if (_loaded_bytes_capacity < size_t(len)) {
76  loaded_bytes.reset( internal::allocate_padded_buffer(len) );
77  if (!loaded_bytes) {
78  std::fclose(fp);
79  return MEMALLOC;
80  }
81  _loaded_bytes_capacity = len;
82  }
83 
84  // Read the string
85  std::rewind(fp);
86  size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp);
87  if (std::fclose(fp) != 0 || bytes_read != size_t(len)) {
88  return IO_ERROR;
89  }
90 
91  return bytes_read;
92 }
93 
94 inline simdjson_result<element> parser::load(const std::string &path) & noexcept {
95  return load_into_document(doc, path);
96 }
97 
98 inline simdjson_result<element> parser::load_into_document(document& provided_doc, const std::string &path) & noexcept {
99  size_t len;
100  auto _error = read_file(path).get(len);
101  if (_error) { return _error; }
102  return parse_into_document(provided_doc, loaded_bytes.get(), len, false);
103 }
104 
105 inline simdjson_result<document_stream> parser::load_many(const std::string &path, size_t batch_size) noexcept {
106  size_t len;
107  auto _error = read_file(path).get(len);
108  if (_error) { return _error; }
109  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
110  return document_stream(*this, reinterpret_cast<const uint8_t*>(loaded_bytes.get()), len, batch_size);
111 }
112 
113 inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
114  // Important: we need to ensure that document has enough capacity.
115  // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!!
116  error_code _error = ensure_capacity(provided_doc, len);
117  if (_error) { return _error; }
118  if (realloc_if_needed) {
119  // Make sure we have enough capacity to copy len bytes
120  if (!loaded_bytes || _loaded_bytes_capacity < len) {
121  loaded_bytes.reset( internal::allocate_padded_buffer(len) );
122  if (!loaded_bytes) {
123  return MEMALLOC;
124  }
125  _loaded_bytes_capacity = len;
126  }
127  std::memcpy(static_cast<void *>(loaded_bytes.get()), buf, len);
128  buf = reinterpret_cast<const uint8_t*>(loaded_bytes.get());
129  }
130 
131  if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
132  buf += 3;
133  len -= 3;
134  }
135  _error = implementation->parse(buf, len, provided_doc);
136 
137  if (_error) { return _error; }
138 
139  return provided_doc.root();
140 }
141 
142 simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept {
143  return parse_into_document(provided_doc, reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
144 }
145 simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept {
146  return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
147 }
148 simdjson_inline simdjson_result<element> parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept {
149  return parse_into_document(provided_doc, s.data(), s.length(), false);
150 }
151 
152 
153 inline simdjson_result<element> parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept {
154  return parse_into_document(doc, buf, len, realloc_if_needed);
155 }
156 
157 simdjson_inline simdjson_result<element> parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept {
158  return parse(reinterpret_cast<const uint8_t *>(buf), len, realloc_if_needed);
159 }
160 simdjson_inline simdjson_result<element> parser::parse(const std::string &s) & noexcept {
161  return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING);
162 }
163 simdjson_inline simdjson_result<element> parser::parse(const padded_string &s) & noexcept {
164  return parse(s.data(), s.length(), false);
165 }
166 simdjson_inline simdjson_result<element> parser::parse(const padded_string_view &v) & noexcept {
167  return parse(v.data(), v.length(), false);
168 }
169 
170 inline simdjson_result<document_stream> parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept {
171  if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; }
172  if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) {
173  buf += 3;
174  len -= 3;
175  }
176  return document_stream(*this, buf, len, batch_size);
177 }
178 inline simdjson_result<document_stream> parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept {
179  return parse_many(reinterpret_cast<const uint8_t *>(buf), len, batch_size);
180 }
181 inline simdjson_result<document_stream> parser::parse_many(const std::string &s, size_t batch_size) noexcept {
182  return parse_many(s.data(), s.length(), batch_size);
183 }
184 inline simdjson_result<document_stream> parser::parse_many(const padded_string &s, size_t batch_size) noexcept {
185  return parse_many(s.data(), s.length(), batch_size);
186 }
187 
188 simdjson_inline size_t parser::capacity() const noexcept {
189  return implementation ? implementation->capacity() : 0;
190 }
191 simdjson_inline size_t parser::max_capacity() const noexcept {
192  return _max_capacity;
193 }
194 simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept {
195  return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH;
196 }
197 
198 simdjson_warn_unused
199 inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept {
200  //
201  // Reallocate implementation if needed
202  //
203  error_code err;
204  if (implementation) {
205  err = implementation->allocate(capacity, max_depth);
206  } else {
207  err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation);
208  }
209  if (err) { return err; }
210  return SUCCESS;
211 }
212 
213 #ifndef SIMDJSON_DISABLE_DEPRECATED_API
214 simdjson_warn_unused
215 inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept {
216  return !allocate(capacity, max_depth);
217 }
218 #endif // SIMDJSON_DISABLE_DEPRECATED_API
219 
220 inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept {
221  return ensure_capacity(doc, desired_capacity);
222 }
223 
224 
225 inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept {
226  // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes.
227  // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr.
228  if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; }
229  // If we don't have enough capacity, (try to) automatically bump it.
230  // If the document needs allocation, do it too.
231  // Both in one if statement to minimize unlikely branching.
232  //
233  // Note: we must make sure that this function is called if capacity() == 0. We do so because we
234  // ensure that desired_capacity > 0.
235  if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) {
236  if (desired_capacity > max_capacity()) {
237  return error = CAPACITY;
238  }
239  error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS;
240  error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS;
241  if(err1 != SUCCESS) { return error = err1; }
242  if(err2 != SUCCESS) { return error = err2; }
243  }
244  return SUCCESS;
245 }
246 
247 simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept {
248  if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) {
249  _max_capacity = max_capacity;
250  } else {
251  _max_capacity = MINIMAL_DOCUMENT_CAPACITY;
252  }
253 }
254 
255 } // namespace dom
256 } // namespace simdjson
257 
258 #endif // SIMDJSON_PARSER_INL_H
A forward-only stream of documents.
A parsed JSON document.
Definition: document.h:16
simdjson_inline parser(size_t max_capacity=SIMDJSON_MAXSIZE_BYTES) noexcept
Create a JSON parser.
Definition: parser-inl.h:23
simdjson_result< element > parse(const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document and return a temporary reference to it.
Definition: parser-inl.h:153
simdjson_result< element > parse_into_document(document &doc, const uint8_t *buf, size_t len, bool realloc_if_needed=true) &noexcept
Parse a JSON document into a provide document instance and return a temporary reference to it.
Definition: parser-inl.h:113
simdjson_result< document_stream > load_many(const std::string &path, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Load a file containing many JSON documents.
Definition: parser-inl.h:105
simdjson_inline void set_max_capacity(size_t max_capacity) noexcept
Set max_capacity.
Definition: parser-inl.h:247
simdjson_inline size_t max_capacity() const noexcept
The largest document this parser can automatically support.
Definition: parser-inl.h:191
simdjson_pure simdjson_inline size_t max_depth() const noexcept
The maximum level of nested object and arrays supported by this parser.
Definition: parser-inl.h:194
simdjson_result< element > load(const std::string &path) &noexcept
Load a JSON document from a file and return a reference to it.
Definition: parser-inl.h:94
simdjson_result< element > load_into_document(document &doc, const std::string &path) &noexcept
Load a JSON document from a file into a provide document instance and return a temporary reference to...
Definition: parser-inl.h:98
simdjson_inline size_t capacity() const noexcept
The largest document this parser can support without reallocating.
Definition: parser-inl.h:188
simdjson_inline parser & operator=(parser &&other) noexcept
Take another parser's buffers and state.
simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept
Ensure this parser has enough memory to process JSON documents up to capacity bytes in length and max...
Definition: parser-inl.h:199
simdjson_result< document_stream > parse_many(const uint8_t *buf, size_t len, size_t batch_size=dom::DEFAULT_BATCH_SIZE) noexcept
Parse a buffer containing many JSON documents.
Definition: parser-inl.h:170
An implementation of simdjson for a particular CPU architecture.
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8
const char * error_message(error_code error) noexcept
It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whethe...
Definition: error-inl.h:20
constexpr size_t DEFAULT_MAX_DEPTH
By default, simdjson supports this many nested objects and arrays.
Definition: base.h:39
error_code
All possible errors returned by simdjson.
Definition: error.h:19
@ CAPACITY
This parser can't support a document that big.
Definition: error.h:21
@ MEMALLOC
Error allocating memory, most likely out of memory.
Definition: error.h:22
@ SUCCESS
No error.
Definition: error.h:20
@ IO_ERROR
Error reading a file.
Definition: error.h:41
SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr< const implementation > & get_active_implementation()
The active implementation.
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition: base.h:32
The result of a simdjson operation that could fail.
Definition: error.h:215