simdjson 4.6.3
Ridiculously Fast JSON
Loading...
Searching...
No Matches
padded_string-inl.h
1#ifndef SIMDJSON_PADDED_STRING_INL_H
2#define SIMDJSON_PADDED_STRING_INL_H
3
4#include "simdjson/padded_string.h"
5#include "simdjson/padded_string_view.h"
6
7#include "simdjson/error-inl.h"
8#include "simdjson/padded_string_view-inl.h"
9
10#include <climits>
11#include <cwchar>
12
13#ifndef _WIN32
14#include <fcntl.h>
15#include <stdio.h>
16#include <sys/mman.h>
17#include <sys/stat.h>
18#include <unistd.h>
19#endif
20
21namespace simdjson {
22namespace internal {
23
24// The allocate_padded_buffer function is a low-level function to allocate memory
25// with padding so we can read past the "length" bytes safely. It is used by
26// the padded_string class automatically. It returns nullptr in case
27// of error: the caller should check for a null pointer.
28// The length parameter is the maximum size in bytes of the string.
29// The caller is responsible to free the memory (e.g., delete[] (...)).
30inline char *allocate_padded_buffer(size_t length) noexcept {
31 const size_t totalpaddedlength = length + SIMDJSON_PADDING;
32 if(totalpaddedlength<length) {
33 // overflow
34 return nullptr;
35 }
36#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
37 // avoid getting out of memory
38 if (totalpaddedlength>(1UL<<20)) {
39 return nullptr;
40 }
41#endif
42
43 char *padded_buffer = new (std::nothrow) char[totalpaddedlength];
44 if (padded_buffer == nullptr) {
45 return nullptr;
46 }
47 // We write nulls in the padded region to avoid having uninitialized
48 // content which may trigger warning for some sanitizers
49 std::memset(padded_buffer + length, 0, totalpaddedlength - length);
50 return padded_buffer;
51} // allocate_padded_buffer()
52
53} // namespace internal
54
55
56inline padded_string::padded_string() noexcept = default;
57inline padded_string::padded_string(size_t length) noexcept
58 : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
59}
60inline padded_string::padded_string(const char *data, size_t length) noexcept
61 : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
62 if ((data != nullptr) && (data_ptr != nullptr)) {
63 std::memcpy(data_ptr, data, length);
64 }
65 if (data_ptr == nullptr) {
66 viable_size = 0;
67 }
68}
69#ifdef __cpp_char8_t
70inline padded_string::padded_string(const char8_t *data, size_t length) noexcept
71 : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) {
72 if ((data != nullptr) && (data_ptr != nullptr)) {
73 std::memcpy(data_ptr, reinterpret_cast<const char *>(data), length);
74 }
75 if (data_ptr == nullptr) {
76 viable_size = 0;
77 }
78}
79#endif
80// note: do not pass std::string arguments by value
81inline padded_string::padded_string(const std::string & str_ ) noexcept
82 : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) {
83 if (data_ptr == nullptr) {
84 viable_size = 0;
85 } else {
86 std::memcpy(data_ptr, str_.data(), str_.size());
87 }
88}
89// note: do pass std::string_view arguments by value
90inline padded_string::padded_string(std::string_view sv_) noexcept
91 : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) {
92 if(simdjson_unlikely(!data_ptr)) {
93 //allocation failed or zero size
94 viable_size = 0;
95 return;
96 }
97 if (sv_.size()) {
98 std::memcpy(data_ptr, sv_.data(), sv_.size());
99 }
100}
102 : viable_size(o.viable_size), data_ptr(o.data_ptr) {
103 o.data_ptr = nullptr; // we take ownership
104 o.viable_size = 0;
105}
106
108 delete[] data_ptr;
109 data_ptr = o.data_ptr;
110 viable_size = o.viable_size;
111 o.data_ptr = nullptr; // we take ownership
112 o.viable_size = 0;
113 return *this;
114}
115
116inline void padded_string::swap(padded_string &o) noexcept {
117 size_t tmp_viable_size = viable_size;
118 char *tmp_data_ptr = data_ptr;
119 viable_size = o.viable_size;
120 data_ptr = o.data_ptr;
121 o.data_ptr = tmp_data_ptr;
122 o.viable_size = tmp_viable_size;
123}
124
125inline padded_string::~padded_string() noexcept {
126 delete[] data_ptr;
127}
128
129inline size_t padded_string::size() const noexcept { return viable_size; }
130
131inline size_t padded_string::length() const noexcept { return viable_size; }
132
133inline const char *padded_string::data() const noexcept { return data_ptr; }
134
135inline char *padded_string::data() noexcept { return data_ptr; }
136
137inline bool padded_string::append(const char *data, size_t length) noexcept {
138 if (length == 0) {
139 return true; // Nothing to append
140 }
141 size_t new_size = viable_size + length;
142 if (new_size < viable_size) {
143 // Overflow, cannot append
144 return false;
145 }
146 char *new_data_ptr = internal::allocate_padded_buffer(new_size);
147 if (new_data_ptr == nullptr) {
148 // Allocation failed, cannot append
149 return false;
150 }
151 // Copy existing data
152 if (viable_size > 0) {
153 std::memcpy(new_data_ptr, data_ptr, viable_size);
154 }
155 // Copy new data
156 std::memcpy(new_data_ptr + viable_size, data, length);
157 // Update
158 delete[] data_ptr;
159 data_ptr = new_data_ptr;
160 viable_size = new_size;
161 return true;
162}
163
164inline padded_string::operator std::string_view() const simdjson_lifetime_bound { return std::string_view(data(), length()); }
165
166inline padded_string::operator padded_string_view() const noexcept simdjson_lifetime_bound {
167 return padded_string_view(data(), length(), length() + SIMDJSON_PADDING);
168}
169
170inline simdjson_result<padded_string> padded_string::load(std::string_view filename) noexcept {
171 // std::string_view is not guaranteed to be null-terminated, but std::fopen requires
172 // a null-terminated C string. Construct a temporary std::string to ensure null-termination.
173 const std::string null_terminated_filename(filename);
174 // Open the file
175 SIMDJSON_PUSH_DISABLE_WARNINGS
176 SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
177 std::FILE *fp = std::fopen(null_terminated_filename.c_str(), "rb");
178 SIMDJSON_POP_DISABLE_WARNINGS
179
180 if (fp == nullptr) {
181 return IO_ERROR;
182 }
183
184 // Get the file size
185 int ret;
186#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
187 ret = _fseeki64(fp, 0, SEEK_END);
188#else
189 ret = std::fseek(fp, 0, SEEK_END);
190#endif // _WIN64
191 if(ret < 0) {
192 std::fclose(fp);
193 return IO_ERROR;
194 }
195#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
196 __int64 llen = _ftelli64(fp);
197 if(llen == -1L) {
198 std::fclose(fp);
199 return IO_ERROR;
200 }
201#else
202 long llen = std::ftell(fp);
203 if((llen < 0) || (llen == LONG_MAX)) {
204 std::fclose(fp);
205 return IO_ERROR;
206 }
207#endif
208
209 // Allocate the padded_string
210 size_t len = static_cast<size_t>(llen);
211 padded_string s(len);
212 if (s.data() == nullptr) {
213 std::fclose(fp);
214 return MEMALLOC;
215 }
216
217 // Read the padded_string
218 std::rewind(fp);
219 size_t bytes_read = std::fread(s.data(), 1, len, fp);
220 if (std::fclose(fp) != 0 || bytes_read != len) {
221 return IO_ERROR;
222 }
223
224 return s;
225}
226
227#if defined(_WIN32) && SIMDJSON_CPLUSPLUS17
228inline simdjson_result<padded_string> padded_string::load(std::wstring_view filename) noexcept {
229 // std::wstring_view is not guaranteed to be null-terminated, but _wfopen requires
230 // a null-terminated wide C string. Construct a temporary std::wstring to ensure null-termination.
231 const std::wstring null_terminated_filename(filename);
232 // Open the file using the wide characters
233 SIMDJSON_PUSH_DISABLE_WARNINGS
234 SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe
235 std::FILE *fp = _wfopen(null_terminated_filename.c_str(), L"rb");
236 SIMDJSON_POP_DISABLE_WARNINGS
237
238 if (fp == nullptr) {
239 return IO_ERROR;
240 }
241
242 // Get the file size
243 int ret;
244#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
245 ret = _fseeki64(fp, 0, SEEK_END);
246#else
247 ret = std::fseek(fp, 0, SEEK_END);
248#endif // _WIN64
249 if(ret < 0) {
250 std::fclose(fp);
251 return IO_ERROR;
252 }
253#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS
254 __int64 llen = _ftelli64(fp);
255 if(llen == -1L) {
256 std::fclose(fp);
257 return IO_ERROR;
258 }
259#else
260 long llen = std::ftell(fp);
261 if((llen < 0) || (llen == LONG_MAX)) {
262 std::fclose(fp);
263 return IO_ERROR;
264 }
265#endif
266
267 // Allocate the padded_string
268 size_t len = static_cast<size_t>(llen);
269 padded_string s(len);
270 if (s.data() == nullptr) {
271 std::fclose(fp);
272 return MEMALLOC;
273 }
274
275 // Read the padded_string
276 std::rewind(fp);
277 size_t bytes_read = std::fread(s.data(), 1, len, fp);
278 if (std::fclose(fp) != 0 || bytes_read != len) {
279 return IO_ERROR;
280 }
281
282 return s;
283}
284#endif
285
286// padded_string_builder implementations
287
288inline padded_string_builder::padded_string_builder() noexcept = default;
289
290inline padded_string_builder::padded_string_builder(size_t new_capacity) noexcept {
291 if (new_capacity > 0) {
292 data = internal::allocate_padded_buffer(new_capacity);
293 if (data != nullptr) {
294 this->capacity = new_capacity;
295 }
296 }
297}
298
300 : size(o.size), capacity(o.capacity), data(o.data) {
301 o.size = 0;
302 o.capacity = 0;
303 o.data = nullptr;
304}
305
307 if (this != &o) {
308 delete[] data;
309 size = o.size;
310 capacity = o.capacity;
311 data = o.data;
312 o.size = 0;
313 o.capacity = 0;
314 o.data = nullptr;
315 }
316 return *this;
317}
318
320 delete[] data;
321}
322
323inline bool padded_string_builder::append(const char *newdata, size_t length) noexcept {
324 if (length == 0) {
325 return true;
326 }
327 if (!reserve(length)) {
328 return false;
329 }
330 std::memcpy(data + size, newdata, length);
331 size += length;
332 return true;
333}
334
335inline bool padded_string_builder::append(std::string_view sv) noexcept {
336 return append(sv.data(), sv.size());
337}
338
339inline size_t padded_string_builder::length() const noexcept {
340 return size;
341}
342
344 return padded_string(data, size);
345}
346
348 padded_string result{};
349 result.data_ptr = data;
350 result.viable_size = size;
351 data = nullptr;
352 size = 0;
353 capacity = 0;
354 return result;
355}
356
357inline bool padded_string_builder::reserve(size_t additional) noexcept {
358 if (simdjson_unlikely(additional + size < size)) {
359 return false; // overflow: cannot satisfy request
360 }
361 size_t needed = size + additional;
362 if (needed <= capacity) {
363 return true;
364 }
365 size_t new_capacity = needed;
366 // We are going to grow the capacity exponentially to avoid
367 // repeated allocations.
368 if (new_capacity < 4096) {
369 new_capacity *= 2;
370 // overflow guard: ensure new_capacity + new_capacity/2 does not overflow
371 } else if (new_capacity + new_capacity / 2 > new_capacity) {
372 new_capacity += new_capacity / 2; // grow by 1.5x
373 }
374 char *new_data = internal::allocate_padded_buffer(new_capacity);
375 if (new_data == nullptr) {
376 return false; // Allocation failed
377 }
378 if (size > 0) {
379 std::memcpy(new_data, data, size);
380 }
381 delete[] data;
382 data = new_data;
383 capacity = new_capacity;
384 return true;
385}
386
387
388#ifndef _WIN32
389simdjson_inline padded_memory_map::padded_memory_map(const char *filename) noexcept {
390
391 int fd = open(filename, O_RDONLY);
392 if (fd == -1) {
393 return; // file not found or cannot be opened, data will be nullptr
394 }
395 struct stat st;
396 if (fstat(fd, &st) == -1) {
397 close(fd);
398 return; // failed to get file size, data will be nullptr
399 }
400 size = static_cast<size_t>(st.st_size);
401 size_t total_size = size + simdjson::SIMDJSON_PADDING;
402 void *anon_map =
403 mmap(NULL, total_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
404 if (anon_map == MAP_FAILED) {
405 close(fd);
406 return; // failed to create anonymous mapping, data will be nullptr
407 }
408 void *file_map =
409 mmap(anon_map, size, PROT_READ, MAP_SHARED | MAP_FIXED, fd, 0);
410 if (file_map == MAP_FAILED) {
411 munmap(anon_map, total_size);
412 close(fd);
413 return; // failed to mmap file, data will be nullptr
414 }
415 data = static_cast<const char *>(file_map);
416 close(fd); // no longer needed after mapping
417}
418
419simdjson_inline padded_memory_map::~padded_memory_map() noexcept {
420 if (data != nullptr) {
421 munmap(const_cast<char *>(data), size + simdjson::SIMDJSON_PADDING);
422 }
423}
424
425
426simdjson_inline simdjson::padded_string_view padded_memory_map::view() const noexcept simdjson_lifetime_bound {
427 if(!is_valid()) {
428 return simdjson::padded_string_view(); // return an empty view if mapping failed
429 }
431}
432
433simdjson_inline bool padded_memory_map::is_valid() const noexcept {
434 return data != nullptr;
435}
436#endif // _WIN32
437
438} // namespace simdjson
439
440inline simdjson::padded_string operator ""_padded(const char *str, size_t len) {
441 return simdjson::padded_string(str, len);
442}
443#ifdef __cpp_char8_t
444inline simdjson::padded_string operator ""_padded(const char8_t *str, size_t len) {
445 return simdjson::padded_string(reinterpret_cast<const char *>(str), len);
446}
447#endif
448
449#endif // SIMDJSON_PADDED_STRING_INL_H
simdjson_inline bool is_valid() const noexcept
Check if the memory map is valid.
simdjson_inline ~padded_memory_map() noexcept
Destroy the padded memory map and release any resources.
simdjson_inline simdjson::padded_string_view view() const noexcept simdjson_lifetime_bound
Get a view of the memory-mapped file.
Builder for constructing padded_string incrementally.
padded_string convert() noexcept
Convert the current content into a padded_string.
padded_string_builder & operator=(padded_string_builder &&o) noexcept
Move assignment.
~padded_string_builder() noexcept
Destructor.
size_t length() const noexcept
Get the current length of the built string.
padded_string build() const noexcept
Build a padded_string from the current content.
bool append(const char *newdata, size_t length) noexcept
Append data to the builder.
padded_string_builder() noexcept
Create a new, empty padded string builder.
User-provided string that promises it has extra padded bytes at the end for use with parser::parse().
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
@ MEMALLOC
Error allocating memory, most likely out of memory.
Definition error.h:22
@ IO_ERROR
Error reading a file.
Definition error.h:41
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition base.h:33
String with extra allocation for ease of use with parser::parse()
size_t size() const noexcept
The length of the string.
bool append(const char *data, size_t length) noexcept
Append data to the padded string.
size_t length() const noexcept
The length of the string.
padded_string() noexcept
Create a new, empty padded string.
padded_string & operator=(padded_string &&o) noexcept
Move one padded string into another.
const char * data() const noexcept
The string data.
static simdjson_result< padded_string > load(std::string_view path) noexcept
Load this padded string from a file.
The result of a simdjson operation that could fail.
Definition error.h:280