8#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H
10#ifndef SIMDJSON_CONDITIONAL_INCLUDE
11#define SIMDJSON_GENERIC_STRING_BUILDER_INL_H
12#include "simdjson/generic/builder/json_string_builder.h"
21#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
22 (defined(_M_AMD64) || defined(_M_X64) || \
23 (defined(_M_IX86_FP) && _M_IX86_FP == 2))
24#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2
25#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1
29#if defined(__aarch64__) || defined(_M_ARM64)
30#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON
31#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1
34#if SIMDJSON_EXPERIMENTAL_HAS_NEON
37#if SIMDJSON_EXPERIMENTAL_HAS_SSE2
42namespace SIMDJSON_IMPLEMENTATION {
45static SIMDJSON_CONSTEXPR_LAMBDA std::array<uint8_t, 256>
46 json_quotable_character = {
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
76SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline
bool
77simple_needs_escaping(std::string_view v) {
80 if (json_quotable_character[
static_cast<uint8_t
>(c)]) {
87#if SIMDJSON_EXPERIMENTAL_HAS_NEON
88simdjson_inline
bool fast_needs_escaping(std::string_view view) {
89 if (view.size() < 16) {
90 return simple_needs_escaping(view);
93 uint8x16_t running = vdupq_n_u8(0);
94 uint8x16_t v34 = vdupq_n_u8(34);
95 uint8x16_t v92 = vdupq_n_u8(92);
97 for (; i + 15 < view.size(); i += 16) {
98 uint8x16_t word = vld1q_u8((
const uint8_t *)view.data() + i);
99 running = vorrq_u8(running, vceqq_u8(word, v34));
100 running = vorrq_u8(running, vceqq_u8(word, v92));
101 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
103 if (i < view.size()) {
105 vld1q_u8((
const uint8_t *)view.data() + view.length() - 16);
106 running = vorrq_u8(running, vceqq_u8(word, v34));
107 running = vorrq_u8(running, vceqq_u8(word, v92));
108 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
110 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
112#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
113simdjson_inline
bool fast_needs_escaping(std::string_view view) {
114 if (view.size() < 16) {
115 return simple_needs_escaping(view);
118 __m128i running = _mm_setzero_si128();
119 for (; i + 15 < view.size(); i += 16) {
122 _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(view.data() + i));
123 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
124 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
125 running = _mm_or_si128(
126 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
127 _mm_setzero_si128()));
129 if (i < view.size()) {
130 __m128i word = _mm_loadu_si128(
131 reinterpret_cast<const __m128i *
>(view.data() + view.length() - 16));
132 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
133 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
134 running = _mm_or_si128(
135 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
136 _mm_setzero_si128()));
138 return _mm_movemask_epi8(running) != 0;
141simdjson_inline
bool fast_needs_escaping(std::string_view view) {
142 return simple_needs_escaping(view);
146SIMDJSON_CONSTEXPR_LAMBDA
inline size_t
147find_next_json_quotable_character(
const std::string_view view,
148 size_t location)
noexcept {
150 for (
auto pos = view.begin() + location; pos != view.end(); ++pos) {
151 if (json_quotable_character[
static_cast<uint8_t
>(*pos)]) {
152 return pos - view.begin();
155 return size_t(view.size());
158SIMDJSON_CONSTEXPR_LAMBDA
static std::string_view control_chars[] = {
159 "\\u0000",
"\\u0001",
"\\u0002",
"\\u0003",
"\\u0004",
"\\u0005",
"\\u0006",
160 "\\u0007",
"\\b",
"\\t",
"\\n",
"\\u000b",
"\\f",
"\\r",
161 "\\u000e",
"\\u000f",
"\\u0010",
"\\u0011",
"\\u0012",
"\\u0013",
"\\u0014",
162 "\\u0015",
"\\u0016",
"\\u0017",
"\\u0018",
"\\u0019",
"\\u001a",
"\\u001b",
163 "\\u001c",
"\\u001d",
"\\u001e",
"\\u001f"};
170SIMDJSON_CONSTEXPR_LAMBDA
void escape_json_char(
char c,
char *&out) {
172 memcpy(out,
"\\\"", 2);
174 }
else if (c ==
'\\') {
175 memcpy(out,
"\\\\", 2);
178 std::string_view v = control_chars[uint8_t(c)];
179 memcpy(out, v.data(), v.size());
184inline size_t write_string_escaped(
const std::string_view input,
char *out) {
185 size_t mysize = input.size();
186 if (!fast_needs_escaping(input)) {
187 memcpy(out, input.data(), input.size());
190 const char *
const initout = out;
191 size_t location = find_next_json_quotable_character(input, 0);
192 memcpy(out, input.data(), location);
194 escape_json_char(input[location], out);
196 while (location < mysize) {
197 size_t newlocation = find_next_json_quotable_character(input, location);
198 memcpy(out, input.data() + location, newlocation - location);
199 out += newlocation - location;
200 location = newlocation;
201 if (location == mysize) {
204 escape_json_char(input[location], out);
207 return out - initout;
210simdjson_inline string_builder::string_builder(
size_t initial_capacity)
211 : buffer(new(std::nothrow) char[initial_capacity]), position(0),
212 capacity(buffer.get() != nullptr ? initial_capacity : 0),
213 is_valid(buffer.get() != nullptr) {}
215simdjson_inline
bool string_builder::capacity_check(
size_t upcoming_bytes) {
219 if (simdjson_likely(upcoming_bytes <= capacity - position)) {
223 if (simdjson_likely(position + upcoming_bytes < position)) {
227 grow_buffer((std::max)(capacity * 2, position + upcoming_bytes));
232simdjson_inline
void string_builder::grow_buffer(
size_t desired_capacity) {
236 std::unique_ptr<char[]> new_buffer(
new (std::nothrow)
char[desired_capacity]);
237 if (new_buffer.get() ==
nullptr) {
241 std::memcpy(new_buffer.get(), buffer.get(), position);
242 buffer.swap(new_buffer);
243 capacity = desired_capacity;
246simdjson_inline
void string_builder::set_valid(
bool valid)
noexcept {
257simdjson_inline
size_t string_builder::size() const noexcept {
261simdjson_inline
void string_builder::append(
char c)
noexcept {
262 if (capacity_check(1)) {
263 buffer.get()[position++] = c;
267simdjson_inline
void string_builder::append_null() noexcept {
268 constexpr char null_literal[] =
"null";
269 constexpr size_t null_len =
sizeof(null_literal) - 1;
270 if (capacity_check(null_len)) {
271 std::memcpy(buffer.get() + position, null_literal, null_len);
272 position += null_len;
276simdjson_inline
void string_builder::clear() noexcept {
288template <
typename number_type,
typename =
typename std::enable_if<
289 std::is_unsigned<number_type>::value>::type>
290simdjson_really_inline
int int_log2(number_type x) {
291 return 63 - leading_zeroes(uint64_t(x) | 1);
294simdjson_really_inline
int fast_digit_count_32(uint32_t x) {
295 static uint64_t table[] = {
296 4294967296, 8589934582, 8589934582, 8589934582, 12884901788,
297 12884901788, 12884901788, 17179868184, 17179868184, 17179868184,
298 21474826480, 21474826480, 21474826480, 21474826480, 25769703776,
299 25769703776, 25769703776, 30063771072, 30063771072, 30063771072,
300 34349738368, 34349738368, 34349738368, 34349738368, 38554705664,
301 38554705664, 38554705664, 41949672960, 41949672960, 41949672960,
302 42949672960, 42949672960};
303 return uint32_t((x + table[int_log2(x)]) >> 32);
306simdjson_really_inline
int fast_digit_count_64(uint64_t x) {
307 static uint64_t table[] = {9,
323 99999999999999999ULL,
324 999999999999999999ULL,
325 9999999999999999999ULL};
326 int y = (19 * int_log2(x) >> 6);
331template <
typename number_type,
typename =
typename std::enable_if<
332 std::is_unsigned<number_type>::value>::type>
333simdjson_really_inline
size_t digit_count(number_type v)
noexcept {
334 static_assert(
sizeof(number_type) == 8 ||
sizeof(number_type) == 4 ||
335 sizeof(number_type) == 2 ||
sizeof(number_type) == 1,
336 "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers");
337 SIMDJSON_IF_CONSTEXPR(
sizeof(number_type) <= 4) {
338 return fast_digit_count_32(
static_cast<uint32_t
>(v));
341 return fast_digit_count_64(
static_cast<uint64_t
>(v));
344static const char decimal_table[200] = {
345 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35,
346 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31,
347 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
348 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33,
349 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39,
350 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35,
351 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31,
352 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
353 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33,
354 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39,
355 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35,
356 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31,
357 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
358 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33,
359 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39,
360 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35,
361 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39,
365template <
typename number_type,
typename>
366simdjson_inline
void string_builder::append(number_type v)
noexcept {
367 static_assert(std::is_same<number_type, bool>::value ||
368 std::is_integral<number_type>::value ||
369 std::is_floating_point<number_type>::value,
370 "Unsupported number type");
372 SIMDJSON_IF_CONSTEXPR(std::is_same<number_type, bool>::value) {
374 constexpr char true_literal[] =
"true";
375 constexpr size_t true_len =
sizeof(true_literal) - 1;
376 if (capacity_check(true_len)) {
377 std::memcpy(buffer.get() + position, true_literal, true_len);
378 position += true_len;
381 constexpr char false_literal[] =
"false";
382 constexpr size_t false_len =
sizeof(false_literal) - 1;
383 if (capacity_check(false_len)) {
384 std::memcpy(buffer.get() + position, false_literal, false_len);
385 position += false_len;
389 else SIMDJSON_IF_CONSTEXPR(std::is_unsigned<number_type>::value) {
390 constexpr size_t max_number_size = 20;
391 if (capacity_check(max_number_size)) {
392 using unsigned_type =
typename std::make_unsigned<number_type>::type;
393 unsigned_type pv =
static_cast<unsigned_type
>(v);
394 size_t dc = internal::digit_count(pv);
395 char *write_pointer = buffer.get() + position + dc - 1;
397 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
402 *write_pointer-- = char(
'0' + (pv % 10));
405 *write_pointer = char(
'0' + pv);
409 else SIMDJSON_IF_CONSTEXPR(std::is_integral<number_type>::value) {
410 constexpr size_t max_number_size = 20;
411 if (capacity_check(max_number_size)) {
412 using unsigned_type =
typename std::make_unsigned<number_type>::type;
413 bool negative = v < 0;
414 unsigned_type pv =
static_cast<unsigned_type
>(v);
418 size_t dc = internal::digit_count(pv);
420 buffer.get()[position++] =
'-';
422 char *write_pointer = buffer.get() + position + dc - 1;
424 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
429 *write_pointer-- = char(
'0' + (pv % 10));
432 *write_pointer = char(
'0' + pv);
436 else SIMDJSON_IF_CONSTEXPR(std::is_floating_point<number_type>::value) {
437 constexpr size_t max_number_size = 24;
438 if (capacity_check(max_number_size)) {
440 char *end = simdjson::internal::to_chars(buffer.get() + position,
nullptr,
442 position = end - buffer.get();
448string_builder::escape_and_append(std::string_view input)
noexcept {
450 if (capacity_check(6 * input.size())) {
451 position += write_string_escaped(input, buffer.get() + position);
456string_builder::escape_and_append_with_quotes(std::string_view input)
noexcept {
458 if (capacity_check(2 + 6 * input.size())) {
459 buffer.get()[position++] =
'"';
460 position += write_string_escaped(input, buffer.get() + position);
461 buffer.get()[position++] =
'"';
466string_builder::escape_and_append_with_quotes(
char input)
noexcept {
468 if (capacity_check(2 + 6 * 1)) {
469 buffer.get()[position++] =
'"';
470 std::string_view cinput(&input, 1);
471 position += write_string_escaped(cinput, buffer.get() + position);
472 buffer.get()[position++] =
'"';
477string_builder::escape_and_append_with_quotes(
const char *input)
noexcept {
478 std::string_view cinput(input);
479 escape_and_append_with_quotes(cinput);
481#if SIMDJSON_SUPPORTS_CONCEPTS
482template <constevalutil::fixed_
string key>
483simdjson_inline
void string_builder::escape_and_append_with_quotes() noexcept {
484 escape_and_append_with_quotes(constevalutil::string_constant<key>::value);
488simdjson_inline
void string_builder::append_raw(
const char *c)
noexcept {
489 size_t len = std::strlen(c);
494string_builder::append_raw(std::string_view input)
noexcept {
495 if (capacity_check(input.size())) {
496 std::memcpy(buffer.get() + position, input.data(), input.size());
497 position += input.size();
501simdjson_inline
void string_builder::append_raw(
const char *str,
502 size_t len)
noexcept {
503 if (capacity_check(len)) {
504 std::memcpy(buffer.get() + position, str, len);
508#if SIMDJSON_SUPPORTS_CONCEPTS
510template <concepts::optional_type T>
511 requires(!require_custom_serialization<T>)
512simdjson_inline
void string_builder::append(
const T &opt) {
521 requires(require_custom_serialization<T>)
522simdjson_inline
void string_builder::append(
const T &val) {
523 serialize(*
this, val);
527 requires(std::is_convertible<T, std::string_view>::value ||
528 std::is_same<T, const char *>::value)
529simdjson_inline
void string_builder::append(
const T &value) {
530 escape_and_append_with_quotes(value);
534#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS
536template <std::ranges::range R>
537 requires(!std::is_convertible<R, std::string_view>::value)
538simdjson_inline
void string_builder::append(
const R &range)
noexcept {
539 auto it = std::ranges::begin(range);
540 auto end = std::ranges::end(range);
541 if constexpr (concepts::is_pair<typename R::value_type>) {
549 append_key_value(it->first, it->second);
553 for (; it != end; ++it) {
555 append_key_value(it->first, it->second);
570 for (; it != end; ++it) {
580#if SIMDJSON_EXCEPTIONS
581simdjson_inline string_builder::operator std::string() const noexcept(false) {
582 return std::string(
operator std::string_view());
585simdjson_inline string_builder::operator std::string_view() const
586 noexcept(false) simdjson_lifetime_bound {
592string_builder::view() const noexcept {
596 return std::string_view(buffer.get(), position);
600 if (capacity_check(1)) {
601 buffer.
get()[position] =
'\0';
607simdjson_inline
bool string_builder::validate_unicode() const noexcept {
611simdjson_inline
void string_builder::start_object() noexcept {
612 if (capacity_check(1)) {
613 buffer.get()[position++] =
'{';
617simdjson_inline
void string_builder::end_object() noexcept {
618 if (capacity_check(1)) {
619 buffer.get()[position++] =
'}';
623simdjson_inline
void string_builder::start_array() noexcept {
624 if (capacity_check(1)) {
625 buffer.get()[position++] =
'[';
629simdjson_inline
void string_builder::end_array() noexcept {
630 if (capacity_check(1)) {
631 buffer.get()[position++] =
']';
635simdjson_inline
void string_builder::append_comma() noexcept {
636 if (capacity_check(1)) {
637 buffer.get()[position++] =
',';
641simdjson_inline
void string_builder::append_colon() noexcept {
642 if (capacity_check(1)) {
643 buffer.get()[position++] =
':';
647template <
typename key_type,
typename value_type>
649string_builder::append_key_value(key_type key, value_type value)
noexcept {
650 static_assert(std::is_same<key_type, const char *>::value ||
651 std::is_convertible<key_type, std::string_view>::value,
652 "Unsupported key type");
653 escape_and_append_with_quotes(key);
655 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
658 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
659 escape_and_append_with_quotes(value);
661 else SIMDJSON_IF_CONSTEXPR(
662 std::is_convertible<value_type, std::string_view>::value) {
663 escape_and_append_with_quotes(value);
665 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
666 escape_and_append_with_quotes(value);
673#if SIMDJSON_SUPPORTS_CONCEPTS
674template <constevalutil::fixed_
string key,
typename value_type>
676string_builder::append_key_value(value_type value)
noexcept {
677 escape_and_append_with_quotes<key>();
679 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
682 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
683 escape_and_append_with_quotes(value);
685 else SIMDJSON_IF_CONSTEXPR(
686 std::is_convertible<value_type, std::string_view>::value) {
687 escape_and_append_with_quotes(value);
689 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
690 escape_and_append_with_quotes(value);
The top level simdjson namespace, containing everything the library provides.
simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept
Validate the UTF-8 string.
@ OUT_OF_CAPACITY
The capacity was exceeded, we cannot allocate enough memory.
The result of a simdjson operation that could fail.
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.