simdjson 4.6.4
Ridiculously Fast JSON
Loading...
Searching...
No Matches
json_string_builder-inl.h
1#include <array>
2#include <cstring>
3#include <limits>
4#include <type_traits>
5#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H
6
7#ifndef SIMDJSON_CONDITIONAL_INCLUDE
8#define SIMDJSON_GENERIC_STRING_BUILDER_INL_H
9#include "simdjson/generic/builder/json_string_builder.h"
10#endif // SIMDJSON_CONDITIONAL_INCLUDE
11
12/*
13 * Empirically, we have found that an inlined optimization is important for
14 * performance. The following macros are not ideal. We should find a better
15 * way to inline the code.
16 */
17
18#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
19 (defined(_M_AMD64) || defined(_M_X64) || \
20 (defined(_M_IX86_FP) && _M_IX86_FP == 2))
21#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2
22#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1
23#endif
24#endif
25
26#if defined(__aarch64__) || defined(_M_ARM64)
27#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON
28#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1
29#endif
30#endif
31#if defined(__loongarch_sx)
32#ifndef SIMDJSON_EXPERIMENTAL_HAS_LSX
33#define SIMDJSON_EXPERIMENTAL_HAS_LSX 1
34#endif
35#endif
36#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 && \
37 defined(__riscv_vector)
38#ifndef SIMDJSON_EXPERIMENTAL_HAS_RVV
39#define SIMDJSON_EXPERIMENTAL_HAS_RVV 1
40#endif
41#endif
42#if (defined(__PPC64__) || defined(_M_PPC64)) && defined(__ALTIVEC__)
43#ifndef SIMDJSON_EXPERIMENTAL_HAS_PPC64
44#define SIMDJSON_EXPERIMENTAL_HAS_PPC64 1
45#endif
46#endif
47#if SIMDJSON_EXPERIMENTAL_HAS_NEON
48#include <arm_neon.h>
49#ifdef _MSC_VER
50#include <intrin.h>
51#endif
52#endif
53#if SIMDJSON_EXPERIMENTAL_HAS_SSE2
54#include <emmintrin.h>
55#ifdef _MSC_VER
56#include <intrin.h>
57#endif
58#endif
59#if SIMDJSON_EXPERIMENTAL_HAS_LSX
60#include <lsxintrin.h>
61#endif
62#if SIMDJSON_EXPERIMENTAL_HAS_RVV
63#include <riscv_vector.h>
64#endif
65#if SIMDJSON_EXPERIMENTAL_HAS_PPC64
66#include <altivec.h>
67#ifdef bool
68#undef bool
69#endif
70#ifdef vector
71#undef vector
72#endif
73#endif
74
75
76namespace simdjson {
77namespace SIMDJSON_IMPLEMENTATION {
78namespace builder {
79
80static SIMDJSON_CONSTEXPR_LAMBDA std::array<uint8_t, 256>
81 json_quotable_character = {
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
93
111SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool
112simple_needs_escaping(std::string_view v) {
113 for (char c : v) {
114 // a table lookup is faster than a series of comparisons
115 if (json_quotable_character[static_cast<uint8_t>(c)]) {
116 return true;
117 }
118 }
119 return false;
120}
121
122#if SIMDJSON_EXPERIMENTAL_HAS_NEON
123simdjson_inline bool fast_needs_escaping(std::string_view view) {
124 if (view.size() < 16) {
125 return simple_needs_escaping(view);
126 }
127 size_t i = 0;
128 uint8x16_t running = vdupq_n_u8(0);
129 uint8x16_t v34 = vdupq_n_u8(34);
130 uint8x16_t v92 = vdupq_n_u8(92);
131
132 for (; i + 15 < view.size(); i += 16) {
133 uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i);
134 running = vorrq_u8(running, vceqq_u8(word, v34));
135 running = vorrq_u8(running, vceqq_u8(word, v92));
136 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
137 }
138 if (i < view.size()) {
139 uint8x16_t word =
140 vld1q_u8((const uint8_t *)view.data() + view.length() - 16);
141 running = vorrq_u8(running, vceqq_u8(word, v34));
142 running = vorrq_u8(running, vceqq_u8(word, v92));
143 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
144 }
145 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
146}
147#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
148simdjson_inline bool fast_needs_escaping(std::string_view view) {
149 if (view.size() < 16) {
150 return simple_needs_escaping(view);
151 }
152 size_t i = 0;
153 __m128i running = _mm_setzero_si128();
154 for (; i + 15 < view.size(); i += 16) {
155
156 __m128i word =
157 _mm_loadu_si128(reinterpret_cast<const __m128i *>(view.data() + i));
158 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
159 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
160 running = _mm_or_si128(
161 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
162 _mm_setzero_si128()));
163 }
164 if (i < view.size()) {
165 __m128i word = _mm_loadu_si128(
166 reinterpret_cast<const __m128i *>(view.data() + view.length() - 16));
167 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
168 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
169 running = _mm_or_si128(
170 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
171 _mm_setzero_si128()));
172 }
173 return _mm_movemask_epi8(running) != 0;
174}
175#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
176simdjson_inline bool fast_needs_escaping(std::string_view view) {
177 if (view.size() < 16) {
178 return simple_needs_escaping(view);
179 }
180 size_t i = 0;
181 __vector unsigned char running = vec_splats((unsigned char)0);
182 __vector unsigned char v34 = vec_splats((unsigned char)34);
183 __vector unsigned char v92 = vec_splats((unsigned char)92);
184 __vector unsigned char v32 = vec_splats((unsigned char)32);
185
186 for (; i + 15 < view.size(); i += 16) {
187 __vector unsigned char word =
188 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(view.data() + i));
189 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
190 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
191 running = vec_or(running,
192 (__vector unsigned char)vec_cmplt(word, v32));
193 }
194 if (i < view.size()) {
195 __vector unsigned char word = vec_vsx_ld(
196 0, reinterpret_cast<const unsigned char *>(view.data() + view.length() - 16));
197 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
198 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
199 running = vec_or(running,
200 (__vector unsigned char)vec_cmplt(word, v32));
201 }
202 return !vec_all_eq(running, vec_splats((unsigned char)0));
203}
204#else
205simdjson_inline bool fast_needs_escaping(std::string_view view) {
206 return simple_needs_escaping(view);
207}
208#endif
209
210// Scalar fallback for finding next quotable character
211SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
212find_next_json_quotable_character_scalar(const std::string_view view,
213 size_t location) noexcept {
214 for (auto pos = view.begin() + location; pos != view.end(); ++pos) {
215 if (json_quotable_character[static_cast<uint8_t>(*pos)]) {
216 return pos - view.begin();
217 }
218 }
219 return size_t(view.size());
220}
221
222// SIMD-accelerated position finding that directly locates the first quotable
223// character, combining detection and position extraction in a single pass to
224// minimize redundant work.
225#if SIMDJSON_EXPERIMENTAL_HAS_NEON
226simdjson_inline size_t
227find_next_json_quotable_character(const std::string_view view,
228 size_t location) noexcept {
229 const size_t len = view.size();
230 const uint8_t *ptr =
231 reinterpret_cast<const uint8_t *>(view.data()) + location;
232 size_t remaining = len - location;
233
234 // SIMD constants for characters requiring escape
235 uint8x16_t v34 = vdupq_n_u8(34); // '"'
236 uint8x16_t v92 = vdupq_n_u8(92); // '\\'
237 uint8x16_t v32 = vdupq_n_u8(32); // control char threshold
238
239 while (remaining >= 16) {
240 uint8x16_t word = vld1q_u8(ptr);
241
242 // Check for quotable characters: '"', '\\', or control chars (< 32)
243 uint8x16_t needs_escape = vceqq_u8(word, v34);
244 needs_escape = vorrq_u8(needs_escape, vceqq_u8(word, v92));
245 needs_escape = vorrq_u8(needs_escape, vcltq_u8(word, v32));
246
247 const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(needs_escape), 4);
248 const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
249 if(mask != 0) {
250 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
251 auto trailing_zero = trailing_zeroes(mask);
252 return offset + (trailing_zero >> 2);
253 }
254 ptr += 16;
255 remaining -= 16;
256 }
257
258 // Scalar fallback for remaining bytes
259 size_t current = len - remaining;
260 return find_next_json_quotable_character_scalar(view, current);
261}
262#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
263simdjson_inline size_t
264find_next_json_quotable_character(const std::string_view view,
265 size_t location) noexcept {
266 const size_t len = view.size();
267 const uint8_t *ptr =
268 reinterpret_cast<const uint8_t *>(view.data()) + location;
269 size_t remaining = len - location;
270
271 // SIMD constants
272 __m128i v34 = _mm_set1_epi8(34); // '"'
273 __m128i v92 = _mm_set1_epi8(92); // '\\'
274 __m128i v31 = _mm_set1_epi8(31); // for control char detection
275
276 while (remaining >= 16) {
277 __m128i word = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
278
279 // Check for quotable characters
280 __m128i needs_escape = _mm_cmpeq_epi8(word, v34);
281 needs_escape = _mm_or_si128(needs_escape, _mm_cmpeq_epi8(word, v92));
282 needs_escape = _mm_or_si128(
283 needs_escape,
284 _mm_cmpeq_epi8(_mm_subs_epu8(word, v31), _mm_setzero_si128()));
285
286 int mask = _mm_movemask_epi8(needs_escape);
287 if (mask != 0) {
288 // Found quotable character - use trailing zero count to find position
289 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
290 return offset + trailing_zeroes(mask);
291 }
292 ptr += 16;
293 remaining -= 16;
294 }
295
296 // Scalar fallback for remaining bytes
297 size_t current = len - remaining;
298 return find_next_json_quotable_character_scalar(view, current);
299}
300#elif SIMDJSON_EXPERIMENTAL_HAS_LSX
301simdjson_inline size_t
302find_next_json_quotable_character(const std::string_view view,
303 size_t location) noexcept {
304 const size_t len = view.size();
305 const uint8_t *ptr =
306 reinterpret_cast<const uint8_t *>(view.data()) + location;
307 size_t remaining = len - location;
308
309 //SIMD constants for characters requiring escape
310 __m128i v34 = __lsx_vreplgr2vr_b(34); // '"'
311 __m128i v92 = __lsx_vreplgr2vr_b(92); // '\\'
312 __m128i v32 = __lsx_vreplgr2vr_b(32); // control char threshold
313
314 while (remaining >= 16){
315 __m128i word = __lsx_vld(ptr, 0);
316
317 //Check for the quotable characters: '"', '\\', or control char (<32)
318 __m128i needs_escape = __lsx_vseq_b(word, v34);
319 needs_escape = __lsx_vor_v(needs_escape, __lsx_vseq_b(word, v92));
320 needs_escape = __lsx_vor_v(needs_escape, __lsx_vslt_bu(word, v32));
321
322 if (!__lsx_bz_v(needs_escape)){
323
324 //Found quotable character - extract exact byte position
325 uint64_t lo = __lsx_vpickve2gr_du(needs_escape,0);
326 uint64_t hi = __lsx_vpickve2gr_du(needs_escape,1);
327 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
328 if ( lo != 0) {
329 return offset + trailing_zeroes(lo) / 8;
330 } else {
331 return offset + 8 + trailing_zeroes(hi) / 8;
332 }
333 }
334 ptr += 16;
335 remaining -= 16;
336 }
337 size_t current = len - remaining;
338 return find_next_json_quotable_character_scalar(view, current);
339}
340#elif SIMDJSON_EXPERIMENTAL_HAS_RVV
341simdjson_inline size_t
342find_next_json_quotable_character(const std::string_view view,
343 size_t location) noexcept {
344 const size_t len = view.size();
345 const uint8_t *ptr =
346 reinterpret_cast<const uint8_t *>(view.data()) + location;
347 size_t remaining = len - location;
348
349 while (remaining > 0) {
350 size_t vl = __riscv_vsetvl_e8m1(remaining);
351 vuint8m1_t word = __riscv_vle8_v_u8m1(ptr, vl);
352
353 // Check for quotable characters: '"', '\\', or control chars (< 32)
354 vbool8_t needs_escape = __riscv_vmseq(word, (uint8_t)34, vl);
355 needs_escape = __riscv_vmor(needs_escape,
356 __riscv_vmseq(word, (uint8_t)92, vl), vl);
357 needs_escape = __riscv_vmor(needs_escape,
358 __riscv_vmsltu(word, (uint8_t)32, vl), vl);
359
360 long first = __riscv_vfirst(needs_escape, vl);
361 if (first >= 0) {
362 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
363 return offset + first;
364 }
365 ptr += vl;
366 remaining -= vl;
367 }
368
369 return len;
370}
371#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
372simdjson_inline size_t
373find_next_json_quotable_character(const std::string_view view,
374 size_t location) noexcept {
375 const size_t len = view.size();
376 const uint8_t *ptr =
377 reinterpret_cast<const uint8_t *>(view.data()) + location;
378 size_t remaining = len - location;
379
380 // SIMD constants for characters requiring escape
381 __vector unsigned char v34 = vec_splats((unsigned char)34); // '"'
382 __vector unsigned char v92 = vec_splats((unsigned char)92); // '\\'
383 __vector unsigned char v32 = vec_splats((unsigned char)32); // control char threshold
384
385 // Bitmask for vec_vbpermq to extract one bit per byte
386 const __vector unsigned char perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50,
387 0x48, 0x40, 0x38, 0x30, 0x28, 0x20,
388 0x18, 0x10, 0x08, 0x00};
389
390 while (remaining >= 16) {
391 __vector unsigned char word =
392 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(ptr));
393
394 // Check for quotable characters: '"', '\\', or control chars (< 32)
395 __vector unsigned char needs_escape =
396 (__vector unsigned char)vec_cmpeq(word, v34);
397 needs_escape = vec_or(needs_escape,
398 (__vector unsigned char)vec_cmpeq(word, v92));
399 needs_escape = vec_or(needs_escape,
400 (__vector unsigned char)vec_cmplt(word, v32));
401
402 __vector unsigned long long result =
403 (__vector unsigned long long)vec_vbpermq(needs_escape, perm_mask);
404#ifdef __LITTLE_ENDIAN__
405 unsigned int mask = static_cast<unsigned int>(result[1]);
406#else
407 unsigned int mask = static_cast<unsigned int>(result[0]);
408#endif
409 if (mask != 0) {
410 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
411 return offset + __builtin_ctz(mask);
412 }
413 ptr += 16;
414 remaining -= 16;
415 }
416
417 // Scalar fallback for remaining bytes
418 size_t current = len - remaining;
419 return find_next_json_quotable_character_scalar(view, current);
420}
421#else
422SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
423find_next_json_quotable_character(const std::string_view view,
424 size_t location) noexcept {
425 return find_next_json_quotable_character_scalar(view, location);
426}
427#endif
428
429SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = {
430 "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006",
431 "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r",
432 "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014",
433 "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b",
434 "\\u001c", "\\u001d", "\\u001e", "\\u001f"};
435
436// All Unicode characters may be placed within the quotation marks, except for
437// the characters that MUST be escaped: quotation mark, reverse solidus, and the
438// control characters (U+0000 through U+001F). There are two-character sequence
439// escape representations of some popular characters:
440// \", \\, \b, \f, \n, \r, \t.
441SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline void escape_json_char(char c, char *&out) {
442 if (c == '"') {
443 memcpy(out, "\\\"", 2);
444 out += 2;
445 } else if (c == '\\') {
446 memcpy(out, "\\\\", 2);
447 out += 2;
448 } else {
449 std::string_view v = control_chars[uint8_t(c)];
450 memcpy(out, v.data(), v.size());
451 out += v.size();
452 }
453}
454
455// Writes the escaped version of input to out, returning the number of bytes
456// written. Uses SIMD position finding to locate quotable characters efficiently.
457inline size_t write_string_escaped(const std::string_view input, char *out) {
458 size_t mysize = input.size();
459
460 // Use SIMD position finder directly - it returns mysize if no escape needed
461 size_t location = find_next_json_quotable_character(input, 0);
462 if (location == mysize) {
463 // Fast path: no escaping needed
464 memcpy(out, input.data(), input.size());
465 return input.size();
466 }
467
468 const char *const initout = out;
469 memcpy(out, input.data(), location);
470 out += location;
471 escape_json_char(input[location], out);
472 location += 1;
473 while (location < mysize) {
474 size_t newlocation = find_next_json_quotable_character(input, location);
475 memcpy(out, input.data() + location, newlocation - location);
476 out += newlocation - location;
477 location = newlocation;
478 if (location == mysize) {
479 break;
480 }
481 escape_json_char(input[location], out);
482 location += 1;
483 }
484 return out - initout;
485}
486
487simdjson_inline string_builder::string_builder(size_t initial_capacity)
488 : buffer(new(std::nothrow) char[initial_capacity]), position(0),
489 capacity(buffer.get() != nullptr ? initial_capacity : 0),
490 is_valid(buffer.get() != nullptr) {}
491
492simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) {
493 // We use the convention that when is_valid is false, then the capacity and
494 // the position are 0.
495 // Most of the time, this function will return true.
496 if (simdjson_likely(upcoming_bytes <= capacity - position)) {
497 return true;
498 }
499 // check for overflow, most of the time there is no overflow
500 if (simdjson_unlikely(position + upcoming_bytes < position)) {
501 return false;
502 }
503 // We will rarely get here.
504 grow_buffer((std::max)(capacity * 2, position + upcoming_bytes));
505 // If the buffer allocation failed, we set is_valid to false.
506 return is_valid;
507}
508
509simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) {
510 if (!is_valid) {
511 return;
512 }
513 std::unique_ptr<char[]> new_buffer(new (std::nothrow) char[desired_capacity]);
514 if (new_buffer.get() == nullptr) {
515 set_valid(false);
516 return;
517 }
518 std::memcpy(new_buffer.get(), buffer.get(), position);
519 buffer.swap(new_buffer);
520 capacity = desired_capacity;
521}
522
523simdjson_inline void string_builder::set_valid(bool valid) noexcept {
524 if (!valid) {
525 is_valid = false;
526 capacity = 0;
527 position = 0;
528 buffer.reset();
529 } else {
530 is_valid = true;
531 }
532}
533
534simdjson_inline size_t string_builder::size() const noexcept {
535 return position;
536}
537
538simdjson_inline void string_builder::append(char c) noexcept {
539 if (capacity_check(1)) {
540 buffer.get()[position++] = c;
541 }
542}
543
544simdjson_inline void string_builder::append_null() noexcept {
545 constexpr char null_literal[] = "null";
546 constexpr size_t null_len = sizeof(null_literal) - 1;
547 if (capacity_check(null_len)) {
548 std::memcpy(buffer.get() + position, null_literal, null_len);
549 position += null_len;
550 }
551}
552
553simdjson_inline void string_builder::clear() noexcept {
554 position = 0;
555 // if it was invalid, we should try to repair it
556 if (!is_valid) {
557 capacity = 0;
558 buffer.reset();
559 is_valid = true;
560 }
561}
562
563namespace internal {
564
565template <typename number_type, typename = typename std::enable_if<
566 std::is_unsigned<number_type>::value>::type>
567simdjson_really_inline int int_log2(number_type x) {
568 return 63 - leading_zeroes(uint64_t(x) | 1);
569}
570
571simdjson_really_inline int fast_digit_count_32(uint32_t x) {
572 static uint64_t table[] = {
573 4294967296, 8589934582, 8589934582, 8589934582, 12884901788,
574 12884901788, 12884901788, 17179868184, 17179868184, 17179868184,
575 21474826480, 21474826480, 21474826480, 21474826480, 25769703776,
576 25769703776, 25769703776, 30063771072, 30063771072, 30063771072,
577 34349738368, 34349738368, 34349738368, 34349738368, 38554705664,
578 38554705664, 38554705664, 41949672960, 41949672960, 41949672960,
579 42949672960, 42949672960};
580 return uint32_t((x + table[int_log2(x)]) >> 32);
581}
582
583simdjson_really_inline int fast_digit_count_64(uint64_t x) {
584 static uint64_t table[] = {9,
585 99,
586 999,
587 9999,
588 99999,
589 999999,
590 9999999,
591 99999999,
592 999999999,
593 9999999999,
594 99999999999,
595 999999999999,
596 9999999999999,
597 99999999999999,
598 999999999999999ULL,
599 9999999999999999ULL,
600 99999999999999999ULL,
601 999999999999999999ULL,
602 9999999999999999999ULL};
603 int y = (19 * int_log2(x) >> 6);
604 y += x > table[y];
605 return y + 1;
606}
607
608template <typename number_type, typename = typename std::enable_if<
609 std::is_unsigned<number_type>::value>::type>
610simdjson_really_inline size_t digit_count(number_type v) noexcept {
611 static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 ||
612 sizeof(number_type) == 2 || sizeof(number_type) == 1,
613 "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers");
614 SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) {
615 return fast_digit_count_32(static_cast<uint32_t>(v));
616 }
617 else {
618 return fast_digit_count_64(static_cast<uint64_t>(v));
619 }
620}
621static const char decimal_table[200] = {
622 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35,
623 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31,
624 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
625 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33,
626 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39,
627 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35,
628 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31,
629 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
630 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33,
631 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39,
632 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35,
633 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31,
634 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
635 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33,
636 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39,
637 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35,
638 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39,
639};
640} // namespace internal
641
642template <typename number_type, typename>
643simdjson_inline void string_builder::append(number_type v) noexcept {
644 static_assert(std::is_same<number_type, bool>::value ||
645 std::is_integral<number_type>::value ||
646 std::is_floating_point<number_type>::value,
647 "Unsupported number type");
648 // If C++17 is available, we can 'if constexpr' here.
649 SIMDJSON_IF_CONSTEXPR(std::is_same<number_type, bool>::value) {
650 if (v) {
651 constexpr char true_literal[] = "true";
652 constexpr size_t true_len = sizeof(true_literal) - 1;
653 if (capacity_check(true_len)) {
654 std::memcpy(buffer.get() + position, true_literal, true_len);
655 position += true_len;
656 }
657 } else {
658 constexpr char false_literal[] = "false";
659 constexpr size_t false_len = sizeof(false_literal) - 1;
660 if (capacity_check(false_len)) {
661 std::memcpy(buffer.get() + position, false_literal, false_len);
662 position += false_len;
663 }
664 }
665 }
666 else SIMDJSON_IF_CONSTEXPR(std::is_unsigned<number_type>::value) {
667 // Process 4 digits at a time instead of 2, reducing store operations
668 // and divisions by approximately half for large numbers.
669 constexpr size_t max_number_size = 20;
670 if (capacity_check(max_number_size)) {
671 using unsigned_type = typename std::make_unsigned<number_type>::type;
672 unsigned_type pv = static_cast<unsigned_type>(v);
673 size_t dc = internal::digit_count(pv);
674 char *write_pointer = buffer.get() + position + dc - 1;
675
676 // Process 4 digits per iteration for large numbers
677 while (pv >= 10000) {
678 unsigned_type q = pv / 10000;
679 unsigned_type r = pv % 10000;
680 unsigned_type r_hi = r / 100; // High 2 digits of remainder
681 unsigned_type r_lo = r % 100; // Low 2 digits of remainder
682 // Write low 2 digits first (rightmost), then high 2 digits
683 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
684 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
685 write_pointer -= 4;
686 pv = q;
687 }
688
689 // Handle remaining 1-4 digits with original 2-digit loop
690 while (pv >= 100) {
691 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
692 write_pointer -= 2;
693 pv /= 100;
694 }
695 if (pv >= 10) {
696 *write_pointer-- = char('0' + (pv % 10));
697 pv /= 10;
698 }
699 *write_pointer = char('0' + pv);
700 position += dc;
701 }
702 }
703 else SIMDJSON_IF_CONSTEXPR(std::is_integral<number_type>::value) {
704 // Same 4-digit batching as unsigned path for signed integers
705 constexpr size_t max_number_size = 20;
706 if (capacity_check(max_number_size)) {
707 using unsigned_type = typename std::make_unsigned<number_type>::type;
708 bool negative = v < 0;
709 unsigned_type pv = static_cast<unsigned_type>(v);
710 if (negative) {
711 pv = 0 - pv; // the 0 is for Microsoft
712 }
713 size_t dc = internal::digit_count(pv);
714 // by always writing the minus sign, we avoid the branch.
715 buffer.get()[position] = '-';
716 position += negative ? 1 : 0;
717 char *write_pointer = buffer.get() + position + dc - 1;
718
719 // Process 4 digits per iteration for large numbers
720 while (pv >= 10000) {
721 unsigned_type q = pv / 10000;
722 unsigned_type r = pv % 10000;
723 unsigned_type r_hi = r / 100;
724 unsigned_type r_lo = r % 100;
725 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
726 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
727 write_pointer -= 4;
728 pv = q;
729 }
730
731 // Handle remaining 1-4 digits
732 while (pv >= 100) {
733 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
734 write_pointer -= 2;
735 pv /= 100;
736 }
737 if (pv >= 10) {
738 *write_pointer-- = char('0' + (pv % 10));
739 pv /= 10;
740 }
741 *write_pointer = char('0' + pv);
742 position += dc;
743 }
744 }
745 else SIMDJSON_IF_CONSTEXPR(std::is_floating_point<number_type>::value) {
746 constexpr size_t max_number_size = 24;
747 if (capacity_check(max_number_size)) {
748 // We could specialize for float.
749 char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr,
750 double(v));
751 position = end - buffer.get();
752 }
753 }
754}
755
756simdjson_inline void
757string_builder::escape_and_append(std::string_view input) noexcept {
758 // escaping might turn a control character into \x00xx so 6 characters.
759 // Guard against size_t overflow in the multiplication below.
760 if (input.size() > (std::numeric_limits<size_t>::max)() / 6) {
761 set_valid(false);
762 return;
763 }
764 if (capacity_check(6 * input.size())) {
765 position += write_string_escaped(input, buffer.get() + position);
766 }
767}
768
769simdjson_inline void
770string_builder::escape_and_append_with_quotes(std::string_view input) noexcept {
771 // escaping might turn a control character into \x00xx so 6 characters.
772 // Guard against size_t overflow in the arithmetic below.
773 if (input.size() > ((std::numeric_limits<size_t>::max)() - 2) / 6) {
774 set_valid(false);
775 return;
776 }
777 if (capacity_check(2 + 6 * input.size())) {
778 buffer.get()[position++] = '"';
779 position += write_string_escaped(input, buffer.get() + position);
780 buffer.get()[position++] = '"';
781 }
782}
783
784simdjson_inline void
785string_builder::escape_and_append_with_quotes(char input) noexcept {
786 // escaping might turn a control character into \x00xx so 6 characters.
787 if (capacity_check(2 + 6 * 1)) {
788 buffer.get()[position++] = '"';
789 std::string_view cinput(&input, 1);
790 position += write_string_escaped(cinput, buffer.get() + position);
791 buffer.get()[position++] = '"';
792 }
793}
794
795simdjson_inline void
796string_builder::escape_and_append_with_quotes(const char *input) noexcept {
797 std::string_view cinput(input);
798 escape_and_append_with_quotes(cinput);
799}
800#if SIMDJSON_SUPPORTS_CONCEPTS
801template <constevalutil::fixed_string key>
802simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept {
803 escape_and_append_with_quotes(constevalutil::string_constant<key>::value);
804}
805#endif
806
807simdjson_inline void string_builder::append_raw(const char *c) noexcept {
808 size_t len = std::strlen(c);
809 append_raw(c, len);
810}
811
812simdjson_inline void
813string_builder::append_raw(std::string_view input) noexcept {
814 if (capacity_check(input.size())) {
815 std::memcpy(buffer.get() + position, input.data(), input.size());
816 position += input.size();
817 }
818}
819
820simdjson_inline void string_builder::append_raw(const char *str,
821 size_t len) noexcept {
822 if (capacity_check(len)) {
823 std::memcpy(buffer.get() + position, str, len);
824 position += len;
825 }
826}
827#if SIMDJSON_SUPPORTS_CONCEPTS
828// Support for optional types (std::optional, etc.)
829template <concepts::optional_type T>
830 requires(!require_custom_serialization<T>)
831simdjson_inline void string_builder::append(const T &opt) {
832 if (opt) {
833 append(*opt);
834 } else {
835 append_null();
836 }
837}
838
839template <typename T>
840 requires(require_custom_serialization<T>)
841simdjson_inline void string_builder::append(T &&val) {
842 serialize(*this, std::forward<T>(val));
843}
844
845template <typename T>
846 requires(std::is_convertible<T, std::string_view>::value ||
847 std::is_same<T, const char *>::value)
848simdjson_inline void string_builder::append(const T &value) {
849 escape_and_append_with_quotes(value);
850}
851#endif
852
853#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS
854// Support for range-based appending (std::ranges::view, etc.)
855template <std::ranges::range R>
856 requires(!std::is_convertible<R, std::string_view>::value && !require_custom_serialization<R>)
857simdjson_inline void string_builder::append(const R &range) noexcept {
858 auto it = std::ranges::begin(range);
859 auto end = std::ranges::end(range);
860 if constexpr (concepts::is_pair<std::ranges::range_value_t<R>>) {
861 start_object();
862
863 if (it == end) {
864 end_object();
865 return; // Handle empty range
866 }
867 // Append first item without leading comma
868 append_key_value(it->first, it->second);
869 ++it;
870
871 // Append remaining items with preceding commas
872 for (; it != end; ++it) {
873 append_comma();
874 append_key_value(it->first, it->second);
875 }
876 end_object();
877 } else {
878 start_array();
879 if (it == end) {
880 end_array();
881 return; // Handle empty range
882 }
883
884 // Append first item without leading comma
885 append(*it);
886 ++it;
887
888 // Append remaining items with preceding commas
889 for (; it != end; ++it) {
890 append_comma();
891 append(*it);
892 }
893 end_array();
894 }
895}
896
897#endif
898
899#if SIMDJSON_EXCEPTIONS
900simdjson_inline string_builder::operator std::string() const noexcept(false) {
901 return std::string(operator std::string_view());
902}
903
904simdjson_inline string_builder::operator std::string_view() const
905 noexcept(false) simdjson_lifetime_bound {
906 return view();
907}
908#endif
909
911string_builder::view() const noexcept {
912 if (!is_valid) {
914 }
915 return std::string_view(buffer.get(), position);
916}
917
918simdjson_inline simdjson_result<const char *> string_builder::c_str() noexcept {
919 if (capacity_check(1)) {
920 buffer.get()[position] = '\0';
921 return buffer.get();
922 }
924}
925
926simdjson_inline bool string_builder::validate_unicode() const noexcept {
927 return simdjson::validate_utf8(buffer.get(), position);
928}
929
930simdjson_inline void string_builder::start_object() noexcept {
931 if (capacity_check(1)) {
932 buffer.get()[position++] = '{';
933 }
934}
935
936simdjson_inline void string_builder::end_object() noexcept {
937 if (capacity_check(1)) {
938 buffer.get()[position++] = '}';
939 }
940}
941
942simdjson_inline void string_builder::start_array() noexcept {
943 if (capacity_check(1)) {
944 buffer.get()[position++] = '[';
945 }
946}
947
948simdjson_inline void string_builder::end_array() noexcept {
949 if (capacity_check(1)) {
950 buffer.get()[position++] = ']';
951 }
952}
953
954simdjson_inline void string_builder::append_comma() noexcept {
955 if (capacity_check(1)) {
956 buffer.get()[position++] = ',';
957 }
958}
959
960simdjson_inline void string_builder::append_colon() noexcept {
961 if (capacity_check(1)) {
962 buffer.get()[position++] = ':';
963 }
964}
965
966template <typename key_type, typename value_type>
967simdjson_inline void
968string_builder::append_key_value(key_type key, value_type value) noexcept {
969 static_assert(std::is_same<key_type, const char *>::value ||
970 std::is_convertible<key_type, std::string_view>::value,
971 "Unsupported key type");
972 escape_and_append_with_quotes(key);
973 append_colon();
974 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
975 append_null();
976 }
977 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
978 escape_and_append_with_quotes(value);
979 }
980 else SIMDJSON_IF_CONSTEXPR(
981 std::is_convertible<value_type, std::string_view>::value) {
982 escape_and_append_with_quotes(value);
983 }
984 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
985 escape_and_append_with_quotes(value);
986 }
987 else {
988 append(value);
989 }
990}
991
992#if SIMDJSON_SUPPORTS_CONCEPTS
993template <constevalutil::fixed_string key, typename value_type>
994simdjson_inline void
995string_builder::append_key_value(value_type value) noexcept {
996 escape_and_append_with_quotes<key>();
997 append_colon();
998 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
999 append_null();
1000 }
1001 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
1002 escape_and_append_with_quotes(value);
1003 }
1004 else SIMDJSON_IF_CONSTEXPR(
1005 std::is_convertible<value_type, std::string_view>::value) {
1006 escape_and_append_with_quotes(value);
1007 }
1008 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
1009 escape_and_append_with_quotes(value);
1010 }
1011 else {
1012 append(value);
1013 }
1014}
1015#endif
1016
1017} // namespace builder
1018} // namespace SIMDJSON_IMPLEMENTATION
1019} // namespace simdjson
1020
1021#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept
Validate the UTF-8 string.
@ OUT_OF_CAPACITY
The capacity was exceeded, we cannot allocate enough memory.
Definition error.h:52
The result of a simdjson operation that could fail.
Definition error.h:280
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.
Definition error-inl.h:163