simdjson 4.4.0
Ridiculously Fast JSON
Loading...
Searching...
No Matches
json_string_builder-inl.h
1#include <array>
2#include <cstring>
3#include <type_traits>
4#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H
5
6#ifndef SIMDJSON_CONDITIONAL_INCLUDE
7#define SIMDJSON_GENERIC_STRING_BUILDER_INL_H
8#include "simdjson/generic/builder/json_string_builder.h"
9#endif // SIMDJSON_CONDITIONAL_INCLUDE
10
11/*
12 * Empirically, we have found that an inlined optimization is important for
13 * performance. The following macros are not ideal. We should find a better
14 * way to inline the code.
15 */
16
17#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
18 (defined(_M_AMD64) || defined(_M_X64) || \
19 (defined(_M_IX86_FP) && _M_IX86_FP == 2))
20#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2
21#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1
22#endif
23#endif
24
25#if defined(__aarch64__) || defined(_M_ARM64)
26#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON
27#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1
28#endif
29#endif
30#if defined(__loongarch_sx)
31#ifndef SIMDJSON_EXPERIMENTAL_HAS_LSX
32#define SIMDJSON_EXPERIMENTAL_HAS_LSX 1
33#endif
34#endif
35#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000
36#ifndef SIMDJSON_EXPERIMENTAL_HAS_RVV
37#define SIMDJSON_EXPERIMENTAL_HAS_RVV 1
38#endif
39#endif
40#if defined(__PPC64__) || defined(_M_PPC64)
41#ifndef SIMDJSON_EXPERIMENTAL_HAS_PPC64
42#define SIMDJSON_EXPERIMENTAL_HAS_PPC64 1
43#endif
44#endif
45#if SIMDJSON_EXPERIMENTAL_HAS_NEON
46#include <arm_neon.h>
47#ifdef _MSC_VER
48#include <intrin.h>
49#endif
50#endif
51#if SIMDJSON_EXPERIMENTAL_HAS_SSE2
52#include <emmintrin.h>
53#ifdef _MSC_VER
54#include <intrin.h>
55#endif
56#endif
57#if SIMDJSON_EXPERIMENTAL_HAS_LSX
58#include <lsxintrin.h>
59#endif
60#if SIMDJSON_EXPERIMENTAL_HAS_RVV
61#include <riscv_vector.h>
62#endif
63#if SIMDJSON_EXPERIMENTAL_HAS_PPC64
64#include <altivec.h>
65#ifdef bool
66#undef bool
67#endif
68#ifdef vector
69#undef vector
70#endif
71#endif
72
73
74namespace simdjson {
75namespace SIMDJSON_IMPLEMENTATION {
76namespace builder {
77
78static SIMDJSON_CONSTEXPR_LAMBDA std::array<uint8_t, 256>
79 json_quotable_character = {
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
91
109SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool
110simple_needs_escaping(std::string_view v) {
111 for (char c : v) {
112 // a table lookup is faster than a series of comparisons
113 if (json_quotable_character[static_cast<uint8_t>(c)]) {
114 return true;
115 }
116 }
117 return false;
118}
119
120#if SIMDJSON_EXPERIMENTAL_HAS_NEON
121simdjson_inline bool fast_needs_escaping(std::string_view view) {
122 if (view.size() < 16) {
123 return simple_needs_escaping(view);
124 }
125 size_t i = 0;
126 uint8x16_t running = vdupq_n_u8(0);
127 uint8x16_t v34 = vdupq_n_u8(34);
128 uint8x16_t v92 = vdupq_n_u8(92);
129
130 for (; i + 15 < view.size(); i += 16) {
131 uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i);
132 running = vorrq_u8(running, vceqq_u8(word, v34));
133 running = vorrq_u8(running, vceqq_u8(word, v92));
134 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
135 }
136 if (i < view.size()) {
137 uint8x16_t word =
138 vld1q_u8((const uint8_t *)view.data() + view.length() - 16);
139 running = vorrq_u8(running, vceqq_u8(word, v34));
140 running = vorrq_u8(running, vceqq_u8(word, v92));
141 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
142 }
143 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
144}
145#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
146simdjson_inline bool fast_needs_escaping(std::string_view view) {
147 if (view.size() < 16) {
148 return simple_needs_escaping(view);
149 }
150 size_t i = 0;
151 __m128i running = _mm_setzero_si128();
152 for (; i + 15 < view.size(); i += 16) {
153
154 __m128i word =
155 _mm_loadu_si128(reinterpret_cast<const __m128i *>(view.data() + i));
156 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
157 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
158 running = _mm_or_si128(
159 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
160 _mm_setzero_si128()));
161 }
162 if (i < view.size()) {
163 __m128i word = _mm_loadu_si128(
164 reinterpret_cast<const __m128i *>(view.data() + view.length() - 16));
165 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
166 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
167 running = _mm_or_si128(
168 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
169 _mm_setzero_si128()));
170 }
171 return _mm_movemask_epi8(running) != 0;
172}
173#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
174simdjson_inline bool fast_needs_escaping(std::string_view view) {
175 if (view.size() < 16) {
176 return simple_needs_escaping(view);
177 }
178 size_t i = 0;
179 __vector unsigned char running = vec_splats((unsigned char)0);
180 __vector unsigned char v34 = vec_splats((unsigned char)34);
181 __vector unsigned char v92 = vec_splats((unsigned char)92);
182 __vector unsigned char v32 = vec_splats((unsigned char)32);
183
184 for (; i + 15 < view.size(); i += 16) {
185 __vector unsigned char word =
186 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(view.data() + i));
187 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
188 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
189 running = vec_or(running,
190 (__vector unsigned char)vec_cmplt(word, v32));
191 }
192 if (i < view.size()) {
193 __vector unsigned char word = vec_vsx_ld(
194 0, reinterpret_cast<const unsigned char *>(view.data() + view.length() - 16));
195 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
196 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
197 running = vec_or(running,
198 (__vector unsigned char)vec_cmplt(word, v32));
199 }
200 return !vec_all_eq(running, vec_splats((unsigned char)0));
201}
202#else
203simdjson_inline bool fast_needs_escaping(std::string_view view) {
204 return simple_needs_escaping(view);
205}
206#endif
207
208// Scalar fallback for finding next quotable character
209SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
210find_next_json_quotable_character_scalar(const std::string_view view,
211 size_t location) noexcept {
212 for (auto pos = view.begin() + location; pos != view.end(); ++pos) {
213 if (json_quotable_character[static_cast<uint8_t>(*pos)]) {
214 return pos - view.begin();
215 }
216 }
217 return size_t(view.size());
218}
219
220// SIMD-accelerated position finding that directly locates the first quotable
221// character, combining detection and position extraction in a single pass to
222// minimize redundant work.
223#if SIMDJSON_EXPERIMENTAL_HAS_NEON
224simdjson_inline size_t
225find_next_json_quotable_character(const std::string_view view,
226 size_t location) noexcept {
227 const size_t len = view.size();
228 const uint8_t *ptr =
229 reinterpret_cast<const uint8_t *>(view.data()) + location;
230 size_t remaining = len - location;
231
232 // SIMD constants for characters requiring escape
233 uint8x16_t v34 = vdupq_n_u8(34); // '"'
234 uint8x16_t v92 = vdupq_n_u8(92); // '\\'
235 uint8x16_t v32 = vdupq_n_u8(32); // control char threshold
236
237 while (remaining >= 16) {
238 uint8x16_t word = vld1q_u8(ptr);
239
240 // Check for quotable characters: '"', '\\', or control chars (< 32)
241 uint8x16_t needs_escape = vceqq_u8(word, v34);
242 needs_escape = vorrq_u8(needs_escape, vceqq_u8(word, v92));
243 needs_escape = vorrq_u8(needs_escape, vcltq_u8(word, v32));
244
245 const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(needs_escape), 4);
246 const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
247 if(mask != 0) {
248 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
249 auto trailing_zero = trailing_zeroes(mask);
250 return offset + (trailing_zero >> 2);
251 }
252 ptr += 16;
253 remaining -= 16;
254 }
255
256 // Scalar fallback for remaining bytes
257 size_t current = len - remaining;
258 return find_next_json_quotable_character_scalar(view, current);
259}
260#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
261simdjson_inline size_t
262find_next_json_quotable_character(const std::string_view view,
263 size_t location) noexcept {
264 const size_t len = view.size();
265 const uint8_t *ptr =
266 reinterpret_cast<const uint8_t *>(view.data()) + location;
267 size_t remaining = len - location;
268
269 // SIMD constants
270 __m128i v34 = _mm_set1_epi8(34); // '"'
271 __m128i v92 = _mm_set1_epi8(92); // '\\'
272 __m128i v31 = _mm_set1_epi8(31); // for control char detection
273
274 while (remaining >= 16) {
275 __m128i word = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
276
277 // Check for quotable characters
278 __m128i needs_escape = _mm_cmpeq_epi8(word, v34);
279 needs_escape = _mm_or_si128(needs_escape, _mm_cmpeq_epi8(word, v92));
280 needs_escape = _mm_or_si128(
281 needs_escape,
282 _mm_cmpeq_epi8(_mm_subs_epu8(word, v31), _mm_setzero_si128()));
283
284 int mask = _mm_movemask_epi8(needs_escape);
285 if (mask != 0) {
286 // Found quotable character - use trailing zero count to find position
287 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
288 return offset + trailing_zeroes(mask);
289 }
290 ptr += 16;
291 remaining -= 16;
292 }
293
294 // Scalar fallback for remaining bytes
295 size_t current = len - remaining;
296 return find_next_json_quotable_character_scalar(view, current);
297}
298#elif SIMDJSON_EXPERIMENTAL_HAS_LSX
299simdjson_inline size_t
300find_next_json_quotable_character(const std::string_view view,
301 size_t location) noexcept {
302 const size_t len = view.size();
303 const uint8_t *ptr =
304 reinterpret_cast<const uint8_t *>(view.data()) + location;
305 size_t remaining = len - location;
306
307 //SIMD constants for characters requiring escape
308 __m128i v34 = __lsx_vreplgr2vr_b(34); // '"'
309 __m128i v92 = __lsx_vreplgr2vr_b(92); // '\\'
310 __m128i v32 = __lsx_vreplgr2vr_b(32); // control char threshold
311
312 while (remaining >= 16){
313 __m128i word = __lsx_vld(ptr, 0);
314
315 //Check for the quotable characters: '"', '\\', or control char (<32)
316 __m128i needs_escape = __lsx_vseq_b(word, v34);
317 needs_escape = __lsx_vor_v(needs_escape, __lsx_vseq_b(word, v92));
318 needs_escape = __lsx_vor_v(needs_escape, __lsx_vslt_bu(word, v32));
319
320 if (!__lsx_bz_v(needs_escape)){
321
322 //Found quotable character - extract exact byte position
323 uint64_t lo = __lsx_vpickve2gr_du(needs_escape,0);
324 uint64_t hi = __lsx_vpickve2gr_du(needs_escape,1);
325 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
326 if ( lo != 0) {
327 return offset + trailing_zeroes(lo) / 8;
328 } else {
329 return offset + 8 + trailing_zeroes(hi) / 8;
330 }
331 }
332 ptr += 16;
333 remaining -= 16;
334 }
335 size_t current = len - remaining;
336 return find_next_json_quotable_character_scalar(view, current);
337}
338#elif SIMDJSON_EXPERIMENTAL_HAS_RVV
339simdjson_inline size_t
340find_next_json_quotable_character(const std::string_view view,
341 size_t location) noexcept {
342 const size_t len = view.size();
343 const uint8_t *ptr =
344 reinterpret_cast<const uint8_t *>(view.data()) + location;
345 size_t remaining = len - location;
346
347 while (remaining > 0) {
348 size_t vl = __riscv_vsetvl_e8m1(remaining);
349 vuint8m1_t word = __riscv_vle8_v_u8m1(ptr, vl);
350
351 // Check for quotable characters: '"', '\\', or control chars (< 32)
352 vbool8_t needs_escape = __riscv_vmseq(word, (uint8_t)34, vl);
353 needs_escape = __riscv_vmor(needs_escape,
354 __riscv_vmseq(word, (uint8_t)92, vl), vl);
355 needs_escape = __riscv_vmor(needs_escape,
356 __riscv_vmsltu(word, (uint8_t)32, vl), vl);
357
358 long first = __riscv_vfirst(needs_escape, vl);
359 if (first >= 0) {
360 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
361 return offset + first;
362 }
363 ptr += vl;
364 remaining -= vl;
365 }
366
367 return len;
368}
369#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
370simdjson_inline size_t
371find_next_json_quotable_character(const std::string_view view,
372 size_t location) noexcept {
373 const size_t len = view.size();
374 const uint8_t *ptr =
375 reinterpret_cast<const uint8_t *>(view.data()) + location;
376 size_t remaining = len - location;
377
378 // SIMD constants for characters requiring escape
379 __vector unsigned char v34 = vec_splats((unsigned char)34); // '"'
380 __vector unsigned char v92 = vec_splats((unsigned char)92); // '\\'
381 __vector unsigned char v32 = vec_splats((unsigned char)32); // control char threshold
382
383 // Bitmask for vec_vbpermq to extract one bit per byte
384 const __vector unsigned char perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50,
385 0x48, 0x40, 0x38, 0x30, 0x28, 0x20,
386 0x18, 0x10, 0x08, 0x00};
387
388 while (remaining >= 16) {
389 __vector unsigned char word =
390 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(ptr));
391
392 // Check for quotable characters: '"', '\\', or control chars (< 32)
393 __vector unsigned char needs_escape =
394 (__vector unsigned char)vec_cmpeq(word, v34);
395 needs_escape = vec_or(needs_escape,
396 (__vector unsigned char)vec_cmpeq(word, v92));
397 needs_escape = vec_or(needs_escape,
398 (__vector unsigned char)vec_cmplt(word, v32));
399
400 __vector unsigned long long result =
401 (__vector unsigned long long)vec_vbpermq(needs_escape, perm_mask);
402#ifdef __LITTLE_ENDIAN__
403 unsigned int mask = static_cast<unsigned int>(result[1]);
404#else
405 unsigned int mask = static_cast<unsigned int>(result[0]);
406#endif
407 if (mask != 0) {
408 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
409 return offset + __builtin_ctz(mask);
410 }
411 ptr += 16;
412 remaining -= 16;
413 }
414
415 // Scalar fallback for remaining bytes
416 size_t current = len - remaining;
417 return find_next_json_quotable_character_scalar(view, current);
418}
419#else
420SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
421find_next_json_quotable_character(const std::string_view view,
422 size_t location) noexcept {
423 return find_next_json_quotable_character_scalar(view, location);
424}
425#endif
426
427SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = {
428 "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006",
429 "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r",
430 "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014",
431 "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b",
432 "\\u001c", "\\u001d", "\\u001e", "\\u001f"};
433
434// All Unicode characters may be placed within the quotation marks, except for
435// the characters that MUST be escaped: quotation mark, reverse solidus, and the
436// control characters (U+0000 through U+001F). There are two-character sequence
437// escape representations of some popular characters:
438// \", \\, \b, \f, \n, \r, \t.
439SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline void escape_json_char(char c, char *&out) {
440 if (c == '"') {
441 memcpy(out, "\\\"", 2);
442 out += 2;
443 } else if (c == '\\') {
444 memcpy(out, "\\\\", 2);
445 out += 2;
446 } else {
447 std::string_view v = control_chars[uint8_t(c)];
448 memcpy(out, v.data(), v.size());
449 out += v.size();
450 }
451}
452
453// Writes the escaped version of input to out, returning the number of bytes
454// written. Uses SIMD position finding to locate quotable characters efficiently.
455inline size_t write_string_escaped(const std::string_view input, char *out) {
456 size_t mysize = input.size();
457
458 // Use SIMD position finder directly - it returns mysize if no escape needed
459 size_t location = find_next_json_quotable_character(input, 0);
460 if (location == mysize) {
461 // Fast path: no escaping needed
462 memcpy(out, input.data(), input.size());
463 return input.size();
464 }
465
466 const char *const initout = out;
467 memcpy(out, input.data(), location);
468 out += location;
469 escape_json_char(input[location], out);
470 location += 1;
471 while (location < mysize) {
472 size_t newlocation = find_next_json_quotable_character(input, location);
473 memcpy(out, input.data() + location, newlocation - location);
474 out += newlocation - location;
475 location = newlocation;
476 if (location == mysize) {
477 break;
478 }
479 escape_json_char(input[location], out);
480 location += 1;
481 }
482 return out - initout;
483}
484
485simdjson_inline string_builder::string_builder(size_t initial_capacity)
486 : buffer(new(std::nothrow) char[initial_capacity]), position(0),
487 capacity(buffer.get() != nullptr ? initial_capacity : 0),
488 is_valid(buffer.get() != nullptr) {}
489
490simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) {
491 // We use the convention that when is_valid is false, then the capacity and
492 // the position are 0.
493 // Most of the time, this function will return true.
494 if (simdjson_likely(upcoming_bytes <= capacity - position)) {
495 return true;
496 }
497 // check for overflow, most of the time there is no overflow
498 if (simdjson_unlikely(position + upcoming_bytes < position)) {
499 return false;
500 }
501 // We will rarely get here.
502 grow_buffer((std::max)(capacity * 2, position + upcoming_bytes));
503 // If the buffer allocation failed, we set is_valid to false.
504 return is_valid;
505}
506
507simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) {
508 if (!is_valid) {
509 return;
510 }
511 std::unique_ptr<char[]> new_buffer(new (std::nothrow) char[desired_capacity]);
512 if (new_buffer.get() == nullptr) {
513 set_valid(false);
514 return;
515 }
516 std::memcpy(new_buffer.get(), buffer.get(), position);
517 buffer.swap(new_buffer);
518 capacity = desired_capacity;
519}
520
521simdjson_inline void string_builder::set_valid(bool valid) noexcept {
522 if (!valid) {
523 is_valid = false;
524 capacity = 0;
525 position = 0;
526 buffer.reset();
527 } else {
528 is_valid = true;
529 }
530}
531
532simdjson_inline size_t string_builder::size() const noexcept {
533 return position;
534}
535
536simdjson_inline void string_builder::append(char c) noexcept {
537 if (capacity_check(1)) {
538 buffer.get()[position++] = c;
539 }
540}
541
542simdjson_inline void string_builder::append_null() noexcept {
543 constexpr char null_literal[] = "null";
544 constexpr size_t null_len = sizeof(null_literal) - 1;
545 if (capacity_check(null_len)) {
546 std::memcpy(buffer.get() + position, null_literal, null_len);
547 position += null_len;
548 }
549}
550
551simdjson_inline void string_builder::clear() noexcept {
552 position = 0;
553 // if it was invalid, we should try to repair it
554 if (!is_valid) {
555 capacity = 0;
556 buffer.reset();
557 is_valid = true;
558 }
559}
560
561namespace internal {
562
563template <typename number_type, typename = typename std::enable_if<
564 std::is_unsigned<number_type>::value>::type>
565simdjson_really_inline int int_log2(number_type x) {
566 return 63 - leading_zeroes(uint64_t(x) | 1);
567}
568
569simdjson_really_inline int fast_digit_count_32(uint32_t x) {
570 static uint64_t table[] = {
571 4294967296, 8589934582, 8589934582, 8589934582, 12884901788,
572 12884901788, 12884901788, 17179868184, 17179868184, 17179868184,
573 21474826480, 21474826480, 21474826480, 21474826480, 25769703776,
574 25769703776, 25769703776, 30063771072, 30063771072, 30063771072,
575 34349738368, 34349738368, 34349738368, 34349738368, 38554705664,
576 38554705664, 38554705664, 41949672960, 41949672960, 41949672960,
577 42949672960, 42949672960};
578 return uint32_t((x + table[int_log2(x)]) >> 32);
579}
580
581simdjson_really_inline int fast_digit_count_64(uint64_t x) {
582 static uint64_t table[] = {9,
583 99,
584 999,
585 9999,
586 99999,
587 999999,
588 9999999,
589 99999999,
590 999999999,
591 9999999999,
592 99999999999,
593 999999999999,
594 9999999999999,
595 99999999999999,
596 999999999999999ULL,
597 9999999999999999ULL,
598 99999999999999999ULL,
599 999999999999999999ULL,
600 9999999999999999999ULL};
601 int y = (19 * int_log2(x) >> 6);
602 y += x > table[y];
603 return y + 1;
604}
605
606template <typename number_type, typename = typename std::enable_if<
607 std::is_unsigned<number_type>::value>::type>
608simdjson_really_inline size_t digit_count(number_type v) noexcept {
609 static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 ||
610 sizeof(number_type) == 2 || sizeof(number_type) == 1,
611 "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers");
612 SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) {
613 return fast_digit_count_32(static_cast<uint32_t>(v));
614 }
615 else {
616 return fast_digit_count_64(static_cast<uint64_t>(v));
617 }
618}
619static const char decimal_table[200] = {
620 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35,
621 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31,
622 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
623 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33,
624 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39,
625 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35,
626 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31,
627 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
628 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33,
629 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39,
630 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35,
631 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31,
632 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
633 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33,
634 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39,
635 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35,
636 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39,
637};
638} // namespace internal
639
640template <typename number_type, typename>
641simdjson_inline void string_builder::append(number_type v) noexcept {
642 static_assert(std::is_same<number_type, bool>::value ||
643 std::is_integral<number_type>::value ||
644 std::is_floating_point<number_type>::value,
645 "Unsupported number type");
646 // If C++17 is available, we can 'if constexpr' here.
647 SIMDJSON_IF_CONSTEXPR(std::is_same<number_type, bool>::value) {
648 if (v) {
649 constexpr char true_literal[] = "true";
650 constexpr size_t true_len = sizeof(true_literal) - 1;
651 if (capacity_check(true_len)) {
652 std::memcpy(buffer.get() + position, true_literal, true_len);
653 position += true_len;
654 }
655 } else {
656 constexpr char false_literal[] = "false";
657 constexpr size_t false_len = sizeof(false_literal) - 1;
658 if (capacity_check(false_len)) {
659 std::memcpy(buffer.get() + position, false_literal, false_len);
660 position += false_len;
661 }
662 }
663 }
664 else SIMDJSON_IF_CONSTEXPR(std::is_unsigned<number_type>::value) {
665 // Process 4 digits at a time instead of 2, reducing store operations
666 // and divisions by approximately half for large numbers.
667 constexpr size_t max_number_size = 20;
668 if (capacity_check(max_number_size)) {
669 using unsigned_type = typename std::make_unsigned<number_type>::type;
670 unsigned_type pv = static_cast<unsigned_type>(v);
671 size_t dc = internal::digit_count(pv);
672 char *write_pointer = buffer.get() + position + dc - 1;
673
674 // Process 4 digits per iteration for large numbers
675 while (pv >= 10000) {
676 unsigned_type q = pv / 10000;
677 unsigned_type r = pv % 10000;
678 unsigned_type r_hi = r / 100; // High 2 digits of remainder
679 unsigned_type r_lo = r % 100; // Low 2 digits of remainder
680 // Write low 2 digits first (rightmost), then high 2 digits
681 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
682 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
683 write_pointer -= 4;
684 pv = q;
685 }
686
687 // Handle remaining 1-4 digits with original 2-digit loop
688 while (pv >= 100) {
689 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
690 write_pointer -= 2;
691 pv /= 100;
692 }
693 if (pv >= 10) {
694 *write_pointer-- = char('0' + (pv % 10));
695 pv /= 10;
696 }
697 *write_pointer = char('0' + pv);
698 position += dc;
699 }
700 }
701 else SIMDJSON_IF_CONSTEXPR(std::is_integral<number_type>::value) {
702 // Same 4-digit batching as unsigned path for signed integers
703 constexpr size_t max_number_size = 20;
704 if (capacity_check(max_number_size)) {
705 using unsigned_type = typename std::make_unsigned<number_type>::type;
706 bool negative = v < 0;
707 unsigned_type pv = static_cast<unsigned_type>(v);
708 if (negative) {
709 pv = 0 - pv; // the 0 is for Microsoft
710 }
711 size_t dc = internal::digit_count(pv);
712 // by always writing the minus sign, we avoid the branch.
713 buffer.get()[position] = '-';
714 position += negative ? 1 : 0;
715 char *write_pointer = buffer.get() + position + dc - 1;
716
717 // Process 4 digits per iteration for large numbers
718 while (pv >= 10000) {
719 unsigned_type q = pv / 10000;
720 unsigned_type r = pv % 10000;
721 unsigned_type r_hi = r / 100;
722 unsigned_type r_lo = r % 100;
723 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
724 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
725 write_pointer -= 4;
726 pv = q;
727 }
728
729 // Handle remaining 1-4 digits
730 while (pv >= 100) {
731 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
732 write_pointer -= 2;
733 pv /= 100;
734 }
735 if (pv >= 10) {
736 *write_pointer-- = char('0' + (pv % 10));
737 pv /= 10;
738 }
739 *write_pointer = char('0' + pv);
740 position += dc;
741 }
742 }
743 else SIMDJSON_IF_CONSTEXPR(std::is_floating_point<number_type>::value) {
744 constexpr size_t max_number_size = 24;
745 if (capacity_check(max_number_size)) {
746 // We could specialize for float.
747 char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr,
748 double(v));
749 position = end - buffer.get();
750 }
751 }
752}
753
754simdjson_inline void
755string_builder::escape_and_append(std::string_view input) noexcept {
756 // escaping might turn a control character into \x00xx so 6 characters.
757 if (capacity_check(6 * input.size())) {
758 position += write_string_escaped(input, buffer.get() + position);
759 }
760}
761
762simdjson_inline void
763string_builder::escape_and_append_with_quotes(std::string_view input) noexcept {
764 // escaping might turn a control character into \x00xx so 6 characters.
765 if (capacity_check(2 + 6 * input.size())) {
766 buffer.get()[position++] = '"';
767 position += write_string_escaped(input, buffer.get() + position);
768 buffer.get()[position++] = '"';
769 }
770}
771
772simdjson_inline void
773string_builder::escape_and_append_with_quotes(char input) noexcept {
774 // escaping might turn a control character into \x00xx so 6 characters.
775 if (capacity_check(2 + 6 * 1)) {
776 buffer.get()[position++] = '"';
777 std::string_view cinput(&input, 1);
778 position += write_string_escaped(cinput, buffer.get() + position);
779 buffer.get()[position++] = '"';
780 }
781}
782
783simdjson_inline void
784string_builder::escape_and_append_with_quotes(const char *input) noexcept {
785 std::string_view cinput(input);
786 escape_and_append_with_quotes(cinput);
787}
788#if SIMDJSON_SUPPORTS_CONCEPTS
789template <constevalutil::fixed_string key>
790simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept {
791 escape_and_append_with_quotes(constevalutil::string_constant<key>::value);
792}
793#endif
794
795simdjson_inline void string_builder::append_raw(const char *c) noexcept {
796 size_t len = std::strlen(c);
797 append_raw(c, len);
798}
799
800simdjson_inline void
801string_builder::append_raw(std::string_view input) noexcept {
802 if (capacity_check(input.size())) {
803 std::memcpy(buffer.get() + position, input.data(), input.size());
804 position += input.size();
805 }
806}
807
808simdjson_inline void string_builder::append_raw(const char *str,
809 size_t len) noexcept {
810 if (capacity_check(len)) {
811 std::memcpy(buffer.get() + position, str, len);
812 position += len;
813 }
814}
815#if SIMDJSON_SUPPORTS_CONCEPTS
816// Support for optional types (std::optional, etc.)
817template <concepts::optional_type T>
818 requires(!require_custom_serialization<T>)
819simdjson_inline void string_builder::append(const T &opt) {
820 if (opt) {
821 append(*opt);
822 } else {
823 append_null();
824 }
825}
826
827template <typename T>
828 requires(require_custom_serialization<T>)
829simdjson_inline void string_builder::append(T &&val) {
830 serialize(*this, std::forward<T>(val));
831}
832
833template <typename T>
834 requires(std::is_convertible<T, std::string_view>::value ||
835 std::is_same<T, const char *>::value)
836simdjson_inline void string_builder::append(const T &value) {
837 escape_and_append_with_quotes(value);
838}
839#endif
840
841#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS
842// Support for range-based appending (std::ranges::view, etc.)
843template <std::ranges::range R>
844 requires(!std::is_convertible<R, std::string_view>::value && !require_custom_serialization<R>)
845simdjson_inline void string_builder::append(const R &range) noexcept {
846 auto it = std::ranges::begin(range);
847 auto end = std::ranges::end(range);
848 if constexpr (concepts::is_pair<std::ranges::range_value_t<R>>) {
849 start_object();
850
851 if (it == end) {
852 end_object();
853 return; // Handle empty range
854 }
855 // Append first item without leading comma
856 append_key_value(it->first, it->second);
857 ++it;
858
859 // Append remaining items with preceding commas
860 for (; it != end; ++it) {
861 append_comma();
862 append_key_value(it->first, it->second);
863 }
864 end_object();
865 } else {
866 start_array();
867 if (it == end) {
868 end_array();
869 return; // Handle empty range
870 }
871
872 // Append first item without leading comma
873 append(*it);
874 ++it;
875
876 // Append remaining items with preceding commas
877 for (; it != end; ++it) {
878 append_comma();
879 append(*it);
880 }
881 end_array();
882 }
883}
884
885#endif
886
887#if SIMDJSON_EXCEPTIONS
888simdjson_inline string_builder::operator std::string() const noexcept(false) {
889 return std::string(operator std::string_view());
890}
891
892simdjson_inline string_builder::operator std::string_view() const
893 noexcept(false) simdjson_lifetime_bound {
894 return view();
895}
896#endif
897
899string_builder::view() const noexcept {
900 if (!is_valid) {
902 }
903 return std::string_view(buffer.get(), position);
904}
905
906simdjson_inline simdjson_result<const char *> string_builder::c_str() noexcept {
907 if (capacity_check(1)) {
908 buffer.get()[position] = '\0';
909 return buffer.get();
910 }
912}
913
914simdjson_inline bool string_builder::validate_unicode() const noexcept {
915 return simdjson::validate_utf8(buffer.get(), position);
916}
917
918simdjson_inline void string_builder::start_object() noexcept {
919 if (capacity_check(1)) {
920 buffer.get()[position++] = '{';
921 }
922}
923
924simdjson_inline void string_builder::end_object() noexcept {
925 if (capacity_check(1)) {
926 buffer.get()[position++] = '}';
927 }
928}
929
930simdjson_inline void string_builder::start_array() noexcept {
931 if (capacity_check(1)) {
932 buffer.get()[position++] = '[';
933 }
934}
935
936simdjson_inline void string_builder::end_array() noexcept {
937 if (capacity_check(1)) {
938 buffer.get()[position++] = ']';
939 }
940}
941
942simdjson_inline void string_builder::append_comma() noexcept {
943 if (capacity_check(1)) {
944 buffer.get()[position++] = ',';
945 }
946}
947
948simdjson_inline void string_builder::append_colon() noexcept {
949 if (capacity_check(1)) {
950 buffer.get()[position++] = ':';
951 }
952}
953
954template <typename key_type, typename value_type>
955simdjson_inline void
956string_builder::append_key_value(key_type key, value_type value) noexcept {
957 static_assert(std::is_same<key_type, const char *>::value ||
958 std::is_convertible<key_type, std::string_view>::value,
959 "Unsupported key type");
960 escape_and_append_with_quotes(key);
961 append_colon();
962 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
963 append_null();
964 }
965 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
966 escape_and_append_with_quotes(value);
967 }
968 else SIMDJSON_IF_CONSTEXPR(
969 std::is_convertible<value_type, std::string_view>::value) {
970 escape_and_append_with_quotes(value);
971 }
972 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
973 escape_and_append_with_quotes(value);
974 }
975 else {
976 append(value);
977 }
978}
979
980#if SIMDJSON_SUPPORTS_CONCEPTS
981template <constevalutil::fixed_string key, typename value_type>
982simdjson_inline void
983string_builder::append_key_value(value_type value) noexcept {
984 escape_and_append_with_quotes<key>();
985 append_colon();
986 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
987 append_null();
988 }
989 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
990 escape_and_append_with_quotes(value);
991 }
992 else SIMDJSON_IF_CONSTEXPR(
993 std::is_convertible<value_type, std::string_view>::value) {
994 escape_and_append_with_quotes(value);
995 }
996 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
997 escape_and_append_with_quotes(value);
998 }
999 else {
1000 append(value);
1001 }
1002}
1003#endif
1004
1005} // namespace builder
1006} // namespace SIMDJSON_IMPLEMENTATION
1007} // namespace simdjson
1008
1009#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept
Validate the UTF-8 string.
@ OUT_OF_CAPACITY
The capacity was exceeded, we cannot allocate enough memory.
Definition error.h:52
The result of a simdjson operation that could fail.
Definition error.h:280
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.
Definition error-inl.h:163