simdjson 4.6.3
Ridiculously Fast JSON
Loading...
Searching...
No Matches
json_string_builder-inl.h
1#include <array>
2#include <cstring>
3#include <type_traits>
4#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H
5
6#ifndef SIMDJSON_CONDITIONAL_INCLUDE
7#define SIMDJSON_GENERIC_STRING_BUILDER_INL_H
8#include "simdjson/generic/builder/json_string_builder.h"
9#endif // SIMDJSON_CONDITIONAL_INCLUDE
10
11/*
12 * Empirically, we have found that an inlined optimization is important for
13 * performance. The following macros are not ideal. We should find a better
14 * way to inline the code.
15 */
16
17#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
18 (defined(_M_AMD64) || defined(_M_X64) || \
19 (defined(_M_IX86_FP) && _M_IX86_FP == 2))
20#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2
21#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1
22#endif
23#endif
24
25#if defined(__aarch64__) || defined(_M_ARM64)
26#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON
27#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1
28#endif
29#endif
30#if defined(__loongarch_sx)
31#ifndef SIMDJSON_EXPERIMENTAL_HAS_LSX
32#define SIMDJSON_EXPERIMENTAL_HAS_LSX 1
33#endif
34#endif
35#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic >= 11000 && \
36 defined(__riscv_vector)
37#ifndef SIMDJSON_EXPERIMENTAL_HAS_RVV
38#define SIMDJSON_EXPERIMENTAL_HAS_RVV 1
39#endif
40#endif
41#if (defined(__PPC64__) || defined(_M_PPC64)) && defined(__ALTIVEC__)
42#ifndef SIMDJSON_EXPERIMENTAL_HAS_PPC64
43#define SIMDJSON_EXPERIMENTAL_HAS_PPC64 1
44#endif
45#endif
46#if SIMDJSON_EXPERIMENTAL_HAS_NEON
47#include <arm_neon.h>
48#ifdef _MSC_VER
49#include <intrin.h>
50#endif
51#endif
52#if SIMDJSON_EXPERIMENTAL_HAS_SSE2
53#include <emmintrin.h>
54#ifdef _MSC_VER
55#include <intrin.h>
56#endif
57#endif
58#if SIMDJSON_EXPERIMENTAL_HAS_LSX
59#include <lsxintrin.h>
60#endif
61#if SIMDJSON_EXPERIMENTAL_HAS_RVV
62#include <riscv_vector.h>
63#endif
64#if SIMDJSON_EXPERIMENTAL_HAS_PPC64
65#include <altivec.h>
66#ifdef bool
67#undef bool
68#endif
69#ifdef vector
70#undef vector
71#endif
72#endif
73
74
75namespace simdjson {
76namespace SIMDJSON_IMPLEMENTATION {
77namespace builder {
78
79static SIMDJSON_CONSTEXPR_LAMBDA std::array<uint8_t, 256>
80 json_quotable_character = {
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
92
110SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool
111simple_needs_escaping(std::string_view v) {
112 for (char c : v) {
113 // a table lookup is faster than a series of comparisons
114 if (json_quotable_character[static_cast<uint8_t>(c)]) {
115 return true;
116 }
117 }
118 return false;
119}
120
121#if SIMDJSON_EXPERIMENTAL_HAS_NEON
122simdjson_inline bool fast_needs_escaping(std::string_view view) {
123 if (view.size() < 16) {
124 return simple_needs_escaping(view);
125 }
126 size_t i = 0;
127 uint8x16_t running = vdupq_n_u8(0);
128 uint8x16_t v34 = vdupq_n_u8(34);
129 uint8x16_t v92 = vdupq_n_u8(92);
130
131 for (; i + 15 < view.size(); i += 16) {
132 uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i);
133 running = vorrq_u8(running, vceqq_u8(word, v34));
134 running = vorrq_u8(running, vceqq_u8(word, v92));
135 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
136 }
137 if (i < view.size()) {
138 uint8x16_t word =
139 vld1q_u8((const uint8_t *)view.data() + view.length() - 16);
140 running = vorrq_u8(running, vceqq_u8(word, v34));
141 running = vorrq_u8(running, vceqq_u8(word, v92));
142 running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32)));
143 }
144 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
145}
146#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
147simdjson_inline bool fast_needs_escaping(std::string_view view) {
148 if (view.size() < 16) {
149 return simple_needs_escaping(view);
150 }
151 size_t i = 0;
152 __m128i running = _mm_setzero_si128();
153 for (; i + 15 < view.size(); i += 16) {
154
155 __m128i word =
156 _mm_loadu_si128(reinterpret_cast<const __m128i *>(view.data() + i));
157 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
158 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
159 running = _mm_or_si128(
160 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
161 _mm_setzero_si128()));
162 }
163 if (i < view.size()) {
164 __m128i word = _mm_loadu_si128(
165 reinterpret_cast<const __m128i *>(view.data() + view.length() - 16));
166 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34)));
167 running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92)));
168 running = _mm_or_si128(
169 running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)),
170 _mm_setzero_si128()));
171 }
172 return _mm_movemask_epi8(running) != 0;
173}
174#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
175simdjson_inline bool fast_needs_escaping(std::string_view view) {
176 if (view.size() < 16) {
177 return simple_needs_escaping(view);
178 }
179 size_t i = 0;
180 __vector unsigned char running = vec_splats((unsigned char)0);
181 __vector unsigned char v34 = vec_splats((unsigned char)34);
182 __vector unsigned char v92 = vec_splats((unsigned char)92);
183 __vector unsigned char v32 = vec_splats((unsigned char)32);
184
185 for (; i + 15 < view.size(); i += 16) {
186 __vector unsigned char word =
187 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(view.data() + i));
188 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
189 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
190 running = vec_or(running,
191 (__vector unsigned char)vec_cmplt(word, v32));
192 }
193 if (i < view.size()) {
194 __vector unsigned char word = vec_vsx_ld(
195 0, reinterpret_cast<const unsigned char *>(view.data() + view.length() - 16));
196 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v34));
197 running = vec_or(running, (__vector unsigned char)vec_cmpeq(word, v92));
198 running = vec_or(running,
199 (__vector unsigned char)vec_cmplt(word, v32));
200 }
201 return !vec_all_eq(running, vec_splats((unsigned char)0));
202}
203#else
204simdjson_inline bool fast_needs_escaping(std::string_view view) {
205 return simple_needs_escaping(view);
206}
207#endif
208
209// Scalar fallback for finding next quotable character
210SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
211find_next_json_quotable_character_scalar(const std::string_view view,
212 size_t location) noexcept {
213 for (auto pos = view.begin() + location; pos != view.end(); ++pos) {
214 if (json_quotable_character[static_cast<uint8_t>(*pos)]) {
215 return pos - view.begin();
216 }
217 }
218 return size_t(view.size());
219}
220
221// SIMD-accelerated position finding that directly locates the first quotable
222// character, combining detection and position extraction in a single pass to
223// minimize redundant work.
224#if SIMDJSON_EXPERIMENTAL_HAS_NEON
225simdjson_inline size_t
226find_next_json_quotable_character(const std::string_view view,
227 size_t location) noexcept {
228 const size_t len = view.size();
229 const uint8_t *ptr =
230 reinterpret_cast<const uint8_t *>(view.data()) + location;
231 size_t remaining = len - location;
232
233 // SIMD constants for characters requiring escape
234 uint8x16_t v34 = vdupq_n_u8(34); // '"'
235 uint8x16_t v92 = vdupq_n_u8(92); // '\\'
236 uint8x16_t v32 = vdupq_n_u8(32); // control char threshold
237
238 while (remaining >= 16) {
239 uint8x16_t word = vld1q_u8(ptr);
240
241 // Check for quotable characters: '"', '\\', or control chars (< 32)
242 uint8x16_t needs_escape = vceqq_u8(word, v34);
243 needs_escape = vorrq_u8(needs_escape, vceqq_u8(word, v92));
244 needs_escape = vorrq_u8(needs_escape, vcltq_u8(word, v32));
245
246 const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(needs_escape), 4);
247 const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
248 if(mask != 0) {
249 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
250 auto trailing_zero = trailing_zeroes(mask);
251 return offset + (trailing_zero >> 2);
252 }
253 ptr += 16;
254 remaining -= 16;
255 }
256
257 // Scalar fallback for remaining bytes
258 size_t current = len - remaining;
259 return find_next_json_quotable_character_scalar(view, current);
260}
261#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2
262simdjson_inline size_t
263find_next_json_quotable_character(const std::string_view view,
264 size_t location) noexcept {
265 const size_t len = view.size();
266 const uint8_t *ptr =
267 reinterpret_cast<const uint8_t *>(view.data()) + location;
268 size_t remaining = len - location;
269
270 // SIMD constants
271 __m128i v34 = _mm_set1_epi8(34); // '"'
272 __m128i v92 = _mm_set1_epi8(92); // '\\'
273 __m128i v31 = _mm_set1_epi8(31); // for control char detection
274
275 while (remaining >= 16) {
276 __m128i word = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
277
278 // Check for quotable characters
279 __m128i needs_escape = _mm_cmpeq_epi8(word, v34);
280 needs_escape = _mm_or_si128(needs_escape, _mm_cmpeq_epi8(word, v92));
281 needs_escape = _mm_or_si128(
282 needs_escape,
283 _mm_cmpeq_epi8(_mm_subs_epu8(word, v31), _mm_setzero_si128()));
284
285 int mask = _mm_movemask_epi8(needs_escape);
286 if (mask != 0) {
287 // Found quotable character - use trailing zero count to find position
288 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
289 return offset + trailing_zeroes(mask);
290 }
291 ptr += 16;
292 remaining -= 16;
293 }
294
295 // Scalar fallback for remaining bytes
296 size_t current = len - remaining;
297 return find_next_json_quotable_character_scalar(view, current);
298}
299#elif SIMDJSON_EXPERIMENTAL_HAS_LSX
300simdjson_inline size_t
301find_next_json_quotable_character(const std::string_view view,
302 size_t location) noexcept {
303 const size_t len = view.size();
304 const uint8_t *ptr =
305 reinterpret_cast<const uint8_t *>(view.data()) + location;
306 size_t remaining = len - location;
307
308 //SIMD constants for characters requiring escape
309 __m128i v34 = __lsx_vreplgr2vr_b(34); // '"'
310 __m128i v92 = __lsx_vreplgr2vr_b(92); // '\\'
311 __m128i v32 = __lsx_vreplgr2vr_b(32); // control char threshold
312
313 while (remaining >= 16){
314 __m128i word = __lsx_vld(ptr, 0);
315
316 //Check for the quotable characters: '"', '\\', or control char (<32)
317 __m128i needs_escape = __lsx_vseq_b(word, v34);
318 needs_escape = __lsx_vor_v(needs_escape, __lsx_vseq_b(word, v92));
319 needs_escape = __lsx_vor_v(needs_escape, __lsx_vslt_bu(word, v32));
320
321 if (!__lsx_bz_v(needs_escape)){
322
323 //Found quotable character - extract exact byte position
324 uint64_t lo = __lsx_vpickve2gr_du(needs_escape,0);
325 uint64_t hi = __lsx_vpickve2gr_du(needs_escape,1);
326 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
327 if ( lo != 0) {
328 return offset + trailing_zeroes(lo) / 8;
329 } else {
330 return offset + 8 + trailing_zeroes(hi) / 8;
331 }
332 }
333 ptr += 16;
334 remaining -= 16;
335 }
336 size_t current = len - remaining;
337 return find_next_json_quotable_character_scalar(view, current);
338}
339#elif SIMDJSON_EXPERIMENTAL_HAS_RVV
340simdjson_inline size_t
341find_next_json_quotable_character(const std::string_view view,
342 size_t location) noexcept {
343 const size_t len = view.size();
344 const uint8_t *ptr =
345 reinterpret_cast<const uint8_t *>(view.data()) + location;
346 size_t remaining = len - location;
347
348 while (remaining > 0) {
349 size_t vl = __riscv_vsetvl_e8m1(remaining);
350 vuint8m1_t word = __riscv_vle8_v_u8m1(ptr, vl);
351
352 // Check for quotable characters: '"', '\\', or control chars (< 32)
353 vbool8_t needs_escape = __riscv_vmseq(word, (uint8_t)34, vl);
354 needs_escape = __riscv_vmor(needs_escape,
355 __riscv_vmseq(word, (uint8_t)92, vl), vl);
356 needs_escape = __riscv_vmor(needs_escape,
357 __riscv_vmsltu(word, (uint8_t)32, vl), vl);
358
359 long first = __riscv_vfirst(needs_escape, vl);
360 if (first >= 0) {
361 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
362 return offset + first;
363 }
364 ptr += vl;
365 remaining -= vl;
366 }
367
368 return len;
369}
370#elif SIMDJSON_EXPERIMENTAL_HAS_PPC64
371simdjson_inline size_t
372find_next_json_quotable_character(const std::string_view view,
373 size_t location) noexcept {
374 const size_t len = view.size();
375 const uint8_t *ptr =
376 reinterpret_cast<const uint8_t *>(view.data()) + location;
377 size_t remaining = len - location;
378
379 // SIMD constants for characters requiring escape
380 __vector unsigned char v34 = vec_splats((unsigned char)34); // '"'
381 __vector unsigned char v92 = vec_splats((unsigned char)92); // '\\'
382 __vector unsigned char v32 = vec_splats((unsigned char)32); // control char threshold
383
384 // Bitmask for vec_vbpermq to extract one bit per byte
385 const __vector unsigned char perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50,
386 0x48, 0x40, 0x38, 0x30, 0x28, 0x20,
387 0x18, 0x10, 0x08, 0x00};
388
389 while (remaining >= 16) {
390 __vector unsigned char word =
391 vec_vsx_ld(0, reinterpret_cast<const unsigned char *>(ptr));
392
393 // Check for quotable characters: '"', '\\', or control chars (< 32)
394 __vector unsigned char needs_escape =
395 (__vector unsigned char)vec_cmpeq(word, v34);
396 needs_escape = vec_or(needs_escape,
397 (__vector unsigned char)vec_cmpeq(word, v92));
398 needs_escape = vec_or(needs_escape,
399 (__vector unsigned char)vec_cmplt(word, v32));
400
401 __vector unsigned long long result =
402 (__vector unsigned long long)vec_vbpermq(needs_escape, perm_mask);
403#ifdef __LITTLE_ENDIAN__
404 unsigned int mask = static_cast<unsigned int>(result[1]);
405#else
406 unsigned int mask = static_cast<unsigned int>(result[0]);
407#endif
408 if (mask != 0) {
409 size_t offset = ptr - reinterpret_cast<const uint8_t *>(view.data());
410 return offset + __builtin_ctz(mask);
411 }
412 ptr += 16;
413 remaining -= 16;
414 }
415
416 // Scalar fallback for remaining bytes
417 size_t current = len - remaining;
418 return find_next_json_quotable_character_scalar(view, current);
419}
420#else
421SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline size_t
422find_next_json_quotable_character(const std::string_view view,
423 size_t location) noexcept {
424 return find_next_json_quotable_character_scalar(view, location);
425}
426#endif
427
428SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = {
429 "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006",
430 "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r",
431 "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014",
432 "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b",
433 "\\u001c", "\\u001d", "\\u001e", "\\u001f"};
434
435// All Unicode characters may be placed within the quotation marks, except for
436// the characters that MUST be escaped: quotation mark, reverse solidus, and the
437// control characters (U+0000 through U+001F). There are two-character sequence
438// escape representations of some popular characters:
439// \", \\, \b, \f, \n, \r, \t.
440SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline void escape_json_char(char c, char *&out) {
441 if (c == '"') {
442 memcpy(out, "\\\"", 2);
443 out += 2;
444 } else if (c == '\\') {
445 memcpy(out, "\\\\", 2);
446 out += 2;
447 } else {
448 std::string_view v = control_chars[uint8_t(c)];
449 memcpy(out, v.data(), v.size());
450 out += v.size();
451 }
452}
453
454// Writes the escaped version of input to out, returning the number of bytes
455// written. Uses SIMD position finding to locate quotable characters efficiently.
456inline size_t write_string_escaped(const std::string_view input, char *out) {
457 size_t mysize = input.size();
458
459 // Use SIMD position finder directly - it returns mysize if no escape needed
460 size_t location = find_next_json_quotable_character(input, 0);
461 if (location == mysize) {
462 // Fast path: no escaping needed
463 memcpy(out, input.data(), input.size());
464 return input.size();
465 }
466
467 const char *const initout = out;
468 memcpy(out, input.data(), location);
469 out += location;
470 escape_json_char(input[location], out);
471 location += 1;
472 while (location < mysize) {
473 size_t newlocation = find_next_json_quotable_character(input, location);
474 memcpy(out, input.data() + location, newlocation - location);
475 out += newlocation - location;
476 location = newlocation;
477 if (location == mysize) {
478 break;
479 }
480 escape_json_char(input[location], out);
481 location += 1;
482 }
483 return out - initout;
484}
485
486simdjson_inline string_builder::string_builder(size_t initial_capacity)
487 : buffer(new(std::nothrow) char[initial_capacity]), position(0),
488 capacity(buffer.get() != nullptr ? initial_capacity : 0),
489 is_valid(buffer.get() != nullptr) {}
490
491simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) {
492 // We use the convention that when is_valid is false, then the capacity and
493 // the position are 0.
494 // Most of the time, this function will return true.
495 if (simdjson_likely(upcoming_bytes <= capacity - position)) {
496 return true;
497 }
498 // check for overflow, most of the time there is no overflow
499 if (simdjson_unlikely(position + upcoming_bytes < position)) {
500 return false;
501 }
502 // We will rarely get here.
503 grow_buffer((std::max)(capacity * 2, position + upcoming_bytes));
504 // If the buffer allocation failed, we set is_valid to false.
505 return is_valid;
506}
507
508simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) {
509 if (!is_valid) {
510 return;
511 }
512 std::unique_ptr<char[]> new_buffer(new (std::nothrow) char[desired_capacity]);
513 if (new_buffer.get() == nullptr) {
514 set_valid(false);
515 return;
516 }
517 std::memcpy(new_buffer.get(), buffer.get(), position);
518 buffer.swap(new_buffer);
519 capacity = desired_capacity;
520}
521
522simdjson_inline void string_builder::set_valid(bool valid) noexcept {
523 if (!valid) {
524 is_valid = false;
525 capacity = 0;
526 position = 0;
527 buffer.reset();
528 } else {
529 is_valid = true;
530 }
531}
532
533simdjson_inline size_t string_builder::size() const noexcept {
534 return position;
535}
536
537simdjson_inline void string_builder::append(char c) noexcept {
538 if (capacity_check(1)) {
539 buffer.get()[position++] = c;
540 }
541}
542
543simdjson_inline void string_builder::append_null() noexcept {
544 constexpr char null_literal[] = "null";
545 constexpr size_t null_len = sizeof(null_literal) - 1;
546 if (capacity_check(null_len)) {
547 std::memcpy(buffer.get() + position, null_literal, null_len);
548 position += null_len;
549 }
550}
551
552simdjson_inline void string_builder::clear() noexcept {
553 position = 0;
554 // if it was invalid, we should try to repair it
555 if (!is_valid) {
556 capacity = 0;
557 buffer.reset();
558 is_valid = true;
559 }
560}
561
562namespace internal {
563
564template <typename number_type, typename = typename std::enable_if<
565 std::is_unsigned<number_type>::value>::type>
566simdjson_really_inline int int_log2(number_type x) {
567 return 63 - leading_zeroes(uint64_t(x) | 1);
568}
569
570simdjson_really_inline int fast_digit_count_32(uint32_t x) {
571 static uint64_t table[] = {
572 4294967296, 8589934582, 8589934582, 8589934582, 12884901788,
573 12884901788, 12884901788, 17179868184, 17179868184, 17179868184,
574 21474826480, 21474826480, 21474826480, 21474826480, 25769703776,
575 25769703776, 25769703776, 30063771072, 30063771072, 30063771072,
576 34349738368, 34349738368, 34349738368, 34349738368, 38554705664,
577 38554705664, 38554705664, 41949672960, 41949672960, 41949672960,
578 42949672960, 42949672960};
579 return uint32_t((x + table[int_log2(x)]) >> 32);
580}
581
582simdjson_really_inline int fast_digit_count_64(uint64_t x) {
583 static uint64_t table[] = {9,
584 99,
585 999,
586 9999,
587 99999,
588 999999,
589 9999999,
590 99999999,
591 999999999,
592 9999999999,
593 99999999999,
594 999999999999,
595 9999999999999,
596 99999999999999,
597 999999999999999ULL,
598 9999999999999999ULL,
599 99999999999999999ULL,
600 999999999999999999ULL,
601 9999999999999999999ULL};
602 int y = (19 * int_log2(x) >> 6);
603 y += x > table[y];
604 return y + 1;
605}
606
607template <typename number_type, typename = typename std::enable_if<
608 std::is_unsigned<number_type>::value>::type>
609simdjson_really_inline size_t digit_count(number_type v) noexcept {
610 static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 ||
611 sizeof(number_type) == 2 || sizeof(number_type) == 1,
612 "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers");
613 SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) {
614 return fast_digit_count_32(static_cast<uint32_t>(v));
615 }
616 else {
617 return fast_digit_count_64(static_cast<uint64_t>(v));
618 }
619}
620static const char decimal_table[200] = {
621 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35,
622 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31,
623 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37,
624 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33,
625 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39,
626 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35,
627 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31,
628 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37,
629 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33,
630 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39,
631 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35,
632 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31,
633 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37,
634 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33,
635 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39,
636 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35,
637 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39,
638};
639} // namespace internal
640
641template <typename number_type, typename>
642simdjson_inline void string_builder::append(number_type v) noexcept {
643 static_assert(std::is_same<number_type, bool>::value ||
644 std::is_integral<number_type>::value ||
645 std::is_floating_point<number_type>::value,
646 "Unsupported number type");
647 // If C++17 is available, we can 'if constexpr' here.
648 SIMDJSON_IF_CONSTEXPR(std::is_same<number_type, bool>::value) {
649 if (v) {
650 constexpr char true_literal[] = "true";
651 constexpr size_t true_len = sizeof(true_literal) - 1;
652 if (capacity_check(true_len)) {
653 std::memcpy(buffer.get() + position, true_literal, true_len);
654 position += true_len;
655 }
656 } else {
657 constexpr char false_literal[] = "false";
658 constexpr size_t false_len = sizeof(false_literal) - 1;
659 if (capacity_check(false_len)) {
660 std::memcpy(buffer.get() + position, false_literal, false_len);
661 position += false_len;
662 }
663 }
664 }
665 else SIMDJSON_IF_CONSTEXPR(std::is_unsigned<number_type>::value) {
666 // Process 4 digits at a time instead of 2, reducing store operations
667 // and divisions by approximately half for large numbers.
668 constexpr size_t max_number_size = 20;
669 if (capacity_check(max_number_size)) {
670 using unsigned_type = typename std::make_unsigned<number_type>::type;
671 unsigned_type pv = static_cast<unsigned_type>(v);
672 size_t dc = internal::digit_count(pv);
673 char *write_pointer = buffer.get() + position + dc - 1;
674
675 // Process 4 digits per iteration for large numbers
676 while (pv >= 10000) {
677 unsigned_type q = pv / 10000;
678 unsigned_type r = pv % 10000;
679 unsigned_type r_hi = r / 100; // High 2 digits of remainder
680 unsigned_type r_lo = r % 100; // Low 2 digits of remainder
681 // Write low 2 digits first (rightmost), then high 2 digits
682 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
683 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
684 write_pointer -= 4;
685 pv = q;
686 }
687
688 // Handle remaining 1-4 digits with original 2-digit loop
689 while (pv >= 100) {
690 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
691 write_pointer -= 2;
692 pv /= 100;
693 }
694 if (pv >= 10) {
695 *write_pointer-- = char('0' + (pv % 10));
696 pv /= 10;
697 }
698 *write_pointer = char('0' + pv);
699 position += dc;
700 }
701 }
702 else SIMDJSON_IF_CONSTEXPR(std::is_integral<number_type>::value) {
703 // Same 4-digit batching as unsigned path for signed integers
704 constexpr size_t max_number_size = 20;
705 if (capacity_check(max_number_size)) {
706 using unsigned_type = typename std::make_unsigned<number_type>::type;
707 bool negative = v < 0;
708 unsigned_type pv = static_cast<unsigned_type>(v);
709 if (negative) {
710 pv = 0 - pv; // the 0 is for Microsoft
711 }
712 size_t dc = internal::digit_count(pv);
713 // by always writing the minus sign, we avoid the branch.
714 buffer.get()[position] = '-';
715 position += negative ? 1 : 0;
716 char *write_pointer = buffer.get() + position + dc - 1;
717
718 // Process 4 digits per iteration for large numbers
719 while (pv >= 10000) {
720 unsigned_type q = pv / 10000;
721 unsigned_type r = pv % 10000;
722 unsigned_type r_hi = r / 100;
723 unsigned_type r_lo = r % 100;
724 memcpy(write_pointer - 1, &internal::decimal_table[r_lo * 2], 2);
725 memcpy(write_pointer - 3, &internal::decimal_table[r_hi * 2], 2);
726 write_pointer -= 4;
727 pv = q;
728 }
729
730 // Handle remaining 1-4 digits
731 while (pv >= 100) {
732 memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2);
733 write_pointer -= 2;
734 pv /= 100;
735 }
736 if (pv >= 10) {
737 *write_pointer-- = char('0' + (pv % 10));
738 pv /= 10;
739 }
740 *write_pointer = char('0' + pv);
741 position += dc;
742 }
743 }
744 else SIMDJSON_IF_CONSTEXPR(std::is_floating_point<number_type>::value) {
745 constexpr size_t max_number_size = 24;
746 if (capacity_check(max_number_size)) {
747 // We could specialize for float.
748 char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr,
749 double(v));
750 position = end - buffer.get();
751 }
752 }
753}
754
755simdjson_inline void
756string_builder::escape_and_append(std::string_view input) noexcept {
757 // escaping might turn a control character into \x00xx so 6 characters.
758 if (capacity_check(6 * input.size())) {
759 position += write_string_escaped(input, buffer.get() + position);
760 }
761}
762
763simdjson_inline void
764string_builder::escape_and_append_with_quotes(std::string_view input) noexcept {
765 // escaping might turn a control character into \x00xx so 6 characters.
766 if (capacity_check(2 + 6 * input.size())) {
767 buffer.get()[position++] = '"';
768 position += write_string_escaped(input, buffer.get() + position);
769 buffer.get()[position++] = '"';
770 }
771}
772
773simdjson_inline void
774string_builder::escape_and_append_with_quotes(char input) noexcept {
775 // escaping might turn a control character into \x00xx so 6 characters.
776 if (capacity_check(2 + 6 * 1)) {
777 buffer.get()[position++] = '"';
778 std::string_view cinput(&input, 1);
779 position += write_string_escaped(cinput, buffer.get() + position);
780 buffer.get()[position++] = '"';
781 }
782}
783
784simdjson_inline void
785string_builder::escape_and_append_with_quotes(const char *input) noexcept {
786 std::string_view cinput(input);
787 escape_and_append_with_quotes(cinput);
788}
789#if SIMDJSON_SUPPORTS_CONCEPTS
790template <constevalutil::fixed_string key>
791simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept {
792 escape_and_append_with_quotes(constevalutil::string_constant<key>::value);
793}
794#endif
795
796simdjson_inline void string_builder::append_raw(const char *c) noexcept {
797 size_t len = std::strlen(c);
798 append_raw(c, len);
799}
800
801simdjson_inline void
802string_builder::append_raw(std::string_view input) noexcept {
803 if (capacity_check(input.size())) {
804 std::memcpy(buffer.get() + position, input.data(), input.size());
805 position += input.size();
806 }
807}
808
809simdjson_inline void string_builder::append_raw(const char *str,
810 size_t len) noexcept {
811 if (capacity_check(len)) {
812 std::memcpy(buffer.get() + position, str, len);
813 position += len;
814 }
815}
816#if SIMDJSON_SUPPORTS_CONCEPTS
817// Support for optional types (std::optional, etc.)
818template <concepts::optional_type T>
819 requires(!require_custom_serialization<T>)
820simdjson_inline void string_builder::append(const T &opt) {
821 if (opt) {
822 append(*opt);
823 } else {
824 append_null();
825 }
826}
827
828template <typename T>
829 requires(require_custom_serialization<T>)
830simdjson_inline void string_builder::append(T &&val) {
831 serialize(*this, std::forward<T>(val));
832}
833
834template <typename T>
835 requires(std::is_convertible<T, std::string_view>::value ||
836 std::is_same<T, const char *>::value)
837simdjson_inline void string_builder::append(const T &value) {
838 escape_and_append_with_quotes(value);
839}
840#endif
841
842#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS
843// Support for range-based appending (std::ranges::view, etc.)
844template <std::ranges::range R>
845 requires(!std::is_convertible<R, std::string_view>::value && !require_custom_serialization<R>)
846simdjson_inline void string_builder::append(const R &range) noexcept {
847 auto it = std::ranges::begin(range);
848 auto end = std::ranges::end(range);
849 if constexpr (concepts::is_pair<std::ranges::range_value_t<R>>) {
850 start_object();
851
852 if (it == end) {
853 end_object();
854 return; // Handle empty range
855 }
856 // Append first item without leading comma
857 append_key_value(it->first, it->second);
858 ++it;
859
860 // Append remaining items with preceding commas
861 for (; it != end; ++it) {
862 append_comma();
863 append_key_value(it->first, it->second);
864 }
865 end_object();
866 } else {
867 start_array();
868 if (it == end) {
869 end_array();
870 return; // Handle empty range
871 }
872
873 // Append first item without leading comma
874 append(*it);
875 ++it;
876
877 // Append remaining items with preceding commas
878 for (; it != end; ++it) {
879 append_comma();
880 append(*it);
881 }
882 end_array();
883 }
884}
885
886#endif
887
888#if SIMDJSON_EXCEPTIONS
889simdjson_inline string_builder::operator std::string() const noexcept(false) {
890 return std::string(operator std::string_view());
891}
892
893simdjson_inline string_builder::operator std::string_view() const
894 noexcept(false) simdjson_lifetime_bound {
895 return view();
896}
897#endif
898
900string_builder::view() const noexcept {
901 if (!is_valid) {
903 }
904 return std::string_view(buffer.get(), position);
905}
906
907simdjson_inline simdjson_result<const char *> string_builder::c_str() noexcept {
908 if (capacity_check(1)) {
909 buffer.get()[position] = '\0';
910 return buffer.get();
911 }
913}
914
915simdjson_inline bool string_builder::validate_unicode() const noexcept {
916 return simdjson::validate_utf8(buffer.get(), position);
917}
918
919simdjson_inline void string_builder::start_object() noexcept {
920 if (capacity_check(1)) {
921 buffer.get()[position++] = '{';
922 }
923}
924
925simdjson_inline void string_builder::end_object() noexcept {
926 if (capacity_check(1)) {
927 buffer.get()[position++] = '}';
928 }
929}
930
931simdjson_inline void string_builder::start_array() noexcept {
932 if (capacity_check(1)) {
933 buffer.get()[position++] = '[';
934 }
935}
936
937simdjson_inline void string_builder::end_array() noexcept {
938 if (capacity_check(1)) {
939 buffer.get()[position++] = ']';
940 }
941}
942
943simdjson_inline void string_builder::append_comma() noexcept {
944 if (capacity_check(1)) {
945 buffer.get()[position++] = ',';
946 }
947}
948
949simdjson_inline void string_builder::append_colon() noexcept {
950 if (capacity_check(1)) {
951 buffer.get()[position++] = ':';
952 }
953}
954
955template <typename key_type, typename value_type>
956simdjson_inline void
957string_builder::append_key_value(key_type key, value_type value) noexcept {
958 static_assert(std::is_same<key_type, const char *>::value ||
959 std::is_convertible<key_type, std::string_view>::value,
960 "Unsupported key type");
961 escape_and_append_with_quotes(key);
962 append_colon();
963 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
964 append_null();
965 }
966 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
967 escape_and_append_with_quotes(value);
968 }
969 else SIMDJSON_IF_CONSTEXPR(
970 std::is_convertible<value_type, std::string_view>::value) {
971 escape_and_append_with_quotes(value);
972 }
973 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
974 escape_and_append_with_quotes(value);
975 }
976 else {
977 append(value);
978 }
979}
980
981#if SIMDJSON_SUPPORTS_CONCEPTS
982template <constevalutil::fixed_string key, typename value_type>
983simdjson_inline void
984string_builder::append_key_value(value_type value) noexcept {
985 escape_and_append_with_quotes<key>();
986 append_colon();
987 SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, std::nullptr_t>::value) {
988 append_null();
989 }
990 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, char>::value) {
991 escape_and_append_with_quotes(value);
992 }
993 else SIMDJSON_IF_CONSTEXPR(
994 std::is_convertible<value_type, std::string_view>::value) {
995 escape_and_append_with_quotes(value);
996 }
997 else SIMDJSON_IF_CONSTEXPR(std::is_same<value_type, const char *>::value) {
998 escape_and_append_with_quotes(value);
999 }
1000 else {
1001 append(value);
1002 }
1003}
1004#endif
1005
1006} // namespace builder
1007} // namespace SIMDJSON_IMPLEMENTATION
1008} // namespace simdjson
1009
1010#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8
simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept
Validate the UTF-8 string.
@ OUT_OF_CAPACITY
The capacity was exceeded, we cannot allocate enough memory.
Definition error.h:52
The result of a simdjson operation that could fail.
Definition error.h:280
simdjson_warn_unused simdjson_inline error_code get(T &value) &&noexcept
Move the value to the provided variable.
Definition error-inl.h:163