simdjson  3.11.0
Ridiculously Fast JSON
jsoncharutils.h
1 #ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H
2 
3 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
4 #define SIMDJSON_GENERIC_JSONCHARUTILS_H
5 #include "simdjson/generic/base.h"
6 #include "simdjson/internal/jsoncharutils_tables.h"
7 #include "simdjson/internal/numberparsing_tables.h"
8 #endif // SIMDJSON_CONDITIONAL_INCLUDE
9 
10 namespace simdjson {
11 namespace SIMDJSON_IMPLEMENTATION {
12 namespace {
13 namespace jsoncharutils {
14 
15 // return non-zero if not a structural or whitespace char
16 // zero otherwise
17 simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
18  return internal::structural_or_whitespace_negated[c];
19 }
20 
21 simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
22  return internal::structural_or_whitespace[c];
23 }
24 
25 // returns a value with the high 16 bits set if not valid
26 // otherwise returns the conversion of the 4 hex digits at src into the bottom
27 // 16 bits of the 32-bit return register
28 //
29 // see
30 // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
31 static inline uint32_t hex_to_u32_nocheck(
32  const uint8_t *src) { // strictly speaking, static inline is a C-ism
33  uint32_t v1 = internal::digit_to_val32[630 + src[0]];
34  uint32_t v2 = internal::digit_to_val32[420 + src[1]];
35  uint32_t v3 = internal::digit_to_val32[210 + src[2]];
36  uint32_t v4 = internal::digit_to_val32[0 + src[3]];
37  return v1 | v2 | v3 | v4;
38 }
39 
40 // given a code point cp, writes to c
41 // the utf-8 code, outputting the length in
42 // bytes, if the length is zero, the code point
43 // is invalid
44 //
45 // This can possibly be made faster using pdep
46 // and clz and table lookups, but JSON documents
47 // have few escaped code points, and the following
48 // function looks cheap.
49 //
50 // Note: we assume that surrogates are treated separately
51 //
52 simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
53  if (cp <= 0x7F) {
54  c[0] = uint8_t(cp);
55  return 1; // ascii
56  }
57  if (cp <= 0x7FF) {
58  c[0] = uint8_t((cp >> 6) + 192);
59  c[1] = uint8_t((cp & 63) + 128);
60  return 2; // universal plane
61  // Surrogates are treated elsewhere...
62  //} //else if (0xd800 <= cp && cp <= 0xdfff) {
63  // return 0; // surrogates // could put assert here
64  } else if (cp <= 0xFFFF) {
65  c[0] = uint8_t((cp >> 12) + 224);
66  c[1] = uint8_t(((cp >> 6) & 63) + 128);
67  c[2] = uint8_t((cp & 63) + 128);
68  return 3;
69  } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
70  // is not needed
71  c[0] = uint8_t((cp >> 18) + 240);
72  c[1] = uint8_t(((cp >> 12) & 63) + 128);
73  c[2] = uint8_t(((cp >> 6) & 63) + 128);
74  c[3] = uint8_t((cp & 63) + 128);
75  return 4;
76  }
77  // will return 0 when the code point was too large.
78  return 0; // bad r
79 }
80 
81 #if SIMDJSON_IS_32BITS // _umul128 for x86, arm
82 // this is a slow emulation routine for 32-bit
83 //
84 static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
85  return x * (uint64_t)y;
86 }
87 static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
88  uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
89  uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
90  uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
91  uint64_t adbc_carry = !!(adbc < ad);
92  uint64_t lo = bd + (adbc << 32);
93  *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
94  (adbc_carry << 32) + !!(lo < bd);
95  return lo;
96 }
97 #endif
98 
99 } // namespace jsoncharutils
100 } // unnamed namespace
101 } // namespace SIMDJSON_IMPLEMENTATION
102 } // namespace simdjson
103 
104 #endif // SIMDJSON_GENERIC_JSONCHARUTILS_H
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8