simdjson  3.11.0
Ridiculously Fast JSON
atomparsing.h
1 #ifndef SIMDJSON_GENERIC_ATOMPARSING_H
2 
3 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
4 #define SIMDJSON_GENERIC_ATOMPARSING_H
5 #include "simdjson/generic/base.h"
6 #include "simdjson/generic/jsoncharutils.h"
7 #endif // SIMDJSON_CONDITIONAL_INCLUDE
8 
9 #include <cstring>
10 
11 namespace simdjson {
12 namespace SIMDJSON_IMPLEMENTATION {
13 namespace {
15 namespace atomparsing {
16 
17 // The string_to_uint32 is exclusively used to map literal strings to 32-bit values.
18 // We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot
19 // be certain that the character pointer will be properly aligned.
20 // You might think that using memcpy makes this function expensive, but you'd be wrong.
21 // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false");
22 // to the compile-time constant 1936482662.
23 simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; }
24 
25 
26 // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive.
27 // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about.
28 simdjson_warn_unused
29 simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) {
30  uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++)
31  static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes");
32  std::memcpy(&srcval, src, sizeof(uint32_t));
33  return srcval ^ string_to_uint32(atom);
34 }
35 
36 simdjson_warn_unused
37 simdjson_inline bool is_valid_true_atom(const uint8_t *src) {
38  return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
39 }
40 
41 simdjson_warn_unused
42 simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) {
43  if (len > 4) { return is_valid_true_atom(src); }
44  else if (len == 4) { return !str4ncmp(src, "true"); }
45  else { return false; }
46 }
47 
48 simdjson_warn_unused
49 simdjson_inline bool is_valid_false_atom(const uint8_t *src) {
50  return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0;
51 }
52 
53 simdjson_warn_unused
54 simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) {
55  if (len > 5) { return is_valid_false_atom(src); }
56  else if (len == 5) { return !str4ncmp(src+1, "alse"); }
57  else { return false; }
58 }
59 
60 simdjson_warn_unused
61 simdjson_inline bool is_valid_null_atom(const uint8_t *src) {
62  return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0;
63 }
64 
65 simdjson_warn_unused
66 simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) {
67  if (len > 4) { return is_valid_null_atom(src); }
68  else if (len == 4) { return !str4ncmp(src, "null"); }
69  else { return false; }
70 }
71 
72 } // namespace atomparsing
73 } // unnamed namespace
74 } // namespace SIMDJSON_IMPLEMENTATION
75 } // namespace simdjson
76 
77 #endif // SIMDJSON_GENERIC_ATOMPARSING_H
The top level simdjson namespace, containing everything the library provides.
Definition: base.h:8
constexpr size_t SIMDJSON_PADDING
The amount of padding needed in a buffer to parse JSON.
Definition: base.h:32