1 #ifndef SIMDJSON_WESTMERE_SIMD_H
2 #define SIMDJSON_WESTMERE_SIMD_H
4 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
5 #include "simdjson/westmere/base.h"
6 #include "simdjson/westmere/bitmanipulation.h"
7 #include "simdjson/internal/simdprune_tables.h"
15 template<
typename Child>
20 simdjson_inline base() : value{__m128i()} {}
23 simdjson_inline base(
const __m128i _value) : value(_value) {}
26 simdjson_inline
operator const __m128i&()
const {
return this->value; }
27 simdjson_inline
operator __m128i&() {
return this->value; }
30 simdjson_inline Child operator|(
const Child other)
const {
return _mm_or_si128(*
this, other); }
31 simdjson_inline Child operator&(
const Child other)
const {
return _mm_and_si128(*
this, other); }
32 simdjson_inline Child operator^(
const Child other)
const {
return _mm_xor_si128(*
this, other); }
33 simdjson_inline Child bit_andnot(
const Child other)
const {
return _mm_andnot_si128(other, *
this); }
34 simdjson_inline Child& operator|=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast | other;
return *this_cast; }
35 simdjson_inline Child& operator&=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast & other;
return *this_cast; }
36 simdjson_inline Child& operator^=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast ^ other;
return *this_cast; }
39 template<
typename T,
typename Mask=simd8<
bool>>
40 struct base8: base<simd8<T>> {
41 typedef uint16_t bitmask_t;
42 typedef uint32_t bitmask2_t;
44 simdjson_inline base8() : base<simd8<T>>() {}
45 simdjson_inline base8(
const __m128i _value) : base<simd8<T>>(_value) {}
47 friend simdjson_inline Mask
operator==(
const simd8<T> lhs,
const simd8<T> rhs) {
return _mm_cmpeq_epi8(lhs, rhs); }
49 static const int SIZE =
sizeof(base<simd8<T>>::value);
52 simdjson_inline simd8<T> prev(
const simd8<T> prev_chunk)
const {
53 return _mm_alignr_epi8(*
this, prev_chunk, 16 - N);
59 struct simd8<bool>: base8<bool> {
60 static simdjson_inline simd8<bool> splat(
bool _value) {
return _mm_set1_epi8(uint8_t(-(!!_value))); }
62 simdjson_inline simd8() : base8() {}
63 simdjson_inline simd8(
const __m128i _value) : base8<bool>(_value) {}
65 simdjson_inline simd8(
bool _value) : base8<bool>(splat(_value)) {}
67 simdjson_inline
int to_bitmask()
const {
return _mm_movemask_epi8(*
this); }
68 simdjson_inline
bool any()
const {
return !_mm_testz_si128(*
this, *
this); }
69 simdjson_inline simd8<bool> operator~()
const {
return *
this ^
true; }
73 struct base8_numeric: base8<T> {
74 static simdjson_inline simd8<T> splat(T _value) {
return _mm_set1_epi8(_value); }
75 static simdjson_inline simd8<T> zero() {
return _mm_setzero_si128(); }
76 static simdjson_inline simd8<T> load(
const T values[16]) {
77 return _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(values));
80 static simdjson_inline simd8<T> repeat_16(
81 T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
82 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
85 v0, v1, v2, v3, v4, v5, v6, v7,
86 v8, v9, v10,v11,v12,v13,v14,v15
90 simdjson_inline base8_numeric() : base8<T>() {}
91 simdjson_inline base8_numeric(
const __m128i _value) : base8<T>(_value) {}
94 simdjson_inline
void store(T dst[16])
const {
return _mm_storeu_si128(
reinterpret_cast<__m128i *
>(dst), *
this); }
97 simdjson_inline simd8<T> operator~()
const {
return *
this ^ 0xFFu; }
100 simdjson_inline simd8<T> operator+(
const simd8<T> other)
const {
return _mm_add_epi8(*
this, other); }
101 simdjson_inline simd8<T> operator-(
const simd8<T> other)
const {
return _mm_sub_epi8(*
this, other); }
102 simdjson_inline simd8<T>& operator+=(
const simd8<T> other) { *
this = *
this + other;
return *
static_cast<simd8<T>*
>(
this); }
103 simdjson_inline simd8<T>& operator-=(
const simd8<T> other) { *
this = *
this - other;
return *
static_cast<simd8<T>*
>(
this); }
107 simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table)
const {
108 return _mm_shuffle_epi8(lookup_table, *
this);
119 simdjson_inline
void compress(uint16_t mask, L * output)
const {
120 using internal::thintable_epi8;
121 using internal::BitsSetTable256mul2;
122 using internal::pshufb_combine_table;
125 uint8_t mask1 = uint8_t(mask);
126 uint8_t mask2 = uint8_t(mask >> 8);
130 __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]);
133 _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
135 __m128i pruned = _mm_shuffle_epi8(*
this, shufmask);
138 int pop1 = BitsSetTable256mul2[mask1];
143 __m128i compactmask =
144 _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(pshufb_combine_table + pop1 * 8));
145 __m128i answer = _mm_shuffle_epi8(pruned, compactmask);
146 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(output), answer);
150 simdjson_inline simd8<L> lookup_16(
151 L replace0, L replace1, L replace2, L replace3,
152 L replace4, L replace5, L replace6, L replace7,
153 L replace8, L replace9, L replace10, L replace11,
154 L replace12, L replace13, L replace14, L replace15)
const {
155 return lookup_16(simd8<L>::repeat_16(
156 replace0, replace1, replace2, replace3,
157 replace4, replace5, replace6, replace7,
158 replace8, replace9, replace10, replace11,
159 replace12, replace13, replace14, replace15
166 struct simd8<int8_t> : base8_numeric<int8_t> {
167 simdjson_inline simd8() : base8_numeric<int8_t>() {}
168 simdjson_inline simd8(
const __m128i _value) : base8_numeric<int8_t>(_value) {}
170 simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
172 simdjson_inline simd8(
const int8_t* values) : simd8(load(values)) {}
174 simdjson_inline simd8(
175 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
176 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
177 ) : simd8(_mm_setr_epi8(
178 v0, v1, v2, v3, v4, v5, v6, v7,
179 v8, v9, v10,v11,v12,v13,v14,v15
182 simdjson_inline
static simd8<int8_t> repeat_16(
183 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
184 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
186 return simd8<int8_t>(
187 v0, v1, v2, v3, v4, v5, v6, v7,
188 v8, v9, v10,v11,v12,v13,v14,v15
193 simdjson_inline simd8<int8_t> max_val(
const simd8<int8_t> other)
const {
return _mm_max_epi8(*
this, other); }
194 simdjson_inline simd8<int8_t> min_val(
const simd8<int8_t> other)
const {
return _mm_min_epi8(*
this, other); }
195 simdjson_inline simd8<bool> operator>(
const simd8<int8_t> other)
const {
return _mm_cmpgt_epi8(*
this, other); }
196 simdjson_inline simd8<bool> operator<(
const simd8<int8_t> other)
const {
return _mm_cmpgt_epi8(other, *
this); }
201 struct simd8<uint8_t>: base8_numeric<uint8_t> {
202 simdjson_inline simd8() : base8_numeric<uint8_t>() {}
203 simdjson_inline simd8(
const __m128i _value) : base8_numeric<uint8_t>(_value) {}
205 simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
207 simdjson_inline simd8(
const uint8_t* values) : simd8(load(values)) {}
209 simdjson_inline simd8(
210 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
211 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
212 ) : simd8(_mm_setr_epi8(
213 v0, v1, v2, v3, v4, v5, v6, v7,
214 v8, v9, v10,v11,v12,v13,v14,v15
217 simdjson_inline
static simd8<uint8_t> repeat_16(
218 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
219 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
221 return simd8<uint8_t>(
222 v0, v1, v2, v3, v4, v5, v6, v7,
223 v8, v9, v10,v11,v12,v13,v14,v15
228 simdjson_inline simd8<uint8_t> saturating_add(
const simd8<uint8_t> other)
const {
return _mm_adds_epu8(*
this, other); }
229 simdjson_inline simd8<uint8_t> saturating_sub(
const simd8<uint8_t> other)
const {
return _mm_subs_epu8(*
this, other); }
232 simdjson_inline simd8<uint8_t> max_val(
const simd8<uint8_t> other)
const {
return _mm_max_epu8(*
this, other); }
233 simdjson_inline simd8<uint8_t> min_val(
const simd8<uint8_t> other)
const {
return _mm_min_epu8(*
this, other); }
235 simdjson_inline simd8<uint8_t> gt_bits(
const simd8<uint8_t> other)
const {
return this->saturating_sub(other); }
237 simdjson_inline simd8<uint8_t> lt_bits(
const simd8<uint8_t> other)
const {
return other.saturating_sub(*
this); }
238 simdjson_inline simd8<bool> operator<=(
const simd8<uint8_t> other)
const {
return other.max_val(*
this) == other; }
239 simdjson_inline simd8<bool> operator>=(
const simd8<uint8_t> other)
const {
return other.min_val(*
this) == other; }
240 simdjson_inline simd8<bool> operator>(
const simd8<uint8_t> other)
const {
return this->gt_bits(other).any_bits_set(); }
241 simdjson_inline simd8<bool> operator<(
const simd8<uint8_t> other)
const {
return this->gt_bits(other).any_bits_set(); }
244 simdjson_inline simd8<bool> bits_not_set()
const {
return *
this == uint8_t(0); }
245 simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits)
const {
return (*
this & bits).bits_not_set(); }
246 simdjson_inline simd8<bool> any_bits_set()
const {
return ~this->bits_not_set(); }
247 simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits)
const {
return ~this->bits_not_set(bits); }
248 simdjson_inline
bool is_ascii()
const {
return _mm_movemask_epi8(*
this) == 0; }
249 simdjson_inline
bool bits_not_set_anywhere()
const {
return _mm_testz_si128(*
this, *
this); }
250 simdjson_inline
bool any_bits_set_anywhere()
const {
return !bits_not_set_anywhere(); }
251 simdjson_inline
bool bits_not_set_anywhere(simd8<uint8_t> bits)
const {
return _mm_testz_si128(*
this, bits); }
252 simdjson_inline
bool any_bits_set_anywhere(simd8<uint8_t> bits)
const {
return !bits_not_set_anywhere(bits); }
254 simdjson_inline simd8<uint8_t> shr()
const {
return simd8<uint8_t>(_mm_srli_epi16(*
this, N)) & uint8_t(0xFFu >> N); }
256 simdjson_inline simd8<uint8_t> shl()
const {
return simd8<uint8_t>(_mm_slli_epi16(*
this, N)) & uint8_t(0xFFu << N); }
260 simdjson_inline
int get_bit()
const {
return _mm_movemask_epi8(_mm_slli_epi16(*
this, 7-N)); }
265 static constexpr
int NUM_CHUNKS = 64 /
sizeof(simd8<T>);
266 static_assert(NUM_CHUNKS == 4,
"Westmere kernel should use four registers per 64-byte block.");
267 const simd8<T> chunks[NUM_CHUNKS];
269 simd8x64(
const simd8x64<T>& o) =
delete;
270 simd8x64<T>& operator=(
const simd8<T>& other) =
delete;
273 simdjson_inline simd8x64(
const simd8<T> chunk0,
const simd8<T> chunk1,
const simd8<T> chunk2,
const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
274 simdjson_inline simd8x64(
const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
276 simdjson_inline
void store(T ptr[64])
const {
277 this->chunks[0].store(ptr+
sizeof(simd8<T>)*0);
278 this->chunks[1].store(ptr+
sizeof(simd8<T>)*1);
279 this->chunks[2].store(ptr+
sizeof(simd8<T>)*2);
280 this->chunks[3].store(ptr+
sizeof(simd8<T>)*3);
283 simdjson_inline simd8<T> reduce_or()
const {
284 return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
287 simdjson_inline uint64_t compress(uint64_t mask, T * output)
const {
288 this->chunks[0].compress(uint16_t(mask), output);
289 this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
290 this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
291 this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
292 return 64 - count_ones(mask);
295 simdjson_inline uint64_t to_bitmask()
const {
296 uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
297 uint64_t r1 = this->chunks[1].to_bitmask() ;
298 uint64_t r2 = this->chunks[2].to_bitmask() ;
299 uint64_t r3 = this->chunks[3].to_bitmask() ;
300 return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
303 simdjson_inline uint64_t eq(
const T m)
const {
304 const simd8<T> mask = simd8<T>::splat(m);
305 return simd8x64<bool>(
306 this->chunks[0] == mask,
307 this->chunks[1] == mask,
308 this->chunks[2] == mask,
309 this->chunks[3] == mask
313 simdjson_inline uint64_t eq(
const simd8x64<uint8_t> &other)
const {
314 return simd8x64<bool>(
315 this->chunks[0] == other.chunks[0],
316 this->chunks[1] == other.chunks[1],
317 this->chunks[2] == other.chunks[2],
318 this->chunks[3] == other.chunks[3]
322 simdjson_inline uint64_t lteq(
const T m)
const {
323 const simd8<T> mask = simd8<T>::splat(m);
324 return simd8x64<bool>(
325 this->chunks[0] <= mask,
326 this->chunks[1] <= mask,
327 this->chunks[2] <= mask,
328 this->chunks[3] <= mask
simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept
Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user i...
The top level simdjson namespace, containing everything the library provides.