1 #ifndef SIMDJSON_PPC64_SIMD_H
2 #define SIMDJSON_PPC64_SIMD_H
4 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
5 #include "simdjson/ppc64/base.h"
6 #include "simdjson/ppc64/bitmanipulation.h"
7 #include "simdjson/internal/simdprune_tables.h"
10 #include <type_traits>
17 using __m128i = __vector
unsigned char;
19 template <
typename Child>
struct base {
23 simdjson_inline base() : value{__m128i()} {}
26 simdjson_inline base(
const __m128i _value) : value(_value) {}
29 simdjson_inline
operator const __m128i &()
const {
32 simdjson_inline
operator __m128i &() {
return this->value; }
35 simdjson_inline Child operator|(
const Child other)
const {
36 return vec_or(this->value, (__m128i)other);
38 simdjson_inline Child operator&(
const Child other)
const {
39 return vec_and(this->value, (__m128i)other);
41 simdjson_inline Child operator^(
const Child other)
const {
42 return vec_xor(this->value, (__m128i)other);
44 simdjson_inline Child bit_andnot(
const Child other)
const {
45 return vec_andc(this->value, (__m128i)other);
47 simdjson_inline Child &operator|=(
const Child other) {
48 auto this_cast =
static_cast<Child*
>(
this);
49 *this_cast = *this_cast | other;
52 simdjson_inline Child &operator&=(
const Child other) {
53 auto this_cast =
static_cast<Child*
>(
this);
54 *this_cast = *this_cast & other;
57 simdjson_inline Child &operator^=(
const Child other) {
58 auto this_cast =
static_cast<Child*
>(
this);
59 *this_cast = *this_cast ^ other;
64 template <
typename T,
typename Mask = simd8<
bool>>
65 struct base8 : base<simd8<T>> {
66 typedef uint16_t bitmask_t;
67 typedef uint32_t bitmask2_t;
69 simdjson_inline base8() : base<simd8<T>>() {}
70 simdjson_inline base8(
const __m128i _value) : base<simd8<T>>(_value) {}
72 friend simdjson_inline Mask
operator==(
const simd8<T> lhs,
const simd8<T> rhs) {
73 return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs);
76 static const int SIZE =
sizeof(base<simd8<T>>::value);
79 simdjson_inline simd8<T> prev(simd8<T> prev_chunk)
const {
80 __m128i chunk = this->value;
81 #ifdef __LITTLE_ENDIAN__
82 chunk = (__m128i)vec_reve(this->value);
83 prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk);
85 chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N);
86 #ifdef __LITTLE_ENDIAN__
87 chunk = (__m128i)vec_reve((__m128i)chunk);
94 template <>
struct simd8<bool> : base8<bool> {
95 static simdjson_inline simd8<bool> splat(
bool _value) {
96 return (__m128i)vec_splats((
unsigned char)(-(!!_value)));
99 simdjson_inline simd8() : base8<bool>() {}
100 simdjson_inline simd8(
const __m128i _value)
101 : base8<bool>(_value) {}
103 simdjson_inline simd8(
bool _value)
104 : base8<bool>(splat(_value)) {}
106 simdjson_inline
int to_bitmask()
const {
107 __vector
unsigned long long result;
108 const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
109 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00};
111 result = ((__vector
unsigned long long)vec_vbpermq((__m128i)this->value,
112 (__m128i)perm_mask));
113 #ifdef __LITTLE_ENDIAN__
114 return static_cast<int>(result[1]);
116 return static_cast<int>(result[0]);
119 simdjson_inline
bool any()
const {
120 return !vec_all_eq(this->value, (__m128i)vec_splats(0));
122 simdjson_inline simd8<bool> operator~()
const {
123 return this->value ^ (__m128i)splat(
true);
127 template <
typename T>
struct base8_numeric : base8<T> {
128 static simdjson_inline simd8<T> splat(T value) {
130 return (__m128i)vec_splats(value);
132 static simdjson_inline simd8<T> zero() {
return splat(0); }
133 static simdjson_inline simd8<T> load(
const T values[16]) {
134 return (__m128i)(vec_vsx_ld(0,
reinterpret_cast<const uint8_t *
>(values)));
137 static simdjson_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
138 T v5, T v6, T v7, T v8, T v9,
139 T v10, T v11, T v12, T v13,
141 return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
145 simdjson_inline base8_numeric() : base8<T>() {}
146 simdjson_inline base8_numeric(
const __m128i _value)
147 : base8<T>(_value) {}
150 simdjson_inline
void store(T dst[16])
const {
151 vec_vsx_st(this->value, 0,
reinterpret_cast<__m128i *
>(dst));
155 simdjson_inline simd8<T> operator~()
const {
return *
this ^ 0xFFu; }
158 simdjson_inline simd8<T> operator+(
const simd8<T> other)
const {
159 return (__m128i)((__m128i)this->value + (__m128i)other);
161 simdjson_inline simd8<T> operator-(
const simd8<T> other)
const {
162 return (__m128i)((__m128i)this->value - (__m128i)other);
164 simdjson_inline simd8<T> &operator+=(
const simd8<T> other) {
165 *
this = *
this + other;
166 return *
static_cast<simd8<T> *
>(
this);
168 simdjson_inline simd8<T> &operator-=(
const simd8<T> other) {
169 *
this = *
this - other;
170 return *
static_cast<simd8<T> *
>(
this);
175 template <
typename L>
176 simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table)
const {
177 return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value);
186 template <
typename L>
187 simdjson_inline
void compress(uint16_t mask, L *output)
const {
188 using internal::BitsSetTable256mul2;
189 using internal::pshufb_combine_table;
190 using internal::thintable_epi8;
193 uint8_t mask1 = uint8_t(mask);
194 uint8_t mask2 = uint8_t(mask >> 8);
198 #ifdef __LITTLE_ENDIAN__
199 __m128i shufmask = (__m128i)(__vector
unsigned long long){
200 thintable_epi8[mask1], thintable_epi8[mask2]};
202 __m128i shufmask = (__m128i)(__vector
unsigned long long){
203 thintable_epi8[mask2], thintable_epi8[mask1]};
204 shufmask = (__m128i)vec_reve((__m128i)shufmask);
207 shufmask = ((__m128i)shufmask) +
208 ((__m128i)(__vector
int){0, 0, 0x08080808, 0x08080808});
211 __m128i pruned = vec_perm(this->value, this->value, shufmask);
214 int pop1 = BitsSetTable256mul2[mask1];
219 __m128i compactmask =
220 vec_vsx_ld(0,
reinterpret_cast<const uint8_t *
>(pshufb_combine_table + pop1 * 8));
221 __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask);
222 vec_vsx_st(answer, 0,
reinterpret_cast<__m128i *
>(output));
225 template <
typename L>
226 simdjson_inline simd8<L>
227 lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
228 L replace5, L replace6, L replace7, L replace8, L replace9,
229 L replace10, L replace11, L replace12, L replace13, L replace14,
231 return lookup_16(simd8<L>::repeat_16(
232 replace0, replace1, replace2, replace3, replace4, replace5, replace6,
233 replace7, replace8, replace9, replace10, replace11, replace12,
234 replace13, replace14, replace15));
239 template <>
struct simd8<int8_t> : base8_numeric<int8_t> {
240 simdjson_inline simd8() : base8_numeric<int8_t>() {}
241 simdjson_inline simd8(
const __m128i _value)
242 : base8_numeric<int8_t>(_value) {}
244 simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
246 simdjson_inline simd8(
const int8_t *values) : simd8(load(values)) {}
248 simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
249 int8_t v4, int8_t v5, int8_t v6, int8_t v7,
250 int8_t v8, int8_t v9, int8_t v10, int8_t v11,
251 int8_t v12, int8_t v13, int8_t v14, int8_t v15)
252 : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7,
253 v8, v9, v10, v11, v12, v13, v14,
256 simdjson_inline
static simd8<int8_t>
257 repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
258 int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
259 int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
260 return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
265 simdjson_inline simd8<int8_t>
266 max_val(
const simd8<int8_t> other)
const {
267 return (__m128i)vec_max((__vector
signed char)this->value,
268 (__vector
signed char)(__m128i)other);
270 simdjson_inline simd8<int8_t>
271 min_val(
const simd8<int8_t> other)
const {
272 return (__m128i)vec_min((__vector
signed char)this->value,
273 (__vector
signed char)(__m128i)other);
275 simdjson_inline simd8<bool>
276 operator>(
const simd8<int8_t> other)
const {
277 return (__m128i)vec_cmpgt((__vector
signed char)this->value,
278 (__vector
signed char)(__m128i)other);
280 simdjson_inline simd8<bool>
281 operator<(
const simd8<int8_t> other)
const {
282 return (__m128i)vec_cmplt((__vector
signed char)this->value,
283 (__vector
signed char)(__m128i)other);
288 template <>
struct simd8<uint8_t> : base8_numeric<uint8_t> {
289 simdjson_inline simd8() : base8_numeric<uint8_t>() {}
290 simdjson_inline simd8(
const __m128i _value)
291 : base8_numeric<uint8_t>(_value) {}
293 simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
295 simdjson_inline simd8(
const uint8_t *values) : simd8(load(values)) {}
298 simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
299 uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
300 uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
301 : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
304 simdjson_inline
static simd8<uint8_t>
305 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
306 uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
307 uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
309 return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
314 simdjson_inline simd8<uint8_t>
315 saturating_add(
const simd8<uint8_t> other)
const {
316 return (__m128i)vec_adds(this->value, (__m128i)other);
318 simdjson_inline simd8<uint8_t>
319 saturating_sub(
const simd8<uint8_t> other)
const {
320 return (__m128i)vec_subs(this->value, (__m128i)other);
324 simdjson_inline simd8<uint8_t>
325 max_val(
const simd8<uint8_t> other)
const {
326 return (__m128i)vec_max(this->value, (__m128i)other);
328 simdjson_inline simd8<uint8_t>
329 min_val(
const simd8<uint8_t> other)
const {
330 return (__m128i)vec_min(this->value, (__m128i)other);
333 simdjson_inline simd8<uint8_t>
334 gt_bits(
const simd8<uint8_t> other)
const {
335 return this->saturating_sub(other);
338 simdjson_inline simd8<uint8_t>
339 lt_bits(
const simd8<uint8_t> other)
const {
340 return other.saturating_sub(*
this);
342 simdjson_inline simd8<bool>
343 operator<=(
const simd8<uint8_t> other)
const {
344 return other.max_val(*
this) == other;
346 simdjson_inline simd8<bool>
347 operator>=(
const simd8<uint8_t> other)
const {
348 return other.min_val(*
this) == other;
350 simdjson_inline simd8<bool>
351 operator>(
const simd8<uint8_t> other)
const {
352 return this->gt_bits(other).any_bits_set();
354 simdjson_inline simd8<bool>
355 operator<(
const simd8<uint8_t> other)
const {
356 return this->gt_bits(other).any_bits_set();
360 simdjson_inline simd8<bool> bits_not_set()
const {
361 return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0)));
363 simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits)
const {
364 return (*
this & bits).bits_not_set();
366 simdjson_inline simd8<bool> any_bits_set()
const {
367 return ~this->bits_not_set();
369 simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits)
const {
370 return ~this->bits_not_set(bits);
372 simdjson_inline
bool bits_not_set_anywhere()
const {
373 return vec_all_eq(this->value, (__m128i)vec_splats(0));
375 simdjson_inline
bool any_bits_set_anywhere()
const {
376 return !bits_not_set_anywhere();
378 simdjson_inline
bool bits_not_set_anywhere(simd8<uint8_t> bits)
const {
379 return vec_all_eq(vec_and(this->value, (__m128i)bits),
380 (__m128i)vec_splats(0));
382 simdjson_inline
bool any_bits_set_anywhere(simd8<uint8_t> bits)
const {
383 return !bits_not_set_anywhere(bits);
385 template <
int N> simdjson_inline simd8<uint8_t> shr()
const {
386 return simd8<uint8_t>(
387 (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N)));
389 template <
int N> simdjson_inline simd8<uint8_t> shl()
const {
390 return simd8<uint8_t>(
391 (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N)));
395 template <
typename T>
struct simd8x64 {
396 static constexpr
int NUM_CHUNKS = 64 /
sizeof(simd8<T>);
397 static_assert(NUM_CHUNKS == 4,
398 "PPC64 kernel should use four registers per 64-byte block.");
399 const simd8<T> chunks[NUM_CHUNKS];
401 simd8x64(
const simd8x64<T> &o) =
delete;
403 operator=(
const simd8<T>& other) =
delete;
406 simdjson_inline simd8x64(
const simd8<T> chunk0,
const simd8<T> chunk1,
407 const simd8<T> chunk2,
const simd8<T> chunk3)
408 : chunks{chunk0, chunk1, chunk2, chunk3} {}
409 simdjson_inline simd8x64(
const T ptr[64])
410 : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr + 16),
411 simd8<T>::load(ptr + 32), simd8<T>::load(ptr + 48)} {}
413 simdjson_inline
void store(T ptr[64])
const {
414 this->chunks[0].store(ptr +
sizeof(simd8<T>) * 0);
415 this->chunks[1].store(ptr +
sizeof(simd8<T>) * 1);
416 this->chunks[2].store(ptr +
sizeof(simd8<T>) * 2);
417 this->chunks[3].store(ptr +
sizeof(simd8<T>) * 3);
420 simdjson_inline simd8<T> reduce_or()
const {
421 return (this->chunks[0] | this->chunks[1]) |
422 (this->chunks[2] | this->chunks[3]);
425 simdjson_inline uint64_t compress(uint64_t mask, T *output)
const {
426 this->chunks[0].compress(uint16_t(mask), output);
427 this->chunks[1].compress(uint16_t(mask >> 16),
428 output + 16 - count_ones(mask & 0xFFFF));
429 this->chunks[2].compress(uint16_t(mask >> 32),
430 output + 32 - count_ones(mask & 0xFFFFFFFF));
431 this->chunks[3].compress(uint16_t(mask >> 48),
432 output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
433 return 64 - count_ones(mask);
436 simdjson_inline uint64_t to_bitmask()
const {
437 uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
438 uint64_t r1 = this->chunks[1].to_bitmask();
439 uint64_t r2 = this->chunks[2].to_bitmask();
440 uint64_t r3 = this->chunks[3].to_bitmask();
441 return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
444 simdjson_inline uint64_t eq(
const T m)
const {
445 const simd8<T> mask = simd8<T>::splat(m);
446 return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
447 this->chunks[2] == mask, this->chunks[3] == mask)
451 simdjson_inline uint64_t eq(
const simd8x64<uint8_t> &other)
const {
452 return simd8x64<bool>(this->chunks[0] == other.chunks[0],
453 this->chunks[1] == other.chunks[1],
454 this->chunks[2] == other.chunks[2],
455 this->chunks[3] == other.chunks[3])
459 simdjson_inline uint64_t lteq(
const T m)
const {
460 const simd8<T> mask = simd8<T>::splat(m);
461 return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
462 this->chunks[2] <= mask, this->chunks[3] <= mask)
simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept
Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user i...
The top level simdjson namespace, containing everything the library provides.