1 #ifndef SIMDJSON_LSX_SIMD_H
2 #define SIMDJSON_LSX_SIMD_H
4 #ifndef SIMDJSON_CONDITIONAL_INCLUDE
5 #include "simdjson/lsx/base.h"
6 #include "simdjson/lsx/bitmanipulation.h"
7 #include "simdjson/internal/simdprune_tables.h"
16 template<
typename Child>
21 simdjson_inline base() : value{__m128i()} {}
24 simdjson_inline base(
const __m128i _value) : value(_value) {}
27 simdjson_inline
operator const __m128i&()
const {
return this->value; }
28 simdjson_inline
operator __m128i&() {
return this->value; }
29 simdjson_inline
operator const v16i8&()
const {
return (v16i8&)this->value; }
30 simdjson_inline
operator v16i8&() {
return (v16i8&)this->value; }
33 simdjson_inline Child operator|(
const Child other)
const {
return __lsx_vor_v(*
this, other); }
34 simdjson_inline Child operator&(
const Child other)
const {
return __lsx_vand_v(*
this, other); }
35 simdjson_inline Child operator^(
const Child other)
const {
return __lsx_vxor_v(*
this, other); }
36 simdjson_inline Child bit_andnot(
const Child other)
const {
return __lsx_vandn_v(other, *
this); }
37 simdjson_inline Child& operator|=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast | other;
return *this_cast; }
38 simdjson_inline Child& operator&=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast & other;
return *this_cast; }
39 simdjson_inline Child& operator^=(
const Child other) {
auto this_cast =
static_cast<Child*
>(
this); *this_cast = *this_cast ^ other;
return *this_cast; }
46 template<
typename T,
typename Mask=simd8<
bool>>
47 struct base8: base<simd8<T>> {
48 simdjson_inline base8() : base<simd8<T>>() {}
49 simdjson_inline base8(
const __m128i _value) : base<simd8<T>>(_value) {}
51 friend simdjson_really_inline Mask
operator==(
const simd8<T> lhs,
const simd8<T> rhs) {
return __lsx_vseq_b(lhs, rhs); }
53 static const int SIZE =
sizeof(base<simd8<T>>::value);
56 simdjson_inline simd8<T> prev(
const simd8<T> prev_chunk)
const {
57 return __lsx_vor_v(__lsx_vbsll_v(*
this, N), __lsx_vbsrl_v(prev_chunk, 16 - N));
63 struct simd8<bool>: base8<bool> {
64 static simdjson_inline simd8<bool> splat(
bool _value) {
65 return __lsx_vreplgr2vr_b(uint8_t(-(!!_value)));
68 simdjson_inline simd8() : base8() {}
69 simdjson_inline simd8(
const __m128i _value) : base8<bool>(_value) {}
71 simdjson_inline simd8(
bool _value) : base8<bool>(splat(_value)) {}
73 simdjson_inline
int to_bitmask()
const {
return __lsx_vpickve2gr_w(__lsx_vmskltz_b(*
this), 0); }
74 simdjson_inline
bool any()
const {
return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*
this), 0); }
75 simdjson_inline simd8<bool> operator~()
const {
return *
this ^
true; }
79 struct base8_numeric: base8<T> {
80 static simdjson_inline simd8<T> splat(T _value) {
return __lsx_vreplgr2vr_b(_value); }
81 static simdjson_inline simd8<T> zero() {
return __lsx_vldi(0); }
82 static simdjson_inline simd8<T> load(
const T values[16]) {
83 return __lsx_vld(
reinterpret_cast<const __m128i *
>(values), 0);
86 static simdjson_inline simd8<T> repeat_16(
87 T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7,
88 T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15
91 v0, v1, v2, v3, v4, v5, v6, v7,
92 v8, v9, v10,v11,v12,v13,v14,v15
96 simdjson_inline base8_numeric() : base8<T>() {}
97 simdjson_inline base8_numeric(
const __m128i _value) : base8<T>(_value) {}
100 simdjson_inline
void store(T dst[16])
const {
101 return __lsx_vst(*
this,
reinterpret_cast<__m128i *
>(dst), 0);
105 simdjson_inline simd8<T> operator+(
const simd8<T> other)
const {
return __lsx_vadd_b(*
this, other); }
106 simdjson_inline simd8<T> operator-(
const simd8<T> other)
const {
return __lsx_vsub_b(*
this, other); }
107 simdjson_inline simd8<T>& operator+=(
const simd8<T> other) { *
this = *
this + other;
return *
static_cast<simd8<T>*
>(
this); }
108 simdjson_inline simd8<T>& operator-=(
const simd8<T> other) { *
this = *
this - other;
return *
static_cast<simd8<T>*
>(
this); }
111 simdjson_inline simd8<T> operator~()
const {
return *
this ^ 0xFFu; }
115 simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table)
const {
116 return __lsx_vshuf_b(lookup_table, lookup_table, *
this);
124 simdjson_inline
void compress(uint16_t mask, L * output)
const {
125 using internal::thintable_epi8;
126 using internal::BitsSetTable256mul2;
127 using internal::pshufb_combine_table;
130 uint8_t mask1 = uint8_t(mask);
131 uint8_t mask2 = uint8_t(mask >> 8);
134 __m128i shufmask = {int64_t(thintable_epi8[mask1]), int64_t(thintable_epi8[mask2]) + 0x0808080808080808};
136 __m128i pruned = __lsx_vshuf_b(*
this, *
this, shufmask);
139 int pop1 = BitsSetTable256mul2[mask1];
141 __m128i compactmask = __lsx_vldx(
reinterpret_cast<void*
>(
reinterpret_cast<unsigned long>(pshufb_combine_table)), pop1 * 8);
142 __m128i answer = __lsx_vshuf_b(pruned, pruned, compactmask);
143 __lsx_vst(answer,
reinterpret_cast<uint8_t*
>(output), 0);
147 simdjson_inline simd8<L> lookup_16(
148 L replace0, L replace1, L replace2, L replace3,
149 L replace4, L replace5, L replace6, L replace7,
150 L replace8, L replace9, L replace10, L replace11,
151 L replace12, L replace13, L replace14, L replace15)
const {
152 return lookup_16(simd8<L>::repeat_16(
153 replace0, replace1, replace2, replace3,
154 replace4, replace5, replace6, replace7,
155 replace8, replace9, replace10, replace11,
156 replace12, replace13, replace14, replace15
163 struct simd8<int8_t> : base8_numeric<int8_t> {
164 simdjson_inline simd8() : base8_numeric<int8_t>() {}
165 simdjson_inline simd8(
const __m128i _value) : base8_numeric<int8_t>(_value) {}
167 simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
169 simdjson_inline simd8(
const int8_t values[16]) : simd8(load(values)) {}
171 simdjson_inline simd8(
172 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
173 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
175 v0, v1, v2, v3, v4, v5, v6, v7,
176 v8, v9, v10,v11,v12,v13,v14,v15
179 simdjson_inline
static simd8<int8_t> repeat_16(
180 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
181 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
183 return simd8<int8_t>(
184 v0, v1, v2, v3, v4, v5, v6, v7,
185 v8, v9, v10,v11,v12,v13,v14,v15
190 simdjson_inline simd8<int8_t> max_val(
const simd8<int8_t> other)
const {
return __lsx_vmax_b(*
this, other); }
191 simdjson_inline simd8<int8_t> min_val(
const simd8<int8_t> other)
const {
return __lsx_vmin_b(*
this, other); }
192 simdjson_inline simd8<bool> operator>(
const simd8<int8_t> other)
const {
return __lsx_vslt_b(other, *
this); }
193 simdjson_inline simd8<bool> operator<(
const simd8<int8_t> other)
const {
return __lsx_vslt_b(*
this, other); }
198 struct simd8<uint8_t>: base8_numeric<uint8_t> {
199 simdjson_inline simd8() : base8_numeric<uint8_t>() {}
200 simdjson_inline simd8(
const __m128i _value) : base8_numeric<uint8_t>(_value) {}
202 simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
204 simdjson_inline simd8(
const uint8_t values[16]) : simd8(load(values)) {}
206 simdjson_inline simd8(
207 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
208 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
209 ) : simd8(__m128i(v16u8{
210 v0, v1, v2, v3, v4, v5, v6, v7,
211 v8, v9, v10,v11,v12,v13,v14,v15
214 simdjson_inline
static simd8<uint8_t> repeat_16(
215 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
216 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
218 return simd8<uint8_t>(
219 v0, v1, v2, v3, v4, v5, v6, v7,
220 v8, v9, v10,v11,v12,v13,v14,v15
225 simdjson_inline simd8<uint8_t> saturating_add(
const simd8<uint8_t> other)
const {
return __lsx_vsadd_bu(*
this, other); }
226 simdjson_inline simd8<uint8_t> saturating_sub(
const simd8<uint8_t> other)
const {
return __lsx_vssub_bu(*
this, other); }
229 simdjson_inline simd8<uint8_t> max_val(
const simd8<uint8_t> other)
const {
return __lsx_vmax_bu(*
this, other); }
230 simdjson_inline simd8<uint8_t> min_val(
const simd8<uint8_t> other)
const {
return __lsx_vmin_bu(other, *
this); }
232 simdjson_inline simd8<uint8_t> gt_bits(
const simd8<uint8_t> other)
const {
return this->saturating_sub(other); }
234 simdjson_inline simd8<uint8_t> lt_bits(
const simd8<uint8_t> other)
const {
return other.saturating_sub(*
this); }
235 simdjson_inline simd8<bool> operator<=(
const simd8<uint8_t> other)
const {
return other.max_val(*
this) == other; }
236 simdjson_inline simd8<bool> operator>=(
const simd8<uint8_t> other)
const {
return other.min_val(*
this) == other; }
237 simdjson_inline simd8<bool> operator>(
const simd8<uint8_t> other)
const {
return this->gt_bits(other).any_bits_set(); }
238 simdjson_inline simd8<bool> operator<(
const simd8<uint8_t> other)
const {
return this->lt_bits(other).any_bits_set(); }
241 simdjson_inline simd8<bool> bits_not_set()
const {
return *
this == uint8_t(0); }
242 simdjson_inline simd8<bool> bits_not_set(simd8<uint8_t> bits)
const {
return (*
this & bits).bits_not_set(); }
243 simdjson_inline simd8<bool> any_bits_set()
const {
return ~this->bits_not_set(); }
244 simdjson_inline simd8<bool> any_bits_set(simd8<uint8_t> bits)
const {
return ~this->bits_not_set(bits); }
245 simdjson_inline
bool is_ascii()
const {
return 0 == __lsx_vpickve2gr_w(__lsx_vmskltz_b(*
this), 0); }
246 simdjson_inline
bool bits_not_set_anywhere()
const {
return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(*
this), 0); }
247 simdjson_inline
bool any_bits_set_anywhere()
const {
return !bits_not_set_anywhere(); }
248 simdjson_inline
bool bits_not_set_anywhere(simd8<uint8_t> bits)
const {
249 return 0 == __lsx_vpickve2gr_hu(__lsx_vmsknz_b(__lsx_vand_v(*
this, bits)), 0);
251 simdjson_inline
bool any_bits_set_anywhere(simd8<uint8_t> bits)
const {
return !bits_not_set_anywhere(bits); }
253 simdjson_inline simd8<uint8_t> shr()
const {
return simd8<uint8_t>(__lsx_vsrli_b(*
this, N)); }
255 simdjson_inline simd8<uint8_t> shl()
const {
return simd8<uint8_t>(__lsx_vslli_b(*
this, N)); }
260 static constexpr
int NUM_CHUNKS = 64 /
sizeof(simd8<T>);
261 static_assert(NUM_CHUNKS == 4,
"LSX kernel should use four registers per 64-byte block.");
262 const simd8<T> chunks[NUM_CHUNKS];
264 simd8x64(
const simd8x64<T>& o) =
delete;
265 simd8x64<T>& operator=(
const simd8<T>& other) =
delete;
268 simdjson_inline simd8x64(
const simd8<T> chunk0,
const simd8<T> chunk1,
const simd8<T> chunk2,
const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
269 simdjson_inline simd8x64(
const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
271 simdjson_inline uint64_t compress(uint64_t mask, T * output)
const {
272 uint16_t mask1 = uint16_t(mask);
273 uint16_t mask2 = uint16_t(mask >> 16);
274 uint16_t mask3 = uint16_t(mask >> 32);
275 uint16_t mask4 = uint16_t(mask >> 48);
276 __m128i zcnt = __lsx_vpcnt_h(__m128i(v2u64{~mask, 0}));
277 uint64_t zcnt1 = __lsx_vpickve2gr_hu(zcnt, 0);
278 uint64_t zcnt2 = __lsx_vpickve2gr_hu(zcnt, 1);
279 uint64_t zcnt3 = __lsx_vpickve2gr_hu(zcnt, 2);
280 uint64_t zcnt4 = __lsx_vpickve2gr_hu(zcnt, 3);
281 uint8_t *voutput =
reinterpret_cast<uint8_t*
>(output);
284 this->chunks[0].compress(mask1,
reinterpret_cast<T*
>(voutput));
287 this->chunks[1].compress(mask2,
reinterpret_cast<T*
>(voutput));
290 this->chunks[2].compress(mask3,
reinterpret_cast<T*
>(voutput));
293 this->chunks[3].compress(mask4,
reinterpret_cast<T*
>(voutput));
295 return reinterpret_cast<uint64_t
>(voutput) -
reinterpret_cast<uint64_t
>(output);
298 simdjson_inline
void store(T ptr[64])
const {
299 this->chunks[0].store(ptr+
sizeof(simd8<T>)*0);
300 this->chunks[1].store(ptr+
sizeof(simd8<T>)*1);
301 this->chunks[2].store(ptr+
sizeof(simd8<T>)*2);
302 this->chunks[3].store(ptr+
sizeof(simd8<T>)*3);
305 simdjson_inline uint64_t to_bitmask()
const {
306 __m128i mask1 = __lsx_vmskltz_b(this->chunks[0]);
307 __m128i mask2 = __lsx_vmskltz_b(this->chunks[1]);
308 __m128i mask3 = __lsx_vmskltz_b(this->chunks[2]);
309 __m128i mask4 = __lsx_vmskltz_b(this->chunks[3]);
310 mask1 = __lsx_vilvl_h(mask2, mask1);
311 mask2 = __lsx_vilvl_h(mask4, mask3);
312 return __lsx_vpickve2gr_du(__lsx_vilvl_w(mask2, mask1), 0);
315 simdjson_inline simd8<T> reduce_or()
const {
316 return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
319 simdjson_inline uint64_t eq(
const T m)
const {
320 const simd8<T> mask = simd8<T>::splat(m);
321 return simd8x64<bool>(
322 this->chunks[0] == mask,
323 this->chunks[1] == mask,
324 this->chunks[2] == mask,
325 this->chunks[3] == mask
329 simdjson_inline uint64_t eq(
const simd8x64<uint8_t> &other)
const {
330 return simd8x64<bool>(
331 this->chunks[0] == other.chunks[0],
332 this->chunks[1] == other.chunks[1],
333 this->chunks[2] == other.chunks[2],
334 this->chunks[3] == other.chunks[3]
338 simdjson_inline uint64_t lteq(
const T m)
const {
339 const simd8<T> mask = simd8<T>::splat(m);
340 return simd8x64<bool>(
341 this->chunks[0] <= mask,
342 this->chunks[1] <= mask,
343 this->chunks[2] <= mask,
344 this->chunks[3] <= mask
simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept
Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user i...
The top level simdjson namespace, containing everything the library provides.