simdjson 4.4.0
Ridiculously Fast JSON
Loading...
Searching...
No Matches
simd.h
1#ifndef SIMDJSON_RVV_VLS_SIMD_H
2#define SIMDJSON_RVV_VLS_SIMD_H
3
4#ifndef SIMDJSON_CONDITIONAL_INCLUDE
5#include "simdjson/rvv-vls/base.h"
6#include "simdjson/rvv-vls/bitmanipulation.h"
7#include "simdjson/internal/simdprune_tables.h"
8#endif // SIMDJSON_CONDITIONAL_INCLUDE
9
10namespace simdjson {
11namespace rvv_vls {
12namespace {
13namespace simd {
14
15#if __riscv_v_fixed_vlen >= 512
16 static constexpr size_t VL8 = 512/8;
17 using vint8_t = vint8m1_t __attribute__((riscv_rvv_vector_bits(512)));
18 using vuint8_t = vuint8m1_t __attribute__((riscv_rvv_vector_bits(512)));
19 using vbool_t = vbool8_t __attribute__((riscv_rvv_vector_bits(512/8)));
20 using vbitmask_t = uint64_t;
21#else
22 static constexpr size_t VL8 = __riscv_v_fixed_vlen/8;
23 using vint8_t = vint8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
24 using vuint8_t = vuint8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen)));
25 using vbool_t = vbool8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen/8)));
26 #if __riscv_v_fixed_vlen == 128
27 using vbitmask_t = uint16_t;
28 #elif __riscv_v_fixed_vlen == 256
29 using vbitmask_t = uint32_t;
30 #endif
31#endif
32
33#if __riscv_v_fixed_vlen == 128
34 using vuint8x64_t = vuint8m4_t __attribute__((riscv_rvv_vector_bits(512)));
35 using vboolx64_t = vbool2_t __attribute__((riscv_rvv_vector_bits(512/8)));
36#elif __riscv_v_fixed_vlen == 256
37 using vuint8x64_t = vuint8m2_t __attribute__((riscv_rvv_vector_bits(512)));
38 using vboolx64_t = vbool4_t __attribute__((riscv_rvv_vector_bits(512/8)));
39#else
40 using vuint8x64_t = vuint8m1_t __attribute__((riscv_rvv_vector_bits(512)));
41 using vboolx64_t = vbool8_t __attribute__((riscv_rvv_vector_bits(512/8)));
42#endif
43
44 template<typename T>
45 struct simd8;
46
47 // SIMD byte mask type (returned by things like eq and gt)
48 template<>
49 struct simd8<bool> {
50 vbool_t value;
51 using bitmask_t = vbitmask_t;
52 static constexpr int SIZE = sizeof(value);
53
54 simdjson_inline simd8(const vbool_t _value) : value(_value) {}
55 simdjson_inline simd8() : simd8(__riscv_vmclr_m_b8(VL8)) {}
56 simdjson_inline simd8(bool _value) : simd8(splat(_value)) {}
57
58 simdjson_inline operator const vbool_t&() const { return value; }
59 simdjson_inline operator vbool_t&() { return value; }
60
61 static simdjson_inline simd8<bool> splat(bool _value) {
62 return __riscv_vreinterpret_b8(__riscv_vmv_v_x_u64m1(((uint64_t)!_value)-1, 1));
63 }
64
65 simdjson_inline vbitmask_t to_bitmask() const {
66#if __riscv_v_fixed_vlen == 128
67 return __riscv_vmv_x(__riscv_vreinterpret_u16m1(value));
68#elif __riscv_v_fixed_vlen == 256
69 return __riscv_vmv_x(__riscv_vreinterpret_u32m1(value));
70#else
71 return __riscv_vmv_x(__riscv_vreinterpret_u64m1(value));
72#endif
73 }
74
75 // Bit operations
76 simdjson_inline simd8<bool> operator|(const simd8<bool> other) const { return __riscv_vmor(*this, other, VL8); }
77 simdjson_inline simd8<bool> operator&(const simd8<bool> other) const { return __riscv_vmand(*this, other, VL8); }
78 simdjson_inline simd8<bool> operator^(const simd8<bool> other) const { return __riscv_vmxor(*this, other, VL8); }
79 simdjson_inline simd8<bool> bit_andnot(const simd8<bool> other) const { return __riscv_vmandn(other, *this, VL8); }
80 simdjson_inline simd8<bool> operator~() const { return __riscv_vmnot(*this, VL8); }
81 simdjson_inline simd8<bool>& operator|=(const simd8<bool> other) { auto this_cast = static_cast<simd8<bool>*>(this); *this_cast = *this_cast | other; return *this_cast; }
82 simdjson_inline simd8<bool>& operator&=(const simd8<bool> other) { auto this_cast = static_cast<simd8<bool>*>(this); *this_cast = *this_cast & other; return *this_cast; }
83 simdjson_inline simd8<bool>& operator^=(const simd8<bool> other) { auto this_cast = static_cast<simd8<bool>*>(this); *this_cast = *this_cast ^ other; return *this_cast; }
84 };
85
86 // Unsigned bytes
87 template<>
88 struct simd8<uint8_t> {
89
90 vuint8_t value;
91 static constexpr int SIZE = sizeof(value);
92
93 simdjson_inline simd8(const vuint8_t _value) : value(_value) {}
94 simdjson_inline simd8() : simd8(zero()) {}
95 simdjson_inline simd8(const uint8_t values[VL8]) : simd8(load(values)) {}
96 simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
97 simdjson_inline simd8(simd8<bool> mask) : value(__riscv_vmerge_vxm_u8m1(zero(), -1, (vbool_t)mask, VL8)) {}
98
99 simdjson_inline operator const vuint8_t&() const { return this->value; }
100 simdjson_inline operator vuint8_t&() { return this->value; }
101
102 simdjson_inline simd8(
103 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
104 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
105 ) : simd8(vuint8_t{
106 v0, v1, v2, v3, v4, v5, v6, v7,
107 v8, v9, v10,v11,v12,v13,v14,v15
108 }) {}
109
110 // Repeat 16 values as many times as necessary (usually for lookup tables)
111 simdjson_inline static simd8<uint8_t> repeat_16(
112 uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7,
113 uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15
114 ) {
115 return simd8<uint8_t>(
116 v0, v1, v2, v3, v4, v5, v6, v7,
117 v8, v9, v10,v11,v12,v13,v14,v15
118 );
119 }
120
121 static simdjson_inline vuint8_t splat(uint8_t _value) { return __riscv_vmv_v_x_u8m1(_value, VL8); }
122 static simdjson_inline vuint8_t zero() { return splat(0); }
123 static simdjson_inline vuint8_t load(const uint8_t values[VL8]) { return __riscv_vle8_v_u8m1(values, VL8); }
124
125 // Bit operations
126 simdjson_inline simd8<uint8_t> operator|(const simd8<uint8_t> other) const { return __riscv_vor_vv_u8m1( value, other, VL8); }
127 simdjson_inline simd8<uint8_t> operator&(const simd8<uint8_t> other) const { return __riscv_vand_vv_u8m1( value, other, VL8); }
128 simdjson_inline simd8<uint8_t> operator^(const simd8<uint8_t> other) const { return __riscv_vxor_vv_u8m1( value, other, VL8); }
129 simdjson_inline simd8<uint8_t> operator~() const { return __riscv_vnot_v_u8m1(value, VL8); }
130#if __riscv_zvbb
131 simdjson_inline simd8<uint8_t> bit_andnot(const simd8<uint8_t> other) const { return __riscv_vandn_vv_u8m1(other, value, VL8); }
132#else
133 simdjson_inline simd8<uint8_t> bit_andnot(const simd8<uint8_t> other) const { return other & ~*this; }
134#endif
135 simdjson_inline simd8<uint8_t>& operator|=(const simd8<uint8_t> other) { value = *this | other; return *this; }
136 simdjson_inline simd8<uint8_t>& operator&=(const simd8<uint8_t> other) { value = *this & other; return *this; }
137 simdjson_inline simd8<uint8_t>& operator^=(const simd8<uint8_t> other) { value = *this ^ other; return *this; }
138
139 simdjson_inline simd8<bool> operator==(const simd8<uint8_t> other) const { return __riscv_vmseq(value, other, VL8); }
140 simdjson_inline simd8<bool> operator==(uint8_t other) const { return __riscv_vmseq(value, other, VL8); }
141
142 template<int N=1>
143 simdjson_inline simd8<uint8_t> prev(const simd8<uint8_t> prev_chunk) const {
144 return __riscv_vslideup(__riscv_vslidedown(prev_chunk, VL8-N, VL8), value, N, VL8);
145 }
146
147 // Store to array
148 simdjson_inline void store(uint8_t dst[VL8]) const { return __riscv_vse8(dst, value, VL8); }
149
150 // Saturated math
151 simdjson_inline simd8<uint8_t> saturating_add(const simd8<uint8_t> other) const { return __riscv_vsaddu(value, other, VL8); }
152 simdjson_inline simd8<uint8_t> saturating_sub(const simd8<uint8_t> other) const { return __riscv_vssubu(value, other, VL8); }
153
154 // Addition/subtraction are the same for signed and unsigned
155 simdjson_inline simd8<uint8_t> operator+(const simd8<uint8_t> other) const { return __riscv_vadd(value, other, VL8); }
156 simdjson_inline simd8<uint8_t> operator-(const simd8<uint8_t> other) const { return __riscv_vsub(value, other, VL8); }
157 simdjson_inline simd8<uint8_t>& operator+=(const simd8<uint8_t> other) { value = *this + other; return *this; }
158 simdjson_inline simd8<uint8_t>& operator-=(const simd8<uint8_t> other) { value = *this - other; return *this; }
159
160 // Order-specific operations
161 simdjson_inline simd8<bool> operator<=(const simd8<uint8_t> other) const { return __riscv_vmsleu(value, other, VL8); }
162 simdjson_inline simd8<bool> operator>=(const simd8<uint8_t> other) const { return __riscv_vmsgeu(value, other, VL8); }
163 simdjson_inline simd8<bool> operator<(const simd8<uint8_t> other) const { return __riscv_vmsltu(value, other, VL8); }
164 simdjson_inline simd8<bool> operator>(const simd8<uint8_t> other) const { return __riscv_vmsgtu(value, other, VL8); }
165
166 // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero.
167 simdjson_inline simd8<uint8_t> gt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this > other); }
168 // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero.
169 simdjson_inline simd8<uint8_t> lt_bits(const simd8<uint8_t> other) const { return simd8<uint8_t>(*this < other); }
170
171 // Bit-specific operations
172 simdjson_inline bool any_bits_set_anywhere() const {
173 return __riscv_vfirst(__riscv_vmsne(value, 0, VL8), VL8) >= 0;
174 }
175 simdjson_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const { return (*this & bits).any_bits_set_anywhere(); }
176 template<int N>
177 simdjson_inline simd8<uint8_t> shr() const { return __riscv_vsrl(value, N, VL8); }
178 template<int N>
179 simdjson_inline simd8<uint8_t> shl() const { return __riscv_vsll(value, N, VL8); }
180
181
182 // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values)
183 template<typename L>
184 simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
185 return __riscv_vrgather(lookup_table, value, VL8);
186 }
187
188 // compress inactive elements, to match AVX-512 behavior
189 template<typename L>
190 simdjson_inline void compress(vbitmask_t mask, L * output) const {
191 mask = (vbitmask_t)~mask;
192#if __riscv_v_fixed_vlen == 128
193 vbool8_t m = __riscv_vreinterpret_b8(__riscv_vmv_s_x_u16m1(mask, 1));
194#elif __riscv_v_fixed_vlen == 256
195 vbool8_t m = __riscv_vreinterpret_b8(__riscv_vmv_s_x_u32m1(mask, 1));
196#else
197 vbool8_t m = __riscv_vreinterpret_b8(__riscv_vmv_s_x_u64m1(mask, 1));
198#endif
199 __riscv_vse8_v_u8m1(output, __riscv_vcompress(value, m, VL8), count_ones(mask));
200 }
201
202 template<typename L>
203 simdjson_inline simd8<L> lookup_16(
204 L replace0, L replace1, L replace2, L replace3,
205 L replace4, L replace5, L replace6, L replace7,
206 L replace8, L replace9, L replace10, L replace11,
207 L replace12, L replace13, L replace14, L replace15) const {
208 return lookup_16(simd8<L>::repeat_16(
209 replace0, replace1, replace2, replace3,
210 replace4, replace5, replace6, replace7,
211 replace8, replace9, replace10, replace11,
212 replace12, replace13, replace14, replace15
213 ));
214 }
215 };
216
217 // Signed bytes
218 template<>
219 struct simd8<int8_t> {
220 vint8_t value;
221 static constexpr int SIZE = sizeof(value);
222
223 simdjson_inline simd8(const vint8_t _value) : value(_value) {}
224 simdjson_inline simd8() : simd8(zero()) {}
225 simdjson_inline simd8(const int8_t values[VL8]) : simd8(load(values)) {}
226 simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {}
227
228 simdjson_inline operator const vint8_t&() const { return this->value; }
229 simdjson_inline operator vint8_t&() { return this->value; }
230
231 simdjson_inline simd8(
232 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
233 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
234 ) : simd8(vint8_t{
235 v0, v1, v2, v3, v4, v5, v6, v7,
236 v8, v9, v10,v11,v12,v13,v14,v15
237 }) {}
238
239 // Repeat 16 values as many times as necessary (usually for lookup tables)
240 simdjson_inline static simd8<int8_t> repeat_16(
241 int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7,
242 int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15
243 ) {
244 return simd8<int8_t>(
245 v0, v1, v2, v3, v4, v5, v6, v7,
246 v8, v9, v10,v11,v12,v13,v14,v15
247 );
248 }
249
250 static simdjson_inline vint8_t splat(int8_t _value) { return __riscv_vmv_v_x_i8m1(_value, VL8); }
251 static simdjson_inline vint8_t zero() { return splat(0); }
252 static simdjson_inline vint8_t load(const int8_t values[VL8]) { return __riscv_vle8_v_i8m1(values, VL8); }
253
254
255 simdjson_inline void store(int8_t dst[VL8]) const { return __riscv_vse8(dst, value, VL8); }
256
257 // Explicit conversion to/from unsigned
258 simdjson_inline explicit simd8(const vuint8_t other): simd8(__riscv_vreinterpret_i8m1(other)) {}
259 simdjson_inline explicit operator simd8<uint8_t>() const { return __riscv_vreinterpret_u8m1(value); }
260
261 // Math
262 simdjson_inline simd8<int8_t> operator+(const simd8<int8_t> other) const { return __riscv_vadd(value, other, VL8); }
263 simdjson_inline simd8<int8_t> operator-(const simd8<int8_t> other) const { return __riscv_vsub(value, other, VL8); }
264 simdjson_inline simd8<int8_t>& operator+=(const simd8<int8_t> other) { value = *this + other; return *this; }
265 simdjson_inline simd8<int8_t>& operator-=(const simd8<int8_t> other) { value = *this - other; return *this; }
266
267 // Order-sensitive comparisons
268 simdjson_inline simd8<int8_t> max_val( const simd8<int8_t> other) const { return __riscv_vmax( value, other, VL8); }
269 simdjson_inline simd8<int8_t> min_val( const simd8<int8_t> other) const { return __riscv_vmin( value, other, VL8); }
270 simdjson_inline simd8<bool> operator>( const simd8<int8_t> other) const { return __riscv_vmsgt(value, other, VL8); }
271 simdjson_inline simd8<bool> operator<( const simd8<int8_t> other) const { return __riscv_vmslt(value, other, VL8); }
272 simdjson_inline simd8<bool> operator==(const simd8<int8_t> other) const { return __riscv_vmseq(value, other, VL8); }
273
274 template<int N=1>
275 simdjson_inline simd8<int8_t> prev(const simd8<int8_t> prev_chunk) const {
276 return __riscv_vslideup(__riscv_vslidedown(prev_chunk, VL8-N, VL8), value, N, VL8);
277 }
278
279 // Perform a lookup assuming no value is larger than 16
280 template<typename L>
281 simdjson_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
282 return __riscv_vrgather(lookup_table, value, VL8);
283 }
284 template<typename L>
285 simdjson_inline simd8<L> lookup_16(
286 L replace0, L replace1, L replace2, L replace3,
287 L replace4, L replace5, L replace6, L replace7,
288 L replace8, L replace9, L replace10, L replace11,
289 L replace12, L replace13, L replace14, L replace15) const {
290 return lookup_16(simd8<L>::repeat_16(
291 replace0, replace1, replace2, replace3,
292 replace4, replace5, replace6, replace7,
293 replace8, replace9, replace10, replace11,
294 replace12, replace13, replace14, replace15
295 ));
296 }
297 };
298
299 template<typename T>
300 struct simd8x64;
301 template<>
302 struct simd8x64<uint8_t> {
303 static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<uint8_t>);
304 vuint8x64_t value;
305
306#if __riscv_v_fixed_vlen >= 512
307 template<int idx> simd8<uint8_t> get() const { return value; }
308#else
309 template<int idx> simd8<uint8_t> get() const { return __riscv_vget_u8m1(value, idx); }
310#endif
311
312 simdjson_inline operator const vuint8x64_t&() const { return this->value; }
313 simdjson_inline operator vuint8x64_t&() { return this->value; }
314
315 simd8x64(const simd8x64<uint8_t>& o) = delete; // no copy allowed
316 simd8x64<uint8_t>& operator=(const simd8<uint8_t>& other) = delete; // no assignment allowed
317 simd8x64() = delete; // no default constructor allowed
318
319#if __riscv_v_fixed_vlen == 128
320 simdjson_inline simd8x64(const uint8_t *ptr, size_t n = 64) : value(__riscv_vle8_v_u8m4(ptr, n)) {}
321#elif __riscv_v_fixed_vlen == 256
322 simdjson_inline simd8x64(const uint8_t *ptr, size_t n = 64) : value(__riscv_vle8_v_u8m2(ptr, n)) {}
323#else
324 simdjson_inline simd8x64(const uint8_t *ptr, size_t n = 64) : value(__riscv_vle8_v_u8m1(ptr, n)) {}
325#endif
326
327 simdjson_inline void store(uint8_t ptr[64]) const {
328 __riscv_vse8(ptr, value, 64);
329 }
330
331 simdjson_inline bool is_ascii() const {
332#if __riscv_v_fixed_vlen == 128
333 return __riscv_vfirst(__riscv_vmslt(__riscv_vreinterpret_i8m4(value), 0, 64), 64) < 0;
334#elif __riscv_v_fixed_vlen == 256
335 return __riscv_vfirst(__riscv_vmslt(__riscv_vreinterpret_i8m2(value), 0, 64), 64) < 0;
336#else
337 return __riscv_vfirst(__riscv_vmslt(__riscv_vreinterpret_i8m1(value), 0, 64), 64) < 0;
338#endif
339 }
340
341 // compress inactive elements, to match AVX-512 behavior
342 simdjson_inline uint64_t compress(uint64_t mask, uint8_t * output) const {
343 mask = ~mask;
344#if __riscv_v_fixed_vlen == 128
345 vboolx64_t m = __riscv_vreinterpret_b2(__riscv_vmv_s_x_u64m1(mask, 1));
346#elif __riscv_v_fixed_vlen == 256
347 vboolx64_t m = __riscv_vreinterpret_b4(__riscv_vmv_s_x_u64m1(mask, 1));
348#else
349 vboolx64_t m = __riscv_vreinterpret_b8(__riscv_vmv_s_x_u64m1(mask, 1));
350#endif
351 size_t cnt = count_ones(mask);
352 __riscv_vse8(output, __riscv_vcompress(value, m, 64), cnt);
353 return cnt;
354 }
355
356 simdjson_inline uint64_t eq(const uint8_t m) const {
357 return __riscv_vmv_x(__riscv_vreinterpret_u64m1(__riscv_vmseq(value, m, 64)));
358 }
359
360 simdjson_inline uint64_t lteq(const uint8_t m) const {
361 return __riscv_vmv_x(__riscv_vreinterpret_u64m1(__riscv_vmsleu(value, m, 64)));
362 }
363 }; // struct simd8x64<uint8_t>
364
365} // namespace simd
366} // unnamed namespace
367} // namespace rvv_vls
368} // namespace simdjson
369
370#endif // SIMDJSON_RVV_VLS_SIMD_H
simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept
Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user i...
The top level simdjson namespace, containing everything the library provides.
Definition base.h:8