20 #ifndef AVX_VECTORHELPER_H
21 #define AVX_VECTORHELPER_H
39 m128i tmp0 = _mm_srli_epi32(avx_cast<m128i>(v), 23);
40 m128i tmp1 = _mm_srli_epi32(avx_cast<m128i>(
hi128(v)), 23);
41 tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f));
42 tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f));
43 return _mm256_cvtepi32_ps(
concat(tmp0, tmp1));
47 m128i tmp0 = _mm_srli_epi64(avx_cast<m128i>(v), 52);
48 m128i tmp1 = _mm_srli_epi64(avx_cast<m128i>(
hi128(v)), 52);
49 tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x3ff));
50 tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x3ff));
51 return _mm256_cvtepi32_pd(avx_cast<m128i>(Mem::shuffle<X0, X2, Y0, Y2>(avx_cast<m128>(tmp0), avx_cast<m128>(tmp1))));
55 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }
56 #define OP1(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a) { return code; }
57 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a, VTArg b) { return code; }
58 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a, VTArg b, VTArg c) { return code; }
63 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
64 typedef const VectorType & VTArg;
87 OP0(zero, _mm256_setzero_ps())
88 OP2(or_, _mm256_or_ps(a, b))
89 OP2(xor_, _mm256_xor_ps(a, b))
90 OP2(and_, _mm256_and_ps(a, b))
91 OP2(andnot_, _mm256_andnot_ps(a, b))
92 OP3(blend, _mm256_blendv_ps(a, b, c))
98 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
99 typedef const VectorType & VTArg;
113 static VectorType cdab(VTArg x) {
return _mm256_permute_pd(x, 5); }
114 static VectorType
badc(VTArg
x) {
return _mm256_permute2f128_pd(x, x, 1); }
118 avx_cast<m128i>(
hi128(x)),
sizeof(
double)));
124 OP0(zero, _mm256_setzero_pd())
125 OP2(or_, _mm256_or_pd(a, b))
126 OP2(xor_, _mm256_xor_pd(a, b))
127 OP2(and_, _mm256_and_pd(a, b))
128 OP2(andnot_, _mm256_andnot_pd(a, b))
129 OP3(blend, _mm256_blendv_pd(a, b, c))
135 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
136 typedef const VectorType & VTArg;
148 template<typename
T> static
void store(
T *mem, VTArg x, VTArg
m, AlignedFlag);
149 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, UnalignedFlag);
150 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag);
151 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag);
153 static VectorType cdab(VTArg x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(2, 3, 0, 1))); }
154 static VectorType
badc(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(1, 0, 3, 2))); }
155 static VectorType
aaaa(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(0, 0, 0, 0))); }
156 static VectorType
bbbb(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(1, 1, 1, 1))); }
157 static VectorType
cccc(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(2, 2, 2, 2))); }
158 static VectorType
dddd(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(3, 3, 3, 3))); }
159 static VectorType
dacb(VTArg
x) {
return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(3, 0, 2, 1))); }
162 OP0(zero, _mm256_setzero_si256())
173 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
174 typedef const VectorType & VTArg;
186 template<typename
T> static
void store(
T *mem, VTArg x, VTArg
m, AlignedFlag);
187 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, UnalignedFlag);
188 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag);
189 template<typename
T> static
void store(
T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag);
191 static VectorType cdab(VTArg x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 3, 0, 1));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)); }
192 static VectorType
badc(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)); }
193 static VectorType
aaaa(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(0, 0, 0, 0));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(0, 0, 0, 0)); }
194 static VectorType
bbbb(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 1, 1, 1));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(1, 1, 1, 1)); }
195 static VectorType
cccc(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 2, 2, 2));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(2, 2, 2, 2)); }
196 static VectorType
dddd(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(3, 3, 3, 3));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(3, 3, 3, 3)); }
197 static VectorType
dacb(VTArg
x) {
const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(3, 0, 2, 1));
return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(3, 0, 2, 1)); }
200 OP0(zero, _mm_setzero_si128())
201 OP2(or_, _mm_or_si128(a, b))
202 OP2(xor_, _mm_xor_si128(a, b))
203 OP2(and_, _mm_and_si128(a, b))
204 OP2(andnot_, _mm_andnot_si128(a, b))
205 OP3(blend, _mm_blendv_epi8(a, b, c))
212 static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a) { return CAT(_mm256_##op##_, SUFFIX)(a); }
214 static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op##_ , SUFFIX)(a, b); }
216 static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op , SUFFIX)(a, b); }
217 #define OPx(op, op2) \
218 static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op2##_, SUFFIX)(a, b); }
220 static Vc_INTRINSIC VectorType Vc_CONST cmp##op(VTArg a, VTArg b) { return CAT(_mm256_cmp##op##_, SUFFIX)(a, b); }
221 #define OP_CAST_(op) \
222 static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_castps_, SUFFIX)( \
223 _mm256_##op##ps(CAT(CAT(_mm256_cast, SUFFIX), _ps)(a), \
224 CAT(CAT(_mm256_cast, SUFFIX), _ps)(b))); \
227 static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return CAT(_mm256_min_, SUFFIX)(a, b); } \
228 static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return CAT(_mm256_max_, SUFFIX)(a, b); }
232 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
233 typedef const VectorType & VTArg;
249 static inline void fma(VectorType &
v1, VTArg v2, VTArg v3) {
251 v1 = _mm256_macc_pd(v1, v2, v3);
253 VectorType
h1 = _mm256_and_pd(v1, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
254 VectorType h2 = _mm256_and_pd(v2, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
255 #if defined(VC_GCC) && VC_GCC < 0x40703
258 asm(
"":
"+x"(
h1),
"+x"(h2));
260 const VectorType l1 = _mm256_sub_pd(v1, h1);
261 const VectorType l2 = _mm256_sub_pd(v2, h2);
262 const VectorType ll = mul(l1, l2);
263 const VectorType lh = add(mul(l1, h2), mul(h1, l2));
264 const VectorType hh = mul(h1, h2);
266 const VectorType lh_lt_v3 = cmplt(
abs(lh),
abs(v3));
267 const VectorType b = _mm256_blendv_pd(v3, lh, lh_lt_v3);
268 const VectorType c = _mm256_blendv_pd(lh, v3, lh_lt_v3);
269 v1 = add(add(ll, b), add(c, hh));
280 return _mm256_div_pd(one(),
sqrt(x));
283 return _mm256_div_pd(one(), x);
297 m128d b = _mm_min_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
298 b = _mm_min_sd(b, _mm_unpackhi_pd(b, b));
299 return _mm_cvtsd_f64(b);
302 m128d b = _mm_max_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
303 b = _mm_max_sd(b, _mm_unpackhi_pd(b, b));
304 return _mm_cvtsd_f64(b);
307 m128d b = _mm_mul_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
308 b = _mm_mul_sd(b, _mm_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1)));
309 return _mm_cvtsd_f64(b);
312 m128d b = _mm_add_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
313 b = _mm_hadd_pd(b, b);
314 return _mm_cvtsd_f64(b);
318 return _mm256_round_pd(a, _MM_FROUND_NINT);
325 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
326 typedef const VectorType & VTArg;
336 const float e,
const float f,
const float g,
const float h) {
342 static inline void fma(VectorType &
v1, VTArg v2, VTArg v3) {
344 v1 = _mm256_macc_ps(v1, v2, v3);
346 m256d v1_0 = _mm256_cvtps_pd(
lo128(v1));
347 m256d v1_1 = _mm256_cvtps_pd(
hi128(v1));
348 m256d v2_0 = _mm256_cvtps_pd(
lo128(v2));
349 m256d v2_1 = _mm256_cvtps_pd(
hi128(v2));
350 m256d v3_0 = _mm256_cvtps_pd(
lo128(v3));
351 m256d v3_1 = _mm256_cvtps_pd(
hi128(v3));
353 _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_0, v2_0), v3_0)),
354 _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_1, v2_1), v3_1)));
371 return _mm256_rcp_ps(x);
379 m128 b = _mm_min_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
380 b = _mm_min_ps(b, _mm_movehl_ps(b, b));
381 b = _mm_min_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
382 return _mm_cvtss_f32(b);
385 m128 b = _mm_max_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
386 b = _mm_max_ps(b, _mm_movehl_ps(b, b));
387 b = _mm_max_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
388 return _mm_cvtss_f32(b);
391 m128 b = _mm_mul_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
392 b = _mm_mul_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
393 b = _mm_mul_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
394 return _mm_cvtss_f32(b);
397 m128 b = _mm_add_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
398 b = _mm_add_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
399 b = _mm_add_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
400 return _mm_cvtss_f32(b);
404 return _mm256_round_ps(a, _MM_FROUND_NINT);
413 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
414 typedef const VectorType & VTArg;
430 const int e,
const int f,
const int g,
const int h) {
445 m128i b = _mm_min_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
446 b = _mm_min_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
447 b = _mm_min_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
448 return _mm_cvtsi128_si32(b);
451 m128i b = _mm_max_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
452 b = _mm_max_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
453 b = _mm_max_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
454 return _mm_cvtsi128_si32(b);
457 m128i b = _mm_add_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
458 b = _mm_add_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
459 b = _mm_add_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
460 return _mm_cvtsi128_si32(b);
463 m128i b = _mm_mullo_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
464 b = _mm_mullo_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
465 b = _mm_mullo_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
466 return _mm_cvtsi128_si32(b);
486 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
487 typedef const VectorType & VTArg;
503 m128i b = _mm_min_epu32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
504 b = _mm_min_epu32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
505 b = _mm_min_epu32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
506 return _mm_cvtsi128_si32(b);
509 m128i b = _mm_max_epu32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
510 b = _mm_max_epu32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
511 b = _mm_max_epu32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
512 return _mm_cvtsi128_si32(b);
515 m128i b = _mm_add_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
516 b = _mm_add_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
517 b = _mm_add_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
518 return _mm_cvtsi128_si32(b);
521 m128i b = _mm_mullo_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
522 b = _mm_mullo_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
523 b = _mm_mullo_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
524 return _mm_cvtsi128_si32(b);
539 static Vc_INTRINSIC VectorType
Vc_CONST set(
const unsigned int a,
const unsigned int b,
const unsigned int c,
const unsigned int d,
540 const unsigned int e,
const unsigned int f,
const unsigned int g,
const unsigned int h) {
547 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
568 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
569 typedef const VectorType & VTArg;
593 const EntryType e,
const EntryType
f,
const EntryType
g,
const EntryType
h) {
598 v1 = add(mul(v1, v2), v3);
608 VectorType a =
min(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
609 a =
min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
610 a =
min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
611 return _mm_cvtsi128_si32(a);
615 VectorType a =
max(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
616 a =
max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
617 a =
max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
618 return _mm_cvtsi128_si32(a);
621 VectorType a = mul(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
622 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
623 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
624 return _mm_cvtsi128_si32(a);
627 VectorType a = add(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
628 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
629 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
630 return _mm_cvtsi128_si32(a);
648 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
649 typedef const VectorType & VTArg;
676 VectorType a =
min(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
677 a =
min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
678 a =
min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
679 return _mm_cvtsi128_si32(a);
683 VectorType a =
max(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
684 a =
max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
685 a =
max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
686 return _mm_cvtsi128_si32(a);
690 VectorType a = mul(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
691 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
692 a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
693 return _mm_cvtsi128_si32(a);
697 VectorType a = add(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
698 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
699 a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
700 return _mm_cvtsi128_si32(a);
704 const EntryType d,
const EntryType e,
const EntryType
f,
705 const EntryType
g,
const EntryType
h) {
715 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
719 static Vc_INTRINSIC VectorType
Vc_CONST cmplt(VTArg a, VTArg b) {
return _mm_cmplt_epi16(a, b); }
720 static Vc_INTRINSIC VectorType
Vc_CONST cmpgt(VTArg a, VTArg b) {
return _mm_cmpgt_epi16(a, b); }
737 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
738 typedef const VectorType & VTArg;
749 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
750 typedef const VectorType & VTArg;
762 #include "vectorhelper.tcc"
765 #endif // AVX_VECTORHELPER_H
static Vc_INTRINSIC VectorType Vc_CONST cmple(VTArg a, VTArg b)
VectorTypeHelper< signed short >::Type VectorType
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
OP_(or_) OP_(and_) OP_(xor_) static Vc_INTRINSIC VectorType Vc_CONST zero()
static Vc_ALWAYS_INLINE Vc_CONST VectorType aaaa(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b)
VectorTypeHelper< unsigned short >::Type VectorType
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
static Vc_INTRINSIC m256 Vc_CONST _mm256_setabsmask_ps()
static Vc_INTRINSIC VectorType Vc_CONST cmple(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x)
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a)
static Vc_INTRINSIC m256i Vc_CONST _mm256_andnot_si256(param256i x, param256i y)
static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a)
static VectorType dddd(VTArg x)
Small helper to encapsulate whether to return the value pointed to by the iterator or its address...
static Vc_INTRINSIC VectorType Vc_CONST one()
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a)
static Vc_INTRINSIC VectorType Vc_CONST set(const int a)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask)
Namespace for new ROOT classes and functions.
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
static Vc_ALWAYS_INLINE VectorType one()
static Vc_INTRINSIC_L T avx_cast(param128 v) Vc_INTRINSIC_R
unsigned short ConcatType
static Vc_INTRINSIC m128i _mm_cmplt_epu16(param128i a, param128i b)
static Vc_INTRINSIC m128i Vc_CONST _mm_setallone_si128()
static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
static void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a)
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST one()
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
static void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a)
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a)
VectorTypeHelper< unsigned char >::Type VectorType
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask)
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
static Vc_INTRINSIC VectorType Vc_CONST one()
static Vc_ALWAYS_INLINE Vc_CONST VectorType bbbb(VTArg x)
StreamingAndUnalignedFlag
unsigned long long ConcatType
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
static VectorType cccc(VTArg x)
Vc_INTRINSIC Vc_CONST m128 hi128(param256 v)
static Vc_ALWAYS_INLINE VectorType set(const double a, const double b, const double c, const double d)
static VectorType aaaa(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
static VectorType dacb(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
VectorTypeHelper< char >::Type VectorType
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
Vc_INTRINSIC m256i Vc_CONST _mm256_blendv_epi8(param256i a0, param256i b0, param256i m0)
static VectorType dacb(VTArg x)
static Vc_INTRINSIC m256d Vc_CONST _mm256_cmpunord_pd(param256d a, param256d b)
static VectorType badc(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST zero()
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a)
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x)
static Vc_INTRINSIC m128i Vc_CONST _mm_setone_epu16()
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
static Vc_ALWAYS_INLINE Vector< T > abs(const Vector< T > &x)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
static Vc_INTRINSIC m256d Vc_CONST _mm256_setabsmask_pd()
static Vc_INTRINSIC m256i Vc_CONST _mm256_cmplt_epu32(param256i _a, param256i _b)
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b)
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST zero()
static Vc_ALWAYS_INLINE Vector< T > sqrt(const Vector< T > &x)
static Vc_ALWAYS_INLINE Vc_CONST m256 concat(param256d a, param256d b)
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a)
#define Vc_ALWAYS_INLINE_R
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask)
static Vc_INTRINSIC m256 Vc_CONST _mm256_cmpunord_ps(param256 a, param256 b)
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask)
static VectorType dacb(VTArg x)
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b)
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a)
static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d, const unsigned int e, const unsigned int f, const unsigned int g, const unsigned int h)
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a)
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_INTRINSIC VectorType Vc_CONST zero()
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a)
static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a)
static MINMAX Vc_INTRINSIC EntryType Vc_CONST min(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b)
static VectorType dddd(VTArg x)
static VectorType badc(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x)
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a)
static VectorType bbbb(VTArg x)
static Vc_ALWAYS_INLINE Vector< T > rsqrt(const Vector< T > &x)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST abs(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a)
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) OP1(sqrt) OP1(rsqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST one()
static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a)
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a)
static Vc_INTRINSIC m128i _mm_cmpgt_epu16(param128i a, param128i b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cccc(VTArg x)
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a)
static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b)
static VectorType badc(VTArg x)
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
static Vc_INTRINSIC m256i Vc_CONST _mm256_cmpgt_epu32(param256i _a, param256i _b)
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
static Vc_ALWAYS_INLINE Vc_CONST VectorType dacb(VTArg x)
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
static Vc_INTRINSIC VectorType Vc_CONST set(const int a, const int b, const int c, const int d, const int e, const int f, const int g, const int h)
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d, const float e, const float f, const float g, const float h)
static Vc_ALWAYS_INLINE VectorType zero()
#define Vc_ALWAYS_INLINE_L
static Vc_ALWAYS_INLINE VectorType notMaskedToZero(VTArg a, param256 mask)
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b)
static Vc_INTRINSIC m256 Vc_CONST _mm256_setallone_ps()
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a)
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a)
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b)
Vc_INTRINSIC Vc_CONST m256 exponent(param256 v)
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE VectorType set(const double a)
static Vc_INTRINSIC m256i Vc_CONST _mm256_and_si256(param256i x, param256i y)
static Vc_INTRINSIC m256d Vc_CONST _mm256_setallone_pd()
static Vc_INTRINSIC m256 Vc_CONST _mm256_cmpord_ps(param256 a, param256 b)
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a)
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType dddd(VTArg x)
static VectorType cccc(VTArg x)
static Vc_INTRINSIC m256i Vc_CONST _mm256_xor_si256(param256i x, param256i y)
static VectorType bbbb(VTArg x)
static VectorType aaaa(VTArg x)
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
static Vc_INTRINSIC m256d Vc_CONST _mm256_cmpord_pd(param256d a, param256d b)
static Vc_INTRINSIC m256i Vc_CONST _mm256_or_si256(param256i x, param256i y)
static Vc_ALWAYS_INLINE Vc_CONST VectorType badc(VTArg x)
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a)
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b)
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VTArg a, param256 mask)
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a)
Vc_INTRINSIC Vc_CONST m128 lo128(param256 v)
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x)
static Vc_INTRINSIC m256i Vc_CONST _mm256_setallone_si256()