ROOT  6.06/09
Reference Guide
vectorhelper.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef AVX_VECTORHELPER_H
21 #define AVX_VECTORHELPER_H
22 
23 #include <limits>
24 #include "types.h"
25 #include "intrinsics.h"
26 #include "casts.h"
27 #include "macros.h"
28 
29 namespace ROOT {
30 namespace Vc
31 {
32 namespace AVX
33 {
34 
35 namespace Internal
36 {
38 {
39  m128i tmp0 = _mm_srli_epi32(avx_cast<m128i>(v), 23);
40  m128i tmp1 = _mm_srli_epi32(avx_cast<m128i>(hi128(v)), 23);
41  tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f));
42  tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f));
43  return _mm256_cvtepi32_ps(concat(tmp0, tmp1));
44 }
46 {
47  m128i tmp0 = _mm_srli_epi64(avx_cast<m128i>(v), 52);
48  m128i tmp1 = _mm_srli_epi64(avx_cast<m128i>(hi128(v)), 52);
49  tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x3ff));
50  tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x3ff));
51  return _mm256_cvtepi32_pd(avx_cast<m128i>(Mem::shuffle<X0, X2, Y0, Y2>(avx_cast<m128>(tmp0), avx_cast<m128>(tmp1))));
52 }
53 } // namespace Internal
54 
55 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }
56 #define OP1(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a) { return code; }
57 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a, VTArg b) { return code; }
58 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VTArg a, VTArg b, VTArg c) { return code; }
59 
60  template<> struct VectorHelper<m256>
61  {
62  typedef m256 VectorType;
63 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
64  typedef const VectorType & VTArg;
65 #else
66  typedef const VectorType VTArg;
67 #endif
68  template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const float *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;
69  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, AlignedFlag) Vc_ALWAYS_INLINE_R;
70  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
71  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
72  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
73  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, VTArg m, AlignedFlag) Vc_ALWAYS_INLINE_R;
74  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, VTArg m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
75  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, VTArg m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
76  static Vc_ALWAYS_INLINE_L void store(float *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
77 
78  static Vc_ALWAYS_INLINE Vc_CONST VectorType cdab(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(2, 3, 0, 1)); }
79  static Vc_ALWAYS_INLINE Vc_CONST VectorType badc(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(1, 0, 3, 2)); }
80  static Vc_ALWAYS_INLINE Vc_CONST VectorType aaaa(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(0, 0, 0, 0)); }
81  static Vc_ALWAYS_INLINE Vc_CONST VectorType bbbb(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1)); }
82  static Vc_ALWAYS_INLINE Vc_CONST VectorType cccc(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2)); }
83  static Vc_ALWAYS_INLINE Vc_CONST VectorType dddd(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3)); }
84  static Vc_ALWAYS_INLINE Vc_CONST VectorType dacb(VTArg x) { return _mm256_permute_ps(x, _MM_SHUFFLE(3, 0, 2, 1)); }
85 
86  OP0(allone, _mm256_setallone_ps())
87  OP0(zero, _mm256_setzero_ps())
88  OP2(or_, _mm256_or_ps(a, b))
89  OP2(xor_, _mm256_xor_ps(a, b))
90  OP2(and_, _mm256_and_ps(a, b))
91  OP2(andnot_, _mm256_andnot_ps(a, b))
92  OP3(blend, _mm256_blendv_ps(a, b, c))
93  };
94 
95  template<> struct VectorHelper<m256d>
96  {
97  typedef m256d VectorType;
98 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
99  typedef const VectorType & VTArg;
100 #else
101  typedef const VectorType VTArg;
102 #endif
103  template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const double *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;
104  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, AlignedFlag) Vc_ALWAYS_INLINE_R;
105  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
106  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
107  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
108  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, VTArg m, AlignedFlag) Vc_ALWAYS_INLINE_R;
109  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, VTArg m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
110  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, VTArg m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
111  static Vc_ALWAYS_INLINE_L void store(double *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
112 
113  static VectorType cdab(VTArg x) { return _mm256_permute_pd(x, 5); }
114  static VectorType badc(VTArg x) { return _mm256_permute2f128_pd(x, x, 1); }
115  // aaaa bbbb cccc dddd specialized in vector.tcc
116  static VectorType dacb(VTArg x) {
117  const m128d cb = avx_cast<m128d>(_mm_alignr_epi8(avx_cast<m128i>(lo128(x)),
118  avx_cast<m128i>(hi128(x)), sizeof(double))); // XXX: lo and hi swapped?
119  const m128d da = _mm_blend_pd(lo128(x), hi128(x), 0 + 2); // XXX: lo and hi swapped?
120  return concat(cb, da);
121  }
122 
123  OP0(allone, _mm256_setallone_pd())
124  OP0(zero, _mm256_setzero_pd())
125  OP2(or_, _mm256_or_pd(a, b))
126  OP2(xor_, _mm256_xor_pd(a, b))
127  OP2(and_, _mm256_and_pd(a, b))
128  OP2(andnot_, _mm256_andnot_pd(a, b))
129  OP3(blend, _mm256_blendv_pd(a, b, c))
130  };
131 
132  template<> struct VectorHelper<m256i>
133  {
134  typedef m256i VectorType;
135 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
136  typedef const VectorType & VTArg;
137 #else
138  typedef const VectorType VTArg;
139 #endif
140  template<typename T> static VectorType load(const T *x, AlignedFlag) Vc_PURE;
141  template<typename T> static VectorType load(const T *x, UnalignedFlag) Vc_PURE;
142  template<typename T> static VectorType load(const T *x, StreamingAndAlignedFlag) Vc_PURE;
143  template<typename T> static VectorType load(const T *x, StreamingAndUnalignedFlag) Vc_PURE;
144  template<typename T> static void store(T *mem, VTArg x, AlignedFlag);
145  template<typename T> static void store(T *mem, VTArg x, UnalignedFlag);
146  template<typename T> static void store(T *mem, VTArg x, StreamingAndAlignedFlag);
147  template<typename T> static void store(T *mem, VTArg x, StreamingAndUnalignedFlag);
148  template<typename T> static void store(T *mem, VTArg x, VTArg m, AlignedFlag);
149  template<typename T> static void store(T *mem, VTArg x, VTArg m, UnalignedFlag);
150  template<typename T> static void store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag);
151  template<typename T> static void store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag);
152 
153  static VectorType cdab(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(2, 3, 0, 1))); }
154  static VectorType badc(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(1, 0, 3, 2))); }
155  static VectorType aaaa(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(0, 0, 0, 0))); }
156  static VectorType bbbb(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(1, 1, 1, 1))); }
157  static VectorType cccc(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(2, 2, 2, 2))); }
158  static VectorType dddd(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(3, 3, 3, 3))); }
159  static VectorType dacb(VTArg x) { return avx_cast<VectorType>(_mm256_permute_ps(avx_cast<m256>(x), _MM_SHUFFLE(3, 0, 2, 1))); }
160 
161  OP0(allone, _mm256_setallone_si256())
162  OP0(zero, _mm256_setzero_si256())
163  OP2(or_, _mm256_or_si256(a, b))
164  OP2(xor_, _mm256_xor_si256(a, b))
165  OP2(and_, _mm256_and_si256(a, b))
166  OP2(andnot_, _mm256_andnot_si256(a, b))
167  OP3(blend, _mm256_blendv_epi8(a, b, c))
168  };
169 
170  template<> struct VectorHelper<m128i>
171  {
172  typedef m128i VectorType;
173 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
174  typedef const VectorType & VTArg;
175 #else
176  typedef const VectorType VTArg;
177 #endif
178  template<typename T> static VectorType load(const T *x, AlignedFlag) Vc_PURE;
179  template<typename T> static VectorType load(const T *x, UnalignedFlag) Vc_PURE;
180  template<typename T> static VectorType load(const T *x, StreamingAndAlignedFlag) Vc_PURE;
181  template<typename T> static VectorType load(const T *x, StreamingAndUnalignedFlag) Vc_PURE;
182  template<typename T> static void store(T *mem, VTArg x, AlignedFlag);
183  template<typename T> static void store(T *mem, VTArg x, UnalignedFlag);
184  template<typename T> static void store(T *mem, VTArg x, StreamingAndAlignedFlag);
185  template<typename T> static void store(T *mem, VTArg x, StreamingAndUnalignedFlag);
186  template<typename T> static void store(T *mem, VTArg x, VTArg m, AlignedFlag);
187  template<typename T> static void store(T *mem, VTArg x, VTArg m, UnalignedFlag);
188  template<typename T> static void store(T *mem, VTArg x, VTArg m, StreamingAndAlignedFlag);
189  template<typename T> static void store(T *mem, VTArg x, VTArg m, StreamingAndUnalignedFlag);
190 
191  static VectorType cdab(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)); }
192  static VectorType badc(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2)); }
193  static VectorType aaaa(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(0, 0, 0, 0)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(0, 0, 0, 0)); }
194  static VectorType bbbb(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 1, 1, 1)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(1, 1, 1, 1)); }
195  static VectorType cccc(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 2, 2, 2)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(2, 2, 2, 2)); }
196  static VectorType dddd(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(3, 3, 3, 3)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(3, 3, 3, 3)); }
197  static VectorType dacb(VTArg x) { const __m128i tmp = _mm_shufflelo_epi16(x, _MM_SHUFFLE(3, 0, 2, 1)); return _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(3, 0, 2, 1)); }
198 
199  OP0(allone, _mm_setallone_si128())
200  OP0(zero, _mm_setzero_si128())
201  OP2(or_, _mm_or_si128(a, b))
202  OP2(xor_, _mm_xor_si128(a, b))
203  OP2(and_, _mm_and_si128(a, b))
204  OP2(andnot_, _mm_andnot_si128(a, b))
205  OP3(blend, _mm_blendv_epi8(a, b, c))
206  };
207 #undef OP1
208 #undef OP2
209 #undef OP3
210 
211 #define OP1(op) \
212  static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a) { return CAT(_mm256_##op##_, SUFFIX)(a); }
213 #define OP(op) \
214  static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op##_ , SUFFIX)(a, b); }
215 #define OP_(op) \
216  static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op , SUFFIX)(a, b); }
217 #define OPx(op, op2) \
218  static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_##op2##_, SUFFIX)(a, b); }
219 #define OPcmp(op) \
220  static Vc_INTRINSIC VectorType Vc_CONST cmp##op(VTArg a, VTArg b) { return CAT(_mm256_cmp##op##_, SUFFIX)(a, b); }
221 #define OP_CAST_(op) \
222  static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return CAT(_mm256_castps_, SUFFIX)( \
223  _mm256_##op##ps(CAT(CAT(_mm256_cast, SUFFIX), _ps)(a), \
224  CAT(CAT(_mm256_cast, SUFFIX), _ps)(b))); \
225  }
226 #define MINMAX \
227  static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return CAT(_mm256_min_, SUFFIX)(a, b); } \
228  static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return CAT(_mm256_max_, SUFFIX)(a, b); }
229 
230  template<> struct VectorHelper<double> {
231  typedef m256d VectorType;
232 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
233  typedef const VectorType & VTArg;
234 #else
235  typedef const VectorType VTArg;
236 #endif
237  typedef double EntryType;
238  typedef double ConcatType;
239 #define SUFFIX pd
240 
241  static Vc_ALWAYS_INLINE VectorType notMaskedToZero(VTArg a, param256 mask) { return CAT(_mm256_and_, SUFFIX)(_mm256_castps_pd(mask), a); }
242  static Vc_ALWAYS_INLINE VectorType set(const double a) { return CAT(_mm256_set1_, SUFFIX)(a); }
243  static Vc_ALWAYS_INLINE VectorType set(const double a, const double b, const double c, const double d) {
244  return CAT(_mm256_set_, SUFFIX)(a, b, c, d);
245  }
246  static Vc_ALWAYS_INLINE VectorType zero() { return CAT(_mm256_setzero_, SUFFIX)(); }
247  static Vc_ALWAYS_INLINE VectorType one() { return CAT(_mm256_setone_, SUFFIX)(); }// set(1.); }
248 
249  static inline void fma(VectorType &v1, VTArg v2, VTArg v3) {
250 #ifdef VC_IMPL_FMA4
251  v1 = _mm256_macc_pd(v1, v2, v3);
252 #else
253  VectorType h1 = _mm256_and_pd(v1, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
254  VectorType h2 = _mm256_and_pd(v2, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
255 #if defined(VC_GCC) && VC_GCC < 0x40703
256  // GCC before 4.7.3 uses an incorrect optimization where it replaces the subtraction with an andnot
257  // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54703
258  asm("":"+x"(h1), "+x"(h2));
259 #endif
260  const VectorType l1 = _mm256_sub_pd(v1, h1);
261  const VectorType l2 = _mm256_sub_pd(v2, h2);
262  const VectorType ll = mul(l1, l2);
263  const VectorType lh = add(mul(l1, h2), mul(h1, l2));
264  const VectorType hh = mul(h1, h2);
265  // ll < lh < hh for all entries is certain
266  const VectorType lh_lt_v3 = cmplt(abs(lh), abs(v3)); // |lh| < |v3|
267  const VectorType b = _mm256_blendv_pd(v3, lh, lh_lt_v3);
268  const VectorType c = _mm256_blendv_pd(lh, v3, lh_lt_v3);
269  v1 = add(add(ll, b), add(c, hh));
270 #endif
271  }
272 
273  OP(add) OP(sub) OP(mul)
274  OPcmp(eq) OPcmp(neq)
275  OPcmp(lt) OPcmp(nlt)
276  OPcmp(le) OPcmp(nle)
277 
278  OP1(sqrt)
279  static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VTArg x) {
280  return _mm256_div_pd(one(), sqrt(x));
281  }
282  static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) {
283  return _mm256_div_pd(one(), x);
284  }
285  static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x) {
286  return _mm256_cmpunord_pd(x, x);
287  }
288  static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x) {
289  return _mm256_cmpord_pd(x, _mm256_mul_pd(zero(), x));
290  }
291  static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) {
292  return CAT(_mm256_and_, SUFFIX)(a, _mm256_setabsmask_pd());
293  }
294 
295  MINMAX
296  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) {
297  m128d b = _mm_min_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
298  b = _mm_min_sd(b, _mm_unpackhi_pd(b, b));
299  return _mm_cvtsd_f64(b);
300  }
301  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) {
302  m128d b = _mm_max_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
303  b = _mm_max_sd(b, _mm_unpackhi_pd(b, b));
304  return _mm_cvtsd_f64(b);
305  }
306  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) {
307  m128d b = _mm_mul_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
308  b = _mm_mul_sd(b, _mm_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1)));
309  return _mm_cvtsd_f64(b);
310  }
311  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) {
312  m128d b = _mm_add_pd(avx_cast<m128d>(a), _mm256_extractf128_pd(a, 1));
313  b = _mm_hadd_pd(b, b); // or: b = _mm_add_sd(b, _mm256_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1)));
314  return _mm_cvtsd_f64(b);
315  }
316 #undef SUFFIX
317  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) {
318  return _mm256_round_pd(a, _MM_FROUND_NINT);
319  }
320  };
321 
322  template<> struct VectorHelper<float> {
323  typedef float EntryType;
324  typedef m256 VectorType;
325 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
326  typedef const VectorType & VTArg;
327 #else
328  typedef const VectorType VTArg;
329 #endif
330  typedef double ConcatType;
331 #define SUFFIX ps
332 
333  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VTArg a, param256 mask) { return CAT(_mm256_and_, SUFFIX)(mask, a); }
334  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) { return CAT(_mm256_set1_, SUFFIX)(a); }
335  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d,
336  const float e, const float f, const float g, const float h) {
337  return CAT(_mm256_set_, SUFFIX)(a, b, c, d, e, f, g, h); }
338  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm256_setzero_, SUFFIX)(); }
339  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm256_setone_, SUFFIX)(); }// set(1.f); }
340  static Vc_ALWAYS_INLINE Vc_CONST m256 concat(param256d a, param256d b) { return _mm256_insertf128_ps(avx_cast<m256>(_mm256_cvtpd_ps(a)), _mm256_cvtpd_ps(b), 1); }
341 
342  static inline void fma(VectorType &v1, VTArg v2, VTArg v3) {
343 #ifdef VC_IMPL_FMA4
344  v1 = _mm256_macc_ps(v1, v2, v3);
345 #else
346  m256d v1_0 = _mm256_cvtps_pd(lo128(v1));
347  m256d v1_1 = _mm256_cvtps_pd(hi128(v1));
348  m256d v2_0 = _mm256_cvtps_pd(lo128(v2));
349  m256d v2_1 = _mm256_cvtps_pd(hi128(v2));
350  m256d v3_0 = _mm256_cvtps_pd(lo128(v3));
351  m256d v3_1 = _mm256_cvtps_pd(hi128(v3));
352  v1 = AVX::concat(
353  _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_0, v2_0), v3_0)),
354  _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_1, v2_1), v3_1)));
355 #endif
356  }
357 
358  OP(add) OP(sub) OP(mul)
359  OPcmp(eq) OPcmp(neq)
360  OPcmp(lt) OPcmp(nlt)
361  OPcmp(le) OPcmp(nle)
362 
363  OP1(sqrt) OP1(rsqrt)
364  static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x) {
365  return _mm256_cmpunord_ps(x, x);
366  }
367  static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x) {
368  return _mm256_cmpord_ps(x, _mm256_mul_ps(zero(), x));
369  }
370  static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) {
371  return _mm256_rcp_ps(x);
372  }
373  static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) {
374  return CAT(_mm256_and_, SUFFIX)(a, _mm256_setabsmask_ps());
375  }
376 
377  MINMAX
378  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) {
379  m128 b = _mm_min_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
380  b = _mm_min_ps(b, _mm_movehl_ps(b, b)); // b = min(a0, a2), min(a1, a3), min(a2, a2), min(a3, a3)
381  b = _mm_min_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = min(a0, a1), a1, a2, a3
382  return _mm_cvtss_f32(b);
383  }
384  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) {
385  m128 b = _mm_max_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
386  b = _mm_max_ps(b, _mm_movehl_ps(b, b)); // b = max(a0, a2), max(a1, a3), max(a2, a2), max(a3, a3)
387  b = _mm_max_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = max(a0, a1), a1, a2, a3
388  return _mm_cvtss_f32(b);
389  }
390  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) {
391  m128 b = _mm_mul_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
392  b = _mm_mul_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
393  b = _mm_mul_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
394  return _mm_cvtss_f32(b);
395  }
396  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) {
397  m128 b = _mm_add_ps(avx_cast<m128>(a), _mm256_extractf128_ps(a, 1));
398  b = _mm_add_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3)));
399  b = _mm_add_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1)));
400  return _mm_cvtss_f32(b);
401  }
402 #undef SUFFIX
403  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) {
404  return _mm256_round_ps(a, _MM_FROUND_NINT);
405  }
406  };
407 
408  template<> struct VectorHelper<sfloat> : public VectorHelper<float> {};
409 
410  template<> struct VectorHelper<int> {
411  typedef int EntryType;
412  typedef m256i VectorType;
413 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
414  typedef const VectorType & VTArg;
415 #else
416  typedef const VectorType VTArg;
417 #endif
418  typedef long long ConcatType;
419 #define SUFFIX si256
420 
421  OP_(or_) OP_(and_) OP_(xor_)
422  static Vc_INTRINSIC VectorType Vc_CONST zero() { return CAT(_mm256_setzero_, SUFFIX)(); }
423  static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask) { return CAT(_mm256_and_, SUFFIX)(_mm256_castps_si256(mask), a); }
424 #undef SUFFIX
425 #define SUFFIX epi32
426  static Vc_INTRINSIC VectorType Vc_CONST one() { return CAT(_mm256_setone_, SUFFIX)(); }
427 
428  static Vc_INTRINSIC VectorType Vc_CONST set(const int a) { return CAT(_mm256_set1_, SUFFIX)(a); }
429  static Vc_INTRINSIC VectorType Vc_CONST set(const int a, const int b, const int c, const int d,
430  const int e, const int f, const int g, const int h) {
431  return CAT(_mm256_set_, SUFFIX)(a, b, c, d, e, f, g, h); }
432 
433  static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3) { v1 = add(mul(v1, v2), v3); }
434 
435  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift) {
436  return CAT(_mm256_slli_, SUFFIX)(a, shift);
437  }
438  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift) {
439  return CAT(_mm256_srai_, SUFFIX)(a, shift);
440  }
441  OP1(abs)
442 
443  MINMAX
444  static Vc_INTRINSIC EntryType Vc_CONST min(VTArg a) {
445  m128i b = _mm_min_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
446  b = _mm_min_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
447  b = _mm_min_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2))); // using lo_epi16 for speed here
448  return _mm_cvtsi128_si32(b);
449  }
450  static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a) {
451  m128i b = _mm_max_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
452  b = _mm_max_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
453  b = _mm_max_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2))); // using lo_epi16 for speed here
454  return _mm_cvtsi128_si32(b);
455  }
456  static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a) {
457  m128i b = _mm_add_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
458  b = _mm_add_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
459  b = _mm_add_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
460  return _mm_cvtsi128_si32(b);
461  }
462  static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a) {
463  m128i b = _mm_mullo_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
464  b = _mm_mullo_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
465  b = _mm_mullo_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
466  return _mm_cvtsi128_si32(b);
467  }
468 
469  static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mullo_epi32(a, b); }
470 
471  OP(add) OP(sub)
472  OPcmp(eq)
473  OPcmp(lt)
474  OPcmp(gt)
475  static Vc_INTRINSIC VectorType Vc_CONST cmpneq(VTArg a, VTArg b) { m256i x = cmpeq(a, b); return _mm256_andnot_si256(x, _mm256_setallone_si256()); }
476  static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b) { m256i x = cmplt(a, b); return _mm256_andnot_si256(x, _mm256_setallone_si256()); }
477  static Vc_INTRINSIC VectorType Vc_CONST cmple (VTArg a, VTArg b) { m256i x = cmpgt(a, b); return _mm256_andnot_si256(x, _mm256_setallone_si256()); }
478  static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b) { return cmpgt(a, b); }
479 #undef SUFFIX
480  static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a) { return a; }
481  };
482 
483  template<> struct VectorHelper<unsigned int> {
484  typedef unsigned int EntryType;
485  typedef m256i VectorType;
486 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
487  typedef const VectorType & VTArg;
488 #else
489  typedef const VectorType VTArg;
490 #endif
491  typedef unsigned long long ConcatType;
492 #define SUFFIX si256
493  OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_)
494  static Vc_INTRINSIC VectorType Vc_CONST zero() { return CAT(_mm256_setzero_, SUFFIX)(); }
495  static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask) { return CAT(_mm256_and_, SUFFIX)(_mm256_castps_si256(mask), a); }
496 
497 #undef SUFFIX
498 #define SUFFIX epu32
499  static Vc_INTRINSIC VectorType Vc_CONST one() { return CAT(_mm256_setone_, SUFFIX)(); }
500 
501  MINMAX
502  static Vc_INTRINSIC EntryType Vc_CONST min(VTArg a) {
503  m128i b = _mm_min_epu32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
504  b = _mm_min_epu32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
505  b = _mm_min_epu32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2))); // using lo_epi16 for speed here
506  return _mm_cvtsi128_si32(b);
507  }
508  static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a) {
509  m128i b = _mm_max_epu32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
510  b = _mm_max_epu32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
511  b = _mm_max_epu32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2))); // using lo_epi16 for speed here
512  return _mm_cvtsi128_si32(b);
513  }
514  static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a) {
515  m128i b = _mm_add_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
516  b = _mm_add_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
517  b = _mm_add_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
518  return _mm_cvtsi128_si32(b);
519  }
520  static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a) {
521  m128i b = _mm_mullo_epi32(avx_cast<m128i>(a), _mm256_extractf128_si256(a, 1));
522  b = _mm_mullo_epi32(b, _mm_shuffle_epi32(b, _MM_SHUFFLE(1, 0, 3, 2)));
523  b = _mm_mullo_epi32(b, _mm_shufflelo_epi16(b, _MM_SHUFFLE(1, 0, 3, 2)));
524  return _mm_cvtsi128_si32(b);
525  }
526 
527  static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mullo_epi32(a, b); }
528  static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3) { v1 = add(mul(v1, v2), v3); }
529 
530 #undef SUFFIX
531 #define SUFFIX epi32
532  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift) {
533  return CAT(_mm256_slli_, SUFFIX)(a, shift);
534  }
535  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift) {
536  return CAT(_mm256_srli_, SUFFIX)(a, shift);
537  }
538  static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a) { return CAT(_mm256_set1_, SUFFIX)(a); }
539  static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d,
540  const unsigned int e, const unsigned int f, const unsigned int g, const unsigned int h) {
541  return CAT(_mm256_set_, SUFFIX)(a, b, c, d, e, f, g, h); }
542 
543  OP(add) OP(sub)
544  OPcmp(eq)
545  static Vc_INTRINSIC VectorType Vc_CONST cmpneq(VTArg a, VTArg b) { return _mm256_andnot_si256(cmpeq(a, b), _mm256_setallone_si256()); }
546 
547 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
548  static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b) {
549  return _mm256_cmplt_epu32(a, b);
550  }
551  static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b) {
552  return _mm256_cmpgt_epu32(a, b);
553  }
554 #else
555  OPcmp(lt)
556  OPcmp(gt)
557 #endif
558  static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b) { return _mm256_andnot_si256(cmplt(a, b), _mm256_setallone_si256()); }
559  static Vc_INTRINSIC VectorType Vc_CONST cmple (VTArg a, VTArg b) { return _mm256_andnot_si256(cmpgt(a, b), _mm256_setallone_si256()); }
560  static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b) { return cmpgt(a, b); }
561 
562 #undef SUFFIX
563  static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a) { return a; }
564  };
565 
566  template<> struct VectorHelper<signed short> {
568 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
569  typedef const VectorType & VTArg;
570 #else
571  typedef const VectorType VTArg;
572 #endif
573  typedef signed short EntryType;
574  typedef int ConcatType;
575 
576  static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b) { return _mm_or_si128(a, b); }
577  static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b) { return _mm_and_si128(a, b); }
578  static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b) { return _mm_xor_si128(a, b); }
579  static Vc_INTRINSIC VectorType Vc_CONST zero() { return _mm_setzero_si128(); }
580  static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask) { return _mm_and_si128(_mm_castps_si128(mask), a); }
581 
582 #define SUFFIX epi16
583  static Vc_INTRINSIC VectorType Vc_CONST one() { return CAT(_mm_setone_, SUFFIX)(); }
584 
585  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift) {
586  return CAT(_mm_slli_, SUFFIX)(a, shift);
587  }
588  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift) {
589  return CAT(_mm_srai_, SUFFIX)(a, shift);
590  }
591  static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }
592  static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c, const EntryType d,
593  const EntryType e, const EntryType f, const EntryType g, const EntryType h) {
594  return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);
595  }
596 
597  static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3) {
598  v1 = add(mul(v1, v2), v3);
599  }
600 
601  static Vc_INTRINSIC VectorType Vc_CONST abs(VTArg a) { return _mm_abs_epi16(a); }
602  static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm_mullo_epi16(a, b); }
603  static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm_min_epi16(a, b); }
604  static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm_max_epi16(a, b); }
605 
606  static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a) {
607  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
608  VectorType a = min(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
609  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
610  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
611  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
612  }
613  static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a) {
614  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
615  VectorType a = max(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
616  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
617  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
618  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
619  }
620  static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a) {
621  VectorType a = mul(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
622  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
623  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
624  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
625  }
626  static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a) {
627  VectorType a = add(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
628  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
629  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
630  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
631  }
632 
633  static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm_add_epi16(a, b); }
634  static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm_sub_epi16(a, b); }
635  static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b) { return _mm_cmpeq_epi16(a, b); }
636  static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b) { return _mm_cmplt_epi16(a, b); }
637  static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b) { return _mm_cmpgt_epi16(a, b); }
638  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b) { m128i x = cmpeq(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
639  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b) { m128i x = cmplt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
640  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (VTArg a, VTArg b) { m128i x = cmpgt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
641  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b) { return cmpgt(a, b); }
642 #undef SUFFIX
643  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) { return a; }
644  };
645 
646  template<> struct VectorHelper<unsigned short> {
648 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
649  typedef const VectorType & VTArg;
650 #else
651  typedef const VectorType VTArg;
652 #endif
653  typedef unsigned short EntryType;
654  typedef unsigned int ConcatType;
655 
656  static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b) { return _mm_or_si128(a, b); }
657  static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b) { return _mm_and_si128(a, b); }
658  static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b) { return _mm_xor_si128(a, b); }
659  static Vc_INTRINSIC VectorType Vc_CONST zero() { return _mm_setzero_si128(); }
660  static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask) { return _mm_and_si128(_mm_castps_si128(mask), a); }
661  static Vc_INTRINSIC VectorType Vc_CONST one() { return _mm_setone_epu16(); }
662 
663  static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm_mullo_epi16(a, b); }
664  static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm_min_epu16(a, b); }
665  static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm_max_epu16(a, b); }
666 
667 #define SUFFIX epi16
668  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift) {
669  return CAT(_mm_slli_, SUFFIX)(a, shift);
670  }
671  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift) {
672  return CAT(_mm_srli_, SUFFIX)(a, shift);
673  }
674  static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a) {
675  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
676  VectorType a = min(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
677  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
678  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
679  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
680  }
681  static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a) {
682  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
683  VectorType a = max(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
684  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
685  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
686  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
687  }
688  static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a) {
689  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
690  VectorType a = mul(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
691  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
692  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
693  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
694  }
695  static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a) {
696  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
697  VectorType a = add(_a, _mm_shuffle_epi32(_a, _MM_SHUFFLE(1, 0, 3, 2)));
698  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
699  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
700  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
701  }
702  static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }
703  static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c,
704  const EntryType d, const EntryType e, const EntryType f,
705  const EntryType g, const EntryType h) {
706  return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);
707  }
708  static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3) { v1 = add(mul(v1, v2), v3); }
709 
710  static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm_add_epi16(a, b); }
711  static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm_sub_epi16(a, b); }
712  static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b) { return _mm_cmpeq_epi16(a, b); }
713  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b) { return _mm_andnot_si128(cmpeq(a, b), _mm_setallone_si128()); }
714 
715 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
716  static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b) { return _mm_cmplt_epu16(a, b); }
717  static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b) { return _mm_cmpgt_epu16(a, b); }
718 #else
719  static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b) { return _mm_cmplt_epi16(a, b); }
720  static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b) { return _mm_cmpgt_epi16(a, b); }
721 #endif
722  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b) { return _mm_andnot_si128(cmplt(a, b), _mm_setallone_si128()); }
723  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (VTArg a, VTArg b) { return _mm_andnot_si128(cmpgt(a, b), _mm_setallone_si128()); }
724  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b) { return cmpgt(a, b); }
725 #undef SUFFIX
726  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) { return a; }
727  };
728 #undef OP1
729 #undef OP
730 #undef OP_
731 #undef OPx
732 #undef OPcmp
733 
734 template<> struct VectorHelper<char>
735 {
737 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
738  typedef const VectorType & VTArg;
739 #else
740  typedef const VectorType VTArg;
741 #endif
742  typedef char EntryType;
743  typedef short ConcatType;
744 };
745 
746 template<> struct VectorHelper<unsigned char>
747 {
749 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
750  typedef const VectorType & VTArg;
751 #else
752  typedef const VectorType VTArg;
753 #endif
754  typedef unsigned char EntryType;
755  typedef unsigned short ConcatType;
756 };
757 
758 } // namespace AVX
759 } // namespace Vc
760 } // namespace ROOT
761 
762 #include "vectorhelper.tcc"
763 #include "undomacros.h"
764 
765 #endif // AVX_VECTORHELPER_H
static Vc_INTRINSIC VectorType Vc_CONST cmple(VTArg a, VTArg b)
Definition: vectorhelper.h:559
VectorTypeHelper< signed short >::Type VectorType
Definition: vectorhelper.h:567
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
Definition: vectorhelper.h:643
OP_(or_) OP_(and_) OP_(xor_) static Vc_INTRINSIC VectorType Vc_CONST zero()
Definition: vectorhelper.h:421
static Vc_ALWAYS_INLINE Vc_CONST VectorType aaaa(VTArg x)
Definition: vectorhelper.h:80
const m256d param256d
Definition: intrinsics.h:129
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b)
Definition: vectorhelper.h:604
#define OPcmp(op)
Definition: vectorhelper.h:219
VectorTypeHelper< unsigned short >::Type VectorType
Definition: vectorhelper.h:647
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
Definition: vectorhelper.h:403
static Vc_INTRINSIC m256 Vc_CONST _mm256_setabsmask_ps()
Definition: intrinsics.h:199
static Vc_INTRINSIC VectorType Vc_CONST cmple(VTArg a, VTArg b)
Definition: vectorhelper.h:477
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b)
Definition: vectorhelper.h:476
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x)
Definition: vectorhelper.h:367
#define OP0(name, code)
Definition: vectorhelper.h:55
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
Definition: vectorhelper.h:317
static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a)
Definition: vectorhelper.h:538
static Vc_INTRINSIC m256i Vc_CONST _mm256_andnot_si256(param256i x, param256i y)
Definition: intrinsics.h:384
static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a)
Definition: vectorhelper.h:563
static VectorType dddd(VTArg x)
Definition: vectorhelper.h:158
Small helper to encapsulate whether to return the value pointed to by the iterator or its address...
const Double_t * v1
Definition: TArcBall.cxx:33
static Vc_INTRINSIC VectorType Vc_CONST one()
Definition: vectorhelper.h:426
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a)
Definition: vectorhelper.h:695
static Vc_INTRINSIC VectorType Vc_CONST set(const int a)
Definition: vectorhelper.h:428
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
Definition: vectorhelper.h:532
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask)
Definition: vectorhelper.h:580
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a)
Definition: vectorhelper.h:726
static Vc_ALWAYS_INLINE VectorType one()
Definition: vectorhelper.h:247
static Vc_INTRINSIC_L T avx_cast(param128 v) Vc_INTRINSIC_R
Definition: casts.h:49
static Vc_INTRINSIC m128i _mm_cmplt_epu16(param128i a, param128i b)
Definition: intrinsics.h:243
static Vc_INTRINSIC m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:165
static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b)
Definition: vectorhelper.h:657
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
Definition: vectorhelper.h:716
static void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:249
#define OP1(name, code)
Definition: vectorhelper.h:211
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg _a)
Definition: vectorhelper.h:626
#define SUFFIX
Definition: vectorhelper.h:667
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a)
Definition: vectorhelper.h:301
static Vc_INTRINSIC VectorType Vc_CONST one()
Definition: vectorhelper.h:661
double T(double x)
Definition: ChebyshevPol.h:34
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
Definition: vectorhelper.h:637
static void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:342
const m128 param128
Definition: intrinsics.h:125
static Vc_INTRINSIC VectorType Vc_CONST and_(VTArg a, VTArg b)
Definition: vectorhelper.h:577
static Vc_INTRINSIC VectorType Vc_CONST round(VTArg a)
Definition: vectorhelper.h:480
static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b)
Definition: vectorhelper.h:658
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
Definition: vectorhelper.h:585
TH1 * h
Definition: legend2.C:5
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
Definition: vectorhelper.h:535
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a)
Definition: vectorhelper.h:514
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a)
Definition: vectorhelper.h:520
VectorTypeHelper< unsigned char >::Type VectorType
Definition: vectorhelper.h:748
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param128 mask)
Definition: vectorhelper.h:660
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:339
static Vc_INTRINSIC VectorType Vc_CONST one()
Definition: vectorhelper.h:499
static Vc_ALWAYS_INLINE Vc_CONST VectorType bbbb(VTArg x)
Definition: vectorhelper.h:81
StreamingAndUnalignedFlag
Definition: global.h:317
#define MINMAX
Definition: vectorhelper.h:226
#define Vc_PURE_L
Definition: macros.h:137
TArc * a
Definition: textangle.C:12
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
Definition: vectorhelper.h:671
#define Vc_INTRINSIC
Definition: macros.h:139
static VectorType cccc(VTArg x)
Definition: vectorhelper.h:195
Vc_INTRINSIC Vc_CONST m128 hi128(param256 v)
Definition: casts.h:118
static Vc_ALWAYS_INLINE VectorType set(const double a, const double b, const double c, const double d)
Definition: vectorhelper.h:243
#define OP_CAST_(op)
Definition: vectorhelper.h:221
static VectorType aaaa(VTArg x)
Definition: vectorhelper.h:155
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b)
Definition: vectorhelper.h:634
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:703
static double A[]
static VectorType dacb(VTArg x)
Definition: vectorhelper.h:197
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
Definition: vectorhelper.h:636
VectorTypeHelper< char >::Type VectorType
Definition: vectorhelper.h:736
static Vc_INTRINSIC VectorType Vc_CONST cmplt(VTArg a, VTArg b)
Definition: vectorhelper.h:548
#define OP(op)
Definition: vectorhelper.h:213
Vc_INTRINSIC m256i Vc_CONST _mm256_blendv_epi8(param256i a0, param256i b0, param256i m0)
Definition: intrinsics.h:495
static VectorType dacb(VTArg x)
Definition: vectorhelper.h:116
static Vc_INTRINSIC m256d Vc_CONST _mm256_cmpunord_pd(param256d a, param256d b)
Definition: intrinsics.h:230
static VectorType badc(VTArg x)
Definition: vectorhelper.h:192
static Vc_INTRINSIC VectorType Vc_CONST zero()
Definition: vectorhelper.h:659
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
Definition: vectorhelper.h:438
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:433
Double_t x[n]
Definition: legend1.C:17
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a)
Definition: vectorhelper.h:702
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:708
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VTArg a, int shift)
Definition: vectorhelper.h:588
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VTArg x)
Definition: vectorhelper.h:288
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
Definition: vectorhelper.h:717
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b)
Definition: vectorhelper.h:603
static Vc_INTRINSIC VectorType Vc_CONST cmpgt(VTArg a, VTArg b)
Definition: vectorhelper.h:551
#define CAT(a, b)
Definition: macros.h:281
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x)
Definition: vectorhelper.h:282
static Vc_INTRINSIC m128i Vc_CONST _mm_setone_epu16()
Definition: intrinsics.h:172
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
Definition: casts.h:123
static Vc_ALWAYS_INLINE Vector< T > abs(const Vector< T > &x)
Definition: vector.h:450
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(VTArg a, VTArg b)
Definition: vectorhelper.h:640
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b)
Definition: vectorhelper.h:638
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
Definition: vectorhelper.h:435
static Vc_INTRINSIC m256d Vc_CONST _mm256_setabsmask_pd()
Definition: intrinsics.h:198
static Vc_INTRINSIC m256i Vc_CONST _mm256_cmplt_epu32(param256i _a, param256i _b)
Definition: intrinsics.h:540
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a)
Definition: vectorhelper.h:306
static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b)
Definition: vectorhelper.h:635
static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b)
Definition: vectorhelper.h:656
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a)
Definition: vectorhelper.h:450
TH1F * h1
Definition: legend1.C:5
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(VTArg a, VTArg b)
Definition: vectorhelper.h:723
static Vc_INTRINSIC VectorType Vc_CONST zero()
Definition: vectorhelper.h:579
#define OP3(name, code)
Definition: vectorhelper.h:58
#define OP2(name, code)
Definition: vectorhelper.h:57
static Vc_ALWAYS_INLINE Vector< T > sqrt(const Vector< T > &x)
Definition: vector.h:448
static Vc_ALWAYS_INLINE Vc_CONST m256 concat(param256d a, param256d b)
Definition: vectorhelper.h:340
#define Vc_PURE
Definition: macros.h:136
#define AVX
Definition: global.h:90
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a)
Definition: vectorhelper.h:620
#define Vc_ALWAYS_INLINE_R
Definition: macros.h:132
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask)
Definition: vectorhelper.h:495
static Vc_INTRINSIC m256 Vc_CONST _mm256_cmpunord_ps(param256 a, param256 b)
Definition: intrinsics.h:241
static Vc_INTRINSIC VectorType Vc_CONST notMaskedToZero(VTArg a, param256 mask)
Definition: vectorhelper.h:423
static VectorType dacb(VTArg x)
Definition: vectorhelper.h:159
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a)
Definition: vectorhelper.h:291
static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b)
Definition: vectorhelper.h:560
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg _a)
Definition: vectorhelper.h:688
static Vc_INTRINSIC VectorType Vc_CONST set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d, const unsigned int e, const unsigned int f, const unsigned int g, const unsigned int h)
Definition: vectorhelper.h:539
StreamingAndAlignedFlag
Definition: global.h:314
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
Definition: vectorhelper.h:602
SVector< double, 2 > v
Definition: Dict.h:5
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a)
Definition: vectorhelper.h:296
static Vc_INTRINSIC VectorType Vc_CONST cmpnle(VTArg a, VTArg b)
Definition: vectorhelper.h:478
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a)
Definition: vectorhelper.h:373
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_INTRINSIC VectorType Vc_CONST zero()
Definition: vectorhelper.h:493
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a)
Definition: vectorhelper.h:378
static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a)
Definition: vectorhelper.h:606
static MINMAX Vc_INTRINSIC EntryType Vc_CONST min(VTArg a)
Definition: vectorhelper.h:502
static Vc_INTRINSIC VectorType Vc_CONST cmpeq(VTArg a, VTArg b)
Definition: vectorhelper.h:712
static VectorType dddd(VTArg x)
Definition: vectorhelper.h:196
TMarker * m
Definition: textangle.C:8
#define Vc_CONST
Definition: macros.h:133
static VectorType badc(VTArg x)
Definition: vectorhelper.h:114
static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b)
Definition: vectorhelper.h:664
static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x)
Definition: vectorhelper.h:285
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:597
static Vc_INTRINSIC EntryType Vc_CONST add(VTArg a)
Definition: vectorhelper.h:456
static VectorType bbbb(VTArg x)
Definition: vectorhelper.h:156
static Vc_ALWAYS_INLINE Vector< T > rsqrt(const Vector< T > &x)
Definition: vector.h:449
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b)
Definition: vectorhelper.h:639
#define OP_(op)
Definition: vectorhelper.h:215
static Vc_INTRINSIC VectorType Vc_CONST abs(VTArg a)
Definition: vectorhelper.h:601
static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b)
Definition: vectorhelper.h:711
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a)
Definition: vectorhelper.h:311
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) OP1(sqrt) OP1(rsqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VTArg x)
Definition: vectorhelper.h:358
static Vc_INTRINSIC VectorType Vc_CONST one()
Definition: vectorhelper.h:583
static Vc_INTRINSIC EntryType Vc_CONST min(VTArg _a)
Definition: vectorhelper.h:674
static Vc_INTRINSIC EntryType Vc_CONST mul(VTArg a)
Definition: vectorhelper.h:462
static Vc_INTRINSIC m128i _mm_cmpgt_epu16(param128i a, param128i b)
Definition: intrinsics.h:246
static Vc_ALWAYS_INLINE Vc_CONST VectorType cccc(VTArg x)
Definition: vectorhelper.h:82
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a)
Definition: vectorhelper.h:681
static Vc_INTRINSIC VectorType Vc_CONST xor_(VTArg a, VTArg b)
Definition: vectorhelper.h:578
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b)
Definition: vectorhelper.h:641
static VectorType badc(VTArg x)
Definition: vectorhelper.h:154
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VTArg x)
Definition: vectorhelper.h:273
static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b)
Definition: vectorhelper.h:665
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(VTArg a, VTArg b)
Definition: vectorhelper.h:713
double f(double x)
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
Definition: vectorhelper.h:469
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
Definition: vectorhelper.h:527
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:592
static Vc_INTRINSIC m256i Vc_CONST _mm256_cmpgt_epu32(param256i _a, param256i _b)
Definition: intrinsics.h:547
static Vc_INTRINSIC void fma(VectorType &v1, VTArg v2, VTArg v3)
Definition: vectorhelper.h:528
static Vc_ALWAYS_INLINE Vc_CONST VectorType dacb(VTArg x)
Definition: vectorhelper.h:84
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
Definition: vectorhelper.h:334
static Vc_INTRINSIC VectorType Vc_CONST set(const int a, const int b, const int c, const int d, const int e, const int f, const int g, const int h)
Definition: vectorhelper.h:429
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d, const float e, const float f, const float g, const float h)
Definition: vectorhelper.h:335
static Vc_ALWAYS_INLINE VectorType zero()
Definition: vectorhelper.h:246
#define Vc_ALWAYS_INLINE_L
Definition: macros.h:131
static Vc_ALWAYS_INLINE VectorType notMaskedToZero(VTArg a, param256 mask)
Definition: vectorhelper.h:241
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a)
Definition: vectorhelper.h:390
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b)
Definition: vectorhelper.h:710
static Vc_INTRINSIC m256 Vc_CONST _mm256_setallone_ps()
Definition: intrinsics.h:186
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg _a)
Definition: vectorhelper.h:613
static Vc_INTRINSIC VectorType Vc_CONST set(const EntryType a)
Definition: vectorhelper.h:591
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:338
static Vc_INTRINSIC VectorType Vc_CONST cmpnlt(VTArg a, VTArg b)
Definition: vectorhelper.h:558
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(VTArg a, VTArg b)
Definition: vectorhelper.h:722
Vc_INTRINSIC Vc_CONST m256 exponent(param256 v)
Definition: vectorhelper.h:37
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(VTArg a, VTArg b)
Definition: vectorhelper.h:724
static Vc_ALWAYS_INLINE VectorType set(const double a)
Definition: vectorhelper.h:242
static Vc_INTRINSIC m256i Vc_CONST _mm256_and_si256(param256i x, param256i y)
Definition: intrinsics.h:381
static Vc_INTRINSIC m256d Vc_CONST _mm256_setallone_pd()
Definition: intrinsics.h:185
#define Vc_PURE_R
Definition: macros.h:138
static Vc_INTRINSIC m256 Vc_CONST _mm256_cmpord_ps(param256 a, param256 b)
Definition: intrinsics.h:240
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a)
Definition: vectorhelper.h:396
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VTArg a, int shift)
Definition: vectorhelper.h:668
static Vc_INTRINSIC VectorType Vc_CONST or_(VTArg a, VTArg b)
Definition: vectorhelper.h:576
static Vc_ALWAYS_INLINE Vc_CONST VectorType dddd(VTArg x)
Definition: vectorhelper.h:83
static VectorType cccc(VTArg x)
Definition: vectorhelper.h:157
static Vc_INTRINSIC m256i Vc_CONST _mm256_xor_si256(param256i x, param256i y)
Definition: intrinsics.h:390
AlignedFlag
Definition: global.h:308
static VectorType bbbb(VTArg x)
Definition: vectorhelper.h:194
Definition: casts.h:28
static VectorType aaaa(VTArg x)
Definition: vectorhelper.h:193
UnalignedFlag
Definition: global.h:311
static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b)
Definition: vectorhelper.h:663
static Vc_INTRINSIC m256d Vc_CONST _mm256_cmpord_pd(param256d a, param256d b)
Definition: intrinsics.h:229
static Vc_INTRINSIC m256i Vc_CONST _mm256_or_si256(param256i x, param256i y)
Definition: intrinsics.h:387
static Vc_ALWAYS_INLINE Vc_CONST VectorType badc(VTArg x)
Definition: vectorhelper.h:79
static Vc_INTRINSIC EntryType Vc_CONST max(VTArg a)
Definition: vectorhelper.h:508
const m256 param256
Definition: intrinsics.h:128
static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b)
Definition: vectorhelper.h:633
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VTArg a, param256 mask)
Definition: vectorhelper.h:333
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a)
Definition: vectorhelper.h:384
__m128d m128d
Definition: intrinsics.h:111
Vc_INTRINSIC Vc_CONST m128 lo128(param256 v)
Definition: casts.h:115
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x)
Definition: vectorhelper.h:370
static Vc_INTRINSIC m256i Vc_CONST _mm256_setallone_si256()
Definition: intrinsics.h:184