ROOT  6.06/09
Reference Guide
vectorhelper.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2011 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef SSE_VECTORHELPER_H
21 #define SSE_VECTORHELPER_H
22 
23 #include "types.h"
24 #include <limits>
25 #include "macros.h"
26 
27 namespace ROOT {
28 namespace Vc
29 {
30 namespace SSE
31 {
32 
33 namespace Internal
34 {
36 {
37  __m128i tmp = _mm_srli_epi32(_mm_castps_si128(v), 23);
38  tmp = _mm_sub_epi32(tmp, _mm_set1_epi32(0x7f));
39  return _mm_cvtepi32_ps(tmp);
40 }
42 {
43  __m128i tmp0 = _mm_srli_epi32(_mm_castps_si128(v[0]), 23);
44  __m128i tmp1 = _mm_srli_epi32(_mm_castps_si128(v[1]), 23);
45  tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f));
46  tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f));
47  return M256::create( _mm_cvtepi32_ps(tmp0), _mm_cvtepi32_ps(tmp1));
48 }
49 Vc_INTRINSIC Vc_CONST __m128d exponent(__m128d v)
50 {
51  __m128i tmp = _mm_srli_epi64(_mm_castpd_si128(v), 52);
52  tmp = _mm_sub_epi32(tmp, _mm_set1_epi32(0x3ff));
53  return _mm_cvtepi32_pd(_mm_shuffle_epi32(tmp, 0x08));
54 }
55 } // namespace Internal
56 
57  template<typename VectorType, unsigned int Size> struct SortHelper
58  {
59  static inline Vc_CONST_L VectorType sort(VectorType) Vc_CONST_R;
60  };
61  template<unsigned int Size> struct SortHelper<M256, Size>
62  {
63  static inline Vc_PURE_L M256 sort(const M256 &) Vc_PURE_R;
64  };
65 
66 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }
67 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VectorTypeArg a, VectorTypeArg b) { return code; }
68 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(VectorTypeArg a, VectorTypeArg b, VectorTypeArg c) { return code; }
69  template<> struct VectorHelper<M256>
70  {
71  typedef M256 VectorType;
72 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
73  typedef const VectorType &VectorTypeArg;
74 #else
75  typedef const VectorType VectorTypeArg;
76 #endif
77  template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const float *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;
78  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, AlignedFlag) Vc_ALWAYS_INLINE_R;
79  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
80  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
81  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
82  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, AlignedFlag) Vc_ALWAYS_INLINE_R;
83  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
84  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
85  static Vc_ALWAYS_INLINE_L void store(float *mem, VectorTypeArg x, VectorTypeArg m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
86 
87  OP0(allone, VectorType::create(_mm_setallone_ps(), _mm_setallone_ps()))
88  OP0(zero, VectorType::create(_mm_setzero_ps(), _mm_setzero_ps()))
89  OP2(or_, VectorType::create(_mm_or_ps(a[0], b[0]), _mm_or_ps(a[1], b[1])))
90  OP2(xor_, VectorType::create(_mm_xor_ps(a[0], b[0]), _mm_xor_ps(a[1], b[1])))
91  OP2(and_, VectorType::create(_mm_and_ps(a[0], b[0]), _mm_and_ps(a[1], b[1])))
92  OP2(andnot_, VectorType::create(_mm_andnot_ps(a[0], b[0]), _mm_andnot_ps(a[1], b[1])))
93  OP3(blend, VectorType::create(mm_blendv_ps(a[0], b[0], c[0]), mm_blendv_ps(a[1], b[1], c[1])))
94  };
95 #undef OP0
96 #undef OP2
97 #undef OP3
98 
99 #define OP0(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name() { return code; }
100 #define OP1(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a) { return code; }
101 #define OP2(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a, const VectorType b) { return code; }
102 #define OP3(name, code) static Vc_ALWAYS_INLINE Vc_CONST VectorType name(const VectorType a, const VectorType b, const VectorType c) { return code; }
103 
104  template<> struct VectorHelper<_M128>
105  {
106  typedef _M128 VectorType;
107  template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const float *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;
108  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;
109  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
110  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
111  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
112  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;
113  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
114  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
115  static Vc_ALWAYS_INLINE_L void store(float *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
116 
117  OP0(allone, _mm_setallone_ps())
118  OP0(zero, _mm_setzero_ps())
119  OP2(or_, _mm_or_ps(a, b))
120  OP2(xor_, _mm_xor_ps(a, b))
121  OP2(and_, _mm_and_ps(a, b))
122  OP2(andnot_, _mm_andnot_ps(a, b))
123  OP3(blend, mm_blendv_ps(a, b, c))
124  };
125 
126 
127  template<> struct VectorHelper<_M128D>
128  {
129  typedef _M128D VectorType;
130  template<typename A> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const double *x, A) Vc_ALWAYS_INLINE_R Vc_PURE_R;
131  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;
132  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
133  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
134  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
135  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;
136  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
137  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
138  static Vc_ALWAYS_INLINE_L void store(double *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
139 
140  OP0(allone, _mm_setallone_pd())
141  OP0(zero, _mm_setzero_pd())
142  OP2(or_, _mm_or_pd(a, b))
143  OP2(xor_, _mm_xor_pd(a, b))
144  OP2(and_, _mm_and_pd(a, b))
145  OP2(andnot_, _mm_andnot_pd(a, b))
146  OP3(blend, mm_blendv_pd(a, b, c))
147  };
148 
149  template<> struct VectorHelper<_M128I>
150  {
151  typedef _M128I VectorType;
152  template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, AlignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;
153  template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, UnalignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;
154  template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;
155  template<typename T> static Vc_ALWAYS_INLINE_L Vc_PURE_L VectorType load(const T *x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R Vc_PURE_R;
156  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, AlignedFlag) Vc_ALWAYS_INLINE_R;
157  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, UnalignedFlag) Vc_ALWAYS_INLINE_R;
158  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
159  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
160  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, AlignedFlag) Vc_ALWAYS_INLINE_R;
161  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, UnalignedFlag) Vc_ALWAYS_INLINE_R;
162  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, StreamingAndAlignedFlag) Vc_ALWAYS_INLINE_R;
163  template<typename T> static Vc_ALWAYS_INLINE_L void store(T *mem, const VectorType x, const VectorType m, StreamingAndUnalignedFlag) Vc_ALWAYS_INLINE_R;
164 
165  OP0(allone, _mm_setallone_si128())
166  OP0(zero, _mm_setzero_si128())
167  OP2(or_, _mm_or_si128(a, b))
168  OP2(xor_, _mm_xor_si128(a, b))
169  OP2(and_, _mm_and_si128(a, b))
170  OP2(andnot_, _mm_andnot_si128(a, b))
171  OP3(blend, mm_blendv_epi8(a, b, c))
172  };
173 
174 #undef OP1
175 #undef OP2
176 #undef OP3
177 
178 #define OP1(op) \
179  static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a) { return CAT(_mm_##op##_, SUFFIX)(a); }
180 #define OP(op) \
181  static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op##_ , SUFFIX)(a, b); }
182 #define OP_(op) \
183  static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op , SUFFIX)(a, b); }
184 #define OPx(op, op2) \
185  static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_##op2##_, SUFFIX)(a, b); }
186 #define OPcmp(op) \
187  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmp##op(const VectorType a, const VectorType b) { return CAT(_mm_cmp##op##_, SUFFIX)(a, b); }
188 #define OP_CAST_(op) \
189  static Vc_ALWAYS_INLINE Vc_CONST VectorType op(const VectorType a, const VectorType b) { return CAT(_mm_castps_, SUFFIX)( \
190  _mm_##op##ps(CAT(CAT(_mm_cast, SUFFIX), _ps)(a), \
191  CAT(CAT(_mm_cast, SUFFIX), _ps)(b))); \
192  }
193 #define MINMAX \
194  static Vc_ALWAYS_INLINE Vc_CONST VectorType min(VectorType a, VectorType b) { return CAT(_mm_min_, SUFFIX)(a, b); } \
195  static Vc_ALWAYS_INLINE Vc_CONST VectorType max(VectorType a, VectorType b) { return CAT(_mm_max_, SUFFIX)(a, b); }
196 
197  template<> struct VectorHelper<double> {
198  typedef _M128D VectorType;
199  typedef double EntryType;
200 #define SUFFIX pd
201 
202  OP_(or_) OP_(and_) OP_(xor_)
203  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_pd(mask), a); }
204  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a) { return CAT(_mm_set1_, SUFFIX)(a); }
205  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a, const double b) { return CAT(_mm_set_, SUFFIX)(a, b); }
206  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
207  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }// set(1.); }
208 
209 #ifdef VC_IMPL_FMA4
210  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {
211  v1 = _mm_macc_pd(v1, v2, v3);
212  }
213 #else
214  static inline void fma(VectorType &v1, VectorType v2, VectorType v3) {
215  VectorType h1 = _mm_and_pd(v1, _mm_load_pd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
216  VectorType h2 = _mm_and_pd(v2, _mm_load_pd(reinterpret_cast<const double *>(&c_general::highMaskDouble)));
217 #if defined(VC_GCC) && VC_GCC < 0x40703
218  // GCC before 4.7.3 uses an incorrect optimization where it replaces the subtraction with an andnot
219  // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54703
220  asm("":"+x"(h1), "+x"(h2));
221 #endif
222  const VectorType l1 = _mm_sub_pd(v1, h1);
223  const VectorType l2 = _mm_sub_pd(v2, h2);
224  const VectorType ll = mul(l1, l2);
225  const VectorType lh = add(mul(l1, h2), mul(h1, l2));
226  const VectorType hh = mul(h1, h2);
227  // ll < lh < hh for all entries is certain
228  const VectorType lh_lt_v3 = cmplt(abs(lh), abs(v3)); // |lh| < |v3|
229  const VectorType b = mm_blendv_pd(v3, lh, lh_lt_v3);
230  const VectorType c = mm_blendv_pd(lh, v3, lh_lt_v3);
231  v1 = add(add(ll, b), add(c, hh));
232  }
233 #endif
234 
235  OP(add) OP(sub) OP(mul)
236  OPcmp(eq) OPcmp(neq)
237  OPcmp(lt) OPcmp(nlt)
238  OPcmp(le) OPcmp(nle)
239 
240  OP1(sqrt)
241  static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VectorType x) {
242  return _mm_div_pd(one(), sqrt(x));
243  }
244  static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x) {
245  return _mm_div_pd(one(), x);
246  }
247  static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x) {
248  return _mm_cmpunord_pd(x, x);
249  }
250  static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x) {
251  return _mm_cmpord_pd(x, _mm_mul_pd(zero(), x));
252  }
253  static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a) {
254  return CAT(_mm_and_, SUFFIX)(a, _mm_setabsmask_pd());
255  }
256 
257  MINMAX
258  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
259  a = _mm_min_sd(a, _mm_unpackhi_pd(a, a));
260  return _mm_cvtsd_f64(a);
261  }
262  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
263  a = _mm_max_sd(a, _mm_unpackhi_pd(a, a));
264  return _mm_cvtsd_f64(a);
265  }
266  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
267  a = _mm_mul_sd(a, _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0, 1)));
268  return _mm_cvtsd_f64(a);
269  }
270  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
271  a = _mm_add_sd(a, _mm_shuffle_pd(a, a, _MM_SHUFFLE2(0, 1)));
272  return _mm_cvtsd_f64(a);
273  }
274 #undef SUFFIX
275  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) {
276 #ifdef VC_IMPL_SSE4_1
277  return _mm_round_pd(a, _MM_FROUND_NINT);
278 #else
279  //XXX: slow: _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
280  return _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
281 #endif
282  }
283  };
284 
285  template<> struct VectorHelper<float> {
286  typedef float EntryType;
287  typedef _M128 VectorType;
288 #define SUFFIX ps
289 
290  OP_(or_) OP_(and_) OP_(xor_)
291  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(mask, a); }
292  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) { return CAT(_mm_set1_, SUFFIX)(a); }
293  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }
294  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
295  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }// set(1.f); }
296  static Vc_ALWAYS_INLINE Vc_CONST _M128 concat(_M128D a, _M128D b) { return _mm_movelh_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b)); }
297 
298 #ifdef VC_IMPL_FMA4
299  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {
300  v1 = _mm_macc_ps(v1, v2, v3);
301  }
302 #else
303  static inline void fma(VectorType &v1, VectorType v2, VectorType v3) {
304  __m128d v1_0 = _mm_cvtps_pd(v1);
305  __m128d v1_1 = _mm_cvtps_pd(_mm_movehl_ps(v1, v1));
306  __m128d v2_0 = _mm_cvtps_pd(v2);
307  __m128d v2_1 = _mm_cvtps_pd(_mm_movehl_ps(v2, v2));
308  __m128d v3_0 = _mm_cvtps_pd(v3);
309  __m128d v3_1 = _mm_cvtps_pd(_mm_movehl_ps(v3, v3));
310  v1 = _mm_movelh_ps(
311  _mm_cvtpd_ps(_mm_add_pd(_mm_mul_pd(v1_0, v2_0), v3_0)),
312  _mm_cvtpd_ps(_mm_add_pd(_mm_mul_pd(v1_1, v2_1), v3_1)));
313  }
314 #endif
315 
316  OP(add) OP(sub) OP(mul)
317  OPcmp(eq) OPcmp(neq)
318  OPcmp(lt) OPcmp(nlt)
319  OPcmp(le) OPcmp(nle)
320 
321  OP1(sqrt) OP1(rsqrt)
322  static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x) {
323  return _mm_cmpunord_ps(x, x);
324  }
325  static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x) {
326  return _mm_cmpord_ps(x, _mm_mul_ps(zero(), x));
327  }
328  static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x) {
329  return _mm_rcp_ps(x);
330  }
331  static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a) {
332  return CAT(_mm_and_, SUFFIX)(a, _mm_setabsmask_ps());
333  }
334 
335  MINMAX
336  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
337  a = _mm_min_ps(a, _mm_movehl_ps(a, a)); // a = min(a0, a2), min(a1, a3), min(a2, a2), min(a3, a3)
338  a = _mm_min_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); // a = min(a0, a1), a1, a2, a3
339  return _mm_cvtss_f32(a);
340  }
341  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
342  a = _mm_max_ps(a, _mm_movehl_ps(a, a)); // a = max(a0, a2), max(a1, a3), max(a2, a2), max(a3, a3)
343  a = _mm_max_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); // a = max(a0, a1), a1, a2, a3
344  return _mm_cvtss_f32(a);
345  }
346  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
347  a = _mm_mul_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 2, 3)));
348  a = _mm_mul_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 0, 1)));
349  return _mm_cvtss_f32(a);
350  }
351  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
352  a = _mm_add_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 1, 2, 3)));
353  a = _mm_add_ss(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 0, 1)));
354  return _mm_cvtss_f32(a);
355  }
356 #undef SUFFIX
357  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) {
358 #ifdef VC_IMPL_SSE4_1
359  return _mm_round_ps(a, _MM_FROUND_NINT);
360 #else
361  //XXX slow: _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
362  return _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
363 #endif
364  }
365  };
366 
367  template<> struct VectorHelper<float8> {
368  typedef float EntryType;
369  typedef M256 VectorType;
370 #ifdef VC_PASSING_VECTOR_BY_VALUE_IS_BROKEN
371  typedef const VectorType &VectorTypeArg;
372 #else
373  typedef const VectorType VectorTypeArg;
374 #endif
375 
376  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) {
377  const _M128 x = _mm_set1_ps(a);
378  return VectorType::create(x, x);
379  }
380  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d) {
381  const _M128 x = _mm_set_ps(a, b, c, d);
382  return VectorType::create(x, x);
383  }
384  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d,
385  const float e, const float f, const float g, const float h) {
386  return VectorType::create(_mm_set_ps(a, b, c, d), _mm_set_ps(e, f, g, h));
387  }
388  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return VectorType::create(_mm_setzero_ps(), _mm_setzero_ps()); }
389  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return set(1.f); }
390 
391 #define REUSE_FLOAT_IMPL1(fun) \
392  static Vc_ALWAYS_INLINE Vc_CONST VectorType fun(VectorTypeArg x) { \
393  return VectorType::create(VectorHelper<float>::fun(x[0]), VectorHelper<float>::fun(x[1])); \
394  }
395 #define REUSE_FLOAT_IMPL2(fun) \
396  static Vc_ALWAYS_INLINE Vc_CONST VectorType fun(VectorTypeArg x, VectorTypeArg y) { \
397  return VectorType::create(VectorHelper<float>::fun(x[0], y[0]), VectorHelper<float>::fun(x[1], y[1])); \
398  }
402  REUSE_FLOAT_IMPL1(isNaN)
403  REUSE_FLOAT_IMPL1(isFinite)
406 
407  REUSE_FLOAT_IMPL2(and_)
408  REUSE_FLOAT_IMPL2(or_)
409  REUSE_FLOAT_IMPL2(xor_)
410  REUSE_FLOAT_IMPL2(notMaskedToZero)
411  REUSE_FLOAT_IMPL2(add)
412  REUSE_FLOAT_IMPL2(sub)
413  REUSE_FLOAT_IMPL2(mul)
414  REUSE_FLOAT_IMPL2(cmple)
415  REUSE_FLOAT_IMPL2(cmpnle)
416  REUSE_FLOAT_IMPL2(cmplt)
417  REUSE_FLOAT_IMPL2(cmpnlt)
418  REUSE_FLOAT_IMPL2(cmpeq)
419  REUSE_FLOAT_IMPL2(cmpneq)
422 
423  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorTypeArg a) {
425  }
426  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorTypeArg a) {
428  }
429  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorTypeArg a) {
431  }
432  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorTypeArg a) {
434  }
435 
436  static inline void fma(VectorType &a, VectorTypeArg b, VectorTypeArg c) {
437  VectorHelper<float>::fma(a[0], b[0], c[0]);
438  VectorHelper<float>::fma(a[1], b[1], c[1]);
439  }
440 #undef REUSE_FLOAT_IMPL2
441 #undef REUSE_FLOAT_IMPL1
442  };
443 
444  template<> struct VectorHelper<int> {
445  typedef int EntryType;
446  typedef _M128I VectorType;
447 #define SUFFIX si128
448 
449  OP_(or_) OP_(and_) OP_(xor_)
450  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
451  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }
452 #undef SUFFIX
453 #define SUFFIX epi32
454  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }
455 
456  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a) { return CAT(_mm_set1_, SUFFIX)(a); }
457  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a, const int b, const int c, const int d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }
458 
459  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }
460 
461  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {
462  return CAT(_mm_slli_, SUFFIX)(a, shift);
463  }
464  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {
465  return CAT(_mm_srai_, SUFFIX)(a, shift);
466  }
467  static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a) { return mm_abs_epi32(a); }
468 
469  static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return mm_min_epi32(a, b); }
470  static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return mm_max_epi32(a, b); }
471  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
472  a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
473  // using lo_epi16 for speed here
474  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
475  return _mm_cvtsi128_si32(a);
476  }
477  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
478  a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
479  // using lo_epi16 for speed here
480  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
481  return _mm_cvtsi128_si32(a);
482  }
483  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
484  a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
485  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
486  return _mm_cvtsi128_si32(a);
487  }
488 #ifdef VC_IMPL_SSE4_1
489  static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(VectorType a, VectorType b) { return _mm_mullo_epi32(a, b); }
490  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
491  a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
492  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
493  return _mm_cvtsi128_si32(a);
494  }
495 #else
496  static inline Vc_CONST VectorType mul(const VectorType a, const VectorType b) {
497  const VectorType aShift = _mm_srli_si128(a, 4);
498  const VectorType ab02 = _mm_mul_epu32(a, b); // [a0 * b0, a2 * b2]
499  const VectorType bShift = _mm_srli_si128(b, 4);
500  const VectorType ab13 = _mm_mul_epu32(aShift, bShift); // [a1 * b1, a3 * b3]
501  return _mm_unpacklo_epi32(_mm_shuffle_epi32(ab02, 8), _mm_shuffle_epi32(ab13, 8));
502  }
503 #endif
504 
505  OP(add) OP(sub)
506  OPcmp(eq)
507  OPcmp(lt)
508  OPcmp(gt)
509  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { _M128I x = cmpeq(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
510  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { _M128I x = cmplt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
511  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { _M128I x = cmpgt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
512  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }
513 #undef SUFFIX
514  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }
515  };
516 
517  template<> struct VectorHelper<unsigned int> {
518  typedef unsigned int EntryType;
519  typedef _M128I VectorType;
520 #define SUFFIX si128
521  OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_)
522  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
523  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }
524 
525 #undef SUFFIX
526 #define SUFFIX epu32
527  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }
528 
529  static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return mm_min_epu32(a, b); }
530  static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return mm_max_epu32(a, b); }
531  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
532  a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
533  // using lo_epi16 for speed here
534  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
535  return _mm_cvtsi128_si32(a);
536  }
537  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
538  a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
539  // using lo_epi16 for speed here
540  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
541  return _mm_cvtsi128_si32(a);
542  }
543  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
544  a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
545  // using lo_epi16 for speed here
546  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
547  return _mm_cvtsi128_si32(a);
548  }
549  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
550  a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
551  // using lo_epi16 for speed here
552  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
553  return _mm_cvtsi128_si32(a);
554  }
555 
556  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }
557 
558  static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a, const VectorType b) {
559  return VectorHelper<int>::mul(a, b);
560  }
561 //X template<unsigned int b> static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a) {
562 //X switch (b) {
563 //X case 0: return zero();
564 //X case 1: return a;
565 //X case 2: return _mm_slli_epi32(a, 1);
566 //X case 4: return _mm_slli_epi32(a, 2);
567 //X case 8: return _mm_slli_epi32(a, 3);
568 //X case 16: return _mm_slli_epi32(a, 4);
569 //X case 32: return _mm_slli_epi32(a, 5);
570 //X case 64: return _mm_slli_epi32(a, 6);
571 //X case 128: return _mm_slli_epi32(a, 7);
572 //X case 256: return _mm_slli_epi32(a, 8);
573 //X case 512: return _mm_slli_epi32(a, 9);
574 //X case 1024: return _mm_slli_epi32(a, 10);
575 //X case 2048: return _mm_slli_epi32(a, 11);
576 //X }
577 //X return mul(a, set(b));
578 //X }
579 
580 #undef SUFFIX
581 #define SUFFIX epi32
582  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {
583  return CAT(_mm_slli_, SUFFIX)(a, shift);
584  }
585  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {
586  return CAT(_mm_srli_, SUFFIX)(a, shift);
587  }
588  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a) { return CAT(_mm_set1_, SUFFIX)(a); }
589  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d) { return CAT(_mm_set_, SUFFIX)(a, b, c, d); }
590 
591  OP(add) OP(sub)
592  OPcmp(eq)
593  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpeq(a, b), _mm_setallone_si128()); }
594 
595 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
596  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b) {
597  return _mm_cmplt_epu32(a, b);
598  }
599  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b) {
600  return _mm_cmpgt_epu32(a, b);
601  }
602 #else
603  OPcmp(lt)
604  OPcmp(gt)
605 #endif
606  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmplt(a, b), _mm_setallone_si128()); }
607  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpgt(a, b), _mm_setallone_si128()); }
608  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }
609 
610 #undef SUFFIX
611  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }
612  };
613 
614  template<> struct VectorHelper<signed short> {
615  typedef _M128I VectorType;
616  typedef signed short EntryType;
617 #define SUFFIX si128
618 
619  OP_(or_) OP_(and_) OP_(xor_)
620  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
621  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }
622  static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packs_epi32(a, b); }
623  static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x) { return _mm_srai_epi32(_mm_unpacklo_epi16(x, x), 16); }
624  static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x) { return _mm_srai_epi32(_mm_unpackhi_epi16(x, x), 16); }
625 
626 #undef SUFFIX
627 #define SUFFIX epi16
628  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }
629 
630  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {
631  return CAT(_mm_slli_, SUFFIX)(a, shift);
632  }
633  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {
634  return CAT(_mm_srai_, SUFFIX)(a, shift);
635  }
636  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }
637  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d,
638  const EntryType e, const EntryType f, const EntryType g, const EntryType h) {
639  return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);
640  }
641 
642  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) {
643  v1 = add(mul(v1, v2), v3); }
644 
645  static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a) { return mm_abs_epi16(a); }
646 
647  OPx(mul, mullo)
648  OP(min) OP(max)
649  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
650  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
651  a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
652  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
653  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
654  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
655  }
656  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
657  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
658  a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
659  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
660  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
661  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
662  }
663  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
664  a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
665  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
666  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
667  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
668  }
669  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
670  a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
671  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
672  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
673  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
674  }
675 
676  OP(add) OP(sub)
677  OPcmp(eq)
678  OPcmp(lt)
679  OPcmp(gt)
680  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { _M128I x = cmpeq(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
681  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { _M128I x = cmplt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
682  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { _M128I x = cmpgt(a, b); return _mm_andnot_si128(x, _mm_setallone_si128()); }
683  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }
684 #undef SUFFIX
685  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }
686  };
687 
688  template<> struct VectorHelper<unsigned short> {
689  typedef _M128I VectorType;
690  typedef unsigned short EntryType;
691 #define SUFFIX si128
692  OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_)
693  static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return CAT(_mm_setzero_, SUFFIX)(); }
694  static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask) { return CAT(_mm_and_, SUFFIX)(_mm_castps_si128(mask), a); }
695 #ifdef VC_IMPL_SSE4_1
696  static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packus_epi32(a, b); }
697 #else
698  // XXX too bad, but this is broken without SSE 4.1
699  static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b) { return _mm_packs_epi32(a, b); }
700 #endif
701  static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x) { return _mm_srli_epi32(_mm_unpacklo_epi16(x, x), 16); }
702  static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x) { return _mm_srli_epi32(_mm_unpackhi_epi16(x, x), 16); }
703 
704 #undef SUFFIX
705 #define SUFFIX epu16
706  static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return CAT(_mm_setone_, SUFFIX)(); }
707 
708 //X template<unsigned int b> static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a) {
709 //X switch (b) {
710 //X case 0: return zero();
711 //X case 1: return a;
712 //X case 2: return _mm_slli_epi16(a, 1);
713 //X case 4: return _mm_slli_epi16(a, 2);
714 //X case 8: return _mm_slli_epi16(a, 3);
715 //X case 16: return _mm_slli_epi16(a, 4);
716 //X case 32: return _mm_slli_epi16(a, 5);
717 //X case 64: return _mm_slli_epi16(a, 6);
718 //X case 128: return _mm_slli_epi16(a, 7);
719 //X case 256: return _mm_slli_epi16(a, 8);
720 //X case 512: return _mm_slli_epi16(a, 9);
721 //X case 1024: return _mm_slli_epi16(a, 10);
722 //X case 2048: return _mm_slli_epi16(a, 11);
723 //X }
724 //X return mul(a, set(b));
725 //X }
726 #if !defined(USE_INCORRECT_UNSIGNED_COMPARE) || VC_IMPL_SSE4_1
727  static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b) { return CAT(mm_min_, SUFFIX)(a, b); }
728  static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b) { return CAT(mm_max_, SUFFIX)(a, b); }
729 #endif
730 #undef SUFFIX
731 #define SUFFIX epi16
732  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift) {
733  return CAT(_mm_slli_, SUFFIX)(a, shift);
734  }
735  static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift) {
736  return CAT(_mm_srli_, SUFFIX)(a, shift);
737  }
738 
739  static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3) { v1 = add(mul(v1, v2), v3); }
740 
741  OPx(mul, mullo) // should work correctly for all values
742 #if defined(USE_INCORRECT_UNSIGNED_COMPARE) && !defined(VC_IMPL_SSE4_1)
743  OP(min) OP(max) // XXX breaks for values with MSB set
744 #endif
745  static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a) {
746  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
747  a = min(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
748  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
749  a = min(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
750  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
751  }
752  static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a) {
753  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
754  a = max(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
755  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
756  a = max(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
757  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
758  }
759  static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a) {
760  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
761  a = mul(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
762  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
763  a = mul(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
764  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
765  }
766  static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a) {
767  // reminder: _MM_SHUFFLE(3, 2, 1, 0) means "no change"
768  a = add(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2)));
769  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 0, 3, 2)));
770  a = add(a, _mm_shufflelo_epi16(a, _MM_SHUFFLE(1, 1, 1, 1)));
771  return _mm_cvtsi128_si32(a); // & 0xffff is implicit
772  }
773  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a) { return CAT(_mm_set1_, SUFFIX)(a); }
774  static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c,
775  const EntryType d, const EntryType e, const EntryType f,
776  const EntryType g, const EntryType h) {
777  return CAT(_mm_set_, SUFFIX)(a, b, c, d, e, f, g, h);
778  }
779 
780  OP(add) OP(sub)
781  OPcmp(eq)
782  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpneq(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpeq(a, b), _mm_setallone_si128()); }
783 
784 #ifndef USE_INCORRECT_UNSIGNED_COMPARE
785  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b) {
786  return _mm_cmplt_epu16(a, b);
787  }
788  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b) {
789  return _mm_cmpgt_epu16(a, b);
790  }
791 #else
792  OPcmp(lt)
793  OPcmp(gt)
794 #endif
795  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b) { return _mm_andnot_si128(cmplt(a, b), _mm_setallone_si128()); }
796  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple (const VectorType a, const VectorType b) { return _mm_andnot_si128(cmpgt(a, b), _mm_setallone_si128()); }
797  static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b) { return cmpgt(a, b); }
798 #undef SUFFIX
799  static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a) { return a; }
800  };
801 #undef OP1
802 #undef OP
803 #undef OP_
804 #undef OPx
805 #undef OPcmp
806 
807 } // namespace SSE
808 } // namespace Vc
809 } // namespace ROOT
810 
811 #include "vectorhelper.tcc"
812 #include "undomacros.h"
813 
814 #endif // SSE_VECTORHELPER_H
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:797
static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:645
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:523
static Vc_ALWAYS_INLINE Vc_CONST VectorType mul(const VectorType a, const VectorType b)
Definition: vectorhelper.h:558
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:346
static Vc_CONST_L VectorType sort(VectorType) Vc_CONST_R
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:692
static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b)
Definition: vectorhelper.h:622
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d)
Definition: vectorhelper.h:293
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:275
Vc_INTRINSIC Vc_CONST __m128 exponent(__m128 v)
Definition: vectorhelper.h:35
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x)
Definition: vectorhelper.h:701
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:599
OP_CAST_(or_) OP_CAST_(and_) OP_CAST_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:521
Small helper to encapsulate whether to return the value pointed to by the iterator or its address...
const Double_t * v1
Definition: TArcBall.cxx:33
#define Vc_CONST_L
Definition: macros.h:134
static Vc_ALWAYS_INLINE Vc_CONST _M128I concat(_M128I a, _M128I b)
Definition: vectorhelper.h:699
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > sqrt(const Vector< T > &x)
Definition: vector.h:522
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
#define OP0(name, code)
Definition: vectorhelper.h:99
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:774
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:336
OPx(mul, mullo) static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:741
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:537
OP_(or_) OP_(and_) OP_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:619
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:266
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:461
const char * Size
Definition: TXMLSetup.cxx:56
static Vc_ALWAYS_INLINE Vc_PURE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:502
double T(double x)
Definition: ChebyshevPol.h:34
#define OP(op)
Definition: vectorhelper.h:180
static void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:214
TH1 * h
Definition: legend2.C:5
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:785
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:766
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x)
Definition: vectorhelper.h:624
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:469
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x)
Definition: vectorhelper.h:244
#define MINMAX
Definition: vectorhelper.h:193
static void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:303
static Vc_INTRINSIC __m128d Vc_CONST _mm_setallone_pd()
Definition: intrinsics.h:83
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:295
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a, const EntryType b, const EntryType c, const EntryType d, const EntryType e, const EntryType f, const EntryType g, const EntryType h)
Definition: vectorhelper.h:637
StreamingAndUnalignedFlag
Definition: global.h:317
#define Vc_PURE_L
Definition: macros.h:137
TArc * a
Definition: textangle.C:12
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a)
Definition: vectorhelper.h:636
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:585
static Vc_INTRINSIC Vc_CONST M256 create(_M128 a, _M128 b)
Definition: types.h:70
static void fma(VectorType &a, VectorTypeArg b, VectorTypeArg c)
Definition: vectorhelper.h:436
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorTypeArg a)
Definition: vectorhelper.h:426
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:388
#define Vc_INTRINSIC
Definition: macros.h:139
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:735
static double A[]
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
Definition: vectorhelper.h:376
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:510
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:270
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:663
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:556
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmplt_epu16(__m128i a, __m128i b)
Definition: intrinsics.h:109
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand0(_M128I x)
Definition: vectorhelper.h:623
static Vc_INTRINSIC __m128i Vc_CONST mm_max_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:432
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorTypeArg a)
Definition: vectorhelper.h:429
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) OP1(sqrt) OP1(rsqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x)
Definition: vectorhelper.h:316
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:294
#define OP_CAST_(op)
Definition: vectorhelper.h:188
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > abs(const Vector< T > &x)
Definition: vector.h:524
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:471
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:262
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:530
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:459
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:82
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:351
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:633
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:732
#define CAT(a, b)
Definition: macros.h:281
static Vc_ALWAYS_INLINE Vc_CONST _M128I expand1(_M128I x)
Definition: vectorhelper.h:702
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
Definition: casts.h:123
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:728
#define OP_(op)
Definition: vectorhelper.h:182
static Vc_INTRINSIC __m128i Vc_CONST mm_abs_epi32(__m128i a)
Definition: intrinsics.h:179
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:331
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:451
static Vc_INTRINSIC __m128i mm_blendv_epi8(__m128i a, __m128i b, __m128i c)
Definition: intrinsics.h:282
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmplt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:596
TH1F * h1
Definition: legend1.C:5
#define SUFFIX
Definition: vectorhelper.h:731
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a)
Definition: vectorhelper.h:456
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const EntryType a)
Definition: vectorhelper.h:773
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > round(const Vector< T > &x)
Definition: vector.h:526
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:207
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmpgt_epu16(__m128i a, __m128i b)
Definition: intrinsics.h:111
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x)
Definition: vectorhelper.h:325
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:543
static Vc_INTRINSIC __m128i Vc_CONST mm_abs_epi16(__m128i a)
Definition: intrinsics.h:175
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorTypeArg a)
Definition: vectorhelper.h:432
static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VectorType a)
Definition: vectorhelper.h:759
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a, const double b)
Definition: vectorhelper.h:205
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:483
#define VC_ALIGNED_PARAMETER(_Type)
Definition: macros.h:368
StreamingAndAlignedFlag
Definition: global.h:314
#define OPcmp(op)
Definition: vectorhelper.h:186
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:611
SVector< double, 2 > v
Definition: Dict.h:5
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:727
static Vc_ALWAYS_INLINE Vc_CONST VectorType isFinite(VectorType x)
Definition: vectorhelper.h:250
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:796
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a, const unsigned int b, const unsigned int c, const unsigned int d)
Definition: vectorhelper.h:589
static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VectorType x)
Definition: vectorhelper.h:328
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:477
#define Vc_CONST
Definition: macros.h:133
static Vc_INTRINSIC __m128d mm_blendv_pd(__m128d a, __m128d b, __m128d c)
Definition: intrinsics.h:276
#define OP1(name, code)
Definition: vectorhelper.h:178
#define _M128
Definition: macros.h:27
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmplt_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:113
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > rsqrt(const Vector< T > &x)
Definition: vector.h:523
static MINMAX Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:258
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:628
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:752
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:357
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d, const float e, const float f, const float g, const float h)
Definition: vectorhelper.h:384
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:608
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > reciprocal(const Vector< T > &x)
Definition: vector.h:525
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpgt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:788
static Vc_ALWAYS_INLINE Vc_PURE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:508
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const unsigned int a)
Definition: vectorhelper.h:588
static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:531
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:682
static Vc_INTRINSIC __m128i Vc_CONST mm_min_epi32(__m128i a, __m128i b)
Definition: intrinsics.h:447
static Vc_CONST VectorType mul(const VectorType a, const VectorType b)
Definition: vectorhelper.h:496
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:511
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d)
Definition: vectorhelper.h:380
double f(double x)
#define OP2(name, code)
Definition: vectorhelper.h:101
#define Vc_CONST_R
Definition: macros.h:135
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:549
#define Vc_ALWAYS_INLINE_L
Definition: macros.h:131
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const int a, const int b, const int c, const int d)
Definition: vectorhelper.h:457
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:685
static Vc_INTRINSIC Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:467
static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(const VectorType a)
Definition: vectorhelper.h:253
static Vc_INTRINSIC __m128i Vc_CONST mm_min_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:441
#define OP3(name, code)
Definition: vectorhelper.h:102
static Vc_INTRINSIC __m128d Vc_CONST _mm_setabsmask_pd()
Definition: intrinsics.h:96
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:681
static Vc_INTRINSIC Vc_CONST VectorType min(const VectorType a, const VectorType b)
Definition: vectorhelper.h:529
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:656
#define SSE
Definition: global.h:84
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VectorType a)
Definition: vectorhelper.h:341
OP(add) OP(sub) OP(mul) OPcmp(eq) OPcmp(neq) OPcmp(lt) OPcmp(nlt) OPcmp(le) OPcmp(nle) static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VectorType x)
Definition: vectorhelper.h:235
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const double a)
Definition: vectorhelper.h:204
#define Vc_PURE_R
Definition: macros.h:138
static Vc_INTRINSIC __m128 mm_blendv_ps(__m128 a, __m128 b, __m128 c)
Definition: intrinsics.h:279
OPx(mul, mullo) OP(min) OP(max) static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VectorType a)
Definition: vectorhelper.h:647
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:795
static Vc_INTRINSIC Vc_CONST VectorType max(const VectorType a, const VectorType b)
Definition: vectorhelper.h:470
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmple(const VectorType a, const VectorType b)
Definition: vectorhelper.h:607
OP_(or_) OP_(and_) OP_(xor_) static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:449
static Vc_ALWAYS_INLINE Vc_CONST VectorType isNaN(VectorType x)
Definition: vectorhelper.h:247
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftRight(VectorType a, int shift)
Definition: vectorhelper.h:464
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnlt(const VectorType a, const VectorType b)
Definition: vectorhelper.h:606
static Vc_INTRINSIC __m128i Vc_CONST mm_max_epi32(__m128i a, __m128i b)
Definition: intrinsics.h:423
AlignedFlag
Definition: global.h:308
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:514
static Vc_INTRINSIC __m128i Vc_CONST _mm_cmpgt_epu32(__m128i a, __m128i b)
Definition: intrinsics.h:115
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:739
Definition: casts.h:28
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:621
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:512
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:630
UnalignedFlag
Definition: global.h:311
static Vc_ALWAYS_INLINE Vc_CONST VectorType zero()
Definition: vectorhelper.h:206
static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VectorType a)
Definition: vectorhelper.h:669
static Vc_ALWAYS_INLINE void fma(VectorType &v1, VectorType v2, VectorType v3)
Definition: vectorhelper.h:642
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:389
static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VectorType a, _M128 mask)
Definition: vectorhelper.h:694
static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VectorType a)
Definition: vectorhelper.h:799
static Vc_ALWAYS_INLINE Vc_CONST VectorType cmpnle(const VectorType a, const VectorType b)
Definition: vectorhelper.h:683
static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a)
Definition: vectorhelper.h:292
static Vc_ALWAYS_INLINE Vc_CONST VectorType shiftLeft(VectorType a, int shift)
Definition: vectorhelper.h:582
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:706
static Vc_INTRINSIC __m128 Vc_CONST _mm_setabsmask_ps()
Definition: intrinsics.h:97
static Vc_ALWAYS_INLINE Vc_CONST _M128 concat(_M128D a, _M128D b)
Definition: vectorhelper.h:296
#define REUSE_FLOAT_IMPL1(fun)
Definition: vectorhelper.h:391
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:454
void fma()
#define REUSE_FLOAT_IMPL2(fun)
Definition: vectorhelper.h:395
static Vc_ALWAYS_INLINE Vc_CONST VectorType one()
Definition: vectorhelper.h:527