47 template<
unsigned int VectorSize>
class Mask
59 #if defined VC_MSVC && defined _WIN32
74 :
k(_mm_castsi128_ps(_mm_packs_epi16(a[0].
dataI(), a[1].
dataI()))) {}
107 _mm_movemask_epi8(
dataI()) == 0xffff;
114 _mm_movemask_epi8(
dataI()) == 0x0000;
121 const int tmp = _mm_movemask_epi8(
dataI());
122 return tmp != 0 && (tmp ^ 0xffff) != 0;
126 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
152 #ifdef VC_COMPILE_BENCHMARKS
170 const _long bit = __builtin_ctzl(mask);
171 __asm__(
"btr %1,%0" :
"+r"(mask) :
"r"(bit));
172 #elif defined(_WIN64)
174 _BitScanForward64(&bit, mask);
175 _bittestandreset64(&mask, bit);
176 #elif defined(_WIN32)
178 _BitScanForward(&bit, mask);
179 _bittestandreset(&mask, bit);
181 #error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de"
187 #define Vc_foreach_bit(_it_, _mask_) \
188 for (Vc::SSE::ForeachHelper Vc__make_unique(foreach_bit_obj)((_mask_).toInt()); Vc__make_unique(foreach_bit_obj).outer(); ) \
189 for (_it_ = Vc__make_unique(foreach_bit_obj).next(); Vc__make_unique(foreach_bit_obj).inner(); Vc__make_unique(foreach_bit_obj).noBreak())
193 return _mm_movemask_epi8(dataI());
197 k = _mm_unpacklo_ps(x.
data(), x.
data());
201 k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
205 tmp = _mm_unpacklo_epi16(tmp, tmp);
206 k = _mm_castsi128_ps(_mm_unpacklo_epi32(tmp, tmp));
209 k = _mm_castsi128_ps(_mm_packs_epi16(x.
dataI(), _mm_setzero_si128()));
212 k = _mm_castsi128_ps(_mm_unpacklo_epi16(x.
dataI(), x.
dataI()));
216 k = _mm_castsi128_ps(_mm_unpacklo_epi16(tmp, tmp));
220 k = _mm_castsi128_ps(_mm_packs_epi16(tmp, tmp));
223 k = _mm_castsi128_ps(_mm_packs_epi16(x.
dataI(), x.
dataI()));
226 k = _mm_castsi128_ps(_mm_unpacklo_epi8(x.
dataI(), x.
dataI()));
230 x[0].
k = _mm_unpacklo_ps(data(), data());
231 x[1].
k = _mm_unpackhi_ps(data(), data());
234 x[0].
k = _mm_castsi128_ps(_mm_unpacklo_epi16(dataI(), dataI()));
235 x[1].
k = _mm_castsi128_ps(_mm_unpackhi_epi16(dataI(), dataI()));
238 x[0].
k = _mm_castsi128_ps(_mm_unpacklo_epi8 (dataI(), dataI()));
239 x[1].
k = _mm_castsi128_ps(_mm_unpackhi_epi8 (dataI(), dataI()));
254 int mask = _mm_movemask_pd(dataD());
255 return (mask & 1) + (mask >> 1);
260 #ifdef VC_IMPL_POPCNT
261 return _mm_popcnt_u32(_mm_movemask_ps(data()));
265 _M128I x = _mm_srli_epi32(dataI(), 31);
266 x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
267 x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
268 return _mm_cvtsi128_si32(x);
274 #ifdef VC_IMPL_POPCNT
275 return _mm_popcnt_u32(_mm_movemask_epi8(dataI())) / 2;
281 _M128I x = _mm_srli_epi16(dataI(), 15);
282 x = _mm_add_epi16(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
283 x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)));
284 x = _mm_add_epi16(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)));
285 return _mm_extract_epi16(x, 0);
291 int tmp = _mm_movemask_epi8(dataI());
292 #ifdef VC_IMPL_POPCNT
293 return _mm_popcnt_u32(tmp);
295 tmp = (tmp & 0x5555) + ((tmp >> 1) & 0x5555);
296 tmp = (tmp & 0x3333) + ((tmp >> 2) & 0x3333);
297 tmp = (tmp & 0x0f0f) + ((tmp >> 4) & 0x0f0f);
298 return (tmp & 0x00ff) + ((tmp >> 8) & 0x00ff);
314 #if defined VC_MSVC && defined _WIN32
323 k[0] = _mm_setzero_ps();
324 k[1] = _mm_setzero_ps();
336 k[0] = _mm_castsi128_ps(_mm_unpacklo_epi16(a.
dataI(), a.
dataI()));
337 k[1] = _mm_castsi128_ps(_mm_unpackhi_epi16(a.
dataI(), a.
dataI()));
351 r.
k[0] = _mm_and_ps(
k[0], rhs.
k[0]);
352 r.
k[1] = _mm_and_ps(
k[1], rhs.
k[1]);
357 r.
k[0] = _mm_and_ps(
k[0], rhs.
k[0]);
358 r.
k[1] = _mm_and_ps(
k[1], rhs.
k[1]);
363 r.
k[0] = _mm_or_ps(
k[0], rhs.
k[0]);
364 r.
k[1] = _mm_or_ps(
k[1], rhs.
k[1]);
369 r.
k[0] = _mm_or_ps(
k[0], rhs.
k[0]);
370 r.
k[1] = _mm_or_ps(
k[1], rhs.
k[1]);
375 r.
k[0] = _mm_xor_ps(
k[0], rhs.
k[0]);
376 r.
k[1] = _mm_xor_ps(
k[1], rhs.
k[1]);
386 k[0] = _mm_and_ps(
k[0], rhs.
k[0]);
387 k[1] = _mm_and_ps(
k[1], rhs.
k[1]);
391 k[0] = _mm_or_ps (
k[0], rhs.
k[0]);
392 k[1] = _mm_or_ps (
k[1], rhs.
k[1]);
396 k[0] = _mm_xor_ps(
k[0], rhs.
k[0]);
397 k[1] = _mm_xor_ps(
k[1], rhs.
k[1]);
402 const _M128 tmp = _mm_and_ps(
k[0],
k[1]);
406 return _mm_movemask_ps(tmp) == 0xf;
412 const _M128 tmp = _mm_or_ps(
k[0],
k[1]);
414 return _mm_testz_si128(_mm_castps_si128(tmp), _mm_castps_si128(tmp));
416 return _mm_movemask_ps(tmp) == 0x0;
427 __m128i tmp = _mm_castps_si128(_mm_xor_ps(
k[0],
k[1]));
429 return !_mm_testz_si128(tmp, tmp) ||
432 const int tmp = _mm_movemask_ps(
k[0]) + _mm_movemask_ps(
k[1]);
433 return tmp > 0x0 && tmp < (0xf + 0xf);
437 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
442 return (_mm_movemask_ps(
k[1]) << 4) + _mm_movemask_ps(
k[0]);
449 return (
toInt() & (1 << index)) != 0;
453 #ifdef VC_IMPL_POPCNT
454 return _mm_popcnt_u32(
toInt());
461 _M128I x = _mm_add_epi32(_mm_srli_epi32(_mm_castps_si128(
k[0]), 31),
462 _mm_srli_epi32(_mm_castps_si128(
k[1]), 31));
463 x = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(0, 1, 2, 3)));
464 x = _mm_add_epi32(x, _mm_shufflelo_epi16(x, _MM_SHUFFLE(1, 0, 3, 2)));
465 return _mm_cvtsi128_si32(x);
472 #ifdef VC_COMPILE_BENCHMARKS
480 const int mask = toInt();
483 _BitScanForward(&bit, mask);
486 __asm__(
"bsf %1,%0" :
"=&r"(bit) :
"r"(mask));
492 const int mask =
toInt();
495 _BitScanForward(&bit, mask);
498 __asm__(
"bsf %1,%0" :
"=&r"(bit) :
"r"(mask));
503 template<
unsigned int VectorSize>
505 : k(_mm_castsi128_ps(_mm_packs_epi32(_mm_castps_si128(m.data()[0]), _mm_castps_si128(m.data()[1])))) {}
549 template<
unsigned int Size>
void operator& (
const Mask<Size> &lhs,
const Float8Mask &rhs);
550 template<
unsigned int Size>
void operator| (
const Mask<Size> &lhs,
const Float8Mask &rhs);
551 template<
unsigned int Size>
void operator^ (
const Mask<Size> &lhs,
const Float8Mask &rhs);
552 template<
unsigned int Size>
void operator& (
const Float8Mask &rhs,
const Mask<Size> &lhs);
553 template<
unsigned int Size>
void operator| (
const Float8Mask &rhs,
const Mask<Size> &lhs);
554 template<
unsigned int Size>
void operator^ (
const Float8Mask &rhs,
const Mask<Size> &lhs);
557 template<
unsigned int LSize,
unsigned int RSize>
void operator&&(
const Mask<LSize> &lhs,
const Mask<RSize> &rhs);
558 template<
unsigned int LSize,
unsigned int RSize>
void operator||(
const Mask<LSize> &lhs,
const Mask<RSize> &rhs);
559 template<
unsigned int Size>
void operator&&(
const Mask<Size> &lhs,
const Float8Mask &rhs);
560 template<
unsigned int Size>
void operator||(
const Mask<Size> &lhs,
const Float8Mask &rhs);
561 template<
unsigned int Size>
void operator&&(
const Float8Mask &rhs,
const Mask<Size> &lhs);
562 template<
unsigned int Size>
void operator||(
const Float8Mask &rhs,
const Mask<Size> &lhs);
Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
Vc_ALWAYS_INLINE Vc_PURE bool operator[](int index) const
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE Float8Mask()
Namespace for new ROOT classes and functions.
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize/2 > *a)
Vc_ALWAYS_INLINE Mask(const __m128i &x)
Vc_ALWAYS_INLINE Mask(bool b)
#define FREE_STORE_OPERATORS_ALIGNED(alignment)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
Vc_ALWAYS_INLINE Vc_PURE _M128 data() const
Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Float8Mask(const Mask< VectorSize > &a)
Mask< LSize > operator&(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Loop over all set bits in the mask.
Vc_ALWAYS_INLINE bool outer() const
void operator&&(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Vc_ALWAYS_INLINE Float8Mask(const M256 &x)
Vc_ALWAYS_INLINE bool inner()
Vc_ALWAYS_INLINE Mask & operator&=(const Mask &rhs)
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerZero::ZEnum)
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerZero::ZEnum)
Vc_ALWAYS_INLINE void noBreak()
Vc_ALWAYS_INLINE Mask(const __m128d &x)
static Vc_ALWAYS_INLINE Vc_CONST bool cmpneq(_M128 k1, _M128 k2)
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const
static Vc_INTRINSIC __m128 Vc_CONST _mm_setallone_ps()
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Mask(const __m128 &x)
Vc_ALWAYS_INLINE Vc_PURE Mask< OtherSize > cast() const
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
#define Vc_ALWAYS_INLINE_R
Vc_ALWAYS_INLINE _long next()
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator&(const Float8Mask &rhs) const
Float8GatherMask(const Float8Mask &k)
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator||(const Float8Mask &rhs) const
Mask< VectorSize > Argument
Vc_ALWAYS_INLINE Vc_PURE int count() const
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
Vc_ALWAYS_INLINE Mask(const Mask &rhs)
Vc_ALWAYS_INLINE Float8Mask & operator&=(const Float8Mask &rhs)
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator^(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Vc_PURE const M256 & data() const
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator!() const
Vc_ALWAYS_INLINE Float8Mask(bool b)
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator&&(const Float8Mask &rhs) const
#define Vc_ALWAYS_INLINE_L
Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Vc_PURE _M128D dataD() const
Vc_ALWAYS_INLINE Float8Mask(VectorSpecialInitializerOne::OEnum)
Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const
Vc_ALWAYS_INLINE Mask & operator^=(const Mask &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Vc_PURE int toInt() const
Mask< LSize > operator|(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const
Vc_ALWAYS_INLINE Vc_PURE bool isFull() const
static Vc_ALWAYS_INLINE Vc_CONST bool cmpeq(_M128 k1, _M128 k2)
Float8GatherMask(const Mask< 8u > &k)
Vc_ALWAYS_INLINE Float8Mask & operator|=(const Float8Mask &rhs)
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerOne::OEnum)
void expand(Mask< VectorSize/2 > *x) const
Mask< LSize > operator^(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE Vc_PURE Float8Mask operator|(const Float8Mask &rhs) const
Vc_ALWAYS_INLINE Float8Mask & operator^=(const Float8Mask &rhs)
Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R
Returns the index of the first one in the mask.
Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
Vc_ALWAYS_INLINE ForeachHelper(_long _mask)
void operator||(const Mask< LSize > &lhs, const Mask< RSize > &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const
Vc_ALWAYS_INLINE Mask & operator|=(const Mask &rhs)
Vc_ALWAYS_INLINE Vc_PURE bool isMix() const