ROOT  6.06/09
Reference Guide
mask.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef VC_AVX_MASK_H
21 #define VC_AVX_MASK_H
22 
23 #include "intrinsics.h"
24 #include "../common/bitscanintrinsics.h"
25 #include "macros.h"
26 
27 namespace ROOT {
28 namespace Vc
29 {
30 namespace AVX
31 {
32 
33 template<unsigned int VectorSize> class Mask<VectorSize, 32u>
34 {
35  friend class Mask<4u, 32u>; // double_v
36  friend class Mask<8u, 32u>; // float_v, (u)int_v
37  friend class Mask<8u, 16u>; // (u)short_v
38  friend class Mask<16u, 16u>; // (u)char_v
39  public:
41 
42  // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
43 #if defined VC_MSVC && defined _WIN32
44  typedef const Mask<VectorSize, 32u> &AsArg;
45 #else
47 #endif
48 
51  Vc_ALWAYS_INLINE Mask(param256d x) : k(_mm256_castpd_ps(x)) {}
52  Vc_ALWAYS_INLINE Mask(param256i x) : k(_mm256_castsi256_ps(x)) {}
53 #ifdef VC_UNCONDITIONAL_AVX2_INTRINSICS
54  Vc_ALWAYS_INLINE Mask(__m256 x) : k(x) {}
55  Vc_ALWAYS_INLINE Mask(__m256d x) : k(_mm256_castpd_ps(x)) {}
56  Vc_ALWAYS_INLINE Mask(__m256i x) : k(_mm256_castsi256_ps(x)) {}
57 #endif
58  Vc_ALWAYS_INLINE explicit Mask(VectorSpecialInitializerZero::ZEnum) : k(_mm256_setzero_ps()) {}
60  Vc_ALWAYS_INLINE explicit Mask(bool b) : k(b ? _mm256_setallone_ps() : m256(_mm256_setzero_ps())) {}
61  Vc_ALWAYS_INLINE Mask(const Mask &rhs) : k(rhs.k) {}
63  _mm_unpacklo_epi16(rhs.dataI(), rhs.dataI()),
64  _mm_unpackhi_epi16(rhs.dataI(), rhs.dataI())))) {}
67 
68  Vc_ALWAYS_INLINE bool operator==(const Mask &rhs) const { return 0 != _mm256_testc_ps(k, rhs.k); }
69  Vc_ALWAYS_INLINE bool operator!=(const Mask &rhs) const { return 0 == _mm256_testc_ps(k, rhs.k); }
70 
71  Vc_ALWAYS_INLINE Mask operator!() const { return _mm256_andnot_ps(data(), _mm256_setallone_ps()); }
72 
73  Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { k = _mm256_and_ps(k, rhs.k); return *this; }
74  Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { k = _mm256_or_ps (k, rhs.k); return *this; }
75  Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { k = _mm256_xor_ps(k, rhs.k); return *this; }
76 
77  // no need for expression template optimizations because cmp(n)eq for floats are not bitwise
78  // compares
79  Vc_ALWAYS_INLINE bool isFull () const { return 0 != _mm256_testc_ps(k, _mm256_setallone_ps()); }
80  Vc_ALWAYS_INLINE bool isEmpty() const { return 0 != _mm256_testz_ps(k, k); }
81  Vc_ALWAYS_INLINE bool isMix () const { return 0 != _mm256_testnzc_ps(k, _mm256_setallone_ps()); }
82 
83 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
84  Vc_ALWAYS_INLINE operator bool() const { return isFull(); }
85 #endif
86 
87  Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
88  Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
89 
90  Vc_ALWAYS_INLINE m256 data () const { return k; }
91  Vc_ALWAYS_INLINE m256i dataI() const { return _mm256_castps_si256(k); }
92  Vc_ALWAYS_INLINE m256d dataD() const { return _mm256_castps_pd(k); }
93 
94  Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R;
95 
96  Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
97  Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
98 
99  private:
100 #ifdef VC_COMPILE_BENCHMARKS
101  public:
102 #endif
104 };
105 
106 template<unsigned int VectorSize> class Mask<VectorSize, 16u>
107 {
108  friend class Mask<4u, 32u>; // double_v
109  friend class Mask<8u, 32u>; // float_v, (u)int_v
110  friend class Mask<8u, 16u>; // (u)short_v
111  friend class Mask<16u, 16u>; // (u)char_v
112  public:
114 
115  // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
116 #if defined VC_MSVC && defined _WIN32
117  typedef const Mask<VectorSize, 16u> &AsArg;
118 #else
120 #endif
121 
124  Vc_ALWAYS_INLINE Mask(param128d x) : k(_mm_castpd_ps(x)) {}
125  Vc_ALWAYS_INLINE Mask(param128i x) : k(_mm_castsi128_ps(x)) {}
126 #ifdef VC_UNCONDITIONAL_AVX2_INTRINSICS
127  Vc_ALWAYS_INLINE Mask(__m128 x) : k(x) {}
128  Vc_ALWAYS_INLINE Mask(__m128d x) : k(_mm_castpd_ps(x)) {}
129  Vc_ALWAYS_INLINE Mask(__m128i x) : k(_mm_castsi128_ps(x)) {}
130 #endif
131  Vc_ALWAYS_INLINE explicit Mask(VectorSpecialInitializerZero::ZEnum) : k(_mm_setzero_ps()) {}
133  Vc_ALWAYS_INLINE explicit Mask(bool b) : k(b ? _mm_setallone_ps() : m128(_mm_setzero_ps())) {}
134  Vc_ALWAYS_INLINE Mask(const Mask &rhs) : k(rhs.k) {}
136  _mm_packs_epi32(avx_cast<m128i>(rhs.data()), _mm256_extractf128_si256(rhs.dataI(), 1)))) {}
138  _mm_packs_epi16(a[0].dataI(), a[1].dataI()))) {}
139 
140  Vc_ALWAYS_INLINE bool operator==(const Mask &rhs) const { return 0 != _mm_testc_si128(dataI(), rhs.dataI()); }
141  Vc_ALWAYS_INLINE bool operator!=(const Mask &rhs) const { return 0 == _mm_testc_si128(dataI(), rhs.dataI()); }
142 
143  Vc_ALWAYS_INLINE Mask operator!() const { return _mm_andnot_ps(data(), _mm_setallone_ps()); }
144 
145  Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { k = _mm_and_ps(k, rhs.k); return *this; }
146  Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { k = _mm_or_ps (k, rhs.k); return *this; }
147  Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { k = _mm_xor_ps(k, rhs.k); return *this; }
148 
149  // TODO: use expression templates to optimize (v1 == v2).isFull() and friends
150  Vc_ALWAYS_INLINE bool isFull () const { return 0 != _mm_testc_si128(dataI(), _mm_setallone_si128()); }
151  Vc_ALWAYS_INLINE bool isEmpty() const { return 0 != _mm_testz_si128(dataI(), dataI()); }
152  Vc_ALWAYS_INLINE bool isMix () const { return 0 != _mm_testnzc_si128(dataI(), _mm_setallone_si128()); }
153 
154 #ifndef VC_NO_AUTOMATIC_BOOL_FROM_MASK
155  Vc_ALWAYS_INLINE operator bool() const { return isFull(); }
156 #endif
157 
158  Vc_ALWAYS_INLINE_L Vc_PURE_L int shiftMask() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
159  Vc_ALWAYS_INLINE_L Vc_PURE_L int toInt() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
160 
161  Vc_ALWAYS_INLINE m128 data () const { return k; }
162  Vc_ALWAYS_INLINE m128i dataI() const { return avx_cast<m128i>(k); }
163  Vc_ALWAYS_INLINE m128d dataD() const { return avx_cast<m128d>(k); }
164 
165  Vc_ALWAYS_INLINE_L Vc_PURE_L bool operator[](int index) const Vc_ALWAYS_INLINE_R Vc_PURE_R;
166 
167  Vc_ALWAYS_INLINE_L Vc_PURE_L int count() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
168  Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
169 
170  private:
171 #ifdef VC_COMPILE_BENCHMARKS
172  public:
173 #endif
175 };
176 
178 {
179  size_t mask;
180  bool brk;
182  Vc_ALWAYS_INLINE ForeachHelper(size_t _mask) : mask(_mask), brk(false), outerBreak(false) {}
183  Vc_ALWAYS_INLINE bool outer() const { return mask != 0 && !outerBreak; }
184  Vc_ALWAYS_INLINE bool inner() { return (brk = !brk); }
185  Vc_ALWAYS_INLINE void noBreak() { outerBreak = false; }
187  outerBreak = true;
188 #ifdef VC_GNU_ASM
189  const size_t bit = __builtin_ctzl(mask);
190  __asm__("btr %1,%0" : "+r"(mask) : "r"(bit));
191 #else
192 #ifdef VC_MSVC
193 #pragma warning(suppress : 4267) // conversion from 'size_t' to 'unsigned long', possible loss of data
194 #endif
195  const size_t bit = _bit_scan_forward(mask);
196  mask &= ~(1 << bit);
197 #endif
198  return bit;
199  }
200 };
201 
202 #define Vc_foreach_bit(_it_, _mask_) \
203  for (Vc::AVX::ForeachHelper Vc__make_unique(foreach_bit_obj)((_mask_).toInt()); Vc__make_unique(foreach_bit_obj).outer(); ) \
204  for (_it_ = Vc__make_unique(foreach_bit_obj).next(); Vc__make_unique(foreach_bit_obj).inner(); Vc__make_unique(foreach_bit_obj).noBreak())
205 
206 // Operators
207 namespace Intrinsics
208 {
209  static Vc_ALWAYS_INLINE Vc_PURE m256 and_(param256 a, param256 b) { return _mm256_and_ps(a, b); }
210  static Vc_ALWAYS_INLINE Vc_PURE m256 or_(param256 a, param256 b) { return _mm256_or_ps(a, b); }
211  static Vc_ALWAYS_INLINE Vc_PURE m256 xor_(param256 a, param256 b) { return _mm256_xor_ps(a, b); }
212 
213  static Vc_ALWAYS_INLINE Vc_PURE m128 and_(param128 a, param128 b) { return _mm_and_ps(a, b); }
214  static Vc_ALWAYS_INLINE Vc_PURE m128 or_(param128 a, param128 b) { return _mm_or_ps(a, b); }
215  static Vc_ALWAYS_INLINE Vc_PURE m128 xor_(param128 a, param128 b) { return _mm_xor_ps(a, b); }
216 } // namespace Intrinsics
217 
218 // binary and/or/xor cannot work with one operand larger than the other
219 template<unsigned int LSize, unsigned int RSize, size_t LWidth, size_t RWidth> void operator&(const Mask<LSize, LWidth> &l, const Mask<RSize, RWidth> &r);
220 template<unsigned int LSize, unsigned int RSize, size_t LWidth, size_t RWidth> void operator|(const Mask<LSize, LWidth> &l, const Mask<RSize, RWidth> &r);
221 template<unsigned int LSize, unsigned int RSize, size_t LWidth, size_t RWidth> void operator^(const Mask<LSize, LWidth> &l, const Mask<RSize, RWidth> &r);
222 
223 // let binary and/or/xor work for any combination of masks (as long as they have the same sizeof)
224 template<unsigned int LSize, unsigned int RSize, size_t Width> Vc_ALWAYS_INLINE Vc_PURE Mask<LSize, Width> operator&(const Mask<LSize, Width> &l, const Mask<RSize, Width> &r) { return Intrinsics::and_(l.data(), r.data()); }
225 template<unsigned int LSize, unsigned int RSize, size_t Width> Vc_ALWAYS_INLINE Vc_PURE Mask<LSize, Width> operator|(const Mask<LSize, Width> &l, const Mask<RSize, Width> &r) { return Intrinsics:: or_(l.data(), r.data()); }
226 template<unsigned int LSize, unsigned int RSize, size_t Width> Vc_ALWAYS_INLINE Vc_PURE Mask<LSize, Width> operator^(const Mask<LSize, Width> &l, const Mask<RSize, Width> &r) { return Intrinsics::xor_(l.data(), r.data()); }
227 
228 // disable logical and/or for incompatible masks
229 template<unsigned int LSize, unsigned int RSize, size_t LWidth, size_t RWidth> void operator&&(const Mask<LSize, LWidth> &lhs, const Mask<RSize, RWidth> &rhs);
230 template<unsigned int LSize, unsigned int RSize, size_t LWidth, size_t RWidth> void operator||(const Mask<LSize, LWidth> &lhs, const Mask<RSize, RWidth> &rhs);
231 
232 // logical and/or for compatible masks
233 template<unsigned int Size, size_t LWidth, size_t RWidth> Vc_ALWAYS_INLINE Vc_PURE Mask<Size, LWidth> operator&&(const Mask<Size, LWidth> &lhs, const Mask<Size, RWidth> &rhs) { return lhs && static_cast<Mask<Size, LWidth> >(rhs); }
234 template<unsigned int Size, size_t LWidth, size_t RWidth> Vc_ALWAYS_INLINE Vc_PURE Mask<Size, LWidth> operator||(const Mask<Size, LWidth> &lhs, const Mask<Size, RWidth> &rhs) { return lhs || static_cast<Mask<Size, LWidth> >(rhs); }
235 
236 template<unsigned int Size, size_t Width> Vc_ALWAYS_INLINE Vc_PURE Mask<Size, Width> operator&&(const Mask<Size, Width> &lhs, const Mask<Size, Width> &rhs) { return Intrinsics::and_(lhs.data(), rhs.data()); }
237 template<unsigned int Size, size_t Width> Vc_ALWAYS_INLINE Vc_PURE Mask<Size, Width> operator||(const Mask<Size, Width> &lhs, const Mask<Size, Width> &rhs) { return Intrinsics::or_ (lhs.data(), rhs.data()); }
238 
239 } // namespace AVX
240 } // namespace Vc
241 } // namespace ROOT
242 
243 #include "mask.tcc"
244 #include "undomacros.h"
245 
246 #endif // VC_AVX_MASK_H
Vc_ALWAYS_INLINE Mask & operator&=(const Mask &rhs)
Definition: mask.h:145
const m256d param256d
Definition: intrinsics.h:129
Vc_ALWAYS_INLINE Mask(param256i x)
Definition: mask.h:52
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerZero::ZEnum)
Definition: mask.h:58
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
void operator^(const Mask< LSize, LWidth > &l, const Mask< RSize, RWidth > &r)
static Vc_INTRINSIC_L T avx_cast(param128 v) Vc_INTRINSIC_R
Definition: casts.h:49
static Vc_INTRINSIC m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:165
Vc_ALWAYS_INLINE Mask & operator&=(const Mask &rhs)
Definition: mask.h:73
const m128 param128
Definition: intrinsics.h:125
Vc_ALWAYS_INLINE Mask & operator|=(const Mask &rhs)
Definition: mask.h:146
#define FREE_STORE_OPERATORS_ALIGNED(alignment)
Definition: macros.h:165
Vc_ALWAYS_INLINE size_t next()
Definition: mask.h:186
Vc_ALWAYS_INLINE m128d dataD() const
Definition: mask.h:163
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerOne::OEnum)
Definition: mask.h:132
Vc_ALWAYS_INLINE Mask(const Mask &rhs)
Definition: mask.h:134
__m128i m128i
Definition: intrinsics.h:112
__m256d m256d
Definition: intrinsics.h:114
#define Vc_PURE_L
Definition: macros.h:137
TArc * a
Definition: textangle.C:12
Vc_ALWAYS_INLINE Mask(param256d x)
Definition: mask.h:51
Vc_ALWAYS_INLINE Mask(bool b)
Definition: mask.h:133
Vc_ALWAYS_INLINE Mask(const Mask &rhs)
Definition: mask.h:61
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Definition: TIterator.cxx:20
Mask< VectorSize, 16u > AsArg
Definition: mask.h:119
Vc_ALWAYS_INLINE bool isEmpty() const
Definition: mask.h:80
void operator&(const Mask< LSize, LWidth > &l, const Mask< RSize, RWidth > &r)
Double_t x[n]
Definition: legend1.C:17
Vc_ALWAYS_INLINE void noBreak()
Definition: mask.h:185
Vc_ALWAYS_INLINE Mask operator!() const
Definition: mask.h:143
const m256i param256i
Definition: intrinsics.h:130
Vc_ALWAYS_INLINE ForeachHelper(size_t _mask)
Definition: mask.h:182
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
Definition: casts.h:123
void operator|(const Mask< LSize, LWidth > &l, const Mask< RSize, RWidth > &r)
static Vc_ALWAYS_INLINE Vc_PURE m256 xor_(param256 a, param256 b)
Definition: mask.h:211
Vc_ALWAYS_INLINE bool operator==(const Mask &rhs) const
Definition: mask.h:68
Vc_ALWAYS_INLINE m256i dataI() const
Definition: mask.h:91
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize, 32u > &rhs)
Definition: mask.h:135
Vc_ALWAYS_INLINE Mask(bool b)
Definition: mask.h:60
#define Vc_PURE
Definition: macros.h:136
#define AVX
Definition: global.h:90
#define Vc_ALWAYS_INLINE_R
Definition: macros.h:132
Vc_ALWAYS_INLINE Mask(param256 x)
Definition: mask.h:50
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize, 16u > &rhs)
Definition: mask.h:62
Vc_ALWAYS_INLINE Mask & operator^=(const Mask &rhs)
Definition: mask.h:147
ROOT::R::TRInterface & r
Definition: Object.C:4
Vc_ALWAYS_INLINE bool operator==(const Mask &rhs) const
Definition: mask.h:140
void operator&&(const Mask< LSize, LWidth > &lhs, const Mask< RSize, RWidth > &rhs)
Vc_ALWAYS_INLINE bool isFull() const
Definition: mask.h:150
__m256i m256i
Definition: intrinsics.h:115
TMarker * m
Definition: textangle.C:8
TLine * l
Definition: textangle.C:4
Vc_ALWAYS_INLINE Mask(param128d x)
Definition: mask.h:124
Mask< VectorSize, 32u > AsArg
Definition: mask.h:46
Vc_ALWAYS_INLINE m128i dataI() const
Definition: mask.h:162
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
Vc_ALWAYS_INLINE Mask(const Mask< VectorSize/2, 16u > *a)
Definition: mask.h:137
static Vc_ALWAYS_INLINE Vc_PURE m256 or_(param256 a, param256 b)
Definition: mask.h:210
static Vc_ALWAYS_INLINE Vc_PURE m256 and_(param256 a, param256 b)
Definition: mask.h:209
#define Vc_ALWAYS_INLINE_L
Definition: macros.h:131
Vc_ALWAYS_INLINE bool isEmpty() const
Definition: mask.h:151
Vc_ALWAYS_INLINE bool operator!=(const Mask &rhs) const
Definition: mask.h:141
Vc_ALWAYS_INLINE bool isMix() const
Definition: mask.h:81
const m128d param128d
Definition: intrinsics.h:126
static Vc_INTRINSIC m256 Vc_CONST _mm256_setallone_ps()
Definition: intrinsics.h:186
void operator||(const Mask< LSize, LWidth > &lhs, const Mask< RSize, RWidth > &rhs)
Vc_ALWAYS_INLINE bool inner()
Definition: mask.h:184
static Vc_INTRINSIC m128 Vc_CONST _mm_setallone_ps()
Definition: intrinsics.h:167
Vc_ALWAYS_INLINE Mask & operator|=(const Mask &rhs)
Definition: mask.h:74
#define Vc_PURE_R
Definition: macros.h:138
Vc_ALWAYS_INLINE Mask & operator^=(const Mask &rhs)
Definition: mask.h:75
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerZero::ZEnum)
Definition: mask.h:131
Vc_ALWAYS_INLINE m256d dataD() const
Definition: mask.h:92
Vc_ALWAYS_INLINE bool isFull() const
Definition: mask.h:79
Vc_ALWAYS_INLINE Mask(param128 x)
Definition: mask.h:123
Vc_ALWAYS_INLINE bool outer() const
Definition: mask.h:183
Vc_ALWAYS_INLINE bool operator!=(const Mask &rhs) const
Definition: mask.h:69
Definition: casts.h:28
Vc_ALWAYS_INLINE Mask(VectorSpecialInitializerOne::OEnum)
Definition: mask.h:59
const m256 param256
Definition: intrinsics.h:128
Vc_ALWAYS_INLINE Mask operator!() const
Definition: mask.h:71
Vc_ALWAYS_INLINE Mask(param128i x)
Definition: mask.h:125
__m128d m128d
Definition: intrinsics.h:111
Vc_ALWAYS_INLINE bool isMix() const
Definition: mask.h:152
const m128i param128i
Definition: intrinsics.h:127