35 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
36 lo = _mm_min_epi16(x, y);
37 hi = _mm_max_epi16(x, y);
38 x = _mm_blend_epi16(lo, hi, 0xaa);
41 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)), _MM_SHUFFLE(0, 1, 2, 3));
42 lo = _mm_min_epi16(x, y);
43 hi = _mm_max_epi16(x, y);
44 x = _mm_blend_epi16(lo, hi, 0xcc);
45 y = _mm_srli_si128(x, 2);
46 lo = _mm_min_epi16(x, y);
47 hi = _mm_max_epi16(x, y);
48 x = _mm_blend_epi16(lo, _mm_slli_si128(hi, 2), 0xaa);
51 y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
52 y = _mm_shufflelo_epi16(y, _MM_SHUFFLE(0, 1, 2, 3));
53 lo = _mm_min_epi16(x, y);
54 hi = _mm_max_epi16(x, y);
56 x = _mm_unpacklo_epi16(lo, hi);
57 y = _mm_srli_si128(x, 8);
58 lo = _mm_min_epi16(x, y);
59 hi = _mm_max_epi16(x, y);
61 x = _mm_unpacklo_epi16(lo, hi);
62 y = _mm_srli_si128(x, 8);
63 lo = _mm_min_epi16(x, y);
64 hi = _mm_max_epi16(x, y);
66 return _mm_unpacklo_epi16(lo, hi);
72 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
73 lo = _mm_min_epu16(x, y);
74 hi = _mm_max_epu16(x, y);
75 x = _mm_blend_epi16(lo, hi, 0xaa);
78 y = _mm_shufflelo_epi16(_mm_shufflehi_epi16(x, _MM_SHUFFLE(0, 1, 2, 3)), _MM_SHUFFLE(0, 1, 2, 3));
79 lo = _mm_min_epu16(x, y);
80 hi = _mm_max_epu16(x, y);
81 x = _mm_blend_epi16(lo, hi, 0xcc);
82 y = _mm_srli_si128(x, 2);
83 lo = _mm_min_epu16(x, y);
84 hi = _mm_max_epu16(x, y);
85 x = _mm_blend_epi16(lo, _mm_slli_si128(hi, 2), 0xaa);
88 y = _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2));
89 y = _mm_shufflelo_epi16(y, _MM_SHUFFLE(0, 1, 2, 3));
90 lo = _mm_min_epu16(x, y);
91 hi = _mm_max_epu16(x, y);
93 x = _mm_unpacklo_epi16(lo, hi);
94 y = _mm_srli_si128(x, 8);
95 lo = _mm_min_epu16(x, y);
96 hi = _mm_max_epu16(x, y);
98 x = _mm_unpacklo_epi16(lo, hi);
99 y = _mm_srli_si128(x, 8);
100 lo = _mm_min_epu16(x, y);
101 hi = _mm_max_epu16(x, y);
103 return _mm_unpacklo_epi16(lo, hi);
111 m128i l = _mm_min_epi32(hgfe, dcba);
112 m128i h = _mm_max_epi32(hgfe, dcba);
114 m128i x = _mm_unpacklo_epi32(l, h);
115 m128i y = _mm_unpackhi_epi32(l, h);
117 l = _mm_min_epi32(x, y);
118 h = _mm_max_epi32(x, y);
120 x = _mm_min_epi32(l, Reg::permute<X2, X2, X0, X0>(h));
121 y = _mm_max_epi32(h, Reg::permute<X3, X3, X1, X1>(l));
123 m128i b = Reg::shuffle<Y0, Y1, X0, X1>(
y,
x);
124 m128i a = _mm_unpackhi_epi64(x, y);
128 if (
VC_IS_UNLIKELY(static_cast<int>(_mm_extract_epi32(x, 2)) >= static_cast<int>(_mm_extract_epi32(y, 1)))) {
129 return concat(Reg::permute<X0, X1, X2, X3>(b), a);
130 }
else if (
VC_IS_UNLIKELY(static_cast<int>(_mm_extract_epi32(x, 0)) >= static_cast<int>(_mm_extract_epi32(y, 3)))) {
131 return concat(a, Reg::permute<X0, X1, X2, X3>(b));
135 l = _mm_min_epi32(a, b);
136 h = _mm_max_epi32(a, b);
138 a = _mm_unpacklo_epi32(l, h);
139 b = _mm_unpackhi_epi32(l, h);
140 l = _mm_min_epi32(a, b);
141 h = _mm_max_epi32(a, b);
143 a = _mm_unpacklo_epi32(l, h);
144 b = _mm_unpackhi_epi32(l, h);
145 l = _mm_min_epi32(a, b);
146 h = _mm_max_epi32(a, b);
148 return concat(_mm_unpacklo_epi32(l, h), _mm_unpackhi_epi32(l, h));
156 m128i l = _mm_min_epu32(hgfe, dcba);
157 m128i h = _mm_max_epu32(hgfe, dcba);
159 m128i x = _mm_unpacklo_epi32(l, h);
160 m128i y = _mm_unpackhi_epi32(l, h);
162 l = _mm_min_epu32(x, y);
163 h = _mm_max_epu32(x, y);
165 x = _mm_min_epu32(l, Reg::permute<X2, X2, X0, X0>(h));
166 y = _mm_max_epu32(h, Reg::permute<X3, X3, X1, X1>(l));
168 m128i b = Reg::shuffle<Y0, Y1, X0, X1>(
y,
x);
169 m128i a = _mm_unpackhi_epi64(x, y);
172 return concat(Reg::permute<X0, X1, X2, X3>(b), a);
174 return concat(a, Reg::permute<X0, X1, X2, X3>(b));
178 l = _mm_min_epu32(a, b);
179 h = _mm_max_epu32(a, b);
181 a = _mm_unpacklo_epi32(l, h);
182 b = _mm_unpackhi_epi32(l, h);
183 l = _mm_min_epu32(a, b);
184 h = _mm_max_epu32(a, b);
186 a = _mm_unpacklo_epi32(l, h);
187 b = _mm_unpackhi_epi32(l, h);
188 l = _mm_min_epu32(a, b);
189 h = _mm_max_epu32(a, b);
191 return concat(_mm_unpacklo_epi32(l, h), _mm_unpackhi_epi32(l, h));
199 m128 l = _mm_min_ps(hgfe, dcba);
200 m128 h = _mm_max_ps(hgfe, dcba);
202 m128 x = _mm_unpacklo_ps(l, h);
203 m128 y = _mm_unpackhi_ps(l, h);
205 l = _mm_min_ps(x, y);
206 h = _mm_max_ps(x, y);
208 x = _mm_min_ps(l, Reg::permute<X2, X2, X0, X0>(h));
209 y = _mm_max_ps(h, Reg::permute<X3, X3, X1, X1>(l));
211 m128 a = _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(x), _mm_castps_pd(y)));
212 m128 b = Reg::shuffle<Y0, Y1, X0, X1>(
y,
x);
215 l = _mm_min_ps(a, b);
216 h = _mm_max_ps(a, b);
218 a = _mm_unpacklo_ps(l, h);
219 b = _mm_unpackhi_ps(l, h);
220 l = _mm_min_ps(a, b);
221 h = _mm_max_ps(a, b);
223 a = _mm_unpacklo_ps(l, h);
224 b = _mm_unpackhi_ps(l, h);
225 l = _mm_min_ps(a, b);
226 h = _mm_max_ps(a, b);
228 return concat(_mm_unpacklo_ps(l, h), _mm_unpackhi_ps(l, h));
238 m256d l = _mm256_min_pd(x, y);
239 m256d h = _mm256_max_pd(x, y);
240 x = _mm256_unpacklo_pd(l, h);
241 y = _mm256_unpackhi_pd(l, h);
242 l = _mm256_min_pd(x, y);
243 h = _mm256_max_pd(x, y);
244 x = _mm256_unpacklo_pd(l, h);
245 y = _mm256_unpackhi_pd(h, l);
246 l = _mm256_min_pd(x, y);
247 h = _mm256_max_pd(x, y);
248 m256d a = Reg::permute<X2, X3, X1, X0>(Reg::permute128<X0, X1>(
h,
h));
249 m256d b = Reg::permute<X2, X3, X1, X0>(
l);
255 l = _mm256_min_pd(a, b);
256 h = _mm256_min_pd(a, b);
258 x = _mm256_unpacklo_pd(l, h);
259 y = _mm256_unpackhi_pd(l, h);
260 l = _mm256_min_pd(x, y);
261 h = _mm256_min_pd(x, y);
263 x = Reg::permute128<Y0, X0>(
l,
h);
264 y = Reg::permute128<Y1, X1>(
l,
h);
265 l = _mm256_min_pd(x, y);
266 h = _mm256_min_pd(x, y);
268 x = _mm256_unpacklo_pd(l, h);
269 y = _mm256_unpackhi_pd(l, h);
303 m128d h0_l0 = _mm_unpacklo_pd(l, h);
304 m128d h1_l1 = _mm_unpackhi_pd(l, h);
305 l = _mm_min_pd(h0_l0, h1_l1);
306 h = _mm_max_pd(h0_l0, h1_l1);
308 _mm_min_pd(l, Reg::permute<X0, X0>(h)),
309 _mm_max_pd(h, Reg::permute<X1, X1>(l))
static VectorType sort(VTArg)
static Vc_INTRINSIC unsigned int Vc_CONST _mm_extract_epu32(param128i x, const int i)
Namespace for new ROOT classes and functions.
VectorTypeHelper< T >::Type VectorType
Vc_INTRINSIC Vc_CONST m128 hi128(param256 v)
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
#define VC_IS_UNLIKELY(x)
float type_of_call hi(const int &, const int &)
Vc_INTRINSIC Vc_CONST m128 lo128(param256 v)