ROOT  6.06/09
Reference Guide
sse_blend.cpp
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #include "unittest.h"
21 #include <Vc/sse/intrinsics.h>
22 
23 namespace std
24 {
25 ostream &operator<<(ostream &out, const __m128i &v)
26 {
27  union {
28  __m128i v;
29  short m[8];
30  } x = { v };
31 
32  out << "[" << x.m[0];
33  for (int i = 1; i < 8; ++i) {
34  out << ", " << x.m[i];
35  }
36  return out << "]";
37 }
38 } // namespace std
39 
40 template<> inline bool unittest_compareHelper<__m128i, __m128i>(const __m128i &a, const __m128i &b)
41 {
42  return _mm_movemask_epi8(_mm_cmpeq_epi16(a, b)) == 0xffff;
43 }
44 
45 void blendpd()
46 {
47 #ifdef VC_IMPL_SSE4_1
48 #define blend _mm_blend_pd
49 #else
50 #define blend Vc::SSE::mm_blend_pd
51 #endif
52  __m128d a = _mm_set_pd(11, 10);
53  __m128d b = _mm_set_pd(21, 20);
54 
55  COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x0), a)), 0x3);
56  COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x1), _mm_set_pd(11, 20))), 0x3);
57  COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x2), _mm_set_pd(21, 10))), 0x3);
58  COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x3), b)), 0x3);
59 #undef blend
60 }
61 void blendps()
62 {
63 #ifdef VC_IMPL_SSE4_1
64 #define blend _mm_blend_ps
65 #else
66 #define blend Vc::SSE::mm_blend_ps
67 #endif
68  __m128 a = _mm_set_ps(13, 12, 11, 10);
69  __m128 b = _mm_set_ps(23, 22, 21, 20);
70 
71  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x0), a)), 0xf);
72  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x1), _mm_set_ps(13, 12, 11, 20))), 0xf);
73  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x2), _mm_set_ps(13, 12, 21, 10))), 0xf);
74  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x3), _mm_set_ps(13, 12, 21, 20))), 0xf);
75  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x4), _mm_set_ps(13, 22, 11, 10))), 0xf);
76  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x5), _mm_set_ps(13, 22, 11, 20))), 0xf);
77  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x6), _mm_set_ps(13, 22, 21, 10))), 0xf);
78  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x7), _mm_set_ps(13, 22, 21, 20))), 0xf);
79  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x8), _mm_set_ps(23, 12, 11, 10))), 0xf);
80  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x9), _mm_set_ps(23, 12, 11, 20))), 0xf);
81  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xa), _mm_set_ps(23, 12, 21, 10))), 0xf);
82  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xb), _mm_set_ps(23, 12, 21, 20))), 0xf);
83  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xc), _mm_set_ps(23, 22, 11, 10))), 0xf);
84  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xd), _mm_set_ps(23, 22, 11, 20))), 0xf);
85  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xe), _mm_set_ps(23, 22, 21, 10))), 0xf);
86  COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xf), b)), 0xf);
87 #undef blend
88 }
89 void blendepi16()
90 {
91 #ifdef VC_IMPL_SSE4_1
92 #define blend _mm_blend_epi16
93 #else
94 #define blend Vc::SSE::mm_blend_epi16
95 #endif
96  __m128i a = _mm_set_epi16(17, 16, 15, 14, 13, 12, 11, 10);
97  __m128i b = _mm_set_epi16(27, 26, 25, 24, 23, 22, 21, 20);
98 
99 #define CALL_2(_i, code) { enum { i = _i }; code } { enum { i = _i + 1 }; code }
100 #define CALL_4(_i, code) CALL_2(_i, code) CALL_2(_i + 2, code)
101 #define CALL_8(_i, code) CALL_4(_i, code) CALL_4(_i + 4, code)
102 #define CALL_16(_i, code) CALL_8(_i, code) CALL_8(_i + 8, code)
103 #define CALL_32(_i, code) CALL_16(_i, code) CALL_16(_i + 16, code)
104 #define CALL_64(_i, code) CALL_32(_i, code) CALL_32(_i + 32, code)
105 #define CALL_128(_i, code) CALL_64(_i, code) CALL_64(_i + 64, code)
106 #define CALL_256(code) CALL_128(0, code) CALL_128(128, code)
107 #define CALL_100(code) CALL_64(0, code) CALL_32(64, code) CALL_4(96, code)
108 
109  CALL_256(
110  short r[8];
111  for (int j = 0; j < 8; ++j) {
112  r[j] = j + ((((i >> j) & 1) == 0) ? 10 : 20);
113  }
114  __m128i reference = _mm_set_epi16(r[7], r[6], r[5], r[4], r[3], r[2], r[1], r[0]);
115  COMPARE_NOEQ(blend(a, b, i), reference);
116  )
117 #undef blend
118 }
119 
120 int main()
121 {
122  runTest(blendpd);
123  runTest(blendps);
125 }
void blendepi16()
Definition: sse_blend.cpp:89
bool unittest_compareHelper< __m128i, __m128i >(const __m128i &a, const __m128i &b)
Definition: sse_blend.cpp:40
TArc * a
Definition: textangle.C:12
STL namespace.
static const double x2[5]
Double_t x[n]
Definition: legend1.C:17
#define COMPARE(a, b)
Definition: unittest.h:509
static const double x4[22]
#define CALL_256(code)
char * out
Definition: TBase64.cxx:29
ROOT::R::TRInterface & r
Definition: Object.C:4
SVector< double, 2 > v
Definition: Dict.h:5
int main()
Definition: sse_blend.cpp:120
TMarker * m
Definition: textangle.C:8
#define blend
static const double x1[5]
void blendpd()
Definition: sse_blend.cpp:45
ROOT::R::TRInterface & operator<<(ROOT::R::TRInterface &r, TString code)
Definition: TRInterface.h:327
#define COMPARE_NOEQ(a, b)
Definition: unittest.h:512
void blendps()
Definition: sse_blend.cpp:61
#define runTest(name)
Definition: unittest.h:42
static const double x3[11]