ROOT  6.06/09
Reference Guide
math.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef VC_SSE_MATH_H
21 #define VC_SSE_MATH_H
22 
23 #include "const.h"
24 #include "macros.h"
25 
26 namespace ROOT {
27 namespace Vc
28 {
29 namespace SSE
30 {
31  /**
32  * splits \p v into exponent and mantissa, the sign is kept with the mantissa
33  *
34  * The return value will be in the range [0.5, 1.0[
35  * The \p e value will be an integer defining the power-of-two exponent
36  */
37  inline double_v frexp(const double_v &v, int_v *e) {
38  const __m128i exponentBits = Const<double>::exponentMask().dataI();
39  const __m128i exponentPart = _mm_and_si128(_mm_castpd_si128(v.data()), exponentBits);
40  *e = _mm_sub_epi32(_mm_srli_epi64(exponentPart, 52), _mm_set1_epi32(0x3fe));
41  const __m128d exponentMaximized = _mm_or_pd(v.data(), _mm_castsi128_pd(exponentBits));
42  double_v ret = _mm_and_pd(exponentMaximized, _mm_load_pd(reinterpret_cast<const double *>(&c_general::frexpMask[0])));
43  double_m zeroMask = v == double_v::Zero();
44  ret(isnan(v) || !isfinite(v) || zeroMask) = v;
45  e->setZero(zeroMask.data());
46  return ret;
47  }
48  inline float_v frexp(const float_v &v, int_v *e) {
49  const __m128i exponentBits = Const<float>::exponentMask().dataI();
50  const __m128i exponentPart = _mm_and_si128(_mm_castps_si128(v.data()), exponentBits);
51  *e = _mm_sub_epi32(_mm_srli_epi32(exponentPart, 23), _mm_set1_epi32(0x7e));
52  const __m128 exponentMaximized = _mm_or_ps(v.data(), _mm_castsi128_ps(exponentBits));
53  float_v ret = _mm_and_ps(exponentMaximized, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)));
54  ret(isnan(v) || !isfinite(v) || v == float_v::Zero()) = v;
55  e->setZero(v == float_v::Zero());
56  return ret;
57  }
58  inline sfloat_v frexp(const sfloat_v &v, short_v *e) {
59  const __m128i exponentBits = Const<float>::exponentMask().dataI();
60  const __m128i exponentPart0 = _mm_and_si128(_mm_castps_si128(v.data()[0]), exponentBits);
61  const __m128i exponentPart1 = _mm_and_si128(_mm_castps_si128(v.data()[1]), exponentBits);
62  *e = _mm_sub_epi16(_mm_packs_epi32(_mm_srli_epi32(exponentPart0, 23), _mm_srli_epi32(exponentPart1, 23)),
63  _mm_set1_epi16(0x7e));
64  const __m128 exponentMaximized0 = _mm_or_ps(v.data()[0], _mm_castsi128_ps(exponentBits));
65  const __m128 exponentMaximized1 = _mm_or_ps(v.data()[1], _mm_castsi128_ps(exponentBits));
66  sfloat_v ret = M256::create(
67  _mm_and_ps(exponentMaximized0, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu))),
68  _mm_and_ps(exponentMaximized1, _mm_castsi128_ps(_mm_set1_epi32(0xbf7fffffu)))
69  );
70  sfloat_m zeroMask = v == sfloat_v::Zero();
71  ret(isnan(v) || !isfinite(v) || zeroMask) = v;
72  e->setZero(static_cast<short_m>(zeroMask));
73  return ret;
74  }
75 
76  /* -> x * 2^e
77  * x == NaN -> NaN
78  * x == (-)inf -> (-)inf
79  */
81  int_v e = _e;
82  e.setZero((v == double_v::Zero()).dataI());
83  const __m128i exponentBits = _mm_slli_epi64(e.data(), 52);
84  return _mm_castsi128_pd(_mm_add_epi64(_mm_castpd_si128(v.data()), exponentBits));
85  }
87  int_v e = _e;
88  e.setZero(static_cast<int_m>(v == float_v::Zero()));
89  return (v.reinterpretCast<int_v>() + (e << 23)).reinterpretCast<float_v>();
90  }
92  short_v e = _e;
93  e.setZero(static_cast<short_m>(v == sfloat_v::Zero()));
94  e <<= (23 - 16);
95  const __m128i exponentBits0 = _mm_unpacklo_epi16(_mm_setzero_si128(), e.data());
96  const __m128i exponentBits1 = _mm_unpackhi_epi16(_mm_setzero_si128(), e.data());
97  return M256::create(_mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[0]), exponentBits0)),
98  _mm_castsi128_ps(_mm_add_epi32(_mm_castps_si128(v.data()[1]), exponentBits1)));
99  }
100 
101 #ifdef VC_IMPL_SSE4_1
102  inline double_v trunc(double_v::AsArg v) { return _mm_round_pd(v.data(), 0x3); }
103  inline float_v trunc(float_v::AsArg v) { return _mm_round_ps(v.data(), 0x3); }
104  inline sfloat_v trunc(sfloat_v::AsArg v) { return M256::create(_mm_round_ps(v.data()[0], 0x3),
105  _mm_round_ps(v.data()[1], 0x3)); }
106 
107  inline double_v floor(double_v::AsArg v) { return _mm_floor_pd(v.data()); }
108  inline float_v floor(float_v::AsArg v) { return _mm_floor_ps(v.data()); }
109  inline sfloat_v floor(sfloat_v::AsArg v) { return M256::create(_mm_floor_ps(v.data()[0]),
110  _mm_floor_ps(v.data()[1])); }
111 
112  inline double_v ceil(double_v::AsArg v) { return _mm_ceil_pd(v.data()); }
113  inline float_v ceil(float_v::AsArg v) { return _mm_ceil_ps(v.data()); }
114  inline sfloat_v ceil(sfloat_v::AsArg v) { return M256::create(_mm_ceil_ps(v.data()[0]),
115  _mm_ceil_ps(v.data()[1])); }
116 #else
117  static inline void floor_shift(float_v &v, float_v::AsArg e)
118  {
120  x <<= 23;
121  x >>= static_cast<int_v>(e);
122  v &= x.reinterpretCast<float_v>();
123  }
124 
125  static inline void floor_shift(sfloat_v &v, sfloat_v::AsArg e)
126  {
128  x <<= 23;
129  int_v y = x;
130  x >>= _mm_cvttps_epi32(e.data()[0]);
131  y >>= _mm_cvttps_epi32(e.data()[1]);
132  v.data()[0] = _mm_and_ps(v.data()[0], _mm_castsi128_ps(x.data()));
133  v.data()[1] = _mm_and_ps(v.data()[1], _mm_castsi128_ps(y.data()));
134  }
135 
136  static inline void floor_shift(double_v &v, double_v::AsArg e)
137  {
138  const long long initialMask = 0xfff0000000000000ull;
139  const uint_v shifts = static_cast<uint_v>(e);
140  union d_ll {
141  long long ll;
142  double d;
143  };
144  d_ll mask0 = { initialMask >> shifts[0] };
145  d_ll mask1 = { initialMask >> shifts[1] };
146  v &= double_v(_mm_setr_pd(mask0.d, mask1.d));
147  }
148 
149  template<typename T>
151  typedef Vector<T> V;
152  typedef typename V::Mask M;
153 
154  V v = _v;
155  V e = abs(v).exponent();
156  const M negativeExponent = e < 0;
157  e.setZero(negativeExponent);
158  //const M negativeInput = v < V::Zero();
159 
160  floor_shift(v, e);
161 
162  v.setZero(negativeExponent);
163  //v(negativeInput && _v != v) -= V::One();
164  return v;
165  }
166 
167  template<typename T>
169  typedef Vector<T> V;
170  typedef typename V::Mask M;
171 
172  V v = _v;
173  V e = abs(v).exponent();
174  const M negativeExponent = e < 0;
175  e.setZero(negativeExponent);
176  const M negativeInput = v < V::Zero();
177 
178  floor_shift(v, e);
179 
180  v.setZero(negativeExponent);
181  v(negativeInput && _v != v) -= V::One();
182  return v;
183  }
184 
185  template<typename T>
187  typedef Vector<T> V;
188  typedef typename V::Mask M;
189 
190  V v = _v;
191  V e = abs(v).exponent();
192  const M negativeExponent = e < 0;
193  e.setZero(negativeExponent);
194  const M positiveInput = v > V::Zero();
195 
196  floor_shift(v, e);
197 
198  v.setZero(negativeExponent);
199  v(positiveInput && _v != v) += V::One();
200  return v;
201  }
202 #endif
203 } // namespace SSE
204 } // namespace Vc
205 } // namespace ROOT
206 
207 #include "undomacros.h"
208 
209 #define VC__USE_NAMESPACE SSE
210 #include "../common/trigonometric.h"
211 #define VC__USE_NAMESPACE SSE
212 #include "../common/logarithm.h"
213 #define VC__USE_NAMESPACE SSE
214 #include "../common/exponential.h"
215 #undef VC__USE_NAMESPACE
216 
217 #endif // VC_SSE_MATH_H
static void floor_shift(float_v &v, float_v::AsArg e)
Definition: math.h:117
Vector< T > floor(VC_ALIGNED_PARAMETER(Vector< T >) _v)
Definition: math.h:168
double_v ldexp(double_v::AsArg v, int_v::AsArg _e)
Definition: math.h:80
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R
static Vc_ALWAYS_INLINE Vc_PURE Vector< T >::Mask isnan(const Vector< T > &x)
Definition: vector.h:529
Vc_ALWAYS_INLINE Vc_PURE _M128 data() const
Definition: mask.h:134
static Vc_INTRINSIC Vc_CONST M256 create(_M128 a, _M128 b)
Definition: types.h:70
Vector< T > ceil(VC_ALIGNED_PARAMETER(Vector< T >) _v)
Definition: math.h:186
Double_t x[n]
Definition: legend1.C:17
static Vc_INTRINSIC_L Vector Zero() Vc_INTRINSIC_R
static Vc_ALWAYS_INLINE Vc_PURE Vector< T > abs(const Vector< T > &x)
Definition: vector.h:524
static Vc_INTRINSIC __m128i Vc_CONST _mm_setallone_si128()
Definition: intrinsics.h:82
double_v frexp(const double_v &v, int_v *e)
splits v into exponent and mantissa, the sign is kept with the mantissa
Definition: math.h:37
Vc_ALWAYS_INLINE Vc_PURE V2 reinterpretCast() const
Definition: vector.h:373
#define VC_ALIGNED_PARAMETER(_Type)
Definition: macros.h:368
VECTOR_NAMESPACE::int_v int_v
Definition: vector.h:86
SVector< double, 2 > v
Definition: Dict.h:5
Vector< double > double_v
Definition: vector.h:480
static Vc_ALWAYS_INLINE Vc_CONST M exponentMask()
Definition: const.h:66
Vector< float8 > sfloat_v
Definition: vector.h:482
Double_t y[n]
Definition: legend1.C:17
VECTOR_NAMESPACE::float_v float_v
Definition: vector.h:84
#define SSE
Definition: global.h:84
Vector< T > trunc(VC_ALIGNED_PARAMETER(Vector< T >) _v)
Definition: math.h:150
Vc_ALWAYS_INLINE Vc_PURE _M128I dataI() const
Definition: mask.h:135
const Vector< T > AsArg
Definition: vector.h:164
static Vc_ALWAYS_INLINE Vc_PURE Vector< T >::Mask isfinite(const Vector< T > &x)
Definition: vector.h:528
Definition: casts.h:28
Vector< short > short_v
Definition: vector.h:485
Vector< float > float_v
Definition: vector.h:481
Vc_ALWAYS_INLINE Vc_PURE VectorType & data()
Definition: vector.h:386