ROOT  6.06/09
Reference Guide
math.h
Go to the documentation of this file.
1 /* This file is part of the Vc library.
2 
3  Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 */
19 
20 #ifndef VC_AVX_MATH_H
21 #define VC_AVX_MATH_H
22 
23 #include "const.h"
24 #include "limits.h"
25 #include "macros.h"
26 
27 namespace ROOT {
28 namespace Vc
29 {
30 namespace AVX
31 {
32  /**
33  * splits \p v into exponent and mantissa, the sign is kept with the mantissa
34  *
35  * The return value will be in the range [0.5, 1.0[
36  * The \p e value will be an integer defining the power-of-two exponent
37  */
39  const m256d exponentBits = Const<double>::exponentMask().dataD();
40  const m256d exponentPart = _mm256_and_pd(v.data(), exponentBits);
41  e->data() = _mm256_sub_epi32(_mm256_srli_epi64(avx_cast<m256i>(exponentPart), 52), _mm256_set1_epi32(0x3fe));
42  const m256d exponentMaximized = _mm256_or_pd(v.data(), exponentBits);
43  double_v ret = _mm256_and_pd(exponentMaximized, _mm256_broadcast_sd(reinterpret_cast<const double *>(&c_general::frexpMask)));
44  double_m zeroMask = v == double_v::Zero();
45  ret(isnan(v) || !isfinite(v) || zeroMask) = v;
46  e->setZero(zeroMask.data());
47  return ret;
48  }
49  inline float_v frexp(float_v::AsArg v, int_v *e) {
50  const m256 exponentBits = Const<float>::exponentMask().data();
51  const m256 exponentPart = _mm256_and_ps(v.data(), exponentBits);
52  e->data() = _mm256_sub_epi32(_mm256_srli_epi32(avx_cast<m256i>(exponentPart), 23), _mm256_set1_epi32(0x7e));
53  const m256 exponentMaximized = _mm256_or_ps(v.data(), exponentBits);
54  float_v ret = _mm256_and_ps(exponentMaximized, avx_cast<m256>(_mm256_set1_epi32(0xbf7fffffu)));
55  ret(isnan(v) || !isfinite(v) || v == float_v::Zero()) = v;
56  e->setZero(v == float_v::Zero());
57  return ret;
58  }
60  const m256 exponentBits = Const<float>::exponentMask().data();
61  const m256 exponentPart = _mm256_and_ps(v.data(), exponentBits);
62  e->data() = _mm_sub_epi16(_mm_packs_epi32(_mm_srli_epi32(avx_cast<m128i>(exponentPart), 23),
63  _mm_srli_epi32(avx_cast<m128i>(hi128(exponentPart)), 23)), _mm_set1_epi16(0x7e));
64  const m256 exponentMaximized = _mm256_or_ps(v.data(), exponentBits);
65  sfloat_v ret = _mm256_and_ps(exponentMaximized, avx_cast<m256>(_mm256_set1_epi32(0xbf7fffffu)));
66  ret(isnan(v) || !isfinite(v) || v == sfloat_v::Zero()) = v;
67  e->setZero(v == sfloat_v::Zero());
68  return ret;
69  }
70 
71  /* -> x * 2^e
72  * x == NaN -> NaN
73  * x == (-)inf -> (-)inf
74  */
76  int_v e = _e;
77  e.setZero((v == double_v::Zero()).dataI());
78  const m256i exponentBits = _mm256_slli_epi64(e.data(), 52);
79  return avx_cast<m256d>(_mm256_add_epi64(avx_cast<m256i>(v.data()), exponentBits));
80  }
82  int_v e = _e;
83  e.setZero(static_cast<int_m>(v == float_v::Zero()));
84  return (v.reinterpretCast<int_v>() + (e << 23)).reinterpretCast<float_v>();
85  }
87  short_v e = _e;
88  e.setZero(static_cast<short_m>(v == sfloat_v::Zero()));
89  e = e << (23 - 16);
90  const m256i exponentBits = concat(_mm_unpacklo_epi16(_mm_setzero_si128(), e.data()),
91  _mm_unpackhi_epi16(_mm_setzero_si128(), e.data()));
92  return (v.reinterpretCast<int_v>() + int_v(exponentBits)).reinterpretCast<sfloat_v>();
93  }
94 
95  static Vc_ALWAYS_INLINE float_v trunc( float_v::AsArg v) { return _mm256_round_ps(v.data(), 0x3); }
96  static Vc_ALWAYS_INLINE sfloat_v trunc(sfloat_v::AsArg v) { return _mm256_round_ps(v.data(), 0x3); }
97  static Vc_ALWAYS_INLINE double_v trunc(double_v::AsArg v) { return _mm256_round_pd(v.data(), 0x3); }
98 
99  static Vc_ALWAYS_INLINE float_v floor(float_v::AsArg v) { return _mm256_floor_ps(v.data()); }
100  static Vc_ALWAYS_INLINE sfloat_v floor(sfloat_v::AsArg v) { return _mm256_floor_ps(v.data()); }
101  static Vc_ALWAYS_INLINE double_v floor(double_v::AsArg v) { return _mm256_floor_pd(v.data()); }
102 
103  static Vc_ALWAYS_INLINE float_v ceil(float_v::AsArg v) { return _mm256_ceil_ps(v.data()); }
104  static Vc_ALWAYS_INLINE sfloat_v ceil(sfloat_v::AsArg v) { return _mm256_ceil_ps(v.data()); }
105  static Vc_ALWAYS_INLINE double_v ceil(double_v::AsArg v) { return _mm256_ceil_pd(v.data()); }
106 } // namespace AVX
107 } // namespace Vc
108 } // namespace ROOT
109 
110 #include "undomacros.h"
111 #define VC__USE_NAMESPACE AVX
112 #include "../common/trigonometric.h"
113 #define VC__USE_NAMESPACE AVX
114 #include "../common/logarithm.h"
115 #define VC__USE_NAMESPACE AVX
116 #include "../common/exponential.h"
117 #undef VC__USE_NAMESPACE
118 
119 #endif // VC_AVX_MATH_H
Vector< sfloat > sfloat_v
Definition: vector.h:418
VECTOR_NAMESPACE::sfloat_v sfloat_v
Definition: vector.h:82
double_v ldexp(double_v::AsArg v, int_v::AsArg _e)
Definition: math.h:75
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
static Vc_ALWAYS_INLINE float_v trunc(float_v::AsArg v)
Definition: math.h:95
static Vc_INTRINSIC_L T avx_cast(param128 v) Vc_INTRINSIC_R
Definition: casts.h:49
Vector< short > short_v
Definition: vector.h:421
__m256d m256d
Definition: intrinsics.h:114
Vc_INTRINSIC Vc_CONST m128 hi128(param256 v)
Definition: casts.h:118
static Vc_ALWAYS_INLINE Vector< T >::Mask isfinite(const Vector< T > &x)
Definition: vector.h:454
Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R
double_v frexp(double_v::AsArg v, int_v *e)
splits v into exponent and mantissa, the sign is kept with the mantissa
Definition: math.h:38
Vc_INTRINSIC Vc_CONST m256 concat(param128 a, param128 b)
Definition: casts.h:123
static Vc_ALWAYS_INLINE float_v floor(float_v::AsArg v)
Definition: math.h:99
static Vc_ALWAYS_INLINE float_v ceil(float_v::AsArg v)
Definition: math.h:103
static Vc_INTRINSIC_L Vc_CONST_L Vector Zero() Vc_INTRINSIC_R Vc_CONST_R
#define AVX
Definition: global.h:90
VECTOR_NAMESPACE::int_v int_v
Definition: vector.h:86
Vector< double > double_v
Definition: vector.h:416
SVector< double, 2 > v
Definition: Dict.h:5
__m256i m256i
Definition: intrinsics.h:115
static Vc_ALWAYS_INLINE Vector< T >::Mask isnan(const Vector< T > &x)
Definition: vector.h:455
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
Vector< T > AsArg
Definition: vector.h:67
static Vc_INTRINSIC m256i Vc_CONST _mm256_set1_epi32(int a)
Definition: intrinsics.h:157
VECTOR_NAMESPACE::float_v float_v
Definition: vector.h:84
Vector< int > int_v
Definition: vector.h:419
Definition: casts.h:28
static Vc_ALWAYS_INLINE Vc_CONST M exponentMask()
Definition: const.h:88
Vc_ALWAYS_INLINE VectorType & data()
Definition: vector.h:322