Loading [MathJax]/extensions/tex2jax.js
ROOT  6.06/09
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
trigonometric.cpp
Go to the documentation of this file.
1 /* This file is part of the Vc library. {{{
2 
3  Copyright (C) 2012 Matthias Kretz <kretz@kde.org>
4 
5  Vc is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as
7  published by the Free Software Foundation, either version 3 of
8  the License, or (at your option) any later version.
9 
10  Vc is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17 
18 }}}*/
19 
20 #include <Vc/Vc>
21 #if defined(VC_IMPL_SSE) || defined(VC_IMPL_AVX)
22 #include <Vc/common/macros.h>
23 
24 namespace ROOT {
25 namespace Vc
26 {
27 namespace
28 {
29  using Vc::Vector;
30  using Vc::float_v;
31  using Vc::double_v;
32  using Vc::sfloat_v;
33 
34  template<typename T> static Vc_ALWAYS_INLINE Vector<T> cosSeries(const Vector<T> &x)
35  {
36  typedef Const<T> C;
37  const Vector<T> x2 = x * x;
38  return ((C::cosCoeff(2) * x2 +
39  C::cosCoeff(1)) * x2 +
40  C::cosCoeff(0)) * (x2 * x2)
41  - C::_1_2() * x2 + Vector<T>::One();
42  }
43  static Vc_ALWAYS_INLINE double_v cosSeries(const double_v &x)
44  {
45  typedef Const<double> C;
46  const double_v x2 = x * x;
47  return (((((C::cosCoeff(5) * x2 +
48  C::cosCoeff(4)) * x2 +
49  C::cosCoeff(3)) * x2 +
50  C::cosCoeff(2)) * x2 +
51  C::cosCoeff(1)) * x2 +
52  C::cosCoeff(0)) * (x2 * x2)
53  - C::_1_2() * x2 + double_v::One();
54  }
55  template<typename T> static Vc_ALWAYS_INLINE Vector<T> sinSeries(const Vector<T> &x)
56  {
57  typedef Const<T> C;
58  const Vector<T> x2 = x * x;
59  return ((C::sinCoeff(2) * x2 +
60  C::sinCoeff(1)) * x2 +
61  C::sinCoeff(0)) * (x2 * x)
62  + x;
63  }
64  static Vc_ALWAYS_INLINE double_v sinSeries(const double_v &x)
65  {
66  typedef Const<double> C;
67  const double_v x2 = x * x;
68  return (((((C::sinCoeff(5) * x2 +
69  C::sinCoeff(4)) * x2 +
70  C::sinCoeff(3)) * x2 +
71  C::sinCoeff(2)) * x2 +
72  C::sinCoeff(1)) * x2 +
73  C::sinCoeff(0)) * (x2 * x)
74  + x;
75  }
76  template<typename V> struct signed_integer { typedef int_v type; };
77  template<> struct signed_integer<sfloat_v> { typedef short_v type; };
78 
79  template<typename _T, typename IV> static Vc_ALWAYS_INLINE Vector<_T> foldInput(const Vector<_T> &_x, IV &quadrant)
80  {
81  typedef Vector<_T> V;
82  typedef Const<_T> C;
83 
84  const V x = abs(_x);
85 #if defined(VC_IMPL_FMA4) || defined(VC_IMPL_FMA)
86  quadrant = static_cast<IV>(x * C::_4_pi() + V::One()); // prefer the fma here
87  quadrant &= ~IV::One();
88 #else
89  quadrant = static_cast<IV>(x * C::_4_pi());
90  quadrant += quadrant & IV::One();
91 #endif
92  const V y = static_cast<V>(quadrant);
93  quadrant &= 7;
94 
95  return ((x - y * C::_pi_4_hi()) - y * C::_pi_4_rem1()) - y * C::_pi_4_rem2();
96  }
97  static Vc_ALWAYS_INLINE double_v foldInput(const double_v &_x, int_v &quadrant)
98  {
99  typedef double_v V;
100  typedef Const<double> C;
101 
102  const V x = abs(_x);
103  V y = trunc(x / C::_pi_4()); // * C::_4_pi() would work, but is >twice as imprecise
104  V z = y - trunc(y * C::_1_16()) * C::_16(); // y modulo 16
105  quadrant = static_cast<int_v>(z);
106  int_m mask = (quadrant & int_v::One()) != int_v::Zero();
107  ++quadrant(mask);
108  y(static_cast<double_m>(mask)) += V::One();
109  quadrant &= 7;
110 
111  // since y is an integer we don't need to split y into low and high parts until the integer
112  // requires more bits than there are zero bits at the end of _pi_4_hi (30 bits -> 1e9)
113  return ((x - y * C::_pi_4_hi()) - y * C::_pi_4_rem1()) - y * C::_pi_4_rem2();
114  }
115 } // anonymous namespace
116 
117 /*
118  * algorithm for sine and cosine:
119  *
120  * The result can be calculated with sine or cosine depending on the π/4 section the input is
121  * in.
122  * sine ≈ x + x³
123  * cosine ≈ 1 - x²
124  *
125  * sine:
126  * Map -x to x and invert the output
127  * Extend precision of x - n * π/4 by calculating
128  * ((x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
129  *
130  * Calculate Taylor series with tuned coefficients.
131  * Fix sign.
132  */
133 template<> template<typename _T> Vector<_T> Trigonometric<Vc::Internal::TrigonometricImplementation>::sin(const Vector<_T> &_x)
134 {
135  typedef Vector<_T> V;
136  typedef typename V::Mask M;
137  typedef typename signed_integer<V>::type IV;
138 
139  IV quadrant;
140  const V z = foldInput(_x, quadrant);
141  const M sign = (_x < V::Zero()) ^ static_cast<M>(quadrant > 3);
142  quadrant(quadrant > 3) -= 4;
143 
144  V y = sinSeries(z);
145  y(quadrant == IV::One() || quadrant == 2) = cosSeries(z);
146  y(sign) = -y;
147  return y;
148 }
149 
151 {
152  typedef double_v V;
153  typedef V::Mask M;
154 
155  int_v quadrant;
156  M sign = _x < V::Zero();
157  const V x = foldInput(_x, quadrant);
158  sign ^= static_cast<M>(quadrant > 3);
159  quadrant(quadrant > 3) -= 4;
160 
161  V y = sinSeries(x);
162  y(static_cast<M>(quadrant == int_v::One() || quadrant == 2)) = cosSeries(x);
163  y(sign) = -y;
164  return y;
165 }
166 template<> template<typename _T> Vector<_T> Trigonometric<Vc::Internal::TrigonometricImplementation>::cos(const Vector<_T> &_x) {
167  typedef Vector<_T> V;
168  typedef typename V::Mask M;
169  typedef typename signed_integer<V>::type IV;
170 
171  IV quadrant;
172  const V x = foldInput(_x, quadrant);
173  M sign = quadrant > 3;
174  quadrant(quadrant > 3) -= 4;
175  sign ^= quadrant > IV::One();
176 
177  V y = cosSeries(x);
178  y(quadrant == IV::One() || quadrant == 2) = sinSeries(x);
179  y(sign) = -y;
180  return y;
181 }
183 {
184  typedef double_v V;
185  typedef V::Mask M;
186 
187  int_v quadrant;
188  const V x = foldInput(_x, quadrant);
189  M sign = static_cast<M>(quadrant > 3);
190  quadrant(quadrant > 3) -= 4;
191  sign ^= static_cast<M>(quadrant > int_v::One());
192 
193  V y = cosSeries(x);
194  y(static_cast<M>(quadrant == int_v::One() || quadrant == 2)) = sinSeries(x);
195  y(sign) = -y;
196  return y;
197 }
198 template<> template<typename _T> void Trigonometric<Vc::Internal::TrigonometricImplementation>::sincos(const Vector<_T> &_x, Vector<_T> *_sin, Vector<_T> *_cos) {
199  typedef Vector<_T> V;
200  typedef typename V::Mask M;
201  typedef typename signed_integer<V>::type IV;
202 
203  IV quadrant;
204  const V x = foldInput(_x, quadrant);
205  M sign = static_cast<M>(quadrant > 3);
206  quadrant(quadrant > 3) -= 4;
207 
208  const V cos_s = cosSeries(x);
209  const V sin_s = sinSeries(x);
210 
211  V c = cos_s;
212  c(static_cast<M>(quadrant == IV::One() || quadrant == 2)) = sin_s;
213  c(sign ^ static_cast<M>(quadrant > IV::One())) = -c;
214  *_cos = c;
215 
216  V s = sin_s;
217  s(static_cast<M>(quadrant == IV::One() || quadrant == 2)) = cos_s;
218  s(sign ^ static_cast<M>(_x < V::Zero())) = -s;
219  *_sin = s;
220 }
221 template<> template<> void Trigonometric<Vc::Internal::TrigonometricImplementation>::sincos(const double_v &_x, double_v *_sin, double_v *_cos) {
222  typedef double_v V;
223  typedef V::Mask M;
224 
225  int_v quadrant;
226  const V x = foldInput(_x, quadrant);
227  M sign = static_cast<M>(quadrant > 3);
228  quadrant(quadrant > 3) -= 4;
229 
230  const V cos_s = cosSeries(x);
231  const V sin_s = sinSeries(x);
232 
233  V c = cos_s;
234  c(static_cast<M>(quadrant == int_v::One() || quadrant == 2)) = sin_s;
235  c(sign ^ static_cast<M>(quadrant > int_v::One())) = -c;
236  *_cos = c;
237 
238  V s = sin_s;
239  s(static_cast<M>(quadrant == int_v::One() || quadrant == 2)) = cos_s;
240  s(sign ^ static_cast<M>(_x < V::Zero())) = -s;
241  *_sin = s;
242 }
243 template<> template<typename _T> Vector<_T> Trigonometric<Vc::Internal::TrigonometricImplementation>::asin (const Vector<_T> &_x) {
244  typedef Const<_T> C;
245  typedef Vector<_T> V;
246  typedef typename V::Mask M;
247 
248  const M &negative = _x < V::Zero();
249 
250  const V &a = abs(_x);
251  const M outOfRange = a > V::One();
252  const M &small = a < C::smallAsinInput();
253  const M &gt_0_5 = a > C::_1_2();
254  V x = a;
255  V z = a * a;
256  z(gt_0_5) = (V::One() - a) * C::_1_2();
257  x(gt_0_5) = sqrt(z);
258  z = ((((C::asinCoeff0(0) * z
259  + C::asinCoeff0(1)) * z
260  + C::asinCoeff0(2)) * z
261  + C::asinCoeff0(3)) * z
262  + C::asinCoeff0(4)) * z * x
263  + x;
264  z(gt_0_5) = C::_pi_2() - (z + z);
265  z(small) = a;
266  z(negative) = -z;
267  z.setQnan(outOfRange);
268 
269  return z;
270 }
272  typedef Const<double> C;
273  typedef double_v V;
274  typedef V::Mask M;
275 
276  const M negative = _x < V::Zero();
277 
278  const V a = abs(_x);
279  const M outOfRange = a > V::One();
280  const M small = a < C::smallAsinInput();
281  const M large = a > C::largeAsinInput();
282 
283  V zz = V::One() - a;
284  const V r = (((C::asinCoeff0(0) * zz + C::asinCoeff0(1)) * zz + C::asinCoeff0(2)) * zz +
285  C::asinCoeff0(3)) * zz + C::asinCoeff0(4);
286  const V s = (((zz + C::asinCoeff1(0)) * zz + C::asinCoeff1(1)) * zz +
287  C::asinCoeff1(2)) * zz + C::asinCoeff1(3);
288  V sqrtzz = sqrt(zz + zz);
289  V z = C::_pi_4() - sqrtzz;
290  z -= sqrtzz * (zz * r / s) - C::_pi_2_rem();
291  z += C::_pi_4();
292 
293  V a2 = a * a;
294  const V p = ((((C::asinCoeff2(0) * a2 + C::asinCoeff2(1)) * a2 + C::asinCoeff2(2)) * a2 +
295  C::asinCoeff2(3)) * a2 + C::asinCoeff2(4)) * a2 + C::asinCoeff2(5);
296  const V q = ((((a2 + C::asinCoeff3(0)) * a2 + C::asinCoeff3(1)) * a2 +
297  C::asinCoeff3(2)) * a2 + C::asinCoeff3(3)) * a2 + C::asinCoeff3(4);
298  z(!large) = a * (a2 * p / q) + a;
299 
300  z(negative) = -z;
301  z(small) = _x;
302  z.setQnan(outOfRange);
303 
304  return z;
305 }
306 template<> template<typename _T> Vector<_T> Trigonometric<Vc::Internal::TrigonometricImplementation>::atan (const Vector<_T> &_x) {
307  typedef Const<_T> C;
308  typedef Vector<_T> V;
309  typedef typename V::Mask M;
310  V x = abs(_x);
311  const M &gt_tan_3pi_8 = x > C::atanThrsHi();
312  const M &gt_tan_pi_8 = x > C::atanThrsLo() && !gt_tan_3pi_8;
313  V y = V::Zero();
314  y(gt_tan_3pi_8) = C::_pi_2();
315  y(gt_tan_pi_8) = C::_pi_4();
316  x(gt_tan_3pi_8) = -V::One() / x;
317  x(gt_tan_pi_8) = (x - V::One()) / (x + V::One());
318  const V &x2 = x * x;
319  y += (((C::atanP(0) * x2
320  - C::atanP(1)) * x2
321  + C::atanP(2)) * x2
322  - C::atanP(3)) * x2 * x
323  + x;
324  y(_x < V::Zero()) = -y;
325  y.setQnan(isnan(_x));
326  return y;
327 }
329  typedef Const<double> C;
330  typedef double_v V;
331  typedef V::Mask M;
332 
333  M sign = _x < V::Zero();
334  V x = abs(_x);
335  M finite = isfinite(_x);
336  V ret = C::_pi_2();
337  V y = V::Zero();
338  const M large = x > C::atanThrsHi();
339  const M gt_06 = x > C::atanThrsLo();
340  V tmp = (x - V::One()) / (x + V::One());
341  tmp(large) = -V::One() / x;
342  x(gt_06) = tmp;
343  y(gt_06) = C::_pi_4();
344  y(large) = C::_pi_2();
345  V z = x * x;
346  const V p = (((C::atanP(0) * z + C::atanP(1)) * z + C::atanP(2)) * z + C::atanP(3)) * z + C::atanP(4);
347  const V q = ((((z + C::atanQ(0)) * z + C::atanQ(1)) * z + C::atanQ(2)) * z + C::atanQ(3)) * z + C::atanQ(4);
348  z = z * p / q;
349  z = x * z + x;
350  V morebits = C::_pi_2_rem();
351  morebits(!large) *= C::_1_2();
352  z(gt_06) += morebits;
353  ret(finite) = y + z;
354  ret(sign) = -ret;
355  ret.setQnan(isnan(_x));
356  return ret;
357 }
358 template<> template<typename _T> Vector<_T> Trigonometric<Vc::Internal::TrigonometricImplementation>::atan2(const Vector<_T> &y, const Vector<_T> &x) {
359  typedef Const<_T> C;
360  typedef Vector<_T> V;
361  typedef typename V::Mask M;
362 
363  const M xZero = x == V::Zero();
364  const M yZero = y == V::Zero();
365  const M xMinusZero = xZero && x.isNegative();
366  const M yNeg = y < V::Zero();
367  const M xInf = !isfinite(x);
368  const M yInf = !isfinite(y);
369 
370  V a = C::_pi().copySign(y);
371  a.setZero(x >= V::Zero());
372 
373  // setting x to any finite value will have atan(y/x) return sign(y/x)*pi/2, just in case x is inf
374  V _x = x;
375  _x(yInf) = V::One().copySign(x);
376 
377  a += atan(y / _x);
378 
379  // if x is +0 and y is +/-0 the result is +0
380  a.setZero(xZero && yZero);
381 
382  // for x = -0 we add/subtract pi to get the correct result
383  a(xMinusZero) += C::_pi().copySign(y);
384 
385  // atan2(-Y, +/-0) = -pi/2
386  a(xZero && yNeg) = -C::_pi_2();
387 
388  // if both inputs are inf the output is +/- (3)pi/4
389  a(xInf && yInf) += C::_pi_4().copySign(x ^ ~y);
390 
391  // correct the sign of y if the result is 0
392  a(a == V::Zero()) = a.copySign(y);
393 
394  // any NaN input will lead to NaN output
395  a.setQnan(isnan(y) || isnan(x));
396 
397  return a;
398 }
400  typedef Const<double> C;
401  typedef double_v V;
402  typedef V::Mask M;
403 
404  const M xZero = x == V::Zero();
405  const M yZero = y == V::Zero();
406  const M xMinusZero = xZero && x.isNegative();
407  const M yNeg = y < V::Zero();
408  const M xInf = !isfinite(x);
409  const M yInf = !isfinite(y);
410 
411  V a = V(C::_pi()).copySign(y);
412  a.setZero(x >= V::Zero());
413 
414  // setting x to any finite value will have atan(y/x) return sign(y/x)*pi/2, just in case x is inf
415  V _x = x;
416  _x(yInf) = V::One().copySign(x);
417 
418  a += atan(y / _x);
419 
420  // if x is +0 and y is +/-0 the result is +0
421  a.setZero(xZero && yZero);
422 
423  // for x = -0 we add/subtract pi to get the correct result
424  a(xMinusZero) += C::_pi().copySign(y);
425 
426  // atan2(-Y, +/-0) = -pi/2
427  a(xZero && yNeg) = -C::_pi_2();
428 
429  // if both inputs are inf the output is +/- (3)pi/4
430  a(xInf && yInf) += C::_pi_4().copySign(x ^ ~y);
431 
432  // correct the sign of y if the result is 0
433  a(a == V::Zero()) = a.copySign(y);
434 
435  // any NaN input will lead to NaN output
436  a.setQnan(isnan(y) || isnan(x));
437 
438  return a;
439 }
440 } // namespace Vc
441 } // namespace ROOT
442 
443 #include <Vc/common/undomacros.h>
444 
445 // instantiate the non-specialized template functions above
448 
451 
454 
457 
460 
463 #endif
VECTOR_NAMESPACE::sfloat_v sfloat_v
Definition: vector.h:82
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
static Vc_ALWAYS_INLINE float_v trunc(float_v::AsArg v)
Definition: math.h:95
TArc * a
Definition: textangle.C:12
static Vector< T > cos(const Vector< T > &_x)
static Vc_ALWAYS_INLINE Vector< T >::Mask isfinite(const Vector< T > &x)
Definition: vector.h:454
VECTOR_NAMESPACE::short_v short_v
Definition: vector.h:90
double sqrt(double)
static const double x2[5]
Double_t x[n]
Definition: legend1.C:17
static Vc_ALWAYS_INLINE Vector< T > abs(const Vector< T > &x)
Definition: vector.h:450
static Vector< T > asin(const Vector< T > &_x)
static Vector< T > atan(const Vector< T > &_x)
int isnan(double)
VECTOR_NAMESPACE::int_v int_v
Definition: vector.h:86
static double C[]
VECTOR_NAMESPACE::double_v double_v
Definition: vector.h:80
static void sincos(const Vector< T > &_x, Vector< T > *_sin, Vector< T > *_cos)
int_v::Mask int_m
Definition: vector.h:87
static Vector< T > atan2(const Vector< T > &y, const Vector< T > &x)
#define Vc_ALWAYS_INLINE
Definition: macros.h:130
int type
Definition: TGX11.cxx:120
Double_t y[n]
Definition: legend1.C:17
double atan(double)
VECTOR_NAMESPACE::float_v float_v
Definition: vector.h:84
Definition: casts.h:28
float * q
Definition: THbookFile.cxx:87
static Vector< T > sin(const Vector< T > &_x)