37 template<
typename T>
struct SincosReference
41 template<
typename T>
struct Reference
46 template<
typename T>
struct Array
50 Array() : size(0), data(0) {}
52 template<
typename T>
struct StaticDeleter
55 StaticDeleter(T *p) : ptr(p) {}
56 ~StaticDeleter() {
delete[] ptr; }
62 template<
typename T, Function F>
static inline const char *
filename();
81 static Array<SincosReference<T> > data;
83 FILE *
file = fopen(filename<T, Sincos>(),
"rb");
85 fseek(file, 0, SEEK_END);
86 const size_t size = ftell(file) /
sizeof(SincosReference<T>);
88 data.data =
new SincosReference<T>[size];
89 static StaticDeleter<SincosReference<T> > _cleanup(data.data);
90 data.size = fread(data.data,
sizeof(SincosReference<T>), size, file);
93 FAIL() <<
"the reference data " << filename<T, Sincos>() <<
" does not exist in the current working directory.";
99 template<
typename T, Function Fun>
102 static Array<Reference<T> > data;
103 if (data.data == 0) {
104 FILE *
file = fopen(filename<T, Fun>(),
"rb");
106 fseek(file, 0, SEEK_END);
107 const size_t size = ftell(file) /
sizeof(Reference<T>);
109 data.data =
new Reference<T>[size];
110 static StaticDeleter<Reference<T> > _cleanup(data.data);
111 data.size = fread(data.data,
sizeof(Reference<T>), size, file);
114 FAIL() <<
"the reference data " << filename<T, Fun>() <<
" does not exist in the current working directory.";
120 template<
typename T>
struct Denormals {
static T *data; };
121 template<>
float *Denormals<float >::data = 0;
122 template<>
double *Denormals<double>::data = 0;
130 for (
size_t i = 0; i <
V::Size; ++i) {
138 for (
int i = 0; i < 0x7fff; ++i) {
148 #if __cplusplus >= 201103
150 #elif defined(_ISOC99_SOURCE)
159 #if __cplusplus >= 201103
161 #elif defined(_ISOC99_SOURCE)
170 typedef typename V::EntryType
T;
171 typedef typename V::IndexType
I;
172 for (
size_t i = 0; i < 100000 /
V::Size; ++i) {
184 typedef typename V::EntryType
T;
185 typedef typename V::IndexType
I;
186 for (
size_t i = 0; i < 100000 /
V::Size; ++i) {
198 typedef typename V::EntryType
T;
199 typedef typename V::IndexType
I;
200 for (
size_t i = 0; i < 100000 /
V::Size; ++i) {
214 typedef typename V::EntryType
T;
215 for (
size_t i = 0; i < 100000 /
V::Size; ++i) {
226 typedef typename V::EntryType
T;
227 Array<Reference<T> > reference = referenceData<T, Log>();
228 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
230 for (
int j = 0; j <
V::Size; ++j) {
231 x[j] = reference.data[i + j].x;
232 ref[j] = reference.data[i + j].ref;
239 V
x(&Denormals<T>::data[i]);
245 #if (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600) || defined(_ISOC99_SOURCE) || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L)
246 static inline float my_log2(
float x) { return ::log2f(x); }
268 #if defined(VC_LOG_ILP) || defined(VC_LOG_ILP2)
273 #if (defined(VC_MSVC) || defined(__APPLE__)) && defined(VC_IMPL_Scalar)
278 typedef typename V::EntryType
T;
279 Array<Reference<T> > reference = referenceData<T, Log2>();
280 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
282 for (
int j = 0; j <
V::Size; ++j) {
283 x[j] = reference.data[i + j].x;
284 ref[j] = reference.data[i + j].ref;
291 V
x(&Denormals<T>::data[i]);
301 typedef typename V::EntryType
T;
302 Array<Reference<T> > reference = referenceData<T, Log10>();
303 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
305 for (
int j = 0; j <
V::Size; ++j) {
306 x[j] = reference.data[i + j].x;
307 ref[j] = reference.data[i + j].ref;
314 V
x(&Denormals<T>::data[i]);
322 typedef typename Vec::EntryType
T;
331 Vec b(&data[Vec::Size]);
332 Vec
c(&data[2 * Vec::Size]);
338 #define FillHelperMemory(code) \
339 typename V::Memory data; \
340 typename V::Memory reference; \
341 for (int ii = 0; ii < V::Size; ++ii) { \
342 const T i = static_cast<T>(ii); \
344 reference[ii] = code; \
345 } do {} while (false)
349 typedef typename V::EntryType
T;
359 typedef typename V::EntryType
T;
360 for (
size_t i = 0; i < 1024 /
V::Size; ++i) {
369 typedef typename V::EntryType
T;
372 Array<SincosReference<T> > reference = sincosReference<T>();
373 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
375 for (
int j = 0; j <
V::Size; ++j) {
376 x[j] = reference.data[i + j].x;
377 sref[j] = reference.data[i + j].s;
378 cref[j] = reference.data[i + j].c;
392 typedef typename V::EntryType
T;
395 Array<SincosReference<T> > reference = sincosReference<T>();
396 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
398 for (
int j = 0; j <
V::Size; ++j) {
399 x[j] = reference.data[i + j].x;
400 sref[j] = reference.data[i + j].s;
409 typedef typename V::EntryType
T;
412 Array<SincosReference<T> > reference = sincosReference<T>();
413 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
415 for (
int j = 0; j <
V::Size; ++j) {
416 x[j] = reference.data[i + j].x;
417 cref[j] = reference.data[i + j].c;
426 typedef typename V::EntryType
T;
429 Array<Reference<T> > reference = referenceData<T, Asin>();
430 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
432 for (
int j = 0; j <
V::Size; ++j) {
433 x[j] = reference.data[i + j].x;
434 ref[j] = reference.data[i + j].ref;
444 }
INF = { 0x7f800000 };
446 #if defined(__APPLE__) && defined(VC_IMPL_Scalar)
447 #define ATAN_COMPARE FUZZY_COMPARE
449 #define ATAN_COMPARE COMPARE
454 typedef typename V::EntryType
T;
460 V nan; nan.setQnan();
461 const V inf =
T(
INF.value);
466 #pragma warning(suppress: 4756) // overflow in constant arithmetic
471 Array<Reference<T> > reference = referenceData<T, Atan>();
472 for (
size_t i = 0; i +
V::Size - 1 < reference.size; i +=
V::Size) {
474 for (
int j = 0; j <
V::Size; ++j) {
475 x[j] = reference.data[i + j].x;
476 ref[j] = reference.data[i + j].ref;
485 typedef typename V::EntryType
T;
492 V nan; nan.setQnan();
493 const V inf =
T(
INF.value);
534 #ifndef _WIN32 // the Microsoft implementation of atan2 fails this test
545 for (
int xoffset = -100; xoffset < 54613; xoffset += 47 *
V::Size) {
546 for (
int yoffset = -100; yoffset < 54613; yoffset += 47 *
V::Size) {
549 const V b(reference);
551 const V x = (a + xoffset) *
T(0.15);
552 const V
y = (a + yoffset) *
T(0.15);
560 typedef typename Vec::EntryType
T;
565 const T scale =
T(0.1);
566 typename Vec::Memory data;
567 typename Vec::Memory reference;
569 const T i =
static_cast<T
>(
ii);
571 T tmp = (i +
offset) * scale;
572 reference[
ii] = one / tmp;
583 typedef typename V::EntryType
T;
592 typedef typename Vec::EntryType
T;
594 const Vec zero(
Zero);
602 typedef typename Vec::EntryType
T;
603 typedef typename Vec::IndexType
I;
604 typedef typename Vec::Mask M;
606 const Vec zero(
Zero);
609 const Vec inf = one / zero;
621 typedef typename Vec::EntryType
T;
629 for (
int i = 0; i < Count *
Vec::Size; ++i) {
630 data[i] = i * 0.25 - 2.0;
631 reference[i] =
std::floor(i * 0.25 - 2.0 + 0.5);
638 for (
int i = 0; i < Count; ++i) {
639 const Vec
a(&data[i * Vec::Size]);
640 const Vec ref(&reference[i * Vec::Size]);
648 typedef typename Vec::EntryType
T;
653 data[i] = i % (Vec::Size + 1) + one;
656 const Vec
a(&data[0]);
664 typedef typename Vec::EntryType
T;
669 data[i] = (i +
Vec::Size) % (Vec::Size + 1) + 1;
672 const Vec
a(&data[0]);
683 typedef typename Vec::EntryType
T;
686 _product *= (i %
Max) + 1;
688 const T product = _product;
691 for (
int i = 0; i < Vec::Size *
Vec::Size; ++i) {
695 const Vec
a(&data[0]);
703 typedef typename Vec::EntryType
T;
712 data[i] = (i + i /
Vec::Size) % Vec::Size + 1;
715 const Vec
a(&data[0]);
723 typedef typename V::EntryType
T;
726 input[ 0] =
T(0.25); expected[ 0] =
T(-2);
727 input[ 1] =
T( 1); expected[ 1] =
T( 0);
728 input[ 2] =
T( 2); expected[ 2] =
T( 1);
729 input[ 3] =
T( 3); expected[ 3] =
T( 1);
730 input[ 4] =
T( 4); expected[ 4] =
T( 2);
731 input[ 5] =
T( 0.5); expected[ 5] =
T(-1);
732 input[ 6] =
T( 6); expected[ 6] =
T( 2);
733 input[ 7] =
T( 7); expected[ 7] =
T( 2);
734 input[ 8] =
T( 8); expected[ 8] =
T( 3);
735 input[ 9] =
T( 9); expected[ 9] =
T( 3);
736 input[10] =
T( 10); expected[10] =
T( 3);
737 input[11] =
T( 11); expected[11] =
T( 3);
738 input[12] =
T( 12); expected[12] =
T( 3);
739 input[13] =
T( 13); expected[13] =
T( 3);
740 input[14] =
T( 14); expected[14] =
T( 3);
741 input[15] =
T( 15); expected[15] =
T( 3);
742 input[16] =
T( 16); expected[16] =
T( 4);
743 input[17] =
T( 17); expected[17] =
T( 4);
744 input[18] =
T( 18); expected[18] =
T( 4);
745 input[19] =
T( 19); expected[19] =
T( 4);
746 input[20] =
T( 20); expected[20] =
T( 4);
747 input[21] =
T( 21); expected[21] =
T( 4);
748 input[22] =
T( 22); expected[22] =
T( 4);
749 input[23] =
T( 23); expected[23] =
T( 4);
750 input[24] =
T( 24); expected[24] =
T( 4);
751 input[25] =
T( 25); expected[25] =
T( 4);
752 input[26] =
T( 26); expected[26] =
T( 4);
753 input[27] =
T( 27); expected[27] =
T( 4);
754 input[28] =
T( 28); expected[28] =
T( 4);
755 input[29] =
T( 29); expected[29] =
T( 4);
756 input[30] =
T( 32); expected[30] =
T( 5);
757 input[31] =
T( 31); expected[31] =
T( 4);
763 template<
typename T>
struct _ExponentVector {
typedef int_v Type; };
768 typedef typename V::EntryType
T;
769 typedef typename _ExponentVector<V>::Type ExpV;
773 input[ 0] =
T(0.25); expectedFraction[ 0] =
T(.5 ); expectedExponent[ 0] = -1;
774 input[ 1] =
T( 1); expectedFraction[ 1] =
T(.5 ); expectedExponent[ 1] = 1;
775 input[ 2] =
T( 0); expectedFraction[ 2] =
T(0. ); expectedExponent[ 2] = 0;
776 input[ 3] =
T( 3); expectedFraction[ 3] =
T(.75 ); expectedExponent[ 3] = 2;
777 input[ 4] =
T( 4); expectedFraction[ 4] =
T(.5 ); expectedExponent[ 4] = 3;
778 input[ 5] =
T( 0.5); expectedFraction[ 5] =
T(.5 ); expectedExponent[ 5] = 0;
779 input[ 6] =
T( 6); expectedFraction[ 6] =
T( 6./8. ); expectedExponent[ 6] = 3;
780 input[ 7] =
T( 7); expectedFraction[ 7] =
T( 7./8. ); expectedExponent[ 7] = 3;
781 input[ 8] =
T( 8); expectedFraction[ 8] =
T( 8./16.); expectedExponent[ 8] = 4;
782 input[ 9] =
T( 9); expectedFraction[ 9] =
T( 9./16.); expectedExponent[ 9] = 4;
783 input[10] =
T( 10); expectedFraction[10] =
T(10./16.); expectedExponent[10] = 4;
784 input[11] =
T( 11); expectedFraction[11] =
T(11./16.); expectedExponent[11] = 4;
785 input[12] =
T( 12); expectedFraction[12] =
T(12./16.); expectedExponent[12] = 4;
786 input[13] =
T( 13); expectedFraction[13] =
T(13./16.); expectedExponent[13] = 4;
787 input[14] =
T( 14); expectedFraction[14] =
T(14./16.); expectedExponent[14] = 4;
788 input[15] =
T( 15); expectedFraction[15] =
T(15./16.); expectedExponent[15] = 4;
789 input[16] =
T( 16); expectedFraction[16] =
T(16./32.); expectedExponent[16] = 5;
790 input[17] =
T( 17); expectedFraction[17] =
T(17./32.); expectedExponent[17] = 5;
791 input[18] =
T( 18); expectedFraction[18] =
T(18./32.); expectedExponent[18] = 5;
792 input[19] =
T( 19); expectedFraction[19] =
T(19./32.); expectedExponent[19] = 5;
793 input[20] =
T( 20); expectedFraction[20] =
T(20./32.); expectedExponent[20] = 5;
794 input[21] =
T( 21); expectedFraction[21] =
T(21./32.); expectedExponent[21] = 5;
795 input[22] =
T( 22); expectedFraction[22] =
T(22./32.); expectedExponent[22] = 5;
796 input[23] =
T( 23); expectedFraction[23] =
T(23./32.); expectedExponent[23] = 5;
797 input[24] =
T( 24); expectedFraction[24] =
T(24./32.); expectedExponent[24] = 5;
798 input[25] =
T( 25); expectedFraction[25] =
T(25./32.); expectedExponent[25] = 5;
799 input[26] =
T( 26); expectedFraction[26] =
T(26./32.); expectedExponent[26] = 5;
800 input[27] =
T( 27); expectedFraction[27] =
T(27./32.); expectedExponent[27] = 5;
801 input[28] =
T( 28); expectedFraction[28] =
T(28./32.); expectedExponent[28] = 5;
802 input[29] =
T( 29); expectedFraction[29] =
T(29./32.); expectedExponent[29] = 5;
803 input[30] =
T( 32); expectedFraction[30] =
T(32./64.); expectedExponent[30] = 6;
804 input[31] =
T( 31); expectedFraction[31] =
T(31./32.); expectedExponent[31] = 5;
810 for (
size_t j = 0; j <
V::Size; ++j) {
811 COMPARE(exp[j * 2], expectedExponent[i * V::Size + j]);
821 typedef typename V::EntryType
T;
822 typedef typename _ExponentVector<V>::Type ExpV;
823 for (
size_t i = 0; i < 1024 /
V::Size; ++i) {
827 COMPARE(
ldexp(m, e), v) <<
", m = " << m <<
", e = " << e;
834 typedef typename V::EntryType
T;
839 for (
size_t count = 0; count < 1024 /
V::Size; ++count) {
845 for (
int i = -10000; i <= 10000; ++i) {
846 const V i_v = V(
T(i));
847 const V diff = base + i_v * eps;
852 const V expectedDifference =
Vc::abs(i_v);
855 VERIFY(
Vc::abs(ulpDifference - expectedDifference) <= maxUncertainty)
856 <<
", base = " << base <<
", epsilon = " << eps <<
", diff = " << diff;
857 for (
int k = 0; k <
V::Size; ++k) {
858 VERIFY(
std::abs(ulpDifference[k] - expectedDifference[k]) <= maxUncertainty[k]);
864 int main(
int argc,
char **argv)
868 Denormals<float>::data = Vc::malloc<float, Vc::AlignOnVector>(
NDenormals);
869 Denormals<float>::data[0] = std::numeric_limits<float>::denorm_min();
871 Denormals<float>::data[i] = Denormals<float>::data[i - 1] * 2.173f;
873 Denormals<double>::data = Vc::malloc<double, Vc::AlignOnVector>(
NDenormals);
874 Denormals<double>::data[0] = std::numeric_limits<double>::denorm_min();
876 Denormals<double>::data[i] = Denormals<double>::data[i - 1] * 2.173;
919 runTest(testReduceMin<float_v>);
920 runTest(testReduceMin<sfloat_v>);
921 runTest(testReduceMin<double_v>);
923 runTest(testReduceMin<uint_v>);
924 runTest(testReduceMin<short_v>);
925 runTest(testReduceMin<ushort_v>);
927 runTest(testReduceMax<float_v>);
928 runTest(testReduceMax<sfloat_v>);
929 runTest(testReduceMax<double_v>);
931 runTest(testReduceMax<uint_v>);
932 runTest(testReduceMax<short_v>);
933 runTest(testReduceMax<ushort_v>);
935 runTest(testReduceProduct<float_v>);
936 runTest(testReduceProduct<sfloat_v>);
937 runTest(testReduceProduct<double_v>);
938 runTest(testReduceProduct<int_v>);
939 runTest(testReduceProduct<uint_v>);
940 runTest(testReduceProduct<short_v>);
941 runTest(testReduceProduct<ushort_v>);
943 runTest(testReduceSum<float_v>);
944 runTest(testReduceSum<sfloat_v>);
945 runTest(testReduceSum<double_v>);
947 runTest(testReduceSum<uint_v>);
948 runTest(testReduceSum<short_v>);
949 runTest(testReduceSum<ushort_v>);
VECTOR_NAMESPACE::sfloat_v sfloat_v
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
V apply_v(VC_ALIGNED_PARAMETER(V) x, typename V::EntryType(func)(typename V::EntryType))
#define FUZZY_COMPARE(a, b)
static Vc_ALWAYS_INLINE float_v trunc(float_v::AsArg v)
void initTest(int argc, char **argv)
const char * filename< float, Log2 >()
void setFuzzyness< double >(double fuzz)
const char * filename< float, Atan >()
const char * filename< double, Atan >()
static Vc_ALWAYS_INLINE Vector< T > reciprocal(const Vector< T > &x)
static const char * filename()
_VC_CONSTEXPR size_t vectorsCount() const
static Vc_ALWAYS_INLINE Vector< T >::Mask isfinite(const Vector< T > &x)
#define FillHelperMemory(code)
static Array< Reference< T > > referenceData()
int main(int argc, char **argv)
VECTOR_NAMESPACE::short_v short_v
const char * filename< double, Log2 >()
const char * filename< double, Asin >()
const char * filename< double, Log >()
static float my_log2(float x)
double_v frexp(double_v::AsArg v, int_v *e)
splits v into exponent and mantissa, the sign is kept with the mantissa
void setFuzzyness< float >(float fuzz)
static Vc_ALWAYS_INLINE Vector< T > abs(const Vector< T > &x)
#define testRealTypes(name)
Vc_ALWAYS_INLINE Vc_PURE VectorPointerHelper< V, AlignedFlag > vector(size_t i)
const char * filename< float, Log >()
#define VC_ALIGNED_PARAMETER(_Type)
VECTOR_NAMESPACE::int_v int_v
#define Vc_buildDouble(sign, mantissa, exponent)
double Pi()
Mathematical constants.
const char * filename< float, Log10 >()
static Vc_ALWAYS_INLINE Vector< T > round(const Vector< T > &x)
const char * filename< double, Sincos >()
static Vc_ALWAYS_INLINE Vector< T > rsqrt(const Vector< T > &x)
Type
enumeration specifying the integration types.
A helper class for fixed-size two-dimensional arrays.
const char * filename< double, Log10 >()
double atan2(double, double)
static Array< SincosReference< T > > sincosReference()
double func(double *x, double *p)
static float my_trunc(float x)
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Vc_INTRINSIC Vc_CONST m256 exponent(param256 v)
static T ulpDiffToReference(T val, T ref)
Short_t Max(Short_t a, Short_t b)
const char * filename< float, Sincos >()
const char * filename< float, Asin >()
double ldexp(double, int)