25template<
typename AFloat>
33 std::vector<AFloat> temp(nElements/nSteps + 1);
35 auto f = [&data, &temp, nElements, nSteps](
UInt_t workerID)
37 size_t iMax = std::min(workerID+nSteps, nElements);
38 size_t iWorker = workerID/nSteps;
39 for (
size_t i = workerID; i < iMax; ++i) {
40 temp[iWorker] +=
fabs(data[i]);
44 auto reduction = [](
const std::vector<AFloat> &
v )
46 return std::accumulate(
v.begin(),
v.end(),AFloat{});
58template<
typename AFloat>
64 AFloat *dataB =
B.GetRawDataPointer();
65 const AFloat *dataA =
A.GetRawDataPointer();
67 size_t nElements =
B.GetNoElements();
75 size_t iMax = std::min(workerID+nSteps, nElements);
76 for (
size_t i = workerID; i < iMax; ++i) {
77 AFloat sign = (dataA[i] < 0.0) ? -1.0 : 1.0;
83 if (nSteps < nElements) {
85 B.GetThreadExecutor().Foreach(
f,
ROOT::TSeqI(0,nElements, nSteps));
87 for (
size_t i = 0; i < nElements; i+=nSteps)
96template<
typename AFloat>
104 std::vector<AFloat> temp(nElements/nSteps + 1);
106 auto f = [&data, &temp, nElements, nSteps](
UInt_t workerID)
108 size_t iMax = std::min(workerID+nSteps, nElements);
109 size_t iWorker = workerID/nSteps;
111 for (
size_t i = workerID; i < iMax; ++i) {
112 temp[iWorker] += data[i] * data[i];
116 auto reduction = [](
const std::vector<AFloat> &
v )
118 return std::accumulate(
v.begin(),
v.end(),AFloat{});
130template<
typename AFloat>
136 AFloat *dataB =
B.GetRawDataPointer();
137 const AFloat *dataA =
A.GetRawDataPointer();
139 size_t nElements =
B.GetNoElements();
145 size_t iMax = std::min(workerID+nSteps, nElements);
146 for (
size_t i = workerID; i < iMax; ++i) {
152 if (nSteps < nElements) {
154 B.GetThreadExecutor().Foreach(
f,
ROOT::TSeqI(0,nElements, nSteps));
156 for (
size_t i = 0; i < nElements; i+=nSteps)
A pseudo container class which is a generator of indices.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
static size_t GetNWorkItems(size_t nelements)
static Executor & GetThreadExecutor()
size_t GetNoElements() const
static void AddL2RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
static Scalar_t L2Regularization(const TCpuMatrix< Scalar_t > &W)
static Scalar_t L1Regularization(const TCpuMatrix< Scalar_t > &W)
static void AddL1RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
Wrap Reduce function.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
create variable transformations