Logo ROOT   6.10/09
Reference Guide
LossFunctions.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12  /////////////////////////////////////////////////////////////////////
13  // Implementation of the loss functions for the multi-threaded CPU //
14  // implementation using Roots TThreadExecutor and BLAS. //
15  /////////////////////////////////////////////////////////////////////
16 
18 
19 namespace TMVA
20 {
21 namespace DNN
22 {
23 
24 //______________________________________________________________________________
25 template<typename AFloat>
28 {
29  const AFloat *dataY = Y.GetRawDataPointer();
30  const AFloat *dataOutput = output.GetRawDataPointer();
31  std::vector<AFloat> temp(Y.GetNElements());
32  AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
33 
34  auto f = [&dataY, &dataOutput, &temp](UInt_t workerID)
35  {
36  AFloat dy = dataY[workerID] - dataOutput[workerID];
37  temp[workerID] = dy * dy;
38  return 0;
39  };
40 
41  //auto reduction = [](AFloat sum1, AFloat sum2)
42  auto reduction = [](const std::vector<AFloat> & v )
43 
44  {
45  //return sum1 + sum2;
46  return std::accumulate(v.begin(),v.end(),AFloat{});
47  };
48 
50  return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
51 }
52 
53 //______________________________________________________________________________
54 template<typename AFloat>
56  TCpuMatrix<AFloat> & dY,
57  const TCpuMatrix<AFloat> & Y,
58  const TCpuMatrix<AFloat> & output)
59 {
60 
61  AFloat *dataDY = dY.GetRawDataPointer();
62  const AFloat *dataY = Y.GetRawDataPointer();
63  const AFloat *dataOutput = output.GetRawDataPointer();
64  AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
65 
66  auto f = [&dataDY, &dataY, &dataOutput, norm](UInt_t workerID)
67  {
68  dataDY[workerID] = - 2.0 * norm * (dataY[workerID] - dataOutput[workerID]);
69  return 0;
70  };
71 
73 }
74 
75 //______________________________________________________________________________
76 template<typename AFloat>
79 {
80  const AFloat *dataY = Y.GetRawDataPointer();
81  const AFloat *dataOutput = output.GetRawDataPointer();
82  std::vector<AFloat> temp(Y.GetNElements());
83  AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
84 
85  auto f = [&dataY, &dataOutput, &temp](UInt_t workerID)
86  {
87  AFloat y = dataY[workerID];
88  AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
89  temp[workerID] = - (y * log(sig) + (1.0 - y) * log(1.0 - sig));
90  return 0;
91  };
92 
93  // auto reduction = [](AFloat sum1, AFloat sum2)
94  // {
95  // return sum1 + sum2;
96  // };
97  auto reduction = [](const std::vector<AFloat> & v )
98  {
99  return std::accumulate(v.begin(),v.end(),AFloat{});
100  };
101 
102 
104  return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
105 }
106 
107 //______________________________________________________________________________
108 template<typename AFloat>
110  TCpuMatrix<AFloat> & dY,
111  const TCpuMatrix<AFloat> & Y,
112  const TCpuMatrix<AFloat> & output)
113 {
114  AFloat *dataDY = dY.GetRawDataPointer();
115  const AFloat *dataY = Y.GetRawDataPointer();
116  const AFloat *dataOutput = output.GetRawDataPointer();
117  AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
118 
119  auto f = [&dataDY, &dataY, &dataOutput, norm](UInt_t workerID)
120  {
121  AFloat y = dataY[workerID];
122  AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
123  dataDY[workerID] = norm * (sig - y);
124  return 0;
125  };
126 
128 }
129 
130 //______________________________________________________________________________
131 template<typename AFloat>
133  const TCpuMatrix<AFloat> &Y,
134  const TCpuMatrix<AFloat> &output)
135 {
136  const AFloat *dataY = Y.GetRawDataPointer();
137  const AFloat *dataOutput = output.GetRawDataPointer();
138  std::vector<AFloat> temp(Y.GetNrows());
139  size_t m = Y.GetNrows();
140  size_t n = Y.GetNcols();
141  AFloat norm = 1.0 / ((AFloat) m);
142 
143  auto f = [&dataY, &dataOutput, &temp, n, m](UInt_t workerID)
144  {
145  AFloat sum = 0.0;
146  for (size_t j = 0; j < n; j++) {
147  sum += exp(dataOutput[workerID + j * m]);
148  }
149  for (size_t j = 0; j < n; j++) {
150  temp[workerID] -=
151  dataY[workerID + j * m] * log(exp(dataOutput[workerID + j * m]) / sum);
152  }
153  return 0;
154  };
155 
156  // auto reduction = [](AFloat sum1, AFloat sum2)
157  // {
158  // return sum1 + sum2;
159  // };
160  auto reduction = [](const std::vector<AFloat> & v )
161  {
162  return std::accumulate(v.begin(),v.end(),AFloat{});
163  };
164 
166  return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
167 }
168 
169 //______________________________________________________________________________
170 template<typename AFloat>
172  TCpuMatrix<AFloat> & dY,
173  const TCpuMatrix<AFloat> & Y,
174  const TCpuMatrix<AFloat> & output)
175 {
176  AFloat *dataDY = dY.GetRawDataPointer();
177  const AFloat *dataY = Y.GetRawDataPointer();
178  const AFloat *dataOutput = output.GetRawDataPointer();
179  size_t m = Y.GetNrows();
180  size_t n = Y.GetNcols();
181  AFloat norm = 1.0 / ((AFloat) m);
182 
183  auto f = [&dataDY, &dataY, &dataOutput, norm, n, m](UInt_t workerID)
184  {
185  AFloat sum = 0.0;
186  AFloat sumY = 0.0;
187  for (size_t j = 0; j < n; j++) {
188  sum += exp(dataOutput[workerID + j * m]);
189  sumY += dataY[workerID + j * m];
190  }
191  for (size_t j = 0; j < n; j++) {
192  dataDY[workerID + j * m] =
193  norm * (exp(dataOutput[workerID + j * m]) / sum * sumY - dataY[workerID + j * m]);
194 
195  }
196  return 0;
197  };
198 
200 }
201 
202 } // namespace DNN
203 } // namespace TMVA
static long int sum(long int i)
Definition: Factory.cxx:2162
The TCpuMatrix class.
Definition: CpuMatrix.h:46
static Scalar_t MeanSquaredError(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
size_t GetNcols() const
Definition: CpuMatrix.h:94
static void CrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
size_t GetNElements() const
Definition: CpuMatrix.h:95
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
static Scalar_t SoftmaxCrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
SVector< double, 2 > v
Definition: Dict.h:5
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
static void MeanSquaredErrorGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:103
static void SoftmaxCrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
double f(double x)
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Double_t y[n]
Definition: legend1.C:17
ROOT::TThreadExecutor & GetThreadExecutor() const
Definition: CpuMatrix.h:106
Abstract ClassifierFactory template that handles arbitrary types.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
size_t GetNrows() const
Definition: CpuMatrix.h:93
double exp(double)
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
const Int_t n
Definition: legend1.C:16
static Scalar_t CrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
double log(double)