Logo ROOT   6.10/09
Reference Guide
LossFunction.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Event *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * LossFunction and associated classes *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * U. of Victoria, Canada *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://mva.sourceforge.net/license.txt) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_LossFunction
32 #define ROOT_TMVA_LossFunction
33 
34 //#include <iosfwd>
35 #include <vector>
36 #include <map>
37 #include "TMVA/Event.h"
38 #include "TMVA/Types.h"
39 
40 namespace TMVA {
41 
42  ///////////////////////////////////////////////////////////////////////////////////////////////
43  // Data Structure used by LossFunction and LossFunctionBDT to calculate errors, targets, etc
44  ///////////////////////////////////////////////////////////////////////////////////////////////
45 
47 
48  public:
50  trueValue = 0.;
51  predictedValue = 0.;
52  weight = 0.;
53  };
54  LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_){
55  trueValue = trueValue_;
56  predictedValue = predictedValue_;
57  weight = weight_;
58  }
60 
64  };
65 
66 
67  ///////////////////////////////////////////////////////////////////////////////////////////////
68  // Loss Function interface defining base class for general error calculations in
69  // regression/classification
70  ///////////////////////////////////////////////////////////////////////////////////////////////
71 
72  class LossFunction {
73 
74  public:
75 
76  // constructors
78  virtual ~LossFunction(){};
79 
80  // abstract methods that need to be implemented
81  virtual Double_t CalculateLoss(LossFunctionEventInfo& e) = 0;
82  virtual Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
83  virtual Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
84 
85  virtual TString Name() = 0;
86  virtual Int_t Id() = 0;
87  };
88 
89  ///////////////////////////////////////////////////////////////////////////////////////////////
90  // Loss Function interface for boosted decision trees. Inherits from LossFunction
91  ///////////////////////////////////////////////////////////////////////////////////////////////
92 
93  /* Must inherit LossFunction with the virtual keyword so that we only have to implement
94  * the LossFunction interface once.
95  *
96  * LossFunction
97  * / \
98  *SomeLossFunction LossFunctionBDT
99  * \ /
100  * \ /
101  * SomeLossFunctionBDT
102  *
103  * Without the virtual keyword the two would point to their own LossFunction objects
104  * and SomeLossFunctionBDT would have to implement the virtual functions of LossFunction twice, once
105  * for each object. See diagram below.
106  *
107  * LossFunction LossFunction
108  * | |
109  *SomeLossFunction LossFunctionBDT
110  * \ /
111  * \ /
112  * SomeLossFunctionBDT
113  *
114  * Multiple inheritance is often frowned upon. To avoid this, We could make LossFunctionBDT separate
115  * from LossFunction but it really is a type of loss function.
116  * We could also put LossFunction into LossFunctionBDT. In either of these scenarios, if you are doing
117  * different regression methods and want to compare the Loss this makes it more convoluted.
118  * I think that multiple inheritance seems justified in this case, but we could change it if it's a problem.
119  * Usually it isn't a big deal with interfaces and this results in the simplest code in this case.
120  */
121 
122  class LossFunctionBDT : public virtual LossFunction{
123 
124  public:
125 
126  // constructors
128  virtual ~LossFunctionBDT(){};
129 
130  // abstract methods that need to be implemented
131  virtual void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights) = 0;
132  virtual void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) = 0;
133  virtual Double_t Target(LossFunctionEventInfo& e) = 0;
134  virtual Double_t Fit(std::vector<LossFunctionEventInfo>& evs) = 0;
135  };
136 
137  ///////////////////////////////////////////////////////////////////////////////////////////////
138  // Huber loss function for regression error calculations
139  ///////////////////////////////////////////////////////////////////////////////////////////////
140 
141  class HuberLossFunction : public virtual LossFunction{
142 
143  public:
145  HuberLossFunction(Double_t quantile);
147 
148  // The LossFunction methods
149  Double_t CalculateLoss(LossFunctionEventInfo& e);
150  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
151  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
152 
153  // We go ahead and implement the simple ones
154  TString Name(){ return TString("Huber"); };
155  Int_t Id(){ return 0; } ;
156 
157  // Functions needed beyond the interface
158  void Init(std::vector<LossFunctionEventInfo>& evs);
159  Double_t CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs);
160  Double_t CalculateSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
161  void SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs);
162  void SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
163 
164  protected:
168  };
169 
170  ///////////////////////////////////////////////////////////////////////////////////////////////
171  // Huber loss function with boosted decision tree functionality
172  ///////////////////////////////////////////////////////////////////////////////////////////////
173 
174  // The bdt loss function implements the LossFunctionBDT interface and inherits the HuberLossFunction
175  // functionality.
177 
178  public:
182 
183  // The LossFunctionBDT methods
184  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
185  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
187  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
188 
189  private:
190  // some data fields
191  };
192 
193  ///////////////////////////////////////////////////////////////////////////////////////////////
194  // LeastSquares loss function for regression error calculations
195  ///////////////////////////////////////////////////////////////////////////////////////////////
196 
197  class LeastSquaresLossFunction : public virtual LossFunction{
198 
199  public:
202 
203  // The LossFunction methods
204  Double_t CalculateLoss(LossFunctionEventInfo& e);
205  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
206  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
207 
208  // We go ahead and implement the simple ones
209  TString Name(){ return TString("LeastSquares"); };
210  Int_t Id(){ return 1; } ;
211  };
212 
213  ///////////////////////////////////////////////////////////////////////////////////////////////
214  // Least Squares loss function with boosted decision tree functionality
215  ///////////////////////////////////////////////////////////////////////////////////////////////
216 
217  // The bdt loss function implements the LossFunctionBDT interface and inherits the LeastSquaresLossFunction
218  // functionality.
220 
221  public:
224 
225  // The LossFunctionBDT methods
226  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
227  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
229  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
230  };
231 
232  ///////////////////////////////////////////////////////////////////////////////////////////////
233  // Absolute Deviation loss function for regression error calculations
234  ///////////////////////////////////////////////////////////////////////////////////////////////
235 
237 
238  public:
241 
242  // The LossFunction methods
243  Double_t CalculateLoss(LossFunctionEventInfo& e);
244  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
245  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
246 
247  // We go ahead and implement the simple ones
248  TString Name(){ return TString("AbsoluteDeviation"); };
249  Int_t Id(){ return 2; } ;
250  };
251 
252  ///////////////////////////////////////////////////////////////////////////////////////////////
253  // Absolute Deviation loss function with boosted decision tree functionality
254  ///////////////////////////////////////////////////////////////////////////////////////////////
255 
256  // The bdt loss function implements the LossFunctionBDT interface and inherits the AbsoluteDeviationLossFunction
257  // functionality.
259 
260  public:
263 
264  // The LossFunctionBDT methods
265  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
266  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
268  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
269  };
270 }
271 
272 #endif
Huber Loss Function.
Definition: LossFunction.h:141
Absolute Deviation BDT Loss Function.
Definition: LossFunction.h:258
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
const char * Name
Definition: TXMLSetup.cxx:67
Huber BDT Loss Function.
Definition: LossFunction.h:176
Least Squares Loss Function.
Definition: LossFunction.h:197
Absolute Deviation Loss Function.
Definition: LossFunction.h:236
Least Squares BDT Loss Function.
Definition: LossFunction.h:219
HuberLossFunctionBDT(Double_t quantile)
Definition: LossFunction.h:180
virtual ~LossFunction()
Definition: LossFunction.h:78
double Double_t
Definition: RtypesCore.h:55
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:134
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_)
Definition: LossFunction.h:54
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~LossFunctionBDT()
Definition: LossFunction.h:128