Logo ROOT   6.08/07
Reference Guide
LossFunction.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Event *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * LossFunction and associated classes *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * U. of Victoria, Canada *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://mva.sourceforge.net/license.txt) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_LossFunction
32 #define ROOT_TMVA_LossFunction
33 
34 //#include <iosfwd>
35 #include <vector>
36 #include <map>
37 #include "TMVA/Event.h"
38 #ifndef ROOT_TMVA_Types
39 #include "TMVA/Types.h"
40 #endif
41 
42 namespace TMVA {
43 
44  ///////////////////////////////////////////////////////////////////////////////////////////////
45  // Data Structure used by LossFunction and LossFunctionBDT to calculate errors, targets, etc
46  ///////////////////////////////////////////////////////////////////////////////////////////////
47 
49 
50  public:
52  LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_){
53  trueValue = trueValue_;
54  predictedValue = predictedValue_;
55  weight = weight_;
56  }
58 
62  };
63 
64 
65  ///////////////////////////////////////////////////////////////////////////////////////////////
66  // Loss Function interface defining base class for general error calculations in
67  // regression/classification
68  ///////////////////////////////////////////////////////////////////////////////////////////////
69 
70  class LossFunction {
71 
72  public:
73 
74  // constructors
76  virtual ~LossFunction(){};
77 
78  // abstract methods that need to be implemented
79  virtual Double_t CalculateLoss(LossFunctionEventInfo& e) = 0;
80  virtual Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
81  virtual Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
82 
83  virtual TString Name() = 0;
84  virtual Int_t Id() = 0;
85  };
86 
87  ///////////////////////////////////////////////////////////////////////////////////////////////
88  // Loss Function interface for boosted decision trees. Inherits from LossFunction
89  ///////////////////////////////////////////////////////////////////////////////////////////////
90 
91  /* Must inherit LossFunction with the virtual keyword so that we only have to implement
92  * the LossFunction interface once.
93  *
94  * LossFunction
95  * / \
96  *SomeLossFunction LossFunctionBDT
97  * \ /
98  * \ /
99  * SomeLossFunctionBDT
100  *
101  * Without the virtual keyword the two would point to their own LossFunction objects
102  * and SomeLossFunctionBDT would have to implement the virtual functions of LossFunction twice, once
103  * for each object. See diagram below.
104  *
105  * LossFunction LossFunction
106  * | |
107  *SomeLossFunction LossFunctionBDT
108  * \ /
109  * \ /
110  * SomeLossFunctionBDT
111  *
112  * Multiple inhertiance is often frowned upon. To avoid this, We could make LossFunctionBDT separate
113  * from LossFunction but it really is a type of loss function.
114  * We could also put LossFunction into LossFunctionBDT. In either of these scenarios, if you are doing
115  * different regression methods and want to compare the Loss this makes it more convoluted.
116  * I think that multiple inheritance seems justified in this case, but we could change it if it's a problem.
117  * Usually it isn't a big deal with interfaces and this results in the simplest code in this case.
118  */
119 
120  class LossFunctionBDT : public virtual LossFunction{
121 
122  public:
123 
124  // constructors
126  virtual ~LossFunctionBDT(){};
127 
128  // abstract methods that need to be implemented
129  virtual void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights) = 0;
130  virtual void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) = 0;
131  virtual Double_t Target(LossFunctionEventInfo& e) = 0;
132  virtual Double_t Fit(std::vector<LossFunctionEventInfo>& evs) = 0;
133  };
134 
135  ///////////////////////////////////////////////////////////////////////////////////////////////
136  // Huber loss function for regression error calculations
137  ///////////////////////////////////////////////////////////////////////////////////////////////
138 
139  class HuberLossFunction : public virtual LossFunction{
140 
141  public:
143  HuberLossFunction(Double_t quantile);
145 
146  // The LossFunction methods
147  Double_t CalculateLoss(LossFunctionEventInfo& e);
148  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
149  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
150 
151  // We go ahead and implement the simple ones
152  TString Name(){ return TString("Huber"); };
153  Int_t Id(){ return 0; } ;
154 
155  // Functions needed beyond the interface
156  void Init(std::vector<LossFunctionEventInfo>& evs);
157  Double_t CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs);
158  Double_t CalculateSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
159  void SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs);
160  void SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
161 
162  protected:
166  };
167 
168  ///////////////////////////////////////////////////////////////////////////////////////////////
169  // Huber loss function with boosted decision tree functionality
170  ///////////////////////////////////////////////////////////////////////////////////////////////
171 
172  // The bdt loss function implements the LossFunctionBDT interface and inherits the HuberLossFunction
173  // functionality.
175 
176  public:
180 
181  // The LossFunctionBDT methods
182  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
183  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
185  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
186 
187  private:
188  // some data fields
189  };
190 
191  ///////////////////////////////////////////////////////////////////////////////////////////////
192  // LeastSquares loss function for regression error calculations
193  ///////////////////////////////////////////////////////////////////////////////////////////////
194 
195  class LeastSquaresLossFunction : public virtual LossFunction{
196 
197  public:
200 
201  // The LossFunction methods
202  Double_t CalculateLoss(LossFunctionEventInfo& e);
203  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
204  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
205 
206  // We go ahead and implement the simple ones
207  TString Name(){ return TString("LeastSquares"); };
208  Int_t Id(){ return 1; } ;
209  };
210 
211  ///////////////////////////////////////////////////////////////////////////////////////////////
212  // Least Squares loss function with boosted decision tree functionality
213  ///////////////////////////////////////////////////////////////////////////////////////////////
214 
215  // The bdt loss function implements the LossFunctionBDT interface and inherits the LeastSquaresLossFunction
216  // functionality.
218 
219  public:
222 
223  // The LossFunctionBDT methods
224  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
225  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
227  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
228  };
229 
230  ///////////////////////////////////////////////////////////////////////////////////////////////
231  // Absolute Deviation loss function for regression error calculations
232  ///////////////////////////////////////////////////////////////////////////////////////////////
233 
235 
236  public:
239 
240  // The LossFunction methods
241  Double_t CalculateLoss(LossFunctionEventInfo& e);
242  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
243  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
244 
245  // We go ahead and implement the simple ones
246  TString Name(){ return TString("AbsoluteDeviation"); };
247  Int_t Id(){ return 2; } ;
248  };
249 
250  ///////////////////////////////////////////////////////////////////////////////////////////////
251  // Absolute Deviation loss function with boosted decision tree functionality
252  ///////////////////////////////////////////////////////////////////////////////////////////////
253 
254  // The bdt loss function implements the LossFunctionBDT interface and inherits the AbsoluteDeviationLossFunction
255  // functionality.
257 
258  public:
261 
262  // The LossFunctionBDT methods
263  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
264  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
266  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
267  };
268 }
269 
270 #endif
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
const char * Name
Definition: TXMLSetup.cxx:67
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:119
HuberLossFunctionBDT(Double_t quantile)
Definition: LossFunction.h:178
virtual ~LossFunction()
Definition: LossFunction.h:76
double Double_t
Definition: RtypesCore.h:55
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:134
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_)
Definition: LossFunction.h:52
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~LossFunctionBDT()
Definition: LossFunction.h:126