Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataSetFactory.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetFactory *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2006: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_DataSetFactory
30#define ROOT_TMVA_DataSetFactory
31
32//////////////////////////////////////////////////////////////////////////
33// //
34// DataSetFactory //
35// //
36// Class that contains all the data information //
37// //
38//////////////////////////////////////////////////////////////////////////
39
40#include <vector>
41#include <map>
42
43#include "TString.h"
44#include "TTree.h"
45#include "TCut.h"
46#include "TTreeFormula.h"
47#include "TMatrixDfwd.h"
48#include "TPrincipal.h"
49#include "TRandom3.h"
50
51#include "TMVA/Types.h"
52#include "TMVA/VariableInfo.h"
53#include "TMVA/Event.h"
54
55namespace TMVA {
56
57 class DataSet;
58 class DataSetInfo;
59 class DataInputHandler;
60 class TreeInfo;
61 class MsgLogger;
62
63 // =============== maybe move these elswhere (e.g. into the tools )
64
65 // =============== functors =======================
66
67 // delete-functor (to be used in e.g. for_each algorithm)
68 template<class T>
70 {
72 delete p;
73 return *this;
74 }
75 };
76
77 template<class T>
79 {
81 }
82
83
84 template< typename T >
85 class Increment {
87 public:
88 Increment( T start ) : value( start ){ }
90 return value++;
91 }
92 };
93
94
95
96 template <typename F>
97 class null_t
98 {
99 private:
100 // returns argF
101 public:
103 F operator()(const F& argF) const
104 {
105 return argF;
106 }
107 };
108
109 template <typename F>
110 inline null_t<F> null() {
111 return null_t<F>();
112 }
113
114
115
116 template <typename F, typename G, typename H>
117 class compose_binary_t : public std::binary_function<typename G::argument_type,
118 typename H::argument_type,
119 typename F::result_type>
120 {
121 private:
122 const F& f; // f(g(argG),h(argH))
123 const G& g;
124 const H& h;
125 public:
126 compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
127 {
128 }
129
130 typename F::result_type operator()(const typename G::argument_type& argG,
131 const typename H::argument_type& argH) const
132 {
133 return f(g(argG),h(argH));
134 }
135 };
136
137 template <typename F, typename G, typename H>
138 inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
139 return compose_binary_t<F,G,H>(_f,_g,_h);
140 }
141
142
143
144
145 template <typename F, typename G>
146 class compose_unary_t : public std::unary_function<typename G::argument_type,
147 typename F::result_type>
148 {
149 private:
150 const F& f; // f(g(argG))
151 const G& g;
152 public:
153 compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
154 {
155 }
156
157 typename F::result_type operator()(const typename G::argument_type& argG) const
158 {
159 return f(g(argG));
160 }
161 };
162
163 template <typename F, typename G>
164 inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
165 return compose_unary_t<F,G>(_f,_g);
166 }
167
168 // =============== functors =======================
169
170
171 // =========================================================
172
173
174 class DataSetFactory:public TObject {
175
176 typedef std::vector<Event* > EventVector;
177 typedef std::vector< EventVector > EventVectorOfClasses;
178 typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
179 typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
180
181 typedef std::vector< Double_t > ValuePerClass;
182 typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
183
185 public:
201 nEvBeforeCut(0),
202 nEvAfterCut(0),
204 nWeEvAfterCut(0),
205 nNegWeights(0),
206 varAvLength(0)
207 {}
208 ~EventStats() { delete[] varAvLength; }
210 };
211
212 typedef std::vector< int > NumberPerClass;
213 typedef std::vector< EventStats > EvtStatsPerClass;
214
215 public:
216
218
220
222 protected:
223
224
227
228 // ---------- new versions
229 void BuildEventVector ( DataSetInfo& dsi,
230 DataInputHandler& dataInput,
232 EvtStatsPerClass& eventCounts);
233
236 EvtStatsPerClass& eventCounts,
237 const TString& splitMode,
238 const TString& mixMode,
239 const TString& normMode,
240 UInt_t splitSeed);
241
242 void RenormEvents ( DataSetInfo& dsi,
244 const EvtStatsPerClass& eventCounts,
245 const TString& normMode );
246
247 void InitOptions ( DataSetInfo& dsi,
248 EvtStatsPerClass& eventsmap,
249 TString& normMode, UInt_t& splitSeed,
250 TString& splitMode, TString& mixMode);
251
252
253 // ------------------------
254
255 // auxiliary functions to compute correlations
256 TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
257 TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
258 void CalcMinMax ( DataSet*, DataSetInfo& dsi );
259
260 // resets branch addresses to current event
263 void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
264 Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
265
266 // verbosity
267 Bool_t Verbose() { return fVerbose; }
268
269 // data members
270
271 // verbosity
272 Bool_t fVerbose; // Verbosity
273 TString fVerboseLevel; // VerboseLevel
274
275 // Printing
276 Bool_t fCorrelations = kFALSE; // Whether to print correlations or not
277 Bool_t fComputeCorrelations = kFALSE; // Whether to force computation of correlations or not
278
279 Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
280
281 // the event
282 TTree* fCurrentTree; // the tree, events are currently read from
283 UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
284
285 // the formulas for reading the original tree
286 std::vector<TTreeFormula*> fInputFormulas; // input variables
287 std::vector<std::pair<TTreeFormula*, Int_t>> fInputTableFormulas; //! input variables expression for arrays
288 std::vector<TTreeFormula *> fTargetFormulas; // targets
289 std::vector<TTreeFormula*> fCutFormulas; // cuts
290 std::vector<TTreeFormula*> fWeightFormula; // weights
291 std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
292
293 MsgLogger* fLogger; //! message logger
294 MsgLogger& Log() const { return *fLogger; }
295 public:
297 };
298}
299
300#endif
const Bool_t kFALSE
Definition RtypesCore.h:92
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
#define ClassDef(name, id)
Definition Rtypes.h:325
Class that contains all the data information.
Class that contains all the data information.
std::vector< TTreeFormula * > fSpectatorFormulas
std::vector< TTreeFormula * > fWeightFormula
MsgLogger & Log() const
message logger
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
std::vector< std::pair< TTreeFormula *, Int_t > > fInputTableFormulas
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< TTreeFormula * > fInputFormulas
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
std::vector< TTreeFormula * > fTargetFormulas
input variables expression for arrays
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
std::vector< TTreeFormula * > fCutFormulas
void ResetBranchAndEventAddresses(TTree *)
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
ostringstream derivative to redirect and format output
Definition MsgLogger.h:59
compose_binary_t(const F &_f, const G &_g, const H &_h)
F::result_type operator()(const typename G::argument_type &argG, const typename H::argument_type &argH) const
F::result_type operator()(const typename G::argument_type &argG) const
compose_unary_t(const F &_f, const G &_g)
F operator()(const F &argF) const
Mother of all ROOT objects.
Definition TObject.h:37
Basic string class.
Definition TString.h:136
Used to pass a selection expression to the Tree drawing routine.
A TTree represents a columnar dataset.
Definition TTree.h:79
#define F(x, y, z)
#define G(x, y, z)
#define H(x, y, z)
create variable transformations
compose_unary_t< F, G > compose_unary(const F &_f, const G &_g)
DeleteFunctor_t< const T > DeleteFunctor()
null_t< F > null()
compose_binary_t< F, G, H > compose_binary(const F &_f, const G &_g, const H &_h)
DeleteFunctor_t & operator()(const T *p)