Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataSetFactory.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetFactory *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2006: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_DataSetFactory
30#define ROOT_TMVA_DataSetFactory
31
32//////////////////////////////////////////////////////////////////////////
33// //
34// DataSetFactory //
35// //
36// Class that contains all the data information //
37// //
38//////////////////////////////////////////////////////////////////////////
39
40#include <vector>
41#include <map>
42
43#include "TString.h"
44#include "TTree.h"
45#include "TCut.h"
46#include "TTreeFormula.h"
47#include "TMatrixDfwd.h"
48#include "TPrincipal.h"
49#include "TRandom3.h"
50
51#include "TMVA/Types.h"
52#include "TMVA/VariableInfo.h"
53#include "TMVA/Event.h"
54
55namespace TMVA {
56
57 class DataSet;
58 class DataSetInfo;
59 class DataInputHandler;
60 class TreeInfo;
61 class MsgLogger;
62
63 // =============== maybe move these elswhere (e.g. into the tools )
64
65 // =============== functors =======================
66
67 // delete-functor (to be used in e.g. for_each algorithm)
68 template<class T>
70 {
72 delete p;
73 return *this;
74 }
75 };
76
77 template<class T>
79 {
81 }
82
83
84 template< typename T >
85 class Increment {
87 public:
88 Increment( T start ) : value( start ){ }
90 return value++;
91 }
92 };
93
94
95
96 template <typename F>
97 class null_t
98 {
99 private:
100 // returns argF
101 public:
103 F operator()(const F& argF) const
104 {
105 return argF;
106 }
107 };
108
109 template <typename F>
110 inline null_t<F> null() {
111 return null_t<F>();
112 }
113
114 // =========================================================
115
116 class DataSetFactory:public TObject {
117
118 typedef std::vector<Event* > EventVector;
119 typedef std::vector< EventVector > EventVectorOfClasses;
120 typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
121 typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
122
123 typedef std::vector< Double_t > ValuePerClass;
124 typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
125
127 public:
143 nEvBeforeCut(0),
144 nEvAfterCut(0),
146 nWeEvAfterCut(0),
147 nNegWeights(0),
148 varAvLength(0)
149 {}
150 ~EventStats() { delete[] varAvLength; }
152 };
153
154 typedef std::vector< int > NumberPerClass;
155 typedef std::vector< EventStats > EvtStatsPerClass;
156
157 public:
158
160
162
164 protected:
165
166
169
170 // ---------- new versions
171 void BuildEventVector ( DataSetInfo& dsi,
172 DataInputHandler& dataInput,
174 EvtStatsPerClass& eventCounts);
175
178 EvtStatsPerClass& eventCounts,
179 const TString& splitMode,
180 const TString& mixMode,
181 const TString& normMode,
182 UInt_t splitSeed);
183
184 void RenormEvents ( DataSetInfo& dsi,
186 const EvtStatsPerClass& eventCounts,
187 const TString& normMode );
188
189 void InitOptions ( DataSetInfo& dsi,
190 EvtStatsPerClass& eventsmap,
191 TString& normMode, UInt_t& splitSeed,
192 TString& splitMode, TString& mixMode);
193
194
195 // ------------------------
196
197 // auxiliary functions to compute correlations
198 TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
199 TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
200 void CalcMinMax ( DataSet*, DataSetInfo& dsi );
201
202 // resets branch addresses to current event
205 void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
206 Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
207
208 // verbosity
209 Bool_t Verbose() { return fVerbose; }
210
211 // data members
212
213 // verbosity
214 Bool_t fVerbose; // Verbosity
215 TString fVerboseLevel; // VerboseLevel
216
217 // Printing
218 Bool_t fCorrelations = kFALSE; // Whether to print correlations or not
219 Bool_t fComputeCorrelations = kFALSE; // Whether to force computation of correlations or not
220
221 Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
222
223 // the event
224 TTree* fCurrentTree; // the tree, events are currently read from
225 UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
226
227 // the formulas for reading the original tree
228 std::vector<TTreeFormula*> fInputFormulas; // input variables
229 std::vector<std::pair<TTreeFormula*, Int_t>> fInputTableFormulas; //! input variables expression for arrays
230 std::vector<TTreeFormula *> fTargetFormulas; // targets
231 std::vector<TTreeFormula*> fCutFormulas; // cuts
232 std::vector<TTreeFormula*> fWeightFormula; // weights
233 std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
234
235 MsgLogger* fLogger; //! message logger
236 MsgLogger& Log() const { return *fLogger; }
237 public:
239 };
240}
241
242#endif
const Bool_t kFALSE
Definition RtypesCore.h:101
bool Bool_t
Definition RtypesCore.h:63
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
#define ClassDef(name, id)
Definition Rtypes.h:325
Class that contains all the data information.
Class that contains all the data information.
std::vector< TTreeFormula * > fSpectatorFormulas
std::vector< TTreeFormula * > fWeightFormula
MsgLogger & Log() const
message logger
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
std::vector< std::pair< TTreeFormula *, Int_t > > fInputTableFormulas
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< TTreeFormula * > fInputFormulas
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
std::vector< TTreeFormula * > fTargetFormulas
input variables expression for arrays
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
std::vector< TTreeFormula * > fCutFormulas
void ResetBranchAndEventAddresses(TTree *)
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
F operator()(const F &argF) const
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:136
Used to pass a selection expression to the Tree drawing routine.
A TTree represents a columnar dataset.
Definition TTree.h:79
#define F(x, y, z)
create variable transformations
DeleteFunctor_t< const T > DeleteFunctor()
null_t< F > null()
DeleteFunctor_t & operator()(const T *p)