Logo ROOT   6.18/05
Reference Guide
DataSetInfo.h
Go to the documentation of this file.
1// // @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetInfo *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - DESY, Germany *
16 * *
17 * Copyright (c) 2008-2011: *
18 * CERN, Switzerland *
19 * MPI-K Heidelberg, Germany *
20 * DESY Hamburg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef ROOT_TMVA_DataSetInfo
28#define ROOT_TMVA_DataSetInfo
29
30//////////////////////////////////////////////////////////////////////////
31// //
32// DataSetInfo //
33// //
34// Class that contains all the data information //
35// //
36//////////////////////////////////////////////////////////////////////////
37
38#include <iosfwd>
39
40#include "TObject.h"
41#include "TString.h"
42#include "TTree.h"
43#include "TCut.h"
44#include "TMatrixDfwd.h"
45
46#include "TMVA/Types.h"
47#include "TMVA/VariableInfo.h"
48#include "TMVA/ClassInfo.h"
49#include "TMVA/Event.h"
50
51class TH2;
52
53namespace TMVA {
54
55 class DataSet;
56 class VariableTransformBase;
57 class MsgLogger;
58 class DataSetManager;
59
60 class DataSetInfo : public TObject {
61
62 public:
63
64 DataSetInfo(const TString& name = "Default");
65 virtual ~DataSetInfo();
66
67 virtual const char* GetName() const { return fName.Data(); }
68
69 // the data set
70 void ClearDataSet() const;
71 DataSet* GetDataSet() const;
72
73 // ---
74 // the variable data
75 // ---
76 VariableInfo& AddVariable( const TString& expression, const TString& title = "", const TString& unit = "",
77 Double_t min = 0, Double_t max = 0, char varType='F',
78 Bool_t normalized = kTRUE, void* external = 0 );
79 VariableInfo& AddVariable( const VariableInfo& varInfo );
80
81 VariableInfo& AddTarget ( const TString& expression, const TString& title, const TString& unit,
82 Double_t min, Double_t max, Bool_t normalized = kTRUE, void* external = 0 );
83 VariableInfo& AddTarget ( const VariableInfo& varInfo );
84
85 VariableInfo& AddSpectator ( const TString& expression, const TString& title, const TString& unit,
86 Double_t min, Double_t max, char type = 'F', Bool_t normalized = kTRUE, void* external = 0 );
87 VariableInfo& AddSpectator ( const VariableInfo& varInfo );
88
89 ClassInfo* AddClass ( const TString& className );
90
91 // accessors
92
93 // general
94 std::vector<VariableInfo>& GetVariableInfos() { return fVariables; }
95 const std::vector<VariableInfo>& GetVariableInfos() const { return fVariables; }
97 const VariableInfo& GetVariableInfo( Int_t i ) const { return fVariables.at(i); }
98
99 std::vector<VariableInfo>& GetTargetInfos() { return fTargets; }
100 const std::vector<VariableInfo>& GetTargetInfos() const { return fTargets; }
101 VariableInfo& GetTargetInfo( Int_t i ) { return fTargets.at(i); }
102 const VariableInfo& GetTargetInfo( Int_t i ) const { return fTargets.at(i); }
103
104 std::vector<VariableInfo>& GetSpectatorInfos() { return fSpectators; }
105 const std::vector<VariableInfo>& GetSpectatorInfos() const { return fSpectators; }
107 const VariableInfo& GetSpectatorInfo( Int_t i ) const { return fSpectators.at(i); }
108
109
110 UInt_t GetNVariables() const { return fVariables.size(); }
111 UInt_t GetNTargets() const { return fTargets.size(); }
112 UInt_t GetNSpectators(bool all=kTRUE) const;
113
114 const TString& GetNormalization() const { return fNormalization; }
115 void SetNormalization( const TString& norm ) { fNormalization = norm; }
116
117 void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights){fTrainingSumSignalWeights = trainingSumSignalWeights;}
118 void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights){fTrainingSumBackgrWeights = trainingSumBackgrWeights;}
119 void SetTestingSumSignalWeights (Double_t testingSumSignalWeights ){fTestingSumSignalWeights = testingSumSignalWeights ;}
120 void SetTestingSumBackgrWeights (Double_t testingSumBackgrWeights ){fTestingSumBackgrWeights = testingSumBackgrWeights ;}
121
126
127
128
129 // classification information
133 ClassInfo* GetClassInfo( Int_t clNum ) const;
134 ClassInfo* GetClassInfo( const TString& name ) const;
135 void PrintClasses() const;
136 UInt_t GetNClasses() const { return fClasses.size(); }
137 Bool_t IsSignal( const Event* ev ) const;
138 std::vector<Float_t>* GetTargetsForMulticlass( const Event* ev );
140
141 // by variable
142 Int_t FindVarIndex( const TString& ) const;
143
144 // weights
145 const TString GetWeightExpression(Int_t i) const { return GetClassInfo(i)->GetWeight(); }
146 void SetWeightExpression( const TString& exp, const TString& className = "" );
147
148 // cuts
149 const TCut& GetCut (Int_t i) const { return GetClassInfo(i)->GetCut(); }
150 const TCut& GetCut ( const TString& className ) const { return GetClassInfo(className)->GetCut(); }
151 void SetCut ( const TCut& cut, const TString& className );
152 void AddCut ( const TCut& cut, const TString& className );
153 Bool_t HasCuts() const;
154
155 std::vector<TString> GetListOfVariables() const;
156
157 // correlation matrix
158 const TMatrixD* CorrelationMatrix ( const TString& className ) const;
159 void SetCorrelationMatrix ( const TString& className, TMatrixD* matrix );
160 void PrintCorrelationMatrix( const TString& className );
162 const TString& hName,
163 const TString& hTitle ) const;
164
165 // options
167 const TString& GetSplitOptions() const { return fSplitOptions; }
168
169 // root dir
171 TDirectory* GetRootDir() const { return fOwnRootDir; }
172
173 void SetMsgType( EMsgType t ) const;
174
176 private:
177
179 void SetDataSetManager( DataSetManager* dsm ) { fDataSetManager = dsm; } // DSMTEST
180 friend class DataSetManager; // DSMTEST (datasetmanager test)
181
183
185
186 TString fName; // name of the dataset info object
187
188 mutable DataSet* fDataSet; // dataset, owned by this datasetinfo object
189 mutable Bool_t fNeedsRebuilding; // flag if rebuilding of dataset is needed (after change of cuts, vars, etc.)
190
191 // expressions/formulas
192 std::vector<VariableInfo> fVariables; // list of variable expressions/internal names
193 std::vector<VariableInfo> fTargets; // list of targets expressions/internal names
194 std::vector<VariableInfo> fSpectators; // list of spectators expressions/internal names
195
196 // the classes
197 mutable std::vector<ClassInfo*> fClasses; // name and other infos of the classes
198
201
206
207
208
209 TDirectory* fOwnRootDir; // ROOT output dir
210 Bool_t fVerbose; // Verbosity
211
212 UInt_t fSignalClass; // index of the class with the name signal
213
214 std::vector<Float_t>* fTargetsForMulticlass;//-> all targets 0 except the one with index==classNumber
215
216 mutable MsgLogger* fLogger; //! message logger
217 MsgLogger& Log() const { return *fLogger; }
218
219 public:
220
222 };
223}
224
225#endif
#define d(i)
Definition: RSha256.hxx:102
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassDef(name, id)
Definition: Rtypes.h:326
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
double exp(double)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Describe directory structure in memory.
Definition: TDirectory.h:34
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
const TCut & GetCut() const
Definition: ClassInfo.h:64
const TString & GetWeight() const
Definition: ClassInfo.h:63
Class that contains all the data information.
Definition: DataSetInfo.h:60
const TString GetWeightExpression(Int_t i) const
Definition: DataSetInfo.h:145
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:94
const std::vector< VariableInfo > & GetSpectatorInfos() const
Definition: DataSetInfo.h:105
Bool_t HasCuts() const
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
const std::vector< VariableInfo > & GetVariableInfos() const
Definition: DataSetInfo.h:95
UInt_t GetNSpectators(bool all=kTRUE) const
const VariableInfo & GetVariableInfo(Int_t i) const
Definition: DataSetInfo.h:97
void SetSplitOptions(const TString &so)
Definition: DataSetInfo.h:166
DataSetInfo(const DataSetInfo &)
Definition: DataSetInfo.h:182
TDirectory * fOwnRootDir
Definition: DataSetInfo.h:209
ClassInfo * AddClass(const TString &className)
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
const TString & GetNormalization() const
Definition: DataSetInfo.h:114
const TMatrixD * CorrelationMatrix(const TString &className) const
TString fNormalization
Definition: DataSetInfo.h:199
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:104
const VariableInfo & GetTargetInfo(Int_t i) const
Definition: DataSetInfo.h:102
TDirectory * GetRootDir() const
Definition: DataSetInfo.h:171
std::vector< ClassInfo * > fClasses
Definition: DataSetInfo.h:197
Double_t fTrainingSumBackgrWeights
Definition: DataSetInfo.h:203
void SetNormalization(const TString &norm)
Definition: DataSetInfo.h:115
Int_t GetTargetNameMaxLength() const
virtual ~DataSetInfo()
destructor
Definition: DataSetInfo.cxx:88
MsgLogger & Log() const
message logger
Definition: DataSetInfo.h:217
Double_t fTestingSumSignalWeights
Definition: DataSetInfo.h:204
Double_t GetTestingSumBackgrWeights()
void SetMsgType(EMsgType t) const
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
TString fSplitOptions
Definition: DataSetInfo.h:200
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:167
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Bool_t fNeedsRebuilding
Definition: DataSetInfo.h:189
const TCut & GetCut(const TString &className) const
Definition: DataSetInfo.h:150
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
Definition: DataSetInfo.h:119
std::vector< VariableInfo > fSpectators
Definition: DataSetInfo.h:194
void PrintCorrelationMatrix(TTree *theTree)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
DataSet * GetDataSet() const
returns data set
DataSetInfo(const TString &name="Default")
constructor
Definition: DataSetInfo.cxx:61
MsgLogger * fLogger
Definition: DataSetInfo.h:216
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
UInt_t GetSignalClassIndex()
Definition: DataSetInfo.h:139
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
const std::vector< VariableInfo > & GetTargetInfos() const
Definition: DataSetInfo.h:100
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
Definition: DataSetInfo.h:117
std::vector< TString > GetListOfVariables() const
returns list of variables
DataSet * fDataSet
Definition: DataSetInfo.h:188
ClassInfo * GetClassInfo(Int_t clNum) const
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
Definition: DataSetInfo.h:120
void SetDataSetManager(DataSetManager *dsm)
Definition: DataSetInfo.h:179
Double_t GetTrainingSumSignalWeights()
Double_t fTrainingSumSignalWeights
Definition: DataSetInfo.h:202
void PrintClasses() const
Int_t GetClassNameMaxLength() const
Double_t GetTrainingSumBackgrWeights()
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output,...
std::vector< VariableInfo > fTargets
Definition: DataSetInfo.h:193
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:149
const VariableInfo & GetSpectatorInfo(Int_t i) const
Definition: DataSetInfo.h:107
std::vector< Float_t > * fTargetsForMulticlass
Definition: DataSetInfo.h:214
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
Double_t GetTestingSumSignalWeights()
Int_t FindVarIndex(const TString &) const
find variable by name
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:96
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:99
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
Definition: DataSetInfo.h:118
std::vector< VariableInfo > fVariables
Definition: DataSetInfo.h:192
void SetRootDir(TDirectory *d)
Definition: DataSetInfo.h:170
Int_t GetVariableNameMaxLength() const
Double_t fTestingSumBackgrWeights
Definition: DataSetInfo.h:205
Bool_t IsSignal(const Event *ev) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetManager * GetDataSetManager()
Definition: DataSetInfo.h:175
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:101
TMVA::DataSetManager * fDataSetManager
Definition: DataSetInfo.h:178
VariableInfo & GetSpectatorInfo(Int_t i)
Definition: DataSetInfo.h:106
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
void ClearDataSet() const
Class that contains all the data information.
Class that contains all the data information.
Definition: DataSet.h:69
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Class for type info of MVA input variable.
Definition: VariableInfo.h:47
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
A TTree represents a columnar dataset.
Definition: TTree.h:71
create variable transformations
auto * m
Definition: textangle.C:8