ROOT  6.06/09
Reference Guide
DataSetInfo.h
Go to the documentation of this file.
1 // // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSetInfo *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - DESY, Germany *
16  * *
17  * Copyright (c) 2008-2011: *
18  * CERN, Switzerland *
19  * MPI-K Heidelberg, Germany *
20  * DESY Hamburg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_DataSetInfo
28 #define ROOT_TMVA_DataSetInfo
29 
30 //////////////////////////////////////////////////////////////////////////
31 // //
32 // DataSetInfo //
33 // //
34 // Class that contains all the data information //
35 // //
36 //////////////////////////////////////////////////////////////////////////
37 
38 #include <iosfwd>
39 
40 #ifndef ROOT_TObject
41 #include "TObject.h"
42 #endif
43 #ifndef ROOT_TString
44 #include "TString.h"
45 #endif
46 #ifndef ROOT_TTree
47 #include "TTree.h"
48 #endif
49 #ifndef ROOT_TCut
50 #include "TCut.h"
51 #endif
52 #ifndef ROOT_TMatrixDfwd
53 #include "TMatrixDfwd.h"
54 #endif
55 
56 #ifndef ROOT_TMVA_Types
57 #include "TMVA/Types.h"
58 #endif
59 #ifndef ROOT_TMVA_VariableInfo
60 #include "TMVA/VariableInfo.h"
61 #endif
62 #ifndef ROOT_TMVA_ClassInfo
63 #include "TMVA/ClassInfo.h"
64 #endif
65 #ifndef ROOT_TMVA_Event
66 #include "TMVA/Event.h"
67 #endif
68 
69 class TH2;
70 
71 namespace TMVA {
72 
73  class DataSet;
74  class VariableTransformBase;
75  class MsgLogger;
76  class DataSetManager;
77 
78  class DataSetInfo : public TObject {
79 
80  public:
81 
82  DataSetInfo(const TString& name = "Default");
83  virtual ~DataSetInfo();
84 
85  virtual const char* GetName() const { return fName.Data(); }
86 
87  // the data set
88  void ClearDataSet() const;
89  DataSet* GetDataSet() const;
90 
91  // ---
92  // the variable data
93  // ---
94  VariableInfo& AddVariable( const TString& expression, const TString& title = "", const TString& unit = "",
95  Double_t min = 0, Double_t max = 0, char varType='F',
96  Bool_t normalized = kTRUE, void* external = 0 );
97  VariableInfo& AddVariable( const VariableInfo& varInfo );
98 
99  VariableInfo& AddTarget ( const TString& expression, const TString& title, const TString& unit,
100  Double_t min, Double_t max, Bool_t normalized = kTRUE, void* external = 0 );
101  VariableInfo& AddTarget ( const VariableInfo& varInfo );
102 
103  VariableInfo& AddSpectator ( const TString& expression, const TString& title, const TString& unit,
104  Double_t min, Double_t max, char type = 'F', Bool_t normalized = kTRUE, void* external = 0 );
105  VariableInfo& AddSpectator ( const VariableInfo& varInfo );
106 
107  ClassInfo* AddClass ( const TString& className );
108 
109  // accessors
110 
111  // general
112  std::vector<VariableInfo>& GetVariableInfos() { return fVariables; }
113  const std::vector<VariableInfo>& GetVariableInfos() const { return fVariables; }
115  const VariableInfo& GetVariableInfo( Int_t i ) const { return fVariables.at(i); }
116 
117  std::vector<VariableInfo>& GetTargetInfos() { return fTargets; }
118  const std::vector<VariableInfo>& GetTargetInfos() const { return fTargets; }
119  VariableInfo& GetTargetInfo( Int_t i ) { return fTargets.at(i); }
120  const VariableInfo& GetTargetInfo( Int_t i ) const { return fTargets.at(i); }
121 
122  std::vector<VariableInfo>& GetSpectatorInfos() { return fSpectators; }
123  const std::vector<VariableInfo>& GetSpectatorInfos() const { return fSpectators; }
125  const VariableInfo& GetSpectatorInfo( Int_t i ) const { return fSpectators.at(i); }
126 
127 
128  UInt_t GetNVariables() const { return fVariables.size(); }
129  UInt_t GetNTargets() const { return fTargets.size(); }
130  UInt_t GetNSpectators(bool all=kTRUE) const;
131 
132  const TString& GetNormalization() const { return fNormalization; }
134 
135  void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights){fTrainingSumSignalWeights = trainingSumSignalWeights;}
136  void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights){fTrainingSumBackgrWeights = trainingSumBackgrWeights;}
137  void SetTestingSumSignalWeights (Double_t testingSumSignalWeights ){fTestingSumSignalWeights = testingSumSignalWeights ;}
138  void SetTestingSumBackgrWeights (Double_t testingSumBackgrWeights ){fTestingSumBackgrWeights = testingSumBackgrWeights ;}
139 
144 
145 
146 
147  // classification information
149  ClassInfo* GetClassInfo( Int_t clNum ) const;
150  ClassInfo* GetClassInfo( const TString& name ) const;
151  void PrintClasses() const;
152  UInt_t GetNClasses() const { return fClasses.size(); }
153  Bool_t IsSignal( const Event* ev ) const;
154  std::vector<Float_t>* GetTargetsForMulticlass( const Event* ev );
156 
157  // by variable
158  Int_t FindVarIndex( const TString& ) const;
159 
160  // weights
161  const TString GetWeightExpression(Int_t i) const { return GetClassInfo(i)->GetWeight(); }
162  void SetWeightExpression( const TString& exp, const TString& className = "" );
163 
164  // cuts
165  const TCut& GetCut (Int_t i) const { return GetClassInfo(i)->GetCut(); }
166  const TCut& GetCut ( const TString& className ) const { return GetClassInfo(className)->GetCut(); }
167  void SetCut ( const TCut& cut, const TString& className );
168  void AddCut ( const TCut& cut, const TString& className );
169  Bool_t HasCuts() const;
170 
171  std::vector<TString> GetListOfVariables() const;
172 
173  // correlation matrix
174  const TMatrixD* CorrelationMatrix ( const TString& className ) const;
175  void SetCorrelationMatrix ( const TString& className, TMatrixD* matrix );
176  void PrintCorrelationMatrix( const TString& className );
178  const TString& hName,
179  const TString& hTitle ) const;
180 
181  // options
183  const TString& GetSplitOptions() const { return fSplitOptions; }
184 
185  // root dir
187  TDirectory* GetRootDir() const { return fOwnRootDir; }
188 
189  void SetMsgType( EMsgType t ) const;
190 
191  private:
192 
194  void SetDataSetManager( DataSetManager* dsm ) { fDataSetManager = dsm; } // DSMTEST
195  friend class DataSetManager; // DSMTEST (datasetmanager test)
196 
197  DataSetInfo( const DataSetInfo& ) : TObject() {}
198 
199  void PrintCorrelationMatrix( TTree* theTree );
200 
201  TString fName; //! name of the dataset info object
202 
203  mutable DataSet* fDataSet; //! dataset, owned by this datasetinfo object
204  mutable Bool_t fNeedsRebuilding; //! flag if rebuilding of dataset is needed (after change of cuts, vars, etc.)
205 
206  // expressions/formulas
207  std::vector<VariableInfo> fVariables; //! list of variable expressions/internal names
208  std::vector<VariableInfo> fTargets; //! list of targets expressions/internal names
209  std::vector<VariableInfo> fSpectators; //! list of spectators expressions/internal names
210 
211  // the classes
212  mutable std::vector<ClassInfo*> fClasses; //! name and other infos of the classes
213 
216 
221 
222 
223 
224  TDirectory* fOwnRootDir; //! ROOT output dir
225  Bool_t fVerbose; //! Verbosity
226 
227  UInt_t fSignalClass; //! index of the class with the name signal
228 
229  std::vector<Float_t>* fTargetsForMulticlass; //! all targets 0 except the one with index==classNumber
230 
231  mutable MsgLogger* fLogger; //! message logger
232  MsgLogger& Log() const { return *fLogger; }
233 
234 
235  };
236 }
237 
238 #endif
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
Definition: DataSetInfo.h:136
static Vc_ALWAYS_INLINE int_v min(const int_v &x, const int_v &y)
Definition: vector.h:433
Bool_t fVerbose
ROOT output dir.
Definition: DataSetInfo.h:225
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
const TString GetWeightExpression(Int_t i) const
Definition: DataSetInfo.h:161
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:122
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
Int_t GetClassNameMaxLength() const
UInt_t GetNClasses() const
Definition: DataSetInfo.h:152
const TCut & GetCut(const TString &className) const
Definition: DataSetInfo.h:166
std::vector< VariableInfo > fTargets
list of variable expressions/internal names
Definition: DataSetInfo.h:208
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
Definition: DataSetInfo.h:135
Basic string class.
Definition: TString.h:137
std::vector< ClassInfo * > fClasses
list of spectators expressions/internal names
Definition: DataSetInfo.h:212
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
Definition: DataSetInfo.h:138
const TString & GetWeight() const
Definition: ClassInfo.h:73
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< VariableInfo > fSpectators
list of targets expressions/internal names
Definition: DataSetInfo.h:209
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
std::vector< VariableInfo > fVariables
flag if rebuilding of dataset is needed (after change of cuts, vars, etc.)
Definition: DataSetInfo.h:207
void SetDataSetManager(DataSetManager *dsm)
Definition: DataSetInfo.h:194
TString fSplitOptions
Definition: DataSetInfo.h:215
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
Double_t GetTrainingSumSignalWeights()
DataSet * GetDataSet() const
returns data set
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
const char * Data() const
Definition: TString.h:349
DataSet * fDataSet
name of the dataset info object
Definition: DataSetInfo.h:203
const VariableInfo & GetSpectatorInfo(Int_t i) const
Definition: DataSetInfo.h:125
Bool_t IsSignal(const Event *ev) const
virtual ~DataSetInfo()
destructor
Definition: DataSetInfo.cxx:90
Double_t fTrainingSumSignalWeights
Definition: DataSetInfo.h:217
TDirectory * GetRootDir() const
Definition: DataSetInfo.h:187
const TString & GetNormalization() const
Definition: DataSetInfo.h:132
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
Definition: DataSetInfo.h:137
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output...
const VariableInfo & GetVariableInfo(Int_t i) const
Definition: DataSetInfo.h:115
Double_t fTestingSumSignalWeights
Definition: DataSetInfo.h:219
const std::vector< VariableInfo > & GetTargetInfos() const
Definition: DataSetInfo.h:118
DataSetInfo(const DataSetInfo &)
Definition: DataSetInfo.h:197
TString fNormalization
name and other infos of the classes
Definition: DataSetInfo.h:214
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:117
DataSetInfo(const TString &name="Default")
constructor
Definition: DataSetInfo.cxx:63
A specialized string object used for TTree selections.
Definition: TCut.h:27
void SetMsgType(EMsgType t) const
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
void SetSplitOptions(const TString &so)
Definition: DataSetInfo.h:182
MsgLogger * fLogger
all targets 0 except the one with index==classNumber
Definition: DataSetInfo.h:231
TDirectory * fOwnRootDir
Definition: DataSetInfo.h:224
Double_t fTrainingSumBackgrWeights
Definition: DataSetInfo.h:218
Double_t GetTestingSumSignalWeights()
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
const std::vector< VariableInfo > & GetVariableInfos() const
Definition: DataSetInfo.h:113
VariableInfo & GetTargetInfo(Int_t i)
Definition: DataSetInfo.h:119
ClassInfo * GetClassInfo(Int_t clNum) const
EMsgType
Definition: Types.h:61
const std::vector< VariableInfo > & GetSpectatorInfos() const
Definition: DataSetInfo.h:123
const TMatrixD * CorrelationMatrix(const TString &className) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
UInt_t GetSignalClassIndex()
Definition: DataSetInfo.h:155
const TCut & GetCut(Int_t i) const
Definition: DataSetInfo.h:165
const TCut & GetCut() const
Definition: ClassInfo.h:74
void PrintClasses() const
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
const TString & GetSplitOptions() const
Definition: DataSetInfo.h:183
MsgLogger & Log() const
message logger
Definition: DataSetInfo.h:232
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:41
Double_t GetTrainingSumBackgrWeights()
int type
Definition: TGX11.cxx:120
VariableInfo & GetSpectatorInfo(Int_t i)
Definition: DataSetInfo.h:124
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
UInt_t fSignalClass
Verbosity.
Definition: DataSetInfo.h:227
void ClearDataSet() const
ClassInfo * AddClass(const TString &className)
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
#define name(a, b)
Definition: linkTestLib0.cpp:5
Mother of all ROOT objects.
Definition: TObject.h:58
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< Float_t > * fTargetsForMulticlass
index of the class with the name signal
Definition: DataSetInfo.h:229
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type= 'F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
TMVA::DataSetManager * fDataSetManager
Definition: DataSetInfo.h:193
UInt_t GetNSpectators(bool all=kTRUE) const
Int_t FindVarIndex(const TString &) const
find variable by name
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
std::vector< TString > GetListOfVariables() const
returns list of variables
A TTree object has a header with a name and a title.
Definition: TTree.h:94
Bool_t fNeedsRebuilding
dataset, owned by this datasetinfo object
Definition: DataSetInfo.h:204
Double_t fTestingSumBackgrWeights
Definition: DataSetInfo.h:220
double exp(double)
const VariableInfo & GetTargetInfo(Int_t i) const
Definition: DataSetInfo.h:120
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Bool_t HasCuts() const
void SetNormalization(const TString &norm)
Definition: DataSetInfo.h:133
void SetRootDir(TDirectory *d)
Definition: DataSetInfo.h:186
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:112
Double_t GetTestingSumBackgrWeights()
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)