Logo ROOT   6.07/09
Reference Guide
DataSet.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2006: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSet
30 #define ROOT_TMVA_DataSet
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSet //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <map>
42 #include <string>
43 
44 #ifndef ROOT_TObject
45 #include "TObject.h"
46 #endif
47 #ifndef ROOT_TNamed
48 #include "TNamed.h"
49 #endif
50 #ifndef ROOT_TString
51 #include "TString.h"
52 #endif
53 #ifndef ROOT_TTree
54 #include "TTree.h"
55 #endif
56 //#ifndef ROOT_TCut
57 //#include "TCut.h"
58 //#endif
59 //#ifndef ROOT_TMatrixDfwd
60 //#include "TMatrixDfwd.h"
61 //#endif
62 //#ifndef ROOT_TPrincipal
63 //#include "TPrincipal.h"
64 //#endif
65 #ifndef ROOT_TRandom3
66 #include "TRandom3.h"
67 #endif
68 
69 #ifndef ROOT_TMVA_Types
70 #include "TMVA/Types.h"
71 #endif
72 #ifndef ROOT_TMVA_VariableInfo
73 #include "TMVA/VariableInfo.h"
74 #endif
75 
76 namespace TMVA {
77 
78  class Event;
79  class DataSetInfo;
80  class MsgLogger;
81  class Results;
82 
83  class DataSet :public TNamed {
84 
85  public:
86  DataSet();
87  DataSet(const DataSetInfo&);
88  virtual ~DataSet();
89 
90  void AddEvent( Event *, Types::ETreeType );
91 
95 
96  // const getters
97  const Event* GetEvent() const; // returns event without transformations
98  const Event* GetEvent ( Long64_t ievt ) const { fCurrentEventIdx = ievt; return GetEvent(); } // returns event without transformations
99  const Event* GetTrainingEvent( Long64_t ievt ) const { return GetEvent(ievt, Types::kTraining); }
100  const Event* GetTestEvent ( Long64_t ievt ) const { return GetEvent(ievt, Types::kTesting); }
101  const Event* GetEvent ( Long64_t ievt, Types::ETreeType type ) const
102  {
103  fCurrentTreeIdx = TreeIndex(type); fCurrentEventIdx = ievt; return GetEvent();
104  }
105 
106 
107 
108 
109  UInt_t GetNVariables() const;
110  UInt_t GetNTargets() const;
111  UInt_t GetNSpectators() const;
112 
113  void SetCurrentEvent( Long64_t ievt ) const { fCurrentEventIdx = ievt; }
116 
117  void SetEventCollection( std::vector<Event*>*, Types::ETreeType, Bool_t deleteEvents = true );
118  const std::vector<Event*>& GetEventCollection( Types::ETreeType type = Types::kMaxTreeType ) const;
120 
125 
127 
128  Results* GetResults ( const TString &,
130  Types::EAnalysisType analysistype );
131  void DeleteResults ( const TString &,
132  Types::ETreeType type,
133  Types::EAnalysisType analysistype );
134 
135  void SetVerbose( Bool_t ) {}
136 
137  // sets the number of blocks to which the training set is divided,
138  // some of which are given to the Validation sample. As default they belong all to Training set.
139  void DivideTrainingSet( UInt_t blockNum );
140 
141  // sets a certrain block from the origin training set to belong to either Training or Validation set
142  void MoveTrainingBlock( Int_t blockInd,Types::ETreeType dest, Bool_t applyChanges = kTRUE );
143 
144  void IncrementNClassEvents( Int_t type, UInt_t classNumber );
145  Long64_t GetNClassEvents ( Int_t type, UInt_t classNumber );
146  void ClearNClassEvents ( Int_t type );
147 
148  TTree* GetTree( Types::ETreeType type );
149 
150  // accessors for random and importance sampling
151  void InitSampling( Float_t fraction, Float_t weight, UInt_t seed = 0 );
152  void EventResult( Bool_t successful, Long64_t evtNumber = -1 );
153  void CreateSampling() const;
154 
155  UInt_t TreeIndex(Types::ETreeType type) const;
156 
157  private:
158 
159  // data members
160  void DestroyCollection( Types::ETreeType type, Bool_t deleteEvents );
161 
162  const DataSetInfo *fdsi; //-> datasetinfo that created this dataset
163 
164  std::vector< std::vector<Event*> > fEventCollection; // list of events for training/testing/...
165 
166  std::vector< std::map< TString, Results* > > fResults; //! [train/test/...][method-identifier]
167 
170 
171  // event sampling
172  std::vector<Char_t> fSampling; // random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> taken for speed (performance) issues )
173  std::vector<Int_t> fSamplingNEvents; // number of events which should be sampled
174  std::vector<Float_t> fSamplingWeight; // weight change factor [weight is indicating if sampling is random (1.0) or importance (<1.0)]
175  mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList; // weights and indices for sampling
176  mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected; // selected events
177  TRandom3 *fSamplingRandom; //-> random generator for sampling
178 
179 
180  // further things
181  std::vector< std::vector<Long64_t> > fClassEvents; // number of events of class 0,1,2,... in training[0]
182  // and testing[1] (+validation, trainingoriginal)
183 
184  Bool_t fHasNegativeEventWeights; // true if at least one signal or bkg event has negative weight
185 
186  mutable MsgLogger* fLogger; //-> message logger
187  MsgLogger& Log() const { return *fLogger; }
188  std::vector<Char_t> fBlockBelongToTraining; // when dividing the dataset to blocks, sets whether
189  // the certain block is in the Training set or else
190  // in the validation set
191  // boolean are stored, taken std::vector<Char_t> for performance reasons (instead of std::vector<Bool_t>)
192  Long64_t fTrainingBlockSize; // block size into which the training dataset is divided
193 
196  public:
197 
198  ClassDef(DataSet,1);
199  };
200 }
201 
202 
203 //_______________________________________________________________________
205 {
206  switch (type) {
207  case Types::kMaxTreeType : return fCurrentTreeIdx;
208  case Types::kTraining : return 0;
209  case Types::kTesting : return 1;
210  case Types::kValidation : return 2;
211  case Types::kTrainingOriginal : return 3;
212  default : return fCurrentTreeIdx;
213  }
214 }
215 
216 //_______________________________________________________________________
218 {
219  switch (fCurrentTreeIdx) {
220  case 0: return Types::kTraining;
221  case 1: return Types::kTesting;
222  case 2: return Types::kValidation;
223  case 3: return Types::kTrainingOriginal;
224  }
225  return Types::kMaxTreeType;
226 }
227 
228 //_______________________________________________________________________
230 {
231  Int_t treeIdx = TreeIndex(type);
232  if (fSampling.size() > UInt_t(treeIdx) && fSampling.at(treeIdx)) {
233  return fSamplingSelected.at(treeIdx).size();
234  }
235  return GetEventCollection(type).size();
236 }
237 
238 //_______________________________________________________________________
239 inline const std::vector<TMVA::Event*>& TMVA::DataSet::GetEventCollection( TMVA::Types::ETreeType type ) const
240 {
241  return fEventCollection.at(TreeIndex(type));
242 }
243 
244 
245 #endif
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:241
Long64_t GetNTestEvents() const
Definition: DataSet.h:94
Random number generator class based on M.
Definition: TRandom3.h:29
long long Long64_t
Definition: RtypesCore.h:69
Long64_t fTrainingBlockSize
Definition: DataSet.h:192
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:176
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:250
float Float_t
Definition: RtypesCore.h:53
MsgLogger & Log() const
Definition: DataSet.h:187
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:175
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:164
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:99
TRandom3 * fSamplingRandom
Definition: DataSet.h:177
EAnalysisType
Definition: Types.h:128
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:204
const Event * GetEvent(Long64_t ievt) const
Definition: DataSet.h:98
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:188
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:169
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:443
const Event * GetTestEvent(Long64_t ievt) const
Definition: DataSet.h:100
#define ClassDef(name, id)
Definition: Rtypes.h:254
Bool_t HasNegativeEventWeights() const
Definition: DataSet.h:126
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:33
virtual ~DataSet()
destructor
Definition: DataSet.cxx:132
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets...
Definition: DataSet.cxx:602
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:113
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:184
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:407
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:387
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:181
#define dest(otri, vertexptr)
Definition: triangle.c:1040
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:419
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
Definition: DataSet.cxx:337
unsigned int UInt_t
Definition: RtypesCore.h:42
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:363
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:198
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:259
const DataSetInfo * fdsi
Definition: DataSet.h:162
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:427
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:501
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:160
const Event * GetEvent() const
Definition: DataSet.cxx:211
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
std::vector< Char_t > fSampling
Definition: DataSet.h:172
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:565
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:174
Long64_t fCurrentEventIdx
Definition: DataSet.h:169
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:168
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:435
int type
Definition: TGX11.cxx:120
void SetVerbose(Bool_t)
Definition: DataSet.h:135
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:233
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:225
void ApplyTrainingBlockDivision()
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:173
const Event * GetEvent(Long64_t ievt, Types::ETreeType type) const
Definition: DataSet.h:101
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:177
MsgLogger * fLogger
Definition: DataSet.h:186
Abstract ClassifierFactory template that handles arbitrary types.
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:239
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
A TTree object has a header with a name and a title.
Definition: TTree.h:98
const TTree * GetEventCollectionAsTree()
const Bool_t kTRUE
Definition: Rtypes.h:91
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:451
std::vector< std::map< TString, Results * > > fResults
Definition: DataSet.h:166