Logo ROOT   6.18/05
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TFile.h"
36#include "TTree.h"
37#include "TH2.h"
38#include "TMath.h"
39#include "TMatrixD.h"
40
41#include "TMVA/DataLoader.h"
42#include "TMVA/Config.h"
43#include "TMVA/CvSplit.h"
44#include "TMVA/Tools.h"
45#include "TMVA/IMethod.h"
46#include "TMVA/MethodBase.h"
48#include "TMVA/DataSetManager.h"
49#include "TMVA/DataSetInfo.h"
50#include "TMVA/MethodBoost.h"
51#include "TMVA/MethodCategory.h"
52
53#include "TMVA/VariableInfo.h"
60
62
63
64////////////////////////////////////////////////////////////////////////////////
65
67: Configurable( ),
68 fDataSetManager ( NULL ), //DSMTEST
69 fDataInputHandler ( new DataInputHandler ),
70 fTransformations ( "I" ),
71 fVerbose ( kFALSE ),
72 fDataAssignType ( kAssignEvents ),
73 fATreeEvent (0)
74{
76 SetName(thedlName.Data());
77 fLogger->SetSource("DataLoader");
78}
79
80////////////////////////////////////////////////////////////////////////////////
81
83{
84 // destructor
85
86 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
87 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
88
89 delete fDataInputHandler;
90
91 // destroy singletons
92 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
93 delete fDataSetManager; // DSMTEST
94
95 // problem with call of REGISTER_METHOD macro ...
96 // ClassifierDataLoader::DestroyInstance();
97 // Types::DestroyInstance();
98 //Tools::DestroyInstance();
99 //Config::DestroyInstance();
100}
101
102
103////////////////////////////////////////////////////////////////////////////////
104
106{
107 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
108}
109
110////////////////////////////////////////////////////////////////////////////////
111
113{
114 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
115
116 if (dsi!=0) return *dsi;
117
118 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
119}
120
121////////////////////////////////////////////////////////////////////////////////
122
124{
125 return DefaultDataSetInfo(); // DSMTEST
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Transforms the variables and return a new DataLoader with the transformed
130/// variables
131
133{
134 TString trOptions = "0";
135 TString trName = "None";
136 if (trafoDefinition.Contains("(")) {
137
138 // contains transformation parameters
139 Ssiz_t parStart = trafoDefinition.Index( "(" );
140 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
141
142 trName = trafoDefinition(0,parStart);
143 trOptions = trafoDefinition(parStart,parLen);
144 trOptions.Remove(parLen-1,1);
145 trOptions.Remove(0,1);
146 }
147 else
148 trName = trafoDefinition;
149
150 VarTransformHandler* handler = new VarTransformHandler(this);
151 // variance threshold variable transformation
152 if (trName == "VT") {
153
154 // find threshold value from given input
155 Double_t threshold = 0.0;
156 if (!trOptions.IsFloat()){
157 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
158 delete handler;
159 return this;
160 }
161 else
162 threshold = trOptions.Atof();
163 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
164 delete handler;
165 return transformedLoader;
166 }
167 else {
168 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
169 }
170 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
171 return this;
172}
173
174////////////////////////////////////////////////////////////////////////////////
175// the next functions are to assign events directly
176
177////////////////////////////////////////////////////////////////////////////////
178/// create the data assignment tree (for event-wise data assignment by user)
179
181{
182 TTree * assignTree = new TTree( name, name );
183 assignTree->SetDirectory(0);
184 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
185 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
186
187 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
188 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
189 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
190
191 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
192 // add variables
193 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
194 TString vname = vars[ivar].GetExpression();
195 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
196 }
197 // add targets
198 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
199 TString vname = tgts[itgt].GetExpression();
200 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
201 }
202 // add spectators
203 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
204 TString vname = spec[ispc].GetExpression();
205 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
206 }
207 return assignTree;
208}
209
210////////////////////////////////////////////////////////////////////////////////
211/// add signal training event
212
213void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
214{
215 AddEvent( "Signal", Types::kTraining, event, weight );
216}
217
218////////////////////////////////////////////////////////////////////////////////
219/// add signal testing event
220
221void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
222{
223 AddEvent( "Signal", Types::kTesting, event, weight );
224}
225
226////////////////////////////////////////////////////////////////////////////////
227/// add signal training event
228
229void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
230{
231 AddEvent( "Background", Types::kTraining, event, weight );
232}
233
234////////////////////////////////////////////////////////////////////////////////
235/// add signal training event
236
237void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
238{
239 AddEvent( "Background", Types::kTesting, event, weight );
240}
241
242////////////////////////////////////////////////////////////////////////////////
243/// add signal training event
244
245void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
246{
247 AddEvent( className, Types::kTraining, event, weight );
248}
249
250////////////////////////////////////////////////////////////////////////////////
251/// add signal test event
252
253void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
254{
255 AddEvent( className, Types::kTesting, event, weight );
256}
257
258////////////////////////////////////////////////////////////////////////////////
259/// add event
260/// vector event : the order of values is: variables + targets + spectators
261
263 const std::vector<Double_t>& event, Double_t weight )
264{
265 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
266 UInt_t clIndex = theClass->GetNumber();
267
268
269 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
270 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
271 fAnalysisType = Types::kMulticlass;
272
273
274 if (clIndex>=fTrainAssignTree.size()) {
275 fTrainAssignTree.resize(clIndex+1, 0);
276 fTestAssignTree.resize(clIndex+1, 0);
277 }
278
279 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
280 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
281 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
282 }
283
284 fATreeType = clIndex;
285 fATreeWeight = weight;
286 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
287
288 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
289 else fTestAssignTree[clIndex]->Fill();
290
291}
292
293////////////////////////////////////////////////////////////////////////////////
294///
295
297{
298 return fTrainAssignTree[clIndex]!=0;
299}
300
301////////////////////////////////////////////////////////////////////////////////
302/// assign event-wise local trees to data set
303
305{
306 UInt_t size = fTrainAssignTree.size();
307 for(UInt_t i=0; i<size; i++) {
308 if(!UserAssignEvents(i)) continue;
309 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
310 SetWeightExpression( "weight", className );
311 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
312 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
313 }
314}
315
316////////////////////////////////////////////////////////////////////////////////
317/// number of signal events (used to compute significance)
318
319void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
320 const TCut& cut, const TString& treetype )
321{
323 TString tmpTreeType = treetype; tmpTreeType.ToLower();
324 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
325 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
326 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
327 else {
328 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
329 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
330 }
331 AddTree( tree, className, weight, cut, tt );
332}
333
334////////////////////////////////////////////////////////////////////////////////
335
336void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
337 const TCut& cut, Types::ETreeType tt )
338{
339 if(!tree)
340 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
341
342 DefaultDataSetInfo().AddClass( className );
343
344 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
345 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
346 fAnalysisType = Types::kMulticlass;
347
348 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
349 << " with " << tree->GetEntries() << " events" << Endl;
350 DataInput().AddTree( tree, className, weight, cut, tt );
351}
352
353////////////////////////////////////////////////////////////////////////////////
354/// number of signal events (used to compute significance)
355
357{
358 AddTree( signal, "Signal", weight, TCut(""), treetype );
359}
360
361////////////////////////////////////////////////////////////////////////////////
362/// add signal tree from text file
363
365{
366 // create trees from these ascii files
367 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
368 signalTree->ReadFile( datFileS );
369
370 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
371 << datFileS << Endl;
372
373 // number of signal events (used to compute significance)
374 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
375}
376
377////////////////////////////////////////////////////////////////////////////////
378
379void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
380{
381 AddTree( signal, "Signal", weight, TCut(""), treetype );
382}
383
384////////////////////////////////////////////////////////////////////////////////
385/// number of signal events (used to compute significance)
386
388{
389 AddTree( signal, "Background", weight, TCut(""), treetype );
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// add background tree from text file
394
396{
397 // create trees from these ascii files
398 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
399 bkgTree->ReadFile( datFileB );
400
401 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
402 << datFileB << Endl;
403
404 // number of signal events (used to compute significance)
405 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
406}
407
408////////////////////////////////////////////////////////////////////////////////
409
410void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
411{
412 AddTree( signal, "Background", weight, TCut(""), treetype );
413}
414
415////////////////////////////////////////////////////////////////////////////////
416
418{
419 AddTree( tree, "Signal", weight );
420}
421
422////////////////////////////////////////////////////////////////////////////////
423
425{
426 AddTree( tree, "Background", weight );
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// set background tree
431
432void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
433{
434 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// define the input trees for signal and background; no cuts are applied
439
440void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
441 Double_t signalWeight, Double_t backgroundWeight )
442{
443 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
444 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
445}
446
447////////////////////////////////////////////////////////////////////////////////
448
449void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
450 Double_t signalWeight, Double_t backgroundWeight )
451{
452 DataInput().AddTree( datFileS, "Signal", signalWeight );
453 DataInput().AddTree( datFileB, "Background", backgroundWeight );
454}
455
456////////////////////////////////////////////////////////////////////////////////
457/// define the input trees for signal and background from single input tree,
458/// containing both signal and background events distinguished by the type
459/// identifiers: SigCut and BgCut
460
461void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
462{
463 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
464 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
465}
466
467////////////////////////////////////////////////////////////////////////////////
468/// user inserts discriminating variable in data set info
469
470void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
471 char type, Double_t min, Double_t max )
472{
473 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
474}
475
476////////////////////////////////////////////////////////////////////////////////
477/// user inserts discriminating variable in data set info
478
479void TMVA::DataLoader::AddVariable( const TString& expression, char type,
480 Double_t min, Double_t max )
481{
482 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
483}
484
485////////////////////////////////////////////////////////////////////////////////
486/// user inserts target in data set info
487
488void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
489 Double_t min, Double_t max )
490{
491 if( fAnalysisType == Types::kNoAnalysisType )
492 fAnalysisType = Types::kRegression;
493
494 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
495}
496
497////////////////////////////////////////////////////////////////////////////////
498/// user inserts target in data set info
499
500void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
501 Double_t min, Double_t max )
502{
503 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
504}
505
506////////////////////////////////////////////////////////////////////////////////
507/// default creation
508
510{
511 return AddDataSet( fName );
512}
513
514////////////////////////////////////////////////////////////////////////////////
515/// fill input variables in data set
516
517void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
518{
519 for (std::vector<TString>::iterator it=theVariables->begin();
520 it!=theVariables->end(); ++it) AddVariable(*it);
521}
522
523////////////////////////////////////////////////////////////////////////////////
524
526{
527 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
528}
529
530////////////////////////////////////////////////////////////////////////////////
531
533{
534 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
535}
536
537////////////////////////////////////////////////////////////////////////////////
538
539void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
540{
541 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
542 if (className=="") {
543 SetSignalWeightExpression(variable);
544 SetBackgroundWeightExpression(variable);
545 }
546 else DefaultDataSetInfo().SetWeightExpression( variable, className );
547}
548
549////////////////////////////////////////////////////////////////////////////////
550
551void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
552 SetCut( TCut(cut), className );
553}
554
555////////////////////////////////////////////////////////////////////////////////
556
557void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
558{
559 DefaultDataSetInfo().SetCut( cut, className );
560}
561
562////////////////////////////////////////////////////////////////////////////////
563
564void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
565{
566 AddCut( TCut(cut), className );
567}
568
569////////////////////////////////////////////////////////////////////////////////
570void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
571{
572 DefaultDataSetInfo().AddCut( cut, className );
573}
574
575////////////////////////////////////////////////////////////////////////////////
576/// prepare the training and test trees
577
579 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
580 const TString& otherOpt )
581{
582 SetInputTreesFromEventAssignTrees();
583
584 AddCut( cut );
585
586 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
587 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// prepare the training and test trees
592/// kept for backward compatibility
593
595{
596 SetInputTreesFromEventAssignTrees();
597
598 AddCut( cut );
599
600 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
601 Ntrain, Ntrain, Ntest, Ntest) );
602}
603
604////////////////////////////////////////////////////////////////////////////////
605/// prepare the training and test trees
606/// -> same cuts for signal and background
607
609{
610 SetInputTreesFromEventAssignTrees();
611
612 DefaultDataSetInfo().PrintClasses();
613 AddCut( cut );
614 DefaultDataSetInfo().SetSplitOptions( opt );
615}
616
617////////////////////////////////////////////////////////////////////////////////
618/// prepare the training and test trees
619
620void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
621{
622 // if event-wise data assignment, add local trees to dataset first
623 SetInputTreesFromEventAssignTrees();
624
625 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
626 AddCut( sigcut, "Signal" );
627 AddCut( bkgcut, "Background" );
628
629 DefaultDataSetInfo().SetSplitOptions( splitOpt );
630}
631
632////////////////////////////////////////////////////////////////////////////////
633/// Function required to split the training and testing datasets into a
634/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
635/// classes. The option to split the training dataset into a training set and
636/// a validation set is implemented but not currently used.
637
639{
640 s.MakeKFoldDataSet( DefaultDataSetInfo() );
641}
642
643////////////////////////////////////////////////////////////////////////////////
644/// Function for assigning the correct folds to the testing or training set.
645
647{
648 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
649}
650
651
652////////////////////////////////////////////////////////////////////////////////
653/// Recombines the dataset. The precise semantics depend on the actual split.
654///
655/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
656/// See documentation for each particular split for more information.
657///
658
660{
661 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
662}
663
664////////////////////////////////////////////////////////////////////////////////
665/// Copy method use in VI and CV
666
668{
670 DataLoaderCopy(des,this);
671 return des;
672}
673
674////////////////////////////////////////////////////////////////////////////////
675///Loading Dataset from DataInputHandler for subseed
676
678{
679 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
680 {
681 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
682 }
683
684 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
685 {
686 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
687 }
688}
689
690////////////////////////////////////////////////////////////////////////////////
691/// returns the correlation matrix of datasets
692
694{
695 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
696 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
697 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
698}
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:184
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:180
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:424
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:356
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:105
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:500
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:304
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:245
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:432
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:221
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:509
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:237
DataSetManager * fDataSetManager
Definition: DataLoader.h:181
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:667
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:525
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:638
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:539
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:229
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:659
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:132
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:532
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:564
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:262
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:66
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:608
DataInputHandler & DataInput()
Definition: DataLoader.h:167
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:387
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:123
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:488
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:693
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:296
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:213
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:253
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:417
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:449
virtual ~DataLoader()
Definition: DataLoader.cxx:82
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:336
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:517
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:551
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:470
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:646
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kRegression
Definition: Types.h:129
@ kMaxTreeType
Definition: Types.h:146
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
const char * Data() const
Definition: TString.h:364
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A TTree represents a columnar dataset.
Definition: TTree.h:71
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8622
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7301
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1741
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:748
Definition: tree.py:1
auto * m
Definition: textangle.C:8
auto * tt
Definition: textangle.C:16