Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TTree.h"
36#include "TH2.h"
37#include "TMatrixD.h"
38
39#include "TMVA/DataLoader.h"
40#include "TMVA/Config.h"
41#include "TMVA/CvSplit.h"
42#include "TMVA/Tools.h"
43#include "TMVA/IMethod.h"
44#include "TMVA/MethodBase.h"
46#include "TMVA/DataSetManager.h"
47#include "TMVA/DataSetInfo.h"
48#include "TMVA/MethodBoost.h"
49#include "TMVA/MethodCategory.h"
50
51#include "TMVA/VariableInfo.h"
58
60
61////////////////////////////////////////////////////////////////////////////////
62/*** Create a data loader
63 \param[in] thedlName name of DataLoader object. This name will be used as the
64 top directory name where the training results
65 (weights, i.e .XML and .C files) will be stored.
66 The results will be stored by default in the `theDlName/weights`
67 directory and relative to the current directory. If the directory is not existing,
68 a new one will be created automatically.
69 For using a different location (i.e. a different path to the current directory) one
70 can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
71 For example, by setting
72~~~~~~~~~~~~~~~{.cpp}
73 TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
74 TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainingResults";
75~~~~~~~~~~~~~~~
76 The training results will be stored in the `/tmp/thedlName/myTrainingResults`
77 directory.
78**/
79
81: Configurable( ),
82 fDataSetManager ( NULL ), //DSMTEST
83 fDataInputHandler ( new DataInputHandler ),
84 fTransformations ( "I" ),
85 fVerbose ( kFALSE ),
86 fDataAssignType ( kAssignEvents ),
87 fATreeEvent (0)
88{
90 SetName(thedlName.Data());
91 fLogger->SetSource("DataLoader");
92}
93
94////////////////////////////////////////////////////////////////////////////////
95
97{
98 // destructor
99
100 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
101 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
102
103 delete fDataInputHandler;
104
105 // destroy singletons
106 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
107 delete fDataSetManager; // DSMTEST
108
109 // problem with call of REGISTER_METHOD macro ...
110 // ClassifierDataLoader::DestroyInstance();
111 // Types::DestroyInstance();
112 //Tools::DestroyInstance();
113 //Config::DestroyInstance();
114}
115
116
117////////////////////////////////////////////////////////////////////////////////
118
120{
121 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
122}
123
124////////////////////////////////////////////////////////////////////////////////
125
127{
128 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
129
130 if (dsi!=0) return *dsi;
131
132 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
133}
134
135////////////////////////////////////////////////////////////////////////////////
136
138{
139 return DefaultDataSetInfo(); // DSMTEST
140}
141
142////////////////////////////////////////////////////////////////////////////////
143/// Transforms the variables and return a new DataLoader with the transformed
144/// variables
145
147{
148 TString trOptions = "0";
149 TString trName = "None";
150 if (trafoDefinition.Contains("(")) {
151
152 // contains transformation parameters
153 Ssiz_t parStart = trafoDefinition.Index( "(" );
154 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
155
156 trName = trafoDefinition(0,parStart);
157 trOptions = trafoDefinition(parStart,parLen);
158 trOptions.Remove(parLen-1,1);
159 trOptions.Remove(0,1);
160 }
161 else
162 trName = trafoDefinition;
163
164 VarTransformHandler* handler = new VarTransformHandler(this);
165 // variance threshold variable transformation
166 if (trName == "VT") {
167
168 // find threshold value from given input
169 Double_t threshold = 0.0;
170 if (!trOptions.IsFloat()){
171 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
172 delete handler;
173 return this;
174 }
175 else
176 threshold = trOptions.Atof();
177 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
178 delete handler;
179 return transformedLoader;
180 }
181 else {
182 delete handler;
183 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
184 }
185 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
186 return this;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190// the next functions are to assign events directly
191
192////////////////////////////////////////////////////////////////////////////////
193/// create the data assignment tree (for event-wise data assignment by user)
194
196{
197 TTree * assignTree = new TTree( name, name );
198 assignTree->SetDirectory(nullptr);
199 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
200 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
201
202 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
203 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
204 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
205
206 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
207 // add variables
208 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
209 TString vname = vars[ivar].GetExpression();
210 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
211 }
212 // add targets
213 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
214 TString vname = tgts[itgt].GetExpression();
215 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
216 }
217 // add spectators
218 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
219 TString vname = spec[ispc].GetExpression();
220 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
221 }
222 return assignTree;
223}
224
225////////////////////////////////////////////////////////////////////////////////
226/// add signal training event
227
228void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
229{
230 AddEvent( "Signal", Types::kTraining, event, weight );
231}
232
233////////////////////////////////////////////////////////////////////////////////
234/// add signal testing event
235
236void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
237{
238 AddEvent( "Signal", Types::kTesting, event, weight );
239}
240
241////////////////////////////////////////////////////////////////////////////////
242/// add signal training event
243
244void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
245{
246 AddEvent( "Background", Types::kTraining, event, weight );
247}
248
249////////////////////////////////////////////////////////////////////////////////
250/// add signal training event
251
252void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
253{
254 AddEvent( "Background", Types::kTesting, event, weight );
255}
256
257////////////////////////////////////////////////////////////////////////////////
258/// add signal training event
259
260void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
261{
262 AddEvent( className, Types::kTraining, event, weight );
263}
264
265////////////////////////////////////////////////////////////////////////////////
266/// add signal test event
267
268void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
269{
270 AddEvent( className, Types::kTesting, event, weight );
271}
272
273////////////////////////////////////////////////////////////////////////////////
274/// add event
275/// vector event : the order of values is: variables + targets + spectators
276
278 const std::vector<Double_t>& event, Double_t weight )
279{
280 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
281 UInt_t clIndex = theClass->GetNumber();
282
283
284 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
285 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
286 fAnalysisType = Types::kMulticlass;
287
288
289 if (clIndex>=fTrainAssignTree.size()) {
290 fTrainAssignTree.resize(clIndex+1, 0);
291 fTestAssignTree.resize(clIndex+1, 0);
292 }
293
294 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
295 fTrainAssignTree[clIndex] = CreateEventAssignTrees( TString::Format("TrainAssignTree_%s", className.Data()).Data() );
296 fTestAssignTree[clIndex] = CreateEventAssignTrees( TString::Format("TestAssignTree_%s", className.Data()).Data() );
297 }
298
299 fATreeType = clIndex;
300 fATreeWeight = weight;
301 if (event.size() > fATreeEvent.size()) {
302 Error("TMVA::DataLoader::AddEvent", "Number of variables defined through DataLoader::AddVariable (%zu) is inconsistent"
303 "with number of variables given to DataLoader::Add*Event (%zu)."
304 "Please check your variable definitions and statement ordering."
305 "This event will not be added.", fATreeEvent.size(), event.size());
306 return;
307 }
308 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
309
310 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
311 else fTestAssignTree[clIndex]->Fill();
312
313}
314
315////////////////////////////////////////////////////////////////////////////////
316///
317
319{
320 return fTrainAssignTree[clIndex]!=0;
321}
322
323////////////////////////////////////////////////////////////////////////////////
324/// assign event-wise local trees to data set
325
327{
328 UInt_t size = fTrainAssignTree.size();
329 for(UInt_t i=0; i<size; i++) {
330 if(!UserAssignEvents(i)) continue;
331 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
332 SetWeightExpression( "weight", className );
333 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
334 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
335 }
336}
337
338////////////////////////////////////////////////////////////////////////////////
339/// number of signal events (used to compute significance)
340
341void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
342 const TCut& cut, const TString& treetype )
343{
345 TString tmpTreeType = treetype; tmpTreeType.ToLower();
346 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
347 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
348 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
349 else {
350 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
351 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
352 }
353 AddTree( tree, className, weight, cut, tt );
354}
355
356////////////////////////////////////////////////////////////////////////////////
357
358void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
359 const TCut& cut, Types::ETreeType tt )
360{
361 if(!tree)
362 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
363
364 DefaultDataSetInfo().AddClass( className );
365
366 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
367 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
368 fAnalysisType = Types::kMulticlass;
369
370 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
371 << " with " << tree->GetEntries() << " events" << Endl;
372 DataInput().AddTree( tree, className, weight, cut, tt );
373}
374
375////////////////////////////////////////////////////////////////////////////////
376/// number of signal events (used to compute significance)
377
379{
380 AddTree( signal, "Signal", weight, TCut(""), treetype );
381}
382
383////////////////////////////////////////////////////////////////////////////////
384/// add signal tree from text file
385
387{
388 // create trees from these ascii files
389 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
390 signalTree->ReadFile( datFileS );
391
392 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
393 << datFileS << Endl;
394
395 // number of signal events (used to compute significance)
396 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
397}
398
399////////////////////////////////////////////////////////////////////////////////
400
401void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
402{
403 AddTree( signal, "Signal", weight, TCut(""), treetype );
404}
405
406////////////////////////////////////////////////////////////////////////////////
407/// number of signal events (used to compute significance)
408
410{
411 AddTree( signal, "Background", weight, TCut(""), treetype );
412}
413
414////////////////////////////////////////////////////////////////////////////////
415/// add background tree from text file
416
418{
419 // create trees from these ascii files
420 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
421 bkgTree->ReadFile( datFileB );
422
423 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
424 << datFileB << Endl;
425
426 // number of signal events (used to compute significance)
427 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
428}
429
430////////////////////////////////////////////////////////////////////////////////
431
432void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
433{
434 AddTree( signal, "Background", weight, TCut(""), treetype );
435}
436
437////////////////////////////////////////////////////////////////////////////////
438
440{
441 AddTree( tree, "Signal", weight );
442}
443
444////////////////////////////////////////////////////////////////////////////////
445
447{
448 AddTree( tree, "Background", weight );
449}
450
451////////////////////////////////////////////////////////////////////////////////
452/// set background tree
453
454void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
455{
456 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
457}
458
459////////////////////////////////////////////////////////////////////////////////
460/// define the input trees for signal and background; no cuts are applied
461
462void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
463 Double_t signalWeight, Double_t backgroundWeight )
464{
465 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
466 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
467}
468
469////////////////////////////////////////////////////////////////////////////////
470
471void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
472 Double_t signalWeight, Double_t backgroundWeight )
473{
474 DataInput().AddTree( datFileS, "Signal", signalWeight );
475 DataInput().AddTree( datFileB, "Background", backgroundWeight );
476}
477
478////////////////////////////////////////////////////////////////////////////////
479/// define the input trees for signal and background from single input tree,
480/// containing both signal and background events distinguished by the type
481/// identifiers: SigCut and BgCut
482
483void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
484{
485 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
486 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
487}
488
489////////////////////////////////////////////////////////////////////////////////
490/// user inserts discriminating variable in data set info
491
492void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
493 char type, Double_t min, Double_t max )
494{
495 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
496}
497
498////////////////////////////////////////////////////////////////////////////////
499/// user inserts discriminating variable in data set info
500
501void TMVA::DataLoader::AddVariable( const TString& expression, char type,
502 Double_t min, Double_t max )
503{
504 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
505}
506
507////////////////////////////////////////////////////////////////////////////////
508/// user inserts discriminating array of variables in data set info
509/// in case input tree provides an array of values
510
511void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
512 Double_t min, Double_t max)
513{
514 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
515}
516////////////////////////////////////////////////////////////////////////////////
517/// user inserts target in data set info
518
519void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
520 Double_t min, Double_t max )
521{
522 if( fAnalysisType == Types::kNoAnalysisType )
523 fAnalysisType = Types::kRegression;
524
525 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
526}
527
528////////////////////////////////////////////////////////////////////////////////
529/// user inserts target in data set info
530
531void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
532 Double_t min, Double_t max )
533{
534 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
535}
536
537////////////////////////////////////////////////////////////////////////////////
538/// default creation
539
541{
542 return AddDataSet( fName );
543}
544
545////////////////////////////////////////////////////////////////////////////////
546/// fill input variables in data set
547
548void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
549{
550 for (std::vector<TString>::iterator it=theVariables->begin();
551 it!=theVariables->end(); ++it) AddVariable(*it);
552}
553
554////////////////////////////////////////////////////////////////////////////////
555
557{
558 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
559}
560
561////////////////////////////////////////////////////////////////////////////////
562
564{
565 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
566}
567
568////////////////////////////////////////////////////////////////////////////////
569
570void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
571{
572 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
573 if (className=="") {
574 SetSignalWeightExpression(variable);
575 SetBackgroundWeightExpression(variable);
576 }
577 else DefaultDataSetInfo().SetWeightExpression( variable, className );
578}
579
580////////////////////////////////////////////////////////////////////////////////
581
582void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
583 SetCut( TCut(cut), className );
584}
585
586////////////////////////////////////////////////////////////////////////////////
587
588void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
589{
590 DefaultDataSetInfo().SetCut( cut, className );
591}
592
593////////////////////////////////////////////////////////////////////////////////
594
595void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
596{
597 AddCut( TCut(cut), className );
598}
599
600////////////////////////////////////////////////////////////////////////////////
601void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
602{
603 DefaultDataSetInfo().AddCut( cut, className );
604}
605
606////////////////////////////////////////////////////////////////////////////////
607/// prepare the training and test trees
608
610 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
611 const TString& otherOpt )
612{
613 SetInputTreesFromEventAssignTrees();
614
615 AddCut( cut );
616
617 DefaultDataSetInfo().SetSplitOptions( TString::Format("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
618 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()).Data() );
619}
620
621////////////////////////////////////////////////////////////////////////////////
622/// prepare the training and test trees
623/// kept for backward compatibility
624
626{
627 SetInputTreesFromEventAssignTrees();
628
629 AddCut( cut );
630
631 DefaultDataSetInfo().SetSplitOptions( TString::Format("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
632 Ntrain, Ntrain, Ntest, Ntest).Data() );
633}
634
635////////////////////////////////////////////////////////////////////////////////
636/// prepare the training and test trees
637/// -> same cuts for signal and background
638
640{
641 SetInputTreesFromEventAssignTrees();
642
643 DefaultDataSetInfo().PrintClasses();
644 AddCut( cut );
645 DefaultDataSetInfo().SetSplitOptions( opt );
646}
647
648////////////////////////////////////////////////////////////////////////////////
649/// prepare the training and test trees
650
651void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
652{
653 // if event-wise data assignment, add local trees to dataset first
654 SetInputTreesFromEventAssignTrees();
655
656 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
657 AddCut( sigcut, "Signal" );
658 AddCut( bkgcut, "Background" );
659
660 DefaultDataSetInfo().SetSplitOptions( splitOpt );
661}
662
663////////////////////////////////////////////////////////////////////////////////
664/// Function required to split the training and testing datasets into a
665/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
666/// classes. The option to split the training dataset into a training set and
667/// a validation set is implemented but not currently used.
668
670{
671 s.MakeKFoldDataSet( DefaultDataSetInfo() );
672}
673
674////////////////////////////////////////////////////////////////////////////////
675/// Function for assigning the correct folds to the testing or training set.
676
678{
679 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
680}
681
682
683////////////////////////////////////////////////////////////////////////////////
684/// Recombines the dataset. The precise semantics depend on the actual split.
685///
686/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
687/// See documentation for each particular split for more information.
688///
689
691{
692 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
693}
694
695////////////////////////////////////////////////////////////////////////////////
696/// Copy method use in VI and CV
697
699{
701 DataLoaderCopy(des,this);
702 return des;
703}
704
705////////////////////////////////////////////////////////////////////////////////
706///Loading Dataset from DataInputHandler for subseed
707
709{
710 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
711 {
712 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
713 }
714
715 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
716 {
717 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
718 }
719}
720
721////////////////////////////////////////////////////////////////////////////////
722/// returns the correlation matrix of datasets
723
725{
726 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
727 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
728 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
729}
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
constexpr Bool_t kFALSE
Definition RtypesCore.h:94
#define ClassImp(name)
Definition Rtypes.h:382
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:185
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A specialized string object used for TTree selections.
Definition TCut.h:25
Service class for 2-D histogram classes.
Definition TH2.h:30
Class that contains all the information of a class.
Definition ClassInfo.h:49
UInt_t GetNumber() const
Definition ClassInfo.h:65
MsgLogger * fLogger
! message logger
virtual void RecombineKFoldDataSet(DataSetInfo &dsi, Types::ETreeType tt=Types::kTraining)
Definition CvSplit.cxx:114
virtual void MakeKFoldDataSet(DataSetInfo &dsi)=0
virtual void PrepareFoldDataSet(DataSetInfo &dsi, UInt_t foldNumber, Types::ETreeType tt)
Set training and test set vectors of dataset described by dsi.
Definition CvSplit.cxx:57
Class that contains all the data information.
DataInputHandler * fDataInputHandler
->
Definition DataLoader.h:189
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & AddDataSet(DataSetInfo &)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
void SetTree(TTree *tree, const TString &className, Double_t weight)
deprecated
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
DataSetInfo & DefaultDataSetInfo()
default creation
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
DataSetManager * fDataSetManager
Definition DataLoader.h:186
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
void SetSignalWeightExpression(const TString &variable)
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void SetBackgroundWeightExpression(const TString &variable)
void AddCut(const TString &cut, const TString &className="")
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
DataLoader(TString thedlName="default")
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Bool_t UserAssignEvents(UInt_t clIndex)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual ~DataLoader()
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void SetInputVariables(std::vector< TString > *theVariables)
deprecated
void SetCut(const TString &cut, const TString &className="")
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
void SetSource(const std::string &source)
Definition MsgLogger.h:68
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kRegression
Definition Types.h:128
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition Types.h:145
@ kTraining
Definition Types.h:143
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:140
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
Double_t Atof() const
Return floating-point value contained in string.
Definition TString.cxx:2054
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1858
const char * Data() const
Definition TString.h:376
TString & Remove(Ssiz_t pos)
Definition TString.h:685
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2378
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:632
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition TTree.cxx:8956
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition TTree.h:353
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition TTree.cxx:7554
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
TMarker m
Definition textangle.C:8
auto * tt
Definition textangle.C:16