68 fDataSetManager ( NULL ),
70 fTransformations (
"I" ),
72 fDataAssignType ( kAssignEvents ),
86 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
87 for (;trfIt != fDefaultTrfs.end(); ++trfIt)
delete (*trfIt);
89 delete fDataInputHandler;
93 delete fDataSetManager;
107 return fDataSetManager->AddDataSetInfo(dsi);
114 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName);
116 if (dsi!=0)
return *dsi;
118 return fDataSetManager->AddDataSetInfo(*(
new DataSetInfo(dsiName)));
125 return DefaultDataSetInfo();
136 if (trafoDefinition.
Contains(
"(")) {
140 Ssiz_t parLen = trafoDefinition.
Index(
")", parStart )-parStart+1;
142 trName = trafoDefinition(0,parStart);
143 trOptions = trafoDefinition(parStart,parLen);
144 trOptions.
Remove(parLen-1,1);
148 trName = trafoDefinition;
152 if (trName ==
"VT") {
157 Log() << kFATAL <<
" VT transformation must be passed a floating threshold value" <<
Endl;
162 threshold = trOptions.
Atof();
165 return transformedLoader;
168 Log() << kFATAL <<
"Incorrect transformation string provided, please check" <<
Endl;
170 Log() << kINFO <<
"No transformation applied, returning original loader" <<
Endl;
184 assignTree->
Branch(
"type", &fATreeType,
"ATreeType/I" );
185 assignTree->
Branch(
"weight", &fATreeWeight,
"ATreeWeight/F" );
187 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
188 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
189 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
191 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
193 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
194 TString vname = vars[ivar].GetExpression();
195 assignTree->
Branch( vname, &fATreeEvent[ivar], vname +
"/F" );
198 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
199 TString vname = tgts[itgt].GetExpression();
200 assignTree->
Branch( vname, &fATreeEvent[vars.size()+itgt], vname +
"/F" );
203 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
204 TString vname = spec[ispc].GetExpression();
205 assignTree->
Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname +
"/F" );
263 const std::vector<Double_t>& event,
Double_t weight )
265 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className);
274 if (clIndex>=fTrainAssignTree.size()) {
275 fTrainAssignTree.resize(clIndex+1, 0);
276 fTestAssignTree.resize(clIndex+1, 0);
279 if (fTrainAssignTree[clIndex]==0) {
280 fTrainAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TrainAssignTree_%s", className.
Data()) );
281 fTestAssignTree[clIndex] = CreateEventAssignTrees(
Form(
"TestAssignTree_%s", className.
Data()) );
284 fATreeType = clIndex;
285 fATreeWeight = weight;
286 for (
UInt_t ivar=0; ivar<
event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
289 else fTestAssignTree[clIndex]->Fill();
298 return fTrainAssignTree[clIndex]!=0;
306 UInt_t size = fTrainAssignTree.size();
307 for(
UInt_t i=0; i<size; i++) {
308 if(!UserAssignEvents(i))
continue;
309 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
310 SetWeightExpression(
"weight", className );
328 Log() << kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
329 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
331 AddTree(
tree, className, weight, cut,
tt );
340 Log() << kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
342 DefaultDataSetInfo().AddClass( className );
348 Log() << kINFO<<
"Add Tree " <<
tree->GetName() <<
" of type " << className
349 <<
" with " <<
tree->GetEntries() <<
" events" <<
Endl;
350 DataInput().AddTree(
tree, className, weight, cut,
tt );
358 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
367 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
370 Log() << kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \""
374 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
381 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
389 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
398 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
401 Log() << kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \""
405 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
412 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
419 AddTree(
tree,
"Signal", weight );
426 AddTree(
tree,
"Background", weight );
452 DataInput().AddTree( datFileS,
"Signal", signalWeight );
453 DataInput().AddTree( datFileB,
"Background", backgroundWeight );
473 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max,
type );
482 DefaultDataSetInfo().AddVariable( expression,
"",
"", min, max,
type );
494 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
503 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
511 return AddDataSet( fName );
519 for (std::vector<TString>::iterator it=theVariables->begin();
520 it!=theVariables->end(); ++it) AddVariable(*it);
527 DefaultDataSetInfo().SetWeightExpression(variable,
"Signal");
534 DefaultDataSetInfo().SetWeightExpression(variable,
"Background");
543 SetSignalWeightExpression(variable);
544 SetBackgroundWeightExpression(variable);
546 else DefaultDataSetInfo().SetWeightExpression( variable, className );
552 SetCut(
TCut(cut), className );
559 DefaultDataSetInfo().SetCut( cut, className );
566 AddCut(
TCut(cut), className );
572 DefaultDataSetInfo().AddCut( cut, className );
582 SetInputTreesFromEventAssignTrees();
586 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
587 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
596 SetInputTreesFromEventAssignTrees();
600 DefaultDataSetInfo().SetSplitOptions(
Form(
"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
601 Ntrain, Ntrain, Ntest, Ntest) );
610 SetInputTreesFromEventAssignTrees();
612 DefaultDataSetInfo().PrintClasses();
614 DefaultDataSetInfo().SetSplitOptions( opt );
623 SetInputTreesFromEventAssignTrees();
626 AddCut( sigcut,
"Signal" );
627 AddCut( bkgcut,
"Background" );
629 DefaultDataSetInfo().SetSplitOptions( splitOpt );
640 s.MakeKFoldDataSet( DefaultDataSetInfo() );
648 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber,
tt );
661 s.RecombineKFoldDataSet( DefaultDataSetInfo(),
tt );
681 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
686 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
695 const TMatrixD *
m = DefaultDataSetInfo().CorrelationMatrix(className);
696 return DefaultDataSetInfo().CreateCorrelationMatrixHist(
m,
697 "CorrelationMatrix"+className,
"Correlation Matrix ("+className+
")");
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Service class for 2-Dim histogram classes.
Class that contains all the information of a class.
DataInputHandler * fDataInputHandler
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & AddDataSet(DataSetInfo &)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
DataSetInfo & DefaultDataSetInfo()
default creation
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
DataSetManager * fDataSetManager
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
void SetSignalWeightExpression(const TString &variable)
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void SetBackgroundWeightExpression(const TString &variable)
void AddCut(const TString &cut, const TString &className="")
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
DataLoader(TString thedlName="default")
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataInputHandler & DataInput()
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Bool_t UserAssignEvents(UInt_t clIndex)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
void SetCut(const TString &cut, const TString &className="")
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
Class that contains all the data information.
void SetSource(const std::string &source)
virtual void SetName(const char *name)
Set the name of the TNamed.
void ToLower()
Change string to lower-case.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
const char * Data() const
TString & Remove(Ssiz_t pos)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
A TTree represents a columnar dataset.
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)