83 fDataSetManager (
NULL ),
85 fTransformations ( "
I" ),
87 fDataAssignType ( kAssignEvents ),
92 fLogger->SetSource(
"DataLoader");
101 std::vector<TMVA::VariableTransformBase*>::iterator trfIt =
fDefaultTrfs.begin();
102 for (;trfIt !=
fDefaultTrfs.end(); trfIt++)
delete (*trfIt);
129 if (dsi!=0)
return *dsi;
148 if (trafoDefinition.
Contains(
"(")) {
152 Ssiz_t parLen = trafoDefinition.
Index(
")", parStart )-parStart+1;
154 trName = trafoDefinition(0,parStart);
155 trOptions = trafoDefinition(parStart,parLen);
156 trOptions.
Remove(parLen-1,1);
160 trName = trafoDefinition;
164 if (trName ==
"VT") {
169 Log() <<
kFATAL <<
" VT transformation must be passed a floating threshold value" <<
Endl;
173 threshold = trOptions.
Atof();
175 return transformedLoader;
178 Log() <<
kFATAL <<
"Incorrect transformation string provided, please check" <<
Endl;
180 Log() <<
kINFO <<
"No transformation applied, returning original loader" <<
Endl;
202 for (
UInt_t ivar=0; ivar<vars.size(); ivar++) {
203 TString vname = vars[ivar].GetExpression();
207 for (
UInt_t itgt=0; itgt<tgts.size(); itgt++) {
208 TString vname = tgts[itgt].GetExpression();
212 for (
UInt_t ispc=0; ispc<spec.size(); ispc++) {
213 TString vname = spec[ispc].GetExpression();
214 assignTree->
Branch( vname, &
fATreeEvent[vars.size()+tgts.size()+ispc], vname +
"/F" );
263 const std::vector<Double_t>& event,
Double_t weight )
307 for(
UInt_t i=0; i<size; i++) {
327 Log() <<
kFATAL <<
"<AddTree> cannot interpret tree type: \"" << treetype
328 <<
"\" should be \"Training\" or \"Test\" or \"Training and Testing\"" <<
Endl;
330 AddTree( tree, className, weight, cut, tt );
338 Log() <<
kFATAL <<
"Tree does not exist (empty pointer)." <<
Endl;
346 Log() <<
kINFO<<
"Add Tree " << tree->
GetName() <<
" of type " << className
355 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
364 TTree* signalTree =
new TTree(
"TreeS",
"Tree (S)" );
367 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Signal file : \"" 371 AddTree( signalTree,
"Signal", weight,
TCut(
""), treetype );
377 AddTree( signal,
"Signal", weight,
TCut(
""), treetype );
384 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
392 TTree* bkgTree =
new TTree(
"TreeB",
"Tree (B)" );
395 Log() <<
kINFO <<
"Create TTree objects from ASCII input files ... \n- Background file : \"" 399 AddTree( bkgTree,
"Background", weight,
TCut(
""), treetype );
405 AddTree( signal,
"Background", weight,
TCut(
""), treetype );
411 AddTree( tree,
"Signal", weight );
417 AddTree( tree,
"Background", weight );
501 for (std::vector<TString>::iterator it=theVariables->begin();
562 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.
Data()) );
575 Ntrain, Ntrain, Ntest, Ntest) );
599 AddCut( sigcut,
"Signal" );
600 AddCut( bkgcut,
"Background" );
618 std::vector<Event*> TrainSigData;
619 std::vector<Event*> TrainBkgData;
620 std::vector<Event*> TestSigData;
621 std::vector<Event*> TestBkgData;
624 for(
UInt_t i=0; i<TrainingData.size(); ++i){
625 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Signal", 6)){ TrainSigData.push_back(TrainingData.at(i)); }
626 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TrainingData.at(i)->GetClass() )->
GetName(),
"Background", 10)){ TrainBkgData.push_back(TrainingData.at(i)); }
632 for(
UInt_t i=0; i<TestingData.size(); ++i){
633 if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Signal", 6)){ TestSigData.push_back(TestingData.at(i)); }
634 else if( strncmp(
DefaultDataSetInfo().GetClassInfo( TestingData.at(i)->GetClass() )->
GetName(),
"Background", 10)){ TestBkgData.push_back(TestingData.at(i)); }
643 std::vector<std::vector<Event*>> tempSigEvents =
SplitSets(TrainSigData,0,2);
644 std::vector<std::vector<Event*>> tempBkgEvents =
SplitSets(TrainBkgData,0,2);
665 std::vector<Event*>* tempTrain =
new std::vector<Event*>;
666 std::vector<Event*>* tempTest =
new std::vector<Event*>;
672 for(
UInt_t i=0; i<numFolds; ++i){
706 tempTrain->reserve(nTrain);
707 tempTest->reserve(nTest);
710 for(
UInt_t j=0; j<numFolds; ++j){
756 std::vector<std::vector<Event*>> tempSets;
757 tempSets.resize(numFolds);
765 if(inSet == foldSize*numFolds){
771 if(tempSets.at(s).size()<foldSize){
772 tempSets.at(s).push_back(oldSet.at(i));
799 des->
AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
804 des->
AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
814 "CorrelationMatrix"+className,
"Correlation Matrix ("+className+
")");
DataSetInfo * GetDataSetInfo(const TString &dsiName)
returns datasetinfo object for given name
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
DataSetManager * fDataSetManager
Random number generator class based on M.
MsgLogger & Endl(MsgLogger &ml)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
std::vector< VariableInfo > & GetSpectatorInfos()
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
std::vector< std::vector< TMVA::Event * > > fTrainBkgEvents
DataSetInfo & GetDataSetInfo()
Double_t Atof() const
Return floating-point value contained in string.
Double_t background(Double_t *x, Double_t *par)
TTree * CreateEventAssignTrees(const TString &name)
DataSetInfo & DefaultDataSetInfo()
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void ToLower()
Change string to lower-case.
void MakeKFoldDataSet(UInt_t numberFolds, bool validationSet=false)
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
DataInputHandler * fDataInputHandler
Types::EAnalysisType fAnalysisType
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
TH2 * GetCorrelationMatrix(const TString &className)
std::vector< std::vector< TMVA::Event * > > fTestBkgEvents
DataSet * GetDataSet() const
returns data set
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
const char * Data() const
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
void SetTree(TTree *tree, const TString &className, Double_t weight)
void PrepareFoldDataSet(UInt_t foldNumber, Types::ETreeType tt)
void SetInputVariables(std::vector< TString > *theVariables)
DataSetInfo & AddDataSet(DataSetInfo &)
std::vector< VariableInfo > & GetTargetInfos()
void AddCut(const TString &cut, const TString &className="")
A specialized string object used for TTree selections.
void SetInputTreesFromEventAssignTrees()
void SetSplitOptions(const TString &so)
DataInputHandler & DataInput()
Service class for 2-Dim histogram classes.
ClassInfo * GetClassInfo(Int_t clNum) const
const TMatrixD * CorrelationMatrix(const TString &className) const
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetInfo & AddDataSetInfo(DataSetInfo &dsi)
stores a copy of the dataset info object
char * Form(const char *fmt,...)
std::vector< TTree * > fTestAssignTree
Bool_t UserAssignEvents(UInt_t clIndex)
std::vector< Float_t > fATreeEvent
virtual const char * GetName() const
Returns name of object.
std::vector< std::vector< TMVA::Event * > > fTestSigEvents
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
void PrintClasses() const
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
DataLoader * MakeCopy(TString name)
TString & Remove(Ssiz_t pos)
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
void SetBackgroundWeightExpression(const TString &variable)
unsigned long long ULong64_t
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
static void DestroyInstance()
static function: destroy TMVA instance
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
ClassInfo * AddClass(const TString &className)
void SetSignalWeightExpression(const TString &variable)
std::vector< std::vector< TMVA::Event * > > fTrainSigEvents
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter= ' ')
Create or simply read branches from filename.
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Abstract ClassifierFactory template that handles arbitrary types.
std::vector< TTree * > fTrainAssignTree
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type= 'F', Bool_t normalized=kTRUE, void *external=0)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
std::vector< std::vector< TMVA::Event * > > fValidBkgEvents
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
void SetSignalTree(TTree *signal, Double_t weight=1.0)
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
virtual Long64_t GetEntries() const
A TTree object has a header with a name and a title.
std::vector< std::vector< TMVA::Event * > > SplitSets(std::vector< TMVA::Event * > &oldSet, int seedNum, int numFolds)
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
std::vector< std::vector< TMVA::Event * > > fValidSigEvents
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
std::vector< VariableInfo > & GetVariableInfos()
void SetCut(const TString &cut, const TString &className="")
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)