62int TMVAClassification(
TString myMethodList =
"" )
77 std::map<std::string,int> Use;
87 Use[
"Likelihood"] = 1;
88 Use[
"LikelihoodD"] = 0;
89 Use[
"LikelihoodPCA"] = 1;
90 Use[
"LikelihoodKDE"] = 0;
91 Use[
"LikelihoodMIX"] = 0;
98 Use[
"PDEFoamBoost"] = 0;
105 Use[
"BoostedFisher"] = 0;
148 std::cout << std::endl;
149 std::cout <<
"==> Start TMVAClassification" << std::endl;
152 if (myMethodList !=
"") {
153 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
156 for (
UInt_t i=0; i<mlist.size(); i++) {
157 std::string regMethod(mlist[i]);
159 if (Use.find(regMethod) == Use.end()) {
160 std::cout <<
"Method \"" << regMethod <<
"\" not known in TMVA under this name. Choose among the following:" << std::endl;
161 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first <<
" ";
162 std::cout << std::endl;
176 TString fname =
"./tmva_class_example.root";
182 input =
TFile::Open(
"http://root.cern.ch/files/tmva_class_example.root",
"CACHEREAD");
185 std::cout <<
"ERROR: could not open data file" << std::endl;
188 std::cout <<
"--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
192 TTree *signalTree = (
TTree*)input->Get(
"TreeS");
193 TTree *background = (
TTree*)input->Get(
"TreeB");
196 TString outfileName(
"TMVA.root" );
210 "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
222 dataloader->
AddVariable(
"myvar1 := var1+var2",
'F' );
223 dataloader->
AddVariable(
"myvar2 := var1-var2",
"Expression 2",
"",
'F' );
224 dataloader->
AddVariable(
"var3",
"Variable 3",
"units",
'F' );
225 dataloader->
AddVariable(
"var4",
"Variable 4",
"units",
'F' );
231 dataloader->
AddSpectator(
"spec1 := var1*2",
"Spectator 1",
"units",
'F' );
232 dataloader->
AddSpectator(
"spec2 := var1*3",
"Spectator 2",
"units",
'F' );
304 "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" );
316 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
320 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
324 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
328 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
332 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
335 if (Use[
"Likelihood"])
337 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
340 if (Use[
"LikelihoodD"])
342 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
345 if (Use[
"LikelihoodPCA"])
347 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
350 if (Use[
"LikelihoodKDE"])
352 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
355 if (Use[
"LikelihoodMIX"])
357 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
366 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
370 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
374 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
379 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
381 if (Use[
"PDEFoamBoost"])
383 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
388 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
396 factory->
BookMethod( dataloader,
TMVA::Types::kLD,
"LD",
"H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
400 factory->
BookMethod( dataloader,
TMVA::Types::kFisher,
"Fisher",
"H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
407 if (Use[
"BoostedFisher"])
409 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
414 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
418 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" );
422 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
426 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
430 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
434 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
438 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLP",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
441 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBFGS",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
444 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBNN",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" );
448 if (Use[
"DNN_CPU"] or Use[
"DNN_GPU"]) {
450 TString layoutString (
"Layout=TANH|128,TANH|128,TANH|128,LINEAR");
454 TString trainingStrategyString = (
"TrainingStrategy=LearningRate=1e-2,Momentum=0.9,"
455 "ConvergenceSteps=20,BatchSize=100,TestRepetitions=1,"
456 "WeightDecay=1e-4,Regularization=None,"
457 "DropConfig=0.0+0.5+0.5+0.5");
460 TString dnnOptions (
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
461 "WeightInitialization=XAVIERUNIFORM");
462 dnnOptions.Append (
":"); dnnOptions.Append (layoutString);
463 dnnOptions.Append (
":"); dnnOptions.Append (trainingStrategyString);
466 if (Use[
"DNN_GPU"]) {
467 TString gpuOptions = dnnOptions +
":Architecture=GPU";
471 if (Use[
"DNN_CPU"]) {
472 TString cpuOptions = dnnOptions +
":Architecture=CPU";
483 factory->
BookMethod( dataloader,
TMVA::Types::kTMlpANN,
"TMlpANN",
"!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" );
492 "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
496 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
500 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
504 "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
508 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
513 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
542 std::cout <<
"==> Wrote root file: " << outputFile->
GetName() << std::endl;
543 std::cout <<
"==> TMVAClassification is done!" << std::endl;
553int main(
int argc,
char** argv )
557 for (
int i=1; i<argc; i++) {
559 if(regMethod==
"-b" || regMethod==
"--batch")
continue;
561 methodList += regMethod;
563 return TMVAClassification(methodList);
R__EXTERN TSystem * gSystem
A specialized string object used for TTree selections.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
void Close(Option_t *option="") override
Close a file.
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetBackgroundWeightExpression(const TString &variable)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
This is the main MVA steering class.
void TrainAllMethods()
Iterates through all booked methods and calls training.
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
virtual const char * GetName() const
Returns name of object.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
A TTree represents a columnar dataset.
void TMVAGui(const char *fName="TMVA.root", TString dataset="")