62int TMVAClassification(
TString myMethodList =
"" )
77 std::map<std::string,int> Use;
87 Use[
"Likelihood"] = 1;
88 Use[
"LikelihoodD"] = 0;
89 Use[
"LikelihoodPCA"] = 1;
90 Use[
"LikelihoodKDE"] = 0;
91 Use[
"LikelihoodMIX"] = 0;
98 Use[
"PDEFoamBoost"] = 0;
105 Use[
"BoostedFisher"] = 0;
148 std::cout << std::endl;
149 std::cout <<
"==> Start TMVAClassification" << std::endl;
152 if (myMethodList !=
"") {
153 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
156 for (
UInt_t i=0; i<mlist.size(); i++) {
157 std::string regMethod(mlist[i]);
159 if (Use.find(regMethod) == Use.end()) {
160 std::cout <<
"Method \"" << regMethod <<
"\" not known in TMVA under this name. Choose among the following:" << std::endl;
161 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first <<
" ";
162 std::cout << std::endl;
176 TString fname =
"./tmva_class_example.root";
182 input =
TFile::Open(
"http://root.cern.ch/files/tmva_class_example.root",
"CACHEREAD");
185 std::cout <<
"ERROR: could not open data file" << std::endl;
188 std::cout <<
"--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
192 TTree *signalTree = (
TTree*)input->Get(
"TreeS");
193 TTree *background = (
TTree*)input->Get(
"TreeB");
196 TString outfileName(
"TMVA.root" );
210 "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
222 dataloader->
AddVariable(
"myvar1 := var1+var2",
'F' );
223 dataloader->
AddVariable(
"myvar2 := var1-var2",
"Expression 2",
"",
'F' );
224 dataloader->
AddVariable(
"var3",
"Variable 3",
"units",
'F' );
225 dataloader->
AddVariable(
"var4",
"Variable 4",
"units",
'F' );
231 dataloader->
AddSpectator(
"spec1 := var1*2",
"Spectator 1",
"units",
'F' );
232 dataloader->
AddSpectator(
"spec2 := var1*3",
"Spectator 2",
"units",
'F' );
304 "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" );
316 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
320 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
324 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
328 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
332 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
335 if (Use[
"Likelihood"])
337 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
340 if (Use[
"LikelihoodD"])
342 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
345 if (Use[
"LikelihoodPCA"])
347 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
350 if (Use[
"LikelihoodKDE"])
352 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
355 if (Use[
"LikelihoodMIX"])
357 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
366 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
370 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
374 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
379 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
381 if (Use[
"PDEFoamBoost"])
383 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
388 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
396 factory->
BookMethod( dataloader,
TMVA::Types::kLD,
"LD",
"H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
400 factory->
BookMethod( dataloader,
TMVA::Types::kFisher,
"Fisher",
"H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
407 if (Use[
"BoostedFisher"])
409 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
414 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
418 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" );
422 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
426 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
430 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
434 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
438 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLP",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
441 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBFGS",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
444 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBNN",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" );
448 if (Use[
"DNN_CPU"] or Use[
"DNN_GPU"]) {
450 TString layoutString (
"Layout=TANH|128,TANH|128,TANH|128,LINEAR");
453 TString training0(
"LearningRate=1e-2,Momentum=0.9,Repetitions=1,"
454 "ConvergenceSteps=30,BatchSize=256,TestRepetitions=10,"
455 "WeightDecay=1e-4,Regularization=None,"
456 "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True");
457 TString training1(
"LearningRate=1e-2,Momentum=0.9,Repetitions=1,"
458 "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
459 "WeightDecay=1e-4,Regularization=L2,"
460 "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
461 TString training2(
"LearningRate=1e-3,Momentum=0.0,Repetitions=1,"
462 "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
463 "WeightDecay=1e-4,Regularization=L2,"
464 "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
465 TString trainingStrategyString (
"TrainingStrategy=");
466 trainingStrategyString += training0 +
"|" + training1 +
"|" + training2;
469 TString dnnOptions (
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
470 "WeightInitialization=XAVIERUNIFORM");
471 dnnOptions.Append (
":"); dnnOptions.Append (layoutString);
472 dnnOptions.Append (
":"); dnnOptions.Append (trainingStrategyString);
475 if (Use[
"DNN_GPU"]) {
476 TString gpuOptions = dnnOptions +
":Architecture=GPU";
480 if (Use[
"DNN_CPU"]) {
481 TString cpuOptions = dnnOptions +
":Architecture=CPU";
492 factory->
BookMethod( dataloader,
TMVA::Types::kTMlpANN,
"TMlpANN",
"!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" );
501 "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
505 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
509 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
513 "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
517 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
522 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
551 std::cout <<
"==> Wrote root file: " << outputFile->
GetName() << std::endl;
552 std::cout <<
"==> TMVAClassification is done!" << std::endl;
562int main(
int argc,
char** argv )
566 for (
int i=1; i<argc; i++) {
568 if(regMethod==
"-b" || regMethod==
"--batch")
continue;
570 methodList += regMethod;
572 return TMVAClassification(methodList);
R__EXTERN TSystem * gSystem
A specialized string object used for TTree selections.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
virtual void Close(Option_t *option="")
Close a file.
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetBackgroundWeightExpression(const TString &variable)
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
This is the main MVA steering class.
void TrainAllMethods()
Iterates through all booked methods and calls training.
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods.
virtual const char * GetName() const
Returns name of object.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
A TTree represents a columnar dataset.
int main(int argc, char **argv)
void TMVAGui(const char *fName="TMVA.root", TString dataset="")