62 int TMVAClassification(
TString myMethodList =
"" )
77 std::map<std::string,int> Use;
87 Use[
"Likelihood"] = 1;
88 Use[
"LikelihoodD"] = 0;
89 Use[
"LikelihoodPCA"] = 1;
90 Use[
"LikelihoodKDE"] = 0;
91 Use[
"LikelihoodMIX"] = 0;
98 Use[
"PDEFoamBoost"] = 0;
105 Use[
"BoostedFisher"] = 0;
139 std::cout << std::endl;
140 std::cout <<
"==> Start TMVAClassification" << std::endl;
143 if (myMethodList !=
"") {
144 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
147 for (
UInt_t i=0; i<mlist.size(); i++) {
148 std::string regMethod(mlist[i]);
150 if (Use.find(regMethod) == Use.end()) {
151 std::cout <<
"Method \"" << regMethod <<
"\" not known in TMVA under this name. Choose among the following:" << std::endl;
152 for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first <<
" ";
153 std::cout << std::endl;
167 TString fname =
"./tmva_class_example.root";
173 input =
TFile::Open(
"http://root.cern.ch/files/tmva_class_example.root",
"CACHEREAD");
176 std::cout <<
"ERROR: could not open data file" << std::endl;
179 std::cout <<
"--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
183 TTree *signalTree = (
TTree*)input->Get(
"TreeS");
187 TString outfileName(
"TMVA.root" );
201 "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
213 dataloader->
AddVariable(
"myvar1 := var1+var2",
'F' );
214 dataloader->
AddVariable(
"myvar2 := var1-var2",
"Expression 2",
"",
'F' );
215 dataloader->
AddVariable(
"var3",
"Variable 3",
"units",
'F' );
216 dataloader->
AddVariable(
"var4",
"Variable 4",
"units",
'F' );
222 dataloader->
AddSpectator(
"spec1 := var1*2",
"Spectator 1",
"units",
'F' );
223 dataloader->
AddSpectator(
"spec2 := var1*3",
"Spectator 2",
"units",
'F' );
295 "nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" );
307 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
311 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
315 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
319 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
323 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
326 if (Use[
"Likelihood"])
328 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
331 if (Use[
"LikelihoodD"])
333 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
336 if (Use[
"LikelihoodPCA"])
338 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
341 if (Use[
"LikelihoodKDE"])
343 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
346 if (Use[
"LikelihoodMIX"])
348 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
357 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
361 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
365 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
370 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
372 if (Use[
"PDEFoamBoost"])
374 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
379 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
387 factory->
BookMethod( dataloader,
TMVA::Types::kLD,
"LD",
"H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
391 factory->
BookMethod( dataloader,
TMVA::Types::kFisher,
"Fisher",
"H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
398 if (Use[
"BoostedFisher"])
400 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
405 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
409 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" );
413 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
417 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
421 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
425 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
429 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLP",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
432 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBFGS",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
435 factory->
BookMethod( dataloader,
TMVA::Types::kMLP,
"MLPBNN",
"H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" );
439 if (Use[
"DNN_CPU"] or Use[
"DNN_GPU"]) {
441 TString layoutString (
"Layout=TANH|128,TANH|128,TANH|128,LINEAR");
444 TString training0(
"LearningRate=1e-1,Momentum=0.9,Repetitions=1," 445 "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," 446 "WeightDecay=1e-4,Regularization=L2," 447 "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True");
448 TString training1(
"LearningRate=1e-2,Momentum=0.9,Repetitions=1," 449 "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," 450 "WeightDecay=1e-4,Regularization=L2," 451 "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
452 TString training2(
"LearningRate=1e-3,Momentum=0.0,Repetitions=1," 453 "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10," 454 "WeightDecay=1e-4,Regularization=L2," 455 "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True");
456 TString trainingStrategyString (
"TrainingStrategy=");
457 trainingStrategyString += training0 +
"|" + training1 +
"|" + training2;
460 TString dnnOptions (
"!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" 461 "WeightInitialization=XAVIERUNIFORM");
462 dnnOptions.Append (
":"); dnnOptions.Append (layoutString);
463 dnnOptions.Append (
":"); dnnOptions.Append (trainingStrategyString);
466 if (Use[
"DNN_GPU"]) {
467 TString gpuOptions = dnnOptions +
":Architecture=GPU";
471 if (Use[
"DNN_CPU"]) {
472 TString cpuOptions = dnnOptions +
":Architecture=CPU";
483 factory->
BookMethod( dataloader,
TMVA::Types::kTMlpANN,
"TMlpANN",
"!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" );
492 "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
496 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
500 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
504 "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
508 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
513 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
542 std::cout <<
"==> Wrote root file: " << outputFile->
GetName() << std::endl;
543 std::cout <<
"==> TMVAClassification is done!" << std::endl;
553 int main(
int argc,
char** argv )
557 for (
int i=1; i<argc; i++) {
559 if(regMethod==
"-b" || regMethod==
"--batch")
continue;
561 methodList += regMethod;
563 return TMVAClassification(methodList);
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
virtual const char * GetName() const
Returns name of object.
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
MethodBase * BookMethod(DataLoader *loader, TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
void TMVAGui(const char *fName="TMVA.root", TString dataset="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
static Bool_t SetCacheFileDir(ROOT::Internal::TStringView cacheDir, Bool_t operateDisconnected=kTRUE, Bool_t forceCacheread=kFALSE)
void TrainAllMethods()
Iterates through all booked methods and calls training.
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
int main(int argc, char **argv)
A specialized string object used for TTree selections.
R__EXTERN TSystem * gSystem
void EvaluateAllMethods(void)
Iterates over all MVAs that have been booked, and calls their evaluation methods. ...
void TestAllMethods()
Evaluates all booked methods on the testing data and adds the output to the Results in the corresponi...
This is the main MVA steering class.
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void SetBackgroundWeightExpression(const TString &variable)
A TTree object has a header with a name and a title.
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
virtual void Close(Option_t *option="")
Close a file.