Logo ROOT   6.12/07
Reference Guide
TMVAMulticlass.C File Reference

Detailed Description

View in nbviewer Open in SWAN This macro provides a simple example for the training and testing of the TMVA multiclass classification

0.0249779224396
33.8529679775
Processing /mnt/build/workspace/root-makedoc-v612/rootspi/rdoc/src/v6-12-00-patches/tutorials/tmva/TMVAMulticlass.C...
==> Start TMVAMulticlass
Creating testdata....
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.14361
var2 = -0.822373
var3 = -0.395426
var4 = -0.529427
created tree: TreeS
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -1.42237
var3 = -1.39543
var4 = -2.02943
created tree: TreeB0
... event: 0 (2000)
... event: 1000 (2000)
======> EVENT:0
var1 = -1.54361
var2 = -0.822373
var3 = -0.395426
var4 = -2.02943
created tree: TreeB1
======> EVENT:0
var1 = 0.463304
var2 = 1.37192
var3 = -1.16769
var4 = -1.77551
created tree: TreeB2
created data file: tmva_example_multiple_background.root
created tmva_example_multiple_background.root for tests of the multiclass features
DataSetInfo : [dataset] : Added class "Signal"
: Add Tree TreeS of type Signal with 2000 events
DataSetInfo : [dataset] : Added class "bg0"
: Add Tree TreeB0 of type bg0 with 2000 events
DataSetInfo : [dataset] : Added class "bg1"
: Add Tree TreeB1 of type bg1 with 2000 events
DataSetInfo : [dataset] : Added class "bg2"
: Add Tree TreeB2 of type bg2 with 2000 events
: Dataset[dataset] : Class index : 0 name : Signal
: Dataset[dataset] : Class index : 1 name : bg0
: Dataset[dataset] : Class index : 2 name : bg1
: Dataset[dataset] : Class index : 3 name : bg2
Factory : Booking method: BDTG
:
: the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change
: to new default for GradBoost *Pray*
DataSetFactory : [dataset] : Number of events in input trees
:
:
:
:
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 1000
: Signal -- testing events : 1000
: Signal -- training and testing events: 2000
: bg0 -- training events : 1000
: bg0 -- testing events : 1000
: bg0 -- training and testing events: 2000
: bg1 -- training events : 1000
: bg1 -- testing events : 1000
: bg1 -- training and testing events: 2000
: bg2 -- training events : 1000
: bg2 -- testing events : 1000
: bg2 -- training and testing events: 2000
:
DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.380 +0.597 +0.819
: var2: +0.380 +1.000 +0.706 +0.744
: var3: +0.597 +0.706 +1.000 +0.853
: var4: +0.819 +0.744 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg0):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.406 +0.621 +0.837
: var2: +0.406 +1.000 +0.696 +0.727
: var3: +0.621 +0.696 +1.000 +0.853
: var4: +0.837 +0.727 +0.853 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg1):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.371 +0.602 +0.831
: var2: +0.371 +1.000 +0.699 +0.721
: var3: +0.602 +0.699 +1.000 +0.847
: var4: +0.831 +0.721 +0.847 +1.000
: ----------------------------------------
DataSetInfo : Correlation matrix (bg2):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 -0.660 +0.034 -0.012
: var2: -0.660 +1.000 +0.007 -0.004
: var3: +0.034 +0.007 +1.000 -0.037
: var4: -0.012 -0.004 -0.037 +1.000
: ----------------------------------------
DataSetFactory : [dataset] :
:
Factory : Booking method: MLP
:
MLP : Building Network.
: Initializing weights
Factory : Train all methods
Factory : [dataset] : Create Transformation "I" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "P" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "G" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
Factory : [dataset] : Create Transformation "D" with events from all classes.
:
: Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.060705 1.0014 [ -4.0592 3.5808 ]
: var2: 0.31440 1.0501 [ -3.6891 3.7877 ]
: var3: 0.12000 1.1225 [ -3.6148 4.5640 ]
: var4: -0.070020 1.2598 [ -4.8486 5.0412 ]
: -----------------------------------------------------------
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.096903 1.0000 [ -3.5985 2.9977 ]
: var2: 0.35671 1.0000 [ -3.3391 3.5408 ]
: var3: 0.070223 1.0000 [ -2.8950 3.1502 ]
: var4: -0.20167 1.0000 [ -3.2998 2.8753 ]
: -----------------------------------------------------------
: Preparing the Principle Component (PCA) transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 1.8262e-10 1.7916 [ -7.2781 7.8235 ]
: var2:-4.0762e-10 0.89644 [ -3.2734 2.6837 ]
: var3:-1.3316e-10 0.74817 [ -2.4103 2.7078 ]
: var4:-1.5119e-10 0.61596 [ -2.2644 1.5471 ]
: -----------------------------------------------------------
: Preparing the Gaussian transformation...
: Preparing the Decorrelation transformation...
TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.0081281 1.0000 [ -2.6178 6.0982 ]
: var2: 0.010257 1.0000 [ -2.8460 6.2789 ]
: var3: 0.0095035 1.0000 [ -3.0077 5.8864 ]
: var4: 0.0074780 1.0000 [ -3.0452 5.6560 ]
: -----------------------------------------------------------
: Ranking input variables (method unspecific)...
Factory : Train method: BDTG for Multiclass classification
:
: Training 1000 Decision Trees ... patience please
: Elapsed time for training with 4000 events: 5.72 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of BDTG on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 1.63 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_BDTG.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_BDTG.class.C
Factory : Training finished
:
Factory : Train method: MLP for Multiclass classification
:
: Training Network
:
: Elapsed time for training with 4000 events: 20.2 sec
: Dataset[dataset] : Create results for training
: Dataset[dataset] : Multiclass evaluation of MLP on training sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.0148 sec
: Creating multiclass response histograms...
: Creating xml weight file: dataset/weights/TMVAMulticlass_MLP.weights.xml
: Creating standalone class: dataset/weights/TMVAMulticlass_MLP.class.C
: Write special histos to file: TMVAMulticlass.root:/dataset/Method_MLP/MLP
Factory : Training finished
:
: Ranking input variables (method specific)...
BDTG : Ranking result (top variable is best ranked)
: --------------------------------------
: Rank : Variable : Variable Importance
: --------------------------------------
: 1 : var4 : 3.048e-01
: 2 : var2 : 2.446e-01
: 3 : var1 : 2.405e-01
: 4 : var3 : 2.101e-01
: --------------------------------------
MLP : Ranking result (top variable is best ranked)
: -----------------------------
: Rank : Variable : Importance
: -----------------------------
: 1 : var4 : 2.946e+01
: 2 : var1 : 1.697e+01
: 3 : var2 : 1.033e+01
: 4 : var3 : 6.599e+00
: -----------------------------
Factory : === Destroy and recreate all methods via weight files for testing ===
:
MLP : Building Network.
: Initializing weights
Factory : Test all methods
Factory : Test method: BDTG for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of BDTG on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.864 sec
: Creating multiclass response histograms...
Factory : Test method: MLP for Multiclass classification performance
:
: Dataset[dataset] : Create results for testing
: Dataset[dataset] : Multiclass evaluation of MLP on testing sample
: Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.0154 sec
: Creating multiclass response histograms...
Factory : Evaluate all methods
: Evaluate multiclass classification method: BDTG
TFHandler_BDTG : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
: Evaluate multiclass classification method: MLP
TFHandler_MLP : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.057094 1.0236 [ -3.6592 3.2749 ]
: var2: 0.31579 1.0607 [ -3.6952 3.7877 ]
: var3: 0.11645 1.1227 [ -4.5727 4.5640 ]
: var4: -0.079113 1.2819 [ -4.7970 4.2221 ]
: -----------------------------------------------------------
:
: 1-vs-rest performance metrics per class
: -------------------------------------------------------------------------------------------------------
:
: Considers the listed class as signal and the other classes
: as background, reporting the resulting binary performance.
: A score of 0.820 (0.850) means 0.820 was acheived on the
: test set and 0.850 on the training set.
:
: Dataset MVA Method ROC AUC Sig eff@B=0.01 Sig eff@B=0.10 Sig eff@B=0.30
: Name: / Class: test (train) test (train) test (train) test (train)
:
: dataset BDTG
: ------------------------------
: Signal 0.968 (0.979) 0.538 (0.618) 0.914 (0.950) 0.991 (0.997)
: bg0 0.912 (0.925) 0.235 (0.278) 0.730 (0.772) 0.929 (0.944)
: bg1 0.945 (0.956) 0.450 (0.448) 0.818 (0.869) 0.968 (0.974)
: bg2 0.974 (0.985) 0.595 (0.688) 0.939 (0.975) 0.995 (0.999)
:
: dataset MLP
: ------------------------------
: Signal 0.974 (0.977) 0.663 (0.623) 0.934 (0.938) 0.994 (0.994)
: bg0 0.934 (0.934) 0.340 (0.317) 0.787 (0.782) 0.955 (0.958)
: bg1 0.966 (0.967) 0.579 (0.485) 0.899 (0.910) 0.992 (0.993)
: bg2 0.980 (0.986) 0.647 (0.714) 0.954 (0.969) 0.997 (1.000)
:
: -------------------------------------------------------------------------------------------------------
:
:
: Confusion matrices for all methods
: -------------------------------------------------------------------------------------------------------
:
: Does a binary comparison between the two classes given by a
: particular row-column combination. In each case, the class
: given by the row is considered signal while the class given
: by the column index is considered background.
:
: === Showing confusion matrix for method : BDTG
: (Signal Efficiency for Background Efficiency 0.01%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.512 (0.398) 0.798 (0.668) 0.686 (0.473)
: bg0 0.232 (0.239) - 0.183 (0.162) 0.709 (0.659)
: bg1 0.848 (0.694) 0.305 (0.343) - 0.669 (0.582)
: bg2 0.691 (0.613) 0.689 (0.530) 0.672 (0.592) -
:
: (Signal Efficiency for Background Efficiency 0.10%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.892 (0.858) 0.996 (0.984) 0.955 (0.900)
: bg0 0.792 (0.757) - 0.636 (0.545) 0.925 (0.923)
: bg1 0.987 (0.979) 0.718 (0.659) - 0.898 (0.862)
: bg2 0.981 (0.955) 0.975 (0.952) 0.951 (0.896) -
:
: (Signal Efficiency for Background Efficiency 0.30%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.980 (0.965) 1.000 (0.999) 1.000 (0.998)
: bg0 0.958 (0.936) - 0.851 (0.832) 0.995 (0.986)
: bg1 1.000 (0.997) 0.915 (0.885) - 0.989 (0.984)
: bg2 0.999 (0.996) 0.999 (0.996) 0.999 (0.993) -
:
: === Showing confusion matrix for method : MLP
: (Signal Efficiency for Background Efficiency 0.01%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.533 (0.505) 0.965 (0.939) 0.668 (0.663)
: bg0 0.242 (0.377) - 0.224 (0.167) 0.661 (0.670)
: bg1 0.959 (0.912) 0.300 (0.331) - 0.712 (0.674)
: bg2 0.689 (0.656) 0.753 (0.642) 0.714 (0.639) -
:
: (Signal Efficiency for Background Efficiency 0.10%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.878 (0.881) 1.000 (0.997) 0.928 (0.885)
: bg0 0.758 (0.743) - 0.705 (0.676) 0.929 (0.943)
: bg1 0.998 (0.996) 0.766 (0.770) - 0.923 (0.913)
: bg2 0.965 (0.953) 0.984 (0.971) 0.950 (0.942) -
:
: (Signal Efficiency for Background Efficiency 0.30%)
: ---------------------------------------------------
: Signal bg0 bg1 bg2
: test (train) test (train) test (train) test (train)
: Signal - 0.974 (0.969) 1.000 (1.000) 0.998 (0.997)
: bg0 0.952 (0.944) - 0.903 (0.918) 0.996 (0.994)
: bg1 0.998 (0.999) 0.955 (0.940) - 0.998 (0.998)
: bg2 1.000 (0.996) 1.000 (0.998) 1.000 (0.998) -
:
: -------------------------------------------------------------------------------------------------------
:
Dataset:dataset : Created tree 'TestTree' with 4000 events
:
Dataset:dataset : Created tree 'TrainTree' with 4000 events
:
Factory : Thank you for using TMVA!
: For citation information, please visit: http://tmva.sf.net/citeTMVA.html
==> Wrote root file: TMVAMulticlass.root
==> TMVAMulticlass is done!
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TSystem.h"
#include "TROOT.h"
#include "TMVA/Tools.h"
#include "TMVA/Factory.h"
using namespace TMVA;
void TMVAMulticlass( TString myMethodList = "" )
{
// This loads the library
// to get access to the GUI and all tmva macros
//
// TString tmva_dir(TString(gRootDir) + "/tmva");
// if(gSystem->Getenv("TMVASYS"))
// tmva_dir = TString(gSystem->Getenv("TMVASYS"));
// gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
// gROOT->ProcessLine(".L TMVAMultiClassGui.C");
//---------------------------------------------------------------
// Default MVA methods to be trained + tested
std::map<std::string,int> Use;
Use["MLP"] = 1;
Use["BDTG"] = 1;
Use["DNN_CPU"] = 0;
Use["FDA_GA"] = 0;
Use["PDEFoam"] = 0;
//---------------------------------------------------------------
std::cout << std::endl;
std::cout << "==> Start TMVAMulticlass" << std::endl;
if (myMethodList != "") {
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++) {
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end()) {
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return;
}
Use[regMethod] = 1;
}
}
// Create a new root output file.
TString outfileName = "TMVAMulticlass.root";
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
dataloader->AddVariable( "var1", 'F' );
dataloader->AddVariable( "var2", "Variable 2", "", 'F' );
dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );
TFile *input(0);
TString fname = "./tmva_example_multiple_background.root";
if (!gSystem->AccessPathName( fname )) {
// first we try to find the file in the local directory
std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl;
input = TFile::Open( fname );
}
else {
std::cout << "Creating testdata...." << std::endl;
TString createDataMacro = gROOT->GetTutorialDir() + "/tmva/createData.C";
gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
gROOT->ProcessLine("create_MultipleBackground(2000)");
std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl;
input = TFile::Open( fname );
}
if (!input) {
std::cout << "ERROR: could not open data file" << std::endl;
exit(1);
}
TTree *signalTree = (TTree*)input->Get("TreeS");
TTree *background0 = (TTree*)input->Get("TreeB0");
TTree *background1 = (TTree*)input->Get("TreeB1");
TTree *background2 = (TTree*)input->Get("TreeB2");
gROOT->cd( outfileName+TString(":/") );
dataloader->AddTree (signalTree,"Signal");
dataloader->AddTree (background0,"bg0");
dataloader->AddTree (background1,"bg1");
dataloader->AddTree (background2,"bg2");
dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );
if (Use["BDTG"]) // gradient boosted decision trees
factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
if (Use["MLP"]) // neural network
factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
if (Use["FDA_GA"]) // functional discriminant with GA minimizer
factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
if (Use["PDEFoam"]) // PDE-Foam approach
factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
if (Use["DNN_CPU"]) {
TString layoutString("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
TString training0("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=10, Multithreading=True");
TString training1("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10,"
" BatchSize=256, TestRepetitions=7, Multithreading=True");
TString trainingStrategyString("TrainingStrategy=");
trainingStrategyString += training0 + "|" + training1;
TString nnOptions("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
"WeightInitialization=XAVIERUNIFORM:Architecture=CPU");
nnOptions.Append(":");
nnOptions.Append(layoutString);
nnOptions.Append(":");
nnOptions.Append(trainingStrategyString);
factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN_CPU", nnOptions);
}
// Train MVAs using the set of training events
factory->TrainAllMethods();
// Evaluate all MVAs using the set of test events
factory->TestAllMethods();
// Evaluate and compare performance of all configured MVAs
factory->EvaluateAllMethods();
// --------------------------------------------------------------
// Save the output
outputFile->Close();
std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVAMulticlass is done!" << std::endl;
delete factory;
delete dataloader;
// Launch the GUI for the root macros
if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );
}
int main( int argc, char** argv )
{
// Select methods (don't look at this code - not of interest)
TString methodList;
for (int i=1; i<argc; i++) {
TString regMethod(argv[i]);
if(regMethod=="-b" || regMethod=="--batch") continue;
if (!methodList.IsNull()) methodList += TString(",");
methodList += regMethod;
}
TMVAMulticlass(methodList);
return 0;
}
Author
Andreas Hoecker

Definition in file TMVAMulticlass.C.