Logo ROOT   6.10/09
Reference Guide
TMVARegressionApplication.C File Reference

Detailed Description

View in nbviewer Open in SWAN This macro provides a simple example on how to use the trained regression MVAs within an analysis module

Processing /mnt/build/workspace/root-makedoc-v610/rootspi/rdoc/src/v6-10-00-patches/tutorials/tmva/TMVARegressionApplication.C...
==> Start TMVARegressionApplication
: Booking "BDTG method" of type "BDT" from dataset/weights/TMVARegression_BDTG.weights.xml.
<HEADER> DataSetInfo : [Default] : Added class "Regression"
: Booked classifier "BDTG" of type: "BDT"
: Booking "FDA_GA method" of type "FDA" from dataset/weights/TMVARegression_FDA_GA.weights.xml.
: User-defined formula string : "(0)+(1)*x0+(2)*x1"
: TFormula-compatible formula string: "[0]+[1]*[3]+[2]*[4]"
: Booked classifier "FDA_GA" of type: "FDA"
: Booking "KNN method" of type "KNN" from dataset/weights/TMVARegression_KNN.weights.xml.
: Creating kd-tree with 1000 events
: Computing scale factor for 1d distributions: (ifrac, bottom, top) = (80%, 10%, 90%)
<HEADER> ModulekNN : Optimizing tree for 2 variables with 1000 values
: <Fill> Class 1 has 1000 events
: Booked classifier "KNN" of type: "KNN"
: Booking "LD method" of type "LD" from dataset/weights/TMVARegression_LD.weights.xml.
: Booked classifier "LD" of type: "LD"
: Booking "MLP method" of type "MLP" from dataset/weights/TMVARegression_MLP.weights.xml.
<HEADER> MLP : Building Network.
: Initializing weights
: Booked classifier "MLP" of type: "MLP"
: Booking "PDEFoam method" of type "PDEFoam" from dataset/weights/TMVARegression_PDEFoam.weights.xml.
: Read foams from file: dataset/weights/TMVARegression_PDEFoam.weights_foams.root
: Booked classifier "PDEFoam" of type: "PDEFoam"
--- TMVARegressionApp : Using input file: ./files/tmva_reg_example.root
--- Select signal sample
--- Processing: 10000 events
--- ... Processing event: 0
--- ... Processing event: 1000
--- ... Processing event: 2000
--- ... Processing event: 3000
--- ... Processing event: 4000
--- ... Processing event: 5000
--- ... Processing event: 6000
--- ... Processing event: 7000
--- ... Processing event: 8000
--- ... Processing event: 9000
--- End of event loop: Real time 0:00:03, CP time 3.400
--- Created root file: "TMVARegApp.root" containing the MVA output histograms
==> TMVARegressionApplication is done!
#include <cstdlib>
#include <vector>
#include <iostream>
#include <map>
#include <string>
#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TSystem.h"
#include "TROOT.h"
#include "TStopwatch.h"
#include "TMVA/Tools.h"
#include "TMVA/Reader.h"
using namespace TMVA;
void TMVARegressionApplication( TString myMethodList = "" )
{
//---------------------------------------------------------------
// This loads the library
// Default MVA methods to be trained + tested
std::map<std::string,int> Use;
// --- Mutidimensional likelihood and Nearest-Neighbour methods
Use["PDERS"] = 0;
Use["PDEFoam"] = 1;
Use["KNN"] = 1;
//
// --- Linear Discriminant Analysis
Use["LD"] = 1;
//
// --- Function Discriminant analysis
Use["FDA_GA"] = 1;
Use["FDA_MC"] = 0;
Use["FDA_MT"] = 0;
Use["FDA_GAMT"] = 0;
//
// --- Neural Network
Use["MLP"] = 1;
Use["DNN_CPU"] = 0;
//
// --- Support Vector Machine
Use["SVM"] = 0;
//
// --- Boosted Decision Trees
Use["BDT"] = 0;
Use["BDTG"] = 1;
// ---------------------------------------------------------------
std::cout << std::endl;
std::cout << "==> Start TMVARegressionApplication" << std::endl;
// Select methods (don't look at this code - not of interest)
if (myMethodList != "") {
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
for (UInt_t i=0; i<mlist.size(); i++) {
std::string regMethod(mlist[i]);
if (Use.find(regMethod) == Use.end()) {
std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
std::cout << std::endl;
return;
}
Use[regMethod] = 1;
}
}
// --------------------------------------------------------------------------------------------------
// --- Create the Reader object
TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );
// Create a set of variables and declare them to the reader
// - the variable names MUST corresponds in name and type to those given in the weight file(s) used
Float_t var1, var2;
reader->AddVariable( "var1", &var1 );
reader->AddVariable( "var2", &var2 );
// Spectator variables declared in the training have to be added to the reader, too
Float_t spec1,spec2;
reader->AddSpectator( "spec1:=var1*2", &spec1 );
reader->AddSpectator( "spec2:=var1*3", &spec2 );
// --- Book the MVA methods
TString dir = "dataset/weights/";
TString prefix = "TMVARegression";
// Book method(s)
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
if (it->second) {
TString methodName = it->first + " method";
TString weightfile = dir + prefix + "_" + TString(it->first) + ".weights.xml";
reader->BookMVA( methodName, weightfile );
}
}
// Book output histograms
TH1* hists[100];
Int_t nhists = -1;
for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
TH1* h = new TH1F( it->first.c_str(), TString(it->first) + " method", 100, -100, 600 );
if (it->second) hists[++nhists] = h;
}
nhists++;
// Prepare input tree (this must be replaced by your data source)
// in this example, there is a toy tree with signal and one with background events
// we'll later on use only the "signal" events for the test in this example.
//
TFile *input(0);
TString fname = "./tmva_reg_example.root";
if (!gSystem->AccessPathName( fname )) {
input = TFile::Open( fname ); // check if file in local directory exists
}
else {
input = TFile::Open("http://root.cern.ch/files/tmva_reg_example.root", "CACHEREAD"); // if not: download from ROOT server
}
if (!input) {
std::cout << "ERROR: could not open data file" << std::endl;
exit(1);
}
std::cout << "--- TMVARegressionApp : Using input file: " << input->GetName() << std::endl;
// --- Event loop
// Prepare the tree
// - here the variable names have to corresponds to your tree
// - you can use the same variables as above which is slightly faster,
// but of course you can use different ones and copy the values inside the event loop
//
TTree* theTree = (TTree*)input->Get("TreeR");
std::cout << "--- Select signal sample" << std::endl;
theTree->SetBranchAddress( "var1", &var1 );
theTree->SetBranchAddress( "var2", &var2 );
std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
sw.Start();
for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) {
if (ievt%1000 == 0) {
std::cout << "--- ... Processing event: " << ievt << std::endl;
}
theTree->GetEntry(ievt);
// Retrieve the MVA target values (regression outputs) and fill into histograms
// NOTE: EvaluateRegression(..) returns a vector for multi-target regression
for (Int_t ih=0; ih<nhists; ih++) {
TString title = hists[ih]->GetTitle();
Float_t val = (reader->EvaluateRegression( title ))[0];
hists[ih]->Fill( val );
}
}
sw.Stop();
std::cout << "--- End of event loop: "; sw.Print();
// --- Write histograms
TFile *target = new TFile( "TMVARegApp.root","RECREATE" );
for (Int_t ih=0; ih<nhists; ih++) hists[ih]->Write();
target->Close();
std::cout << "--- Created root file: \"" << target->GetName()
<< "\" containing the MVA output histograms" << std::endl;
delete reader;
std::cout << "==> TMVARegressionApplication is done!" << std::endl << std::endl;
}
int main( int argc, char** argv )
{
// Select methods (don't look at this code - not of interest)
TString methodList;
for (int i=1; i<argc; i++) {
TString regMethod(argv[i]);
if(regMethod=="-b" || regMethod=="--batch") continue;
if (!methodList.IsNull()) methodList += TString(",");
methodList += regMethod;
}
TMVARegressionApplication(methodList);
return 0;
}
Author
Andreas Hoecker

Definition in file TMVARegressionApplication.C.