void train(const std::string &filename)
{
output,
"!V:!DrawProgressBar:AnalysisType=Classification");
auto signal = (
TTree *)data->Get(
"TreeS");
auto background = (
TTree *)data->Get(
"TreeB");
const std::vector<std::string> variables = {"var1", "var2", "var3", "var4"};
for (const auto &var : variables) {
dataloader->AddVariable(var);
}
dataloader->AddSignalTree(signal, 1.0);
dataloader->AddBackgroundTree(background, 1.0);
dataloader->PrepareTrainingAndTestTree("", "");
factory->BookMethod(dataloader,
TMVA::Types::kBDT,
"BDT",
"!V:!H:NTrees=300:MaxDepth=2");
factory->TrainAllMethods();
}
void tmva003_RReader()
{
const std::string filename = "http://root.cern.ch/files/tmva_class_example.root";
train(filename);
RReader model(
"tmva003_BDT/weights/tmva003_BDT.weights.xml");
auto prediction = model.Compute({0.5, 1.0, -0.2, 1.5});
std::cout << "Single-event inference: " << prediction[0] << "\n\n";
auto df2 = df.Range(3);
auto x = AsTensor<float>(df2, variables);
auto y = model.Compute(
x);
std::cout <<
"RTensor input for inference on data of multiple events:\n" <<
x <<
"\n\n";
std::cout <<
"Prediction performed on multiple events: " <<
y <<
"\n\n";
auto make_histo = [&](const std::string &treename) {
auto df2 = df.Define("y", Compute<4, float>(model), variables);
return df2.Histo1D({treename.c_str(), ";BDT score;N_{Events}", 30, -0.5, 0.5}, "y");
};
auto sig = make_histo("TreeS");
auto bkg = make_histo("TreeB");
bkg->SetLineColor(
kBlue);
sig->SetLineWidth(2);
bkg->SetLineWidth(2);
bkg->Draw("HIST");
sig->Draw("HIST SAME");
TLegend legend(0.7, 0.7, 0.89, 0.89);
legend.SetBorderSize(0);
legend.AddEntry("TreeS", "Signal", "l");
legend.AddEntry("TreeB", "Background", "l");
legend.Draw();
}
R__EXTERN TStyle * gStyle
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
This class displays a legend box (TPaveText) containing several legend entries.
TMVA::Reader legacy interface.
This is the main MVA steering class.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
A TTree represents a columnar dataset.
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
static void output(int code)
<HEADER> DataSetInfo : [tmva003_BDT] : Added class "Signal"
: Add Tree TreeS of type Signal with 6000 events
<HEADER> DataSetInfo : [tmva003_BDT] : Added class "Background"
: Add Tree TreeB of type Background with 6000 events
: Dataset[tmva003_BDT] : Class index : 0 name : Signal
: Dataset[tmva003_BDT] : Class index : 1 name : Background
<HEADER> Factory : Booking method: BDT
:
: Rebuilding Dataset tmva003_BDT
: Building event vectors for type 2 Signal
: Dataset[tmva003_BDT] : create input formulas for tree TreeS
: Building event vectors for type 2 Background
: Dataset[tmva003_BDT] : create input formulas for tree TreeB
<HEADER> DataSetFactory : [tmva003_BDT] : Number of events in input trees
:
:
: Dataset[tmva003_BDT] : Weight renormalisation mode: "EqualNumEvents": renormalises all event classes ...
: Dataset[tmva003_BDT] : such that the effective (weighted) number of events in each class is the same
: Dataset[tmva003_BDT] : (and equals the number of events (entries) given for class=0 )
: Dataset[tmva003_BDT] : ... i.e. such that Sum[i=1..N_j]{w_i} = N_classA, j=classA, classB, ...
: Dataset[tmva003_BDT] : ... (note that N_j is the sum of TRAINING events
: Dataset[tmva003_BDT] : ..... Testing events are not renormalised nor included in the renormalisation factor!)
: Number of training and testing events
: ---------------------------------------------------------------------------
: Signal -- training events : 3000
: Signal -- testing events : 3000
: Signal -- training and testing events: 6000
: Background -- training events : 3000
: Background -- testing events : 3000
: Background -- training and testing events: 6000
:
<HEADER> DataSetInfo : Correlation matrix (Signal):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.390 +0.594 +0.819
: var2: +0.390 +1.000 +0.684 +0.724
: var3: +0.594 +0.684 +1.000 +0.848
: var4: +0.819 +0.724 +0.848 +1.000
: ----------------------------------------
<HEADER> DataSetInfo : Correlation matrix (Background):
: ----------------------------------------
: var1 var2 var3 var4
: var1: +1.000 +0.854 +0.917 +0.965
: var2: +0.854 +1.000 +0.926 +0.934
: var3: +0.917 +0.926 +1.000 +0.972
: var4: +0.965 +0.934 +0.972 +1.000
: ----------------------------------------
<HEADER> DataSetFactory : [tmva003_BDT] :
:
<HEADER> Factory : Train all methods
<HEADER> Factory : [tmva003_BDT] : Create Transformation "I" with events from all classes.
:
<HEADER> : Transformation, Variable selection :
: Input : variable 'var1' <---> Output : variable 'var1'
: Input : variable 'var2' <---> Output : variable 'var2'
: Input : variable 'var3' <---> Output : variable 'var3'
: Input : variable 'var4' <---> Output : variable 'var4'
<HEADER> TFHandler_Factory : Variable Mean RMS [ Min Max ]
: -----------------------------------------------------------
: var1: 0.017312 1.6864 [ -5.8991 4.7639 ]
: var2: 0.0068952 1.5665 [ -5.2454 4.6508 ]
: var3: 0.0094455 1.7427 [ -5.3563 4.6430 ]
: var4: 0.16960 2.1719 [ -6.9675 4.9600 ]
: -----------------------------------------------------------
: Ranking input variables (method unspecific)...
<HEADER> IdTransformation : Ranking result (top variable is best ranked)
: -----------------------------
: Rank : Variable : Separation
: -----------------------------
: 1 : var4 : 3.564e-01
: 2 : var3 : 2.899e-01
: 3 : var1 : 2.792e-01
: 4 : var2 : 2.260e-01
: -----------------------------
<HEADER> Factory : Train method: BDT for Classification
:
<HEADER> BDT : #events: (reweighted) sig: 3000 bkg: 3000
: #events: (unweighted) sig: 3000 bkg: 3000
: Training 300 Decision Trees ... patience please
: Elapsed time for training with 6000 events: 0.524 sec
<HEADER> BDT : [tmva003_BDT] : Evaluation of BDT on training sample (6000 events)
: Elapsed time for evaluation of 6000 events: 0.0704 sec
: Creating xml weight file: tmva003_BDT/weights/tmva003_BDT.weights.xml
: Creating standalone class: tmva003_BDT/weights/tmva003_BDT.class.C
: TMVA.root:/tmva003_BDT/Method_BDT/BDT
<HEADER> Factory : Training finished
:
: Ranking input variables (method specific)...
<HEADER> BDT : Ranking result (top variable is best ranked)
: --------------------------------------
: Rank : Variable : Variable Importance
: --------------------------------------
: 1 : var4 : 3.940e-01
: 2 : var1 : 2.619e-01
: 3 : var2 : 1.849e-01
: 4 : var3 : 1.592e-01
: --------------------------------------
<HEADER> Factory : === Destroy and recreate all methods via weight files for testing ===
:
: Reading weight file: tmva003_BDT/weights/tmva003_BDT.weights.xml
Single-event inference: 0.233873
RTensor input for inference on data of multiple events:
{ { -1.14361, -0.822373, -0.495426, -0.629427 } { 2.14344, -0.0189228, 0.26703, 1.26749 } { -0.443913, 0.486827, 0.139535, 0.611483 } }
Prediction performed on multiple events: { 0.173541, -0.0540229, 0.266502 }