Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
VariableImportance.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata and Sergei Gleyzer
3
4/*! \class TMVA::VariableImportanceResult
5\ingroup TMVA
6*/
7
8/*! \class TMVA::VariableImportance
9\ingroup TMVA
10*/
11
13
14#include "TMVA/Config.h"
15#include "TMVA/DataSetInfo.h"
16#include "TMVA/Envelope.h"
17#include "TMVA/Factory.h"
18#include "TMVA/OptionMap.h"
19#include "TMVA/MethodBase.h"
20#include "TMVA/MethodCategory.h"
21#include "TMVA/MsgLogger.h"
22#include "TMVA/Types.h"
24
25#include "TAxis.h"
26#include "TCanvas.h"
27#include "TH1.h"
28#include "TRandom3.h"
29#include "TStyle.h"
30
31#include <bitset>
32#include <cmath>
33#include <memory>
34#include <utility>
35
36
37//number of bits for bitset
38#define NBITS 32
39
40////////////////////////////////////////////////////////////////////////////////
41
47
48////////////////////////////////////////////////////////////////////////////////
49
55
56////////////////////////////////////////////////////////////////////////////////
57
59{
62
63 MsgLogger fLogger("VariableImportance");
64 if(fType==VIType::kShort)
65 {
66 fLogger<<kINFO<<"Variable Importance Results (Short)"<<Endl;
67 }else if(fType==VIType::kAll)
68 {
69 fLogger<<kINFO<<"Variable Importance Results (All)"<<Endl;
70 }else{
71 fLogger<<kINFO<<"Variable Importance Results (Random)"<<Endl;
72 }
73
74 fImportanceValues.Print();
76}
77
78////////////////////////////////////////////////////////////////////////////////
79
81{
82 TCanvas *c=new TCanvas(name.Data());
83 fImportanceHist->Draw("");
84 fImportanceHist->GetXaxis()->SetTitle(" Variable Names ");
85 fImportanceHist->GetYaxis()->SetTitle(" Importance (%) ");
86 c->Draw();
87 return c;
88}
89
90////////////////////////////////////////////////////////////////////////////////
91
92TMVA::VariableImportance::VariableImportance(TMVA::DataLoader *dataloader):TMVA::Envelope("VariableImportance",dataloader,nullptr),fType(VIType::kShort)
93{
94 fClassifier=std::unique_ptr<Factory>(new TMVA::Factory("VariableImportance","!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
95}
96
97////////////////////////////////////////////////////////////////////////////////
98
103
104////////////////////////////////////////////////////////////////////////////////
105
107{
108
109 //NOTE: Put the type of VI Algorithm in the results Print
110 if(fType==VIType::kShort)
111 {
113 }else if(fType==VIType::kAll)
114 {
116 }else{
117 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
118 if(nbits<10)
119 Log()<<kERROR<<"Running variable importance with less that 10 varibales in Random mode "<<
120 "can to produce inconsisten results"<<Endl;
121 EvaluateImportanceRandom(pow(nbits,2));
122 }
123 fResults.fType = fType;
126 Log()<<kINFO<<"Evaluation done."<<Endl;
128}
129
130////////////////////////////////////////////////////////////////////////////////
131
133{
134 ULong_t sum=0;
135 for(ULong_t n=0;n<i;n++) sum+=pow(2,n);
136 return sum;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140
141TH1F* TMVA::VariableImportance::GetImportance(const UInt_t nbits,std::vector<Float_t> &importances,std::vector<TString> &varNames)
142{
143 TH1F *vihist = new TH1F("vihist", "", nbits, 0, nbits);
144
145 gStyle->SetOptStat(000000);
146
147 Float_t normalization = 0.0;
148 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
149
150 Float_t roc = 0.0;
151
152 gStyle->SetTitleXOffset(0.4);
153 gStyle->SetTitleXOffset(1.2);
154
155
156 for (UInt_t i = 1; i < nbits + 1; i++) {
157 roc = 100.0 * importances[i - 1] / normalization;
158 vihist->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
159 vihist->SetBinContent(i, roc);
160 }
161
162 vihist->LabelsOption("v >", "X");
163 vihist->SetBarWidth(0.97);
164 vihist->SetFillColor(TColor::GetColor("#006600"));
165
166 vihist->GetXaxis()->SetTitle(" Variable Names ");
167 vihist->GetXaxis()->SetTitleSize(0.045);
168 vihist->GetXaxis()->CenterTitle();
169 vihist->GetXaxis()->SetTitleOffset(1.24);
170
171 vihist->GetYaxis()->SetTitle(" Importance (%)");
172 vihist->GetYaxis()->SetTitleSize(0.045);
173 vihist->GetYaxis()->CenterTitle();
174 vihist->GetYaxis()->SetTitleOffset(1.24);
175
176 vihist->GetYaxis()->SetRangeUser(-7, 50);
177 vihist->SetDirectory(nullptr);
178
179 return vihist;
180}
181
182////////////////////////////////////////////////////////////////////////////////
183
185{
186 for (auto &meth : fMethods) {
187 TString methodName = meth.GetValue<TString>("MethodName");
188 TString methodTitle = meth.GetValue<TString>("MethodTitle");
189 TString methodOptions = meth.GetValue<TString>("MethodOptions");
190
191 uint32_t x = 0;
192 uint32_t y = 0;
193 // getting number of variables and variable names from loader
194 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
195 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
196
197 ULong_t range = Sum(nbits);
198
199 // vector to save importances
200 std::vector<Float_t> importances(nbits);
201 for (UInt_t i = 0; i < nbits; i++)
202 importances[i] = 0;
203
204 Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed
205
206 x = range;
207
208 std::bitset<NBITS> xbitset(x);
209 if (x == 0)
210 Log() << kFATAL << "Error: need at least one variable."; // dataloader need at least one variable
211
212 // creating loader for seed
213 TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string());
214
215 // adding variables from seed
216 for (UInt_t index = 0; index < nbits; index++) {
217 if (xbitset[index])
218 seeddl->AddVariable(varNames[index], 'F');
219 }
220
221 // Loading Dataset
222 DataLoaderCopy(seeddl, fDataLoader.get());
223
224 // Booking Seed
225 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
226
227 // Train/Test/Evaluation
228 fClassifier->TrainAllMethods();
229 fClassifier->TestAllMethods();
230 fClassifier->EvaluateAllMethods();
231
232 // getting ROC
233 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
234
235 delete seeddl;
236 fClassifier->DeleteAllMethods();
237 fClassifier->fMethodsMap.clear();
238
239 for (uint32_t i = 0; i < NBITS; ++i) {
240 if (x & (1 << i)) {
241 y = x & ~(1 << i);
242 std::bitset<NBITS> ybitset(y);
243 //need at least one variable
244 //NOTE: if subssed is zero then is the special case
245 //that count in xbitset is 1
246 Double_t ny = log(x - y) / 0.693147;
247 if (y == 0) {
248 importances[ny] = SROC - 0.5;
249 continue;
250 }
251
252 //creating loader for subseed
253 TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string());
254 //adding variables from subseed
255 for (UInt_t index = 0; index < nbits; index++) {
256 if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F');
257 }
258
259 //Loading Dataset
260 DataLoaderCopy(subseeddl,fDataLoader.get());
261
262 //Booking SubSeed
263 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
264
265 //Train/Test/Evaluation
266 fClassifier->TrainAllMethods();
267 fClassifier->TestAllMethods();
268 fClassifier->EvaluateAllMethods();
269
270 //getting ROC
271 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
272 importances[ny] += SROC - SSROC;
273
274 delete subseeddl;
275 fClassifier->DeleteAllMethods();
276 fClassifier->fMethodsMap.clear();
277 }
278 }
279 Float_t normalization = 0.0;
280 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
281
282 for(UInt_t i=0;i<nbits;i++){
283 //adding values
284 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
285 //adding sufix
286 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
287 }
288 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
289 }
290}
291
292////////////////////////////////////////////////////////////////////////////////
293
295{
296 for (auto &meth : fMethods) {
297
298 TString methodName = meth.GetValue<TString>("MethodName");
299 TString methodTitle = meth.GetValue<TString>("MethodTitle");
300 TString methodOptions = meth.GetValue<TString>("MethodOptions");
301
302 TRandom3 *rangen = new TRandom3(0); // Random Gen.
303
304 uint32_t x = 0;
305 uint32_t y = 0;
306
307 // getting number of variables and variable names from loader
308 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
309 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
310
311 ULong_t range = pow(2, nbits);
312
313 // vector to save importances
314 std::vector<Float_t> importances(nbits);
315
316 for (UInt_t i = 0; i < nbits; i++)
317 importances[i] = 0;
318
319 Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed
320
321 x = range;
322
323 for (UInt_t n = 0; n < seeds; n++) {
324 x = rangen->Integer(range);
325
326 std::bitset<NBITS> xbitset(x);
327 if (x == 0)
328 continue; // dataloader need at least one variable
329
330 // creating loader for seed
331 TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string());
332
333 // adding variables from seed
334 for (UInt_t index = 0; index < nbits; index++) {
335 if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F');
336 }
337
338 //Loading Dataset
339 DataLoaderCopy(seeddl,fDataLoader.get());
340
341 //Booking Seed
342 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
343
344 //Train/Test/Evaluation
345 fClassifier->TrainAllMethods();
346 fClassifier->TestAllMethods();
347 fClassifier->EvaluateAllMethods();
348
349 //getting ROC
350 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
351
352 delete seeddl;
353 fClassifier->DeleteAllMethods();
354 fClassifier->fMethodsMap.clear();
355
356 for (uint32_t i = 0; i < 32; ++i) {
357 if (x & (1 << i)) {
358 y = x & ~(1 << i);
359 std::bitset<NBITS> ybitset(y);
360 //need at least one variable
361 //NOTE: if subssed is zero then is the special case
362 //that count in xbitset is 1
363 Double_t ny = log(x - y) / 0.693147;
364 if (y == 0) {
365 importances[ny] = SROC - 0.5;
366 continue;
367 }
368
369 //creating loader for subseed
370 TMVA::DataLoader *subseeddl = new TMVA::DataLoader(ybitset.to_string());
371 //adding variables from subseed
372 for (UInt_t index = 0; index < nbits; index++) {
373 if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F');
374 }
375
376 //Loading Dataset
377 DataLoaderCopy(subseeddl,fDataLoader.get());
378
379 //Booking SubSeed
380 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
381
382 //Train/Test/Evaluation
383 fClassifier->TrainAllMethods();
384 fClassifier->TestAllMethods();
385 fClassifier->EvaluateAllMethods();
386
387 //getting ROC
388 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
389 importances[ny] += SROC - SSROC;
390
391 delete subseeddl;
392 fClassifier->DeleteAllMethods();
393 fClassifier->fMethodsMap.clear();
394 }
395 }
396 }
397
398 Float_t normalization = 0.0;
399 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
400
401 for(UInt_t i=0;i<nbits;i++){
402 //adding values
403 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
404 //adding sufix
405 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
406 }
407 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
408 delete rangen;
409 }
410}
411
412////////////////////////////////////////////////////////////////////////////////
413
415{
416 for (auto &meth : fMethods) {
417 TString methodName = meth.GetValue<TString>("MethodName");
418 TString methodTitle = meth.GetValue<TString>("MethodTitle");
419 TString methodOptions = meth.GetValue<TString>("MethodOptions");
420
421 uint32_t x = 0;
422 uint32_t y = 0;
423
424 // getting number of variables and variable names from loader
425 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
426 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
427
428 ULong_t range = pow(2, nbits);
429
430 // vector to save importances
431 std::vector<Float_t> importances(nbits);
432
433 // vector to save ROC-Integral values
434 std::vector<Float_t> ROC(range);
435 ROC[0] = 0.5;
436 for (UInt_t i = 0; i < nbits; i++)
437 importances[i] = 0;
438
439 Float_t SROC, SSROC; // computed ROC value
440 for (x = 1; x < range; x++) {
441
442 std::bitset<NBITS> xbitset(x);
443 if (x == 0)
444 continue; // dataloader need at least one variable
445
446 // creating loader for seed
447 TMVA::DataLoader *seeddl = new TMVA::DataLoader(xbitset.to_string());
448
449 // adding variables from seed
450 for (UInt_t index = 0; index < nbits; index++) {
451 if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F');
452 }
453
454 DataLoaderCopy(seeddl,fDataLoader.get());
455
456 seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
457
458 //Booking Seed
459 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
460
461 //Train/Test/Evaluation
462 fClassifier->TrainAllMethods();
463 fClassifier->TestAllMethods();
464 fClassifier->EvaluateAllMethods();
465
466 //getting ROC
467 ROC[x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
468
469 delete seeddl;
470 fClassifier->DeleteAllMethods();
471 fClassifier->fMethodsMap.clear();
472 }
473
474
475 for ( x = 0; x <range ; x++)
476 {
477 SROC=ROC[x];
478 for (uint32_t i = 0; i < NBITS; ++i) {
479 if (x & (1 << i)) {
480 y = x & ~(1 << i);
481 std::bitset<NBITS> ybitset(y);
482
483 Float_t ny = log(x - y) / 0.693147;
484 if (y == 0) {
485 importances[ny] = SROC - 0.5;
486 continue;
487 }
488
489 //getting ROC
490 SSROC = ROC[y];
491 importances[ny] += SROC - SSROC;
492 }
493
494 }
495 }
496 Float_t normalization = 0.0;
497 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
498
499 for(UInt_t i=0;i<nbits;i++){
500 //adding values
501 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
502 //adding sufix
503 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
504 }
505 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
506 }
507}
#define c(i)
Definition RSha256.hxx:101
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
Definition RtypesCore.h:69
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
float Float_t
Float 4 bytes (float).
Definition RtypesCore.h:71
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
char name[80]
Definition TGX11.cxx:148
externTStyle * gStyle
Definition TStyle.h:442
#define NBITS
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
Definition TAttAxis.cxx:279
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
Definition TAttAxis.cxx:290
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition TAttFill.h:40
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
Definition TAxis.cxx:891
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
Definition TAxis.h:196
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates, that is,...
Definition TAxis.cxx:1090
The Canvas class.
Definition TCanvas.h:23
static Int_t GetColor(const char *hexcolor)
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
virtual void SetDirectory(TDirectory *dir)
By default, when a histogram is created, it is added to the list of histogram objects in the current ...
Definition TH1.cxx:9074
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Sort bins with labels or set option(s) to draw axis with labels.
Definition TH1.cxx:5464
TAxis * GetXaxis()
Definition TH1.h:571
TAxis * GetYaxis()
Definition TH1.h:572
virtual void SetBarWidth(Float_t width=0.5)
Set the width of bars as fraction of the bin width for drawing mode "B".
Definition TH1.h:613
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9356
void SetSilent(Bool_t s)
Definition Config.h:63
MsgLogger & Log() const
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
std::vector< OptionMap > fMethods
! Booked method information
Definition Envelope.h:46
std::shared_ptr< DataLoader > fDataLoader
! data
Definition Envelope.h:47
Envelope(const TString &name, DataLoader *dataloader=nullptr, TFile *file=nullptr, const TString options="")
Constructor for the initialization of Envelopes, differents Envelopes may needs differents constructo...
Definition Envelope.cxx:40
This is the main MVA steering class.
Definition Factory.h:80
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
static void EnableOutput()
Definition MsgLogger.cxx:67
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
void Evaluate() override
Virtual method to be implemented with your algorithm.
std::unique_ptr< Factory > fClassifier
void EvaluateImportanceRandom(UInt_t nseeds)
VariableImportanceResult fResults
VariableImportance(DataLoader *loader)
TH1F * GetImportance(const UInt_t nbits, std::vector< Float_t > &importances, std::vector< TString > &varNames)
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:173
Random number generator class based on M.
Definition TRandom3.h:27
Basic string class.
Definition TString.h:138
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Config & gConfig()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2338