Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
VariableImportance.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata and Sergei Gleyzer
3
4/*! \class TMVA::VariableImportanceResult
5\ingroup TMVA
6*/
7
8/*! \class TMVA::VariableImportance
9\ingroup TMVA
10*/
11
13
14#include "TMVA/Config.h"
15#include "TMVA/DataSetInfo.h"
16#include "TMVA/Envelope.h"
17#include "TMVA/Factory.h"
18#include "TMVA/OptionMap.h"
19#include "TMVA/MethodBase.h"
20#include "TMVA/MethodCategory.h"
21#include "TMVA/MsgLogger.h"
22#include "TMVA/Types.h"
24
25#include "TAxis.h"
26#include "TCanvas.h"
27#include "TH1.h"
28#include "TRandom3.h"
29#include "TStyle.h"
30
31#include <bitset>
32#include <cmath>
33#include <memory>
34#include <utility>
35
36
37//number of bits for bitset
38#define NBITS 32
39
40////////////////////////////////////////////////////////////////////////////////
41
42TMVA::VariableImportanceResult::VariableImportanceResult():fImportanceValues("VariableImportance"),
43 fImportanceHist(nullptr)
44{
45
46}
47
48////////////////////////////////////////////////////////////////////////////////
49
51{
52 fImportanceValues = obj.fImportanceValues;
53 fImportanceHist = obj.fImportanceHist;
54}
55
56////////////////////////////////////////////////////////////////////////////////
57
59{
62
63 MsgLogger fLogger("VariableImportance");
64 if(fType==VIType::kShort)
65 {
66 fLogger<<kINFO<<"Variable Importance Results (Short)"<<Endl;
67 }else if(fType==VIType::kAll)
68 {
69 fLogger<<kINFO<<"Variable Importance Results (All)"<<Endl;
70 }else{
71 fLogger<<kINFO<<"Variable Importance Results (Random)"<<Endl;
72 }
73
74 fImportanceValues.Print();
76}
77
78////////////////////////////////////////////////////////////////////////////////
79
81{
82 TCanvas *c=new TCanvas(name.Data());
83 fImportanceHist->Draw("");
84 fImportanceHist->GetXaxis()->SetTitle(" Variable Names ");
85 fImportanceHist->GetYaxis()->SetTitle(" Importance (%) ");
86 c->Draw();
87 return c;
88}
89
90////////////////////////////////////////////////////////////////////////////////
91
93{
94 fClassifier=std::unique_ptr<Factory>(new TMVA::Factory("VariableImportance","!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
95}
96
97////////////////////////////////////////////////////////////////////////////////
98
100{
101 fClassifier=nullptr;
102}
103
104////////////////////////////////////////////////////////////////////////////////
105
107{
108
109 //NOTE: Put the type of VI Algorithm in the results Print
110 if(fType==VIType::kShort)
111 {
112 EvaluateImportanceShort();
113 }else if(fType==VIType::kAll)
114 {
115 EvaluateImportanceAll();
116 }else{
117 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
118 if(nbits<10)
119 Log()<<kERROR<<"Running variable importance with less that 10 varibales in Random mode "<<
120 "can to produce inconsisten results"<<Endl;
121 EvaluateImportanceRandom(pow(nbits,2));
122 }
123 fResults.fType = fType;
126 Log()<<kINFO<<"Evaluation done."<<Endl;
128}
129
130////////////////////////////////////////////////////////////////////////////////
131
133{
134 ULong_t sum=0;
135 for(ULong_t n=0;n<i;n++) sum+=pow(2,n);
136 return sum;
137}
138
139////////////////////////////////////////////////////////////////////////////////
140
141TH1F* TMVA::VariableImportance::GetImportance(const UInt_t nbits,std::vector<Float_t> &importances,std::vector<TString> &varNames)
142{
143 TH1F *vihist = new TH1F("vihist", "", nbits, 0, nbits);
144
145 gStyle->SetOptStat(000000);
146
148 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
149
150 Float_t roc = 0.0;
151
154
155
156 for (UInt_t i = 1; i < nbits + 1; i++) {
157 roc = 100.0 * importances[i - 1] / normalization;
158 vihist->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
159 vihist->SetBinContent(i, roc);
160 }
161
162 vihist->LabelsOption("v >", "X");
163 vihist->SetBarWidth(0.97);
164 vihist->SetFillColor(TColor::GetColor("#006600"));
165
166 vihist->GetXaxis()->SetTitle(" Variable Names ");
167 vihist->GetXaxis()->SetTitleSize(0.045);
168 vihist->GetXaxis()->CenterTitle();
169 vihist->GetXaxis()->SetTitleOffset(1.24);
170
171 vihist->GetYaxis()->SetTitle(" Importance (%)");
172 vihist->GetYaxis()->SetTitleSize(0.045);
173 vihist->GetYaxis()->CenterTitle();
174 vihist->GetYaxis()->SetTitleOffset(1.24);
175
176 vihist->GetYaxis()->SetRangeUser(-7, 50);
177 vihist->SetDirectory(nullptr);
178
179 return vihist;
180}
181
182////////////////////////////////////////////////////////////////////////////////
183
185{
186 for (auto &meth : fMethods) {
187 TString methodName = meth.GetValue<TString>("MethodName");
188 TString methodTitle = meth.GetValue<TString>("MethodTitle");
189 TString methodOptions = meth.GetValue<TString>("MethodOptions");
190
191 uint32_t x = 0;
192 uint32_t y = 0;
193 // getting number of variables and variable names from loader
194 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
195 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
196
197 ULong_t range = Sum(nbits);
198
199 // vector to save importances
200 std::vector<Float_t> importances(nbits);
201 for (UInt_t i = 0; i < nbits; i++)
202 importances[i] = 0;
203
204 Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed
205
206 x = range;
207
208 std::bitset<NBITS> xbitset(x);
209 if (x == 0)
210 Log() << kFATAL << "Error: need at least one variable."; // dataloader need at least one variable
211
212 // creating loader for seed
214
215 // adding variables from seed
216 for (UInt_t index = 0; index < nbits; index++) {
217 if (xbitset[index])
218 seeddl->AddVariable(varNames[index], 'F');
219 }
220
221 // Loading Dataset
222 DataLoaderCopy(seeddl, fDataLoader.get());
223
224 // Booking Seed
225 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
226
227 // Train/Test/Evaluation
228 fClassifier->TrainAllMethods();
229 fClassifier->TestAllMethods();
230 fClassifier->EvaluateAllMethods();
231
232 // getting ROC
233 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
234
235 delete seeddl;
236 fClassifier->DeleteAllMethods();
237 fClassifier->fMethodsMap.clear();
238
239 for (uint32_t i = 0; i < NBITS; ++i) {
240 if (x & (1 << i)) {
241 y = x & ~(1 << i);
242 std::bitset<NBITS> ybitset(y);
243 //need at least one variable
244 //NOTE: if subssed is zero then is the special case
245 //that count in xbitset is 1
246 Double_t ny = log(x - y) / 0.693147;
247 if (y == 0) {
248 importances[ny] = SROC - 0.5;
249 continue;
250 }
251
252 //creating loader for subseed
254 //adding variables from subseed
255 for (UInt_t index = 0; index < nbits; index++) {
256 if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F');
257 }
258
259 //Loading Dataset
260 DataLoaderCopy(subseeddl,fDataLoader.get());
261
262 //Booking SubSeed
263 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
264
265 //Train/Test/Evaluation
266 fClassifier->TrainAllMethods();
267 fClassifier->TestAllMethods();
268 fClassifier->EvaluateAllMethods();
269
270 //getting ROC
271 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
272 importances[ny] += SROC - SSROC;
273
274 delete subseeddl;
275 fClassifier->DeleteAllMethods();
276 fClassifier->fMethodsMap.clear();
277 }
278 }
280 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
281
282 for(UInt_t i=0;i<nbits;i++){
283 //adding values
284 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
285 //adding sufix
286 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
287 }
288 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
289 }
290}
291
292////////////////////////////////////////////////////////////////////////////////
293
295{
296 for (auto &meth : fMethods) {
297
298 TString methodName = meth.GetValue<TString>("MethodName");
299 TString methodTitle = meth.GetValue<TString>("MethodTitle");
300 TString methodOptions = meth.GetValue<TString>("MethodOptions");
301
302 TRandom3 *rangen = new TRandom3(0); // Random Gen.
303
304 uint32_t x = 0;
305 uint32_t y = 0;
306
307 // getting number of variables and variable names from loader
308 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
309 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
310
311 ULong_t range = pow(2, nbits);
312
313 // vector to save importances
314 std::vector<Float_t> importances(nbits);
315
316 for (UInt_t i = 0; i < nbits; i++)
317 importances[i] = 0;
318
319 Float_t SROC, SSROC; // computed ROC value for every Seed and SubSeed
320
321 x = range;
322
323 for (UInt_t n = 0; n < seeds; n++) {
324 x = rangen->Integer(range);
325
326 std::bitset<NBITS> xbitset(x);
327 if (x == 0)
328 continue; // dataloader need at least one variable
329
330 // creating loader for seed
332
333 // adding variables from seed
334 for (UInt_t index = 0; index < nbits; index++) {
335 if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F');
336 }
337
338 //Loading Dataset
339 DataLoaderCopy(seeddl,fDataLoader.get());
340
341 //Booking Seed
342 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
343
344 //Train/Test/Evaluation
345 fClassifier->TrainAllMethods();
346 fClassifier->TestAllMethods();
347 fClassifier->EvaluateAllMethods();
348
349 //getting ROC
350 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
351
352 delete seeddl;
353 fClassifier->DeleteAllMethods();
354 fClassifier->fMethodsMap.clear();
355
356 for (uint32_t i = 0; i < 32; ++i) {
357 if (x & (1 << i)) {
358 y = x & ~(1 << i);
359 std::bitset<NBITS> ybitset(y);
360 //need at least one variable
361 //NOTE: if subssed is zero then is the special case
362 //that count in xbitset is 1
363 Double_t ny = log(x - y) / 0.693147;
364 if (y == 0) {
365 importances[ny] = SROC - 0.5;
366 continue;
367 }
368
369 //creating loader for subseed
371 //adding variables from subseed
372 for (UInt_t index = 0; index < nbits; index++) {
373 if (ybitset[index]) subseeddl->AddVariable(varNames[index], 'F');
374 }
375
376 //Loading Dataset
377 DataLoaderCopy(subseeddl,fDataLoader.get());
378
379 //Booking SubSeed
380 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
381
382 //Train/Test/Evaluation
383 fClassifier->TrainAllMethods();
384 fClassifier->TestAllMethods();
385 fClassifier->EvaluateAllMethods();
386
387 //getting ROC
388 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
389 importances[ny] += SROC - SSROC;
390
391 delete subseeddl;
392 fClassifier->DeleteAllMethods();
393 fClassifier->fMethodsMap.clear();
394 }
395 }
396 }
397
399 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
400
401 for(UInt_t i=0;i<nbits;i++){
402 //adding values
403 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
404 //adding sufix
405 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
406 }
407 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
408 delete rangen;
409 }
410}
411
412////////////////////////////////////////////////////////////////////////////////
413
415{
416 for (auto &meth : fMethods) {
417 TString methodName = meth.GetValue<TString>("MethodName");
418 TString methodTitle = meth.GetValue<TString>("MethodTitle");
419 TString methodOptions = meth.GetValue<TString>("MethodOptions");
420
421 uint32_t x = 0;
422 uint32_t y = 0;
423
424 // getting number of variables and variable names from loader
425 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
426 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
427
428 ULong_t range = pow(2, nbits);
429
430 // vector to save importances
431 std::vector<Float_t> importances(nbits);
432
433 // vector to save ROC-Integral values
434 std::vector<Float_t> ROC(range);
435 ROC[0] = 0.5;
436 for (UInt_t i = 0; i < nbits; i++)
437 importances[i] = 0;
438
439 Float_t SROC, SSROC; // computed ROC value
440 for (x = 1; x < range; x++) {
441
442 std::bitset<NBITS> xbitset(x);
443 if (x == 0)
444 continue; // dataloader need at least one variable
445
446 // creating loader for seed
448
449 // adding variables from seed
450 for (UInt_t index = 0; index < nbits; index++) {
451 if (xbitset[index]) seeddl->AddVariable(varNames[index], 'F');
452 }
453
454 DataLoaderCopy(seeddl,fDataLoader.get());
455
456 seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut("Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut("Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
457
458 //Booking Seed
459 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
460
461 //Train/Test/Evaluation
462 fClassifier->TrainAllMethods();
463 fClassifier->TestAllMethods();
464 fClassifier->EvaluateAllMethods();
465
466 //getting ROC
467 ROC[x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
468
469 delete seeddl;
470 fClassifier->DeleteAllMethods();
471 fClassifier->fMethodsMap.clear();
472 }
473
474
475 for ( x = 0; x <range ; x++)
476 {
477 SROC=ROC[x];
478 for (uint32_t i = 0; i < NBITS; ++i) {
479 if (x & (1 << i)) {
480 y = x & ~(1 << i);
481 std::bitset<NBITS> ybitset(y);
482
483 Float_t ny = log(x - y) / 0.693147;
484 if (y == 0) {
485 importances[ny] = SROC - 0.5;
486 continue;
487 }
488
489 //getting ROC
490 SSROC = ROC[y];
491 importances[ny] += SROC - SSROC;
492 }
493
494 }
495 }
497 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
498
499 for(UInt_t i=0;i<nbits;i++){
500 //adding values
501 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
502 //adding sufix
503 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+" % ";
504 }
505 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
506 }
507}
#define c(i)
Definition RSha256.hxx:101
unsigned long ULong_t
Unsigned long integer 4 bytes (unsigned long). Size depends on architecture.
Definition RtypesCore.h:69
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
char name[80]
Definition TGX11.cxx:110
R__EXTERN TStyle * gStyle
Definition TStyle.h:442
#define NBITS
The Canvas class.
Definition TCanvas.h:23
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
Definition TColor.cxx:1927
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:879
void SetSilent(Bool_t s)
Definition Config.h:63
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
Definition Envelope.h:44
This is the main MVA steering class.
Definition Factory.h:80
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
static void EnableOutput()
Definition MsgLogger.cxx:68
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
void Evaluate() override
Virtual method to be implemented with your algorithm.
std::unique_ptr< Factory > fClassifier
void EvaluateImportanceRandom(UInt_t nseeds)
VariableImportance(DataLoader *loader)
TH1F * GetImportance(const UInt_t nbits, std::vector< Float_t > &importances, std::vector< TString > &varNames)
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition TObject.cxx:656
Random number generator class based on M.
Definition TRandom3.h:27
Basic string class.
Definition TString.h:138
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
Definition TStyle.cxx:1642
void SetTitleXOffset(Float_t offset=1)
Definition TStyle.h:413
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Config & gConfig()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2340