42 fImportanceHist(nullptr)
63 if(fType==VIType::kShort)
65 fLogger<<kINFO<<
"Variable Importance Results (Short)"<<
Endl;
66 }
else if(fType==VIType::kAll)
68 fLogger<<kINFO<<
"Variable Importance Results (All)"<<
Endl;
70 fLogger<<kINFO<<
"Variable Importance Results (Random)"<<
Endl;
73 fImportanceValues.
Print();
82 fImportanceHist->Draw(
"");
83 fImportanceHist->GetXaxis()->SetTitle(
" Variable Names ");
84 fImportanceHist->GetYaxis()->SetTitle(
" Importance (%) ");
93 fClassifier=std::unique_ptr<Factory>(
new TMVA::Factory(
"VariableImportance",
"!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
109 if(fType==VIType::kShort)
111 EvaluateImportanceShort();
112 }
else if(fType==VIType::kAll)
114 EvaluateImportanceAll();
116 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
118 Log()<<kERROR<<
"Running variable importance with less that 10 varibales in Random mode "<<
119 "can to produce inconsisten results"<<
Endl;
120 EvaluateImportanceRandom(pow(nbits,2));
122 fResults.fType = fType;
125 Log()<<kINFO<<
"Evaluation done."<<
Endl;
142 TH1F *vihist =
new TH1F(
"vihist",
"", nbits, 0, nbits);
147 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
155 for (
UInt_t i = 1; i < nbits + 1; i++) {
156 roc = 100.0 * importances[i - 1] / normalization;
185 for (
auto &meth : fMethods) {
193 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
194 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
199 std::vector<Float_t> importances(nbits);
200 for (
UInt_t i = 0; i < nbits; i++)
207 std::bitset<NBITS> xbitset(
x);
209 Log() << kFATAL <<
"Error: need at least one variable.";
215 for (
UInt_t index = 0; index < nbits; index++) {
224 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
227 fClassifier->TrainAllMethods();
228 fClassifier->TestAllMethods();
229 fClassifier->EvaluateAllMethods();
232 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
235 fClassifier->DeleteAllMethods();
236 fClassifier->fMethodsMap.clear();
238 for (uint32_t i = 0; i <
NBITS; ++i) {
241 std::bitset<NBITS> ybitset(
y);
247 importances[ny] = SROC - 0.5;
254 for (
UInt_t index = 0; index < nbits; index++) {
255 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
262 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
265 fClassifier->TrainAllMethods();
266 fClassifier->TestAllMethods();
267 fClassifier->EvaluateAllMethods();
270 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
271 importances[ny] += SROC - SSROC;
274 fClassifier->DeleteAllMethods();
275 fClassifier->fMethodsMap.clear();
279 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
281 for(
UInt_t i=0;i<nbits;i++){
283 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
285 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
287 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
295 for (
auto &meth : fMethods) {
307 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
308 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
313 std::vector<Float_t> importances(nbits);
315 for (
UInt_t i = 0; i < nbits; i++)
323 x = rangen->Integer(range);
325 std::bitset<NBITS> xbitset(
x);
333 for (
UInt_t index = 0; index < nbits; index++) {
334 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
341 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
344 fClassifier->TrainAllMethods();
345 fClassifier->TestAllMethods();
346 fClassifier->EvaluateAllMethods();
349 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
352 fClassifier->DeleteAllMethods();
353 fClassifier->fMethodsMap.clear();
355 for (uint32_t i = 0; i < 32; ++i) {
358 std::bitset<NBITS> ybitset(
y);
364 importances[ny] = SROC - 0.5;
371 for (
UInt_t index = 0; index < nbits; index++) {
372 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
379 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
382 fClassifier->TrainAllMethods();
383 fClassifier->TestAllMethods();
384 fClassifier->EvaluateAllMethods();
387 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
388 importances[ny] += SROC - SSROC;
391 fClassifier->DeleteAllMethods();
392 fClassifier->fMethodsMap.clear();
398 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
400 for(
UInt_t i=0;i<nbits;i++){
402 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
404 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
406 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
415 for (
auto &meth : fMethods) {
424 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
425 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
430 std::vector<Float_t> importances(nbits);
433 std::vector<Float_t> ROC(range);
435 for (
UInt_t i = 0; i < nbits; i++)
439 for (
x = 1;
x < range;
x++) {
441 std::bitset<NBITS> xbitset(
x);
449 for (
UInt_t index = 0; index < nbits; index++) {
450 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
455 seeddl->
PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut(
"Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut(
"Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
458 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
461 fClassifier->TrainAllMethods();
462 fClassifier->TestAllMethods();
463 fClassifier->EvaluateAllMethods();
466 ROC[
x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
469 fClassifier->DeleteAllMethods();
470 fClassifier->fMethodsMap.clear();
474 for (
x = 0;
x <range ;
x++)
477 for (uint32_t i = 0; i <
NBITS; ++i) {
480 std::bitset<NBITS> ybitset(
y);
484 importances[ny] = SROC - 0.5;
490 importances[ny] += SROC - SSROC;
496 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
498 for(
UInt_t i=0;i<nbits;i++){
500 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
502 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
504 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
R__EXTERN TStyle * gStyle
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates, that is,...
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default, when a histogram is created, it is added to the list of histogram objects in the current ...
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Sort bins with labels or set option(s) to draw axis with labels.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual void SetBarWidth(Float_t width=0.5)
Set the width of bars as fraction of the bin width for drawing mode "B".
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
This is the main MVA steering class.
ostringstream derivative to redirect and format output
static void EnableOutput()
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
VariableImportanceResult()
OptionMap fImportanceValues
std::unique_ptr< Factory > fClassifier
void EvaluateImportanceShort()
virtual void Evaluate()
Virtual method to be implemented with your algorithm.
void EvaluateImportanceRandom(UInt_t nseeds)
VariableImportance(DataLoader *loader)
TH1F * GetImportance(const UInt_t nbits, std::vector< Float_t > &importances, std::vector< TString > &varNames)
void EvaluateImportanceAll()
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Random number generator class based on M.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
static uint64_t sum(uint64_t i)