45 fImportanceHist(nullptr)
66 if(
fType==VIType::kShort)
68 fLogger<<kINFO<<
"Variable Importance Results (Short)"<<
Endl;
69 }
else if(
fType==VIType::kAll)
71 fLogger<<kINFO<<
"Variable Importance Results (All)"<<
Endl;
73 fLogger<<kINFO<<
"Variable Importance Results (Random)"<<
Endl;
76 fImportanceValues.
Print();
85 fImportanceHist->Draw(
"");
86 fImportanceHist->GetXaxis()->SetTitle(
" Variable Names ");
87 fImportanceHist->GetYaxis()->SetTitle(
" Importance (%) ");
96 fClassifier=std::unique_ptr<Factory>(
new TMVA::Factory(
"VariableImportance",
"!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
112 if(
fType==VIType::kShort)
114 EvaluateImportanceShort();
115 }
else if(
fType==VIType::kAll)
117 EvaluateImportanceAll();
119 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
121 Log()<<kERROR<<
"Running variable importance with less that 10 varibales in Random mode "<<
122 "can to produce inconsisten results"<<
Endl;
123 EvaluateImportanceRandom(
pow(nbits,2));
125 fResults.fType =
fType;
128 Log()<<kINFO<<
"Evaluation done."<<
Endl;
145 TH1F *vihist =
new TH1F(
"vihist",
"", nbits, 0, nbits);
150 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
158 for (
UInt_t i = 1; i < nbits + 1; i++) {
159 roc = 100.0 * importances[i - 1] / normalization;
188 for (
auto &meth : fMethods) {
196 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
197 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
202 std::vector<Float_t> importances(nbits);
203 for (
UInt_t i = 0; i < nbits; i++)
210 std::bitset<NBITS> xbitset(
x);
212 Log() << kFATAL <<
"Error: need at least one variable.";
218 for (
UInt_t index = 0; index < nbits; index++) {
227 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
230 fClassifier->TrainAllMethods();
231 fClassifier->TestAllMethods();
232 fClassifier->EvaluateAllMethods();
235 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
238 fClassifier->DeleteAllMethods();
239 fClassifier->fMethodsMap.clear();
241 for (uint32_t i = 0; i <
NBITS; ++i) {
244 std::bitset<NBITS> ybitset(
y);
250 importances[ny] = SROC - 0.5;
257 for (
UInt_t index = 0; index < nbits; index++) {
258 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
265 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
268 fClassifier->TrainAllMethods();
269 fClassifier->TestAllMethods();
270 fClassifier->EvaluateAllMethods();
273 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
274 importances[ny] += SROC - SSROC;
277 fClassifier->DeleteAllMethods();
278 fClassifier->fMethodsMap.clear();
282 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
284 for(
UInt_t i=0;i<nbits;i++){
286 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
288 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
290 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
298 for (
auto &meth : fMethods) {
310 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
311 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
316 std::vector<Float_t> importances(nbits);
319 for (
UInt_t i = 0; i < nbits; i++)
327 x = rangen->Integer(range);
329 std::bitset<NBITS> xbitset(
x);
337 for (
UInt_t index = 0; index < nbits; index++) {
338 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
345 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
348 fClassifier->TrainAllMethods();
349 fClassifier->TestAllMethods();
350 fClassifier->EvaluateAllMethods();
353 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
356 fClassifier->DeleteAllMethods();
357 fClassifier->fMethodsMap.clear();
359 for (uint32_t i = 0; i < 32; ++i) {
362 std::bitset<NBITS> ybitset(
y);
368 importances[ny] = SROC - 0.5;
369 importances_norm += importances[ny];
376 for (
UInt_t index = 0; index < nbits; index++) {
377 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
384 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
387 fClassifier->TrainAllMethods();
388 fClassifier->TestAllMethods();
389 fClassifier->EvaluateAllMethods();
392 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
393 importances[ny] += SROC - SSROC;
396 fClassifier->DeleteAllMethods();
397 fClassifier->fMethodsMap.clear();
403 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
405 for(
UInt_t i=0;i<nbits;i++){
407 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
409 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
411 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
420 for (
auto &meth : fMethods) {
429 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
430 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
435 std::vector<Float_t> importances(nbits);
438 std::vector<Float_t> ROC(range);
440 for (
UInt_t i = 0; i < nbits; i++)
444 for (
x = 1;
x < range;
x++) {
446 std::bitset<NBITS> xbitset(
x);
454 for (
UInt_t index = 0; index < nbits; index++) {
455 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
460 seeddl->
PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut(
"Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut(
"Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
463 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
466 fClassifier->TrainAllMethods();
467 fClassifier->TestAllMethods();
468 fClassifier->EvaluateAllMethods();
471 ROC[
x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
474 fClassifier->DeleteAllMethods();
475 fClassifier->fMethodsMap.clear();
479 for (
x = 0;
x <range ;
x++)
482 for (uint32_t i = 0; i <
NBITS; ++i) {
485 std::bitset<NBITS> ybitset(
y);
489 importances[ny] = SROC - 0.5;
495 importances[ny] += SROC - SSROC;
501 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
503 for(
UInt_t i=0;i<nbits;i++){
505 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
507 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
509 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
double pow(double, double)
R__EXTERN TStyle * gStyle
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title Offset is a correction factor with respect to the "s...
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title The size is expressed in per cent of the pad width.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates).
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Set option(s) to draw axis with labels.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBarWidth(Float_t width=0.5)
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
This is the main MVA steering class.
ostringstream derivative to redirect and format output
static void EnableOutput()
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
VariableImportanceResult()
OptionMap fImportanceValues
std::unique_ptr< Factory > fClassifier
void EvaluateImportanceShort()
virtual void Evaluate()
Virtual method to be implemented with your algorithm.
void EvaluateImportanceRandom(UInt_t nseeds)
VariableImportance(DataLoader *loader)
TH1F * GetImportance(const UInt_t nbits, std::vector< Float_t > &importances, std::vector< TString > &varNames)
void EvaluateImportanceAll()
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Random number generator class based on M.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
T Sum(const RVec< T > &v)
Sum elements of an RVec.
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
static long int sum(long int i)