42 fImportanceHist(nullptr)
63 if(fType==VIType::kShort)
65 fLogger<<kINFO<<
"Variable Importance Results (Short)"<<
Endl;
66 }
else if(fType==VIType::kAll)
68 fLogger<<kINFO<<
"Variable Importance Results (All)"<<
Endl;
70 fLogger<<kINFO<<
"Variable Importance Results (Random)"<<
Endl;
73 fImportanceValues.
Print();
82 fImportanceHist->Draw(
"");
83 fImportanceHist->GetXaxis()->SetTitle(
" Variable Names ");
84 fImportanceHist->GetYaxis()->SetTitle(
" Importance (%) ");
93 fClassifier=std::unique_ptr<Factory>(
new TMVA::Factory(
"VariableImportance",
"!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
109 if(fType==VIType::kShort)
111 EvaluateImportanceShort();
112 }
else if(fType==VIType::kAll)
114 EvaluateImportanceAll();
116 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
118 Log()<<kERROR<<
"Running variable importance with less that 10 varibales in Random mode "<<
119 "can to produce inconsisten results"<<
Endl;
120 EvaluateImportanceRandom(
pow(nbits,2));
122 fResults.fType = fType;
125 Log()<<kINFO<<
"Evaluation done."<<
Endl;
142 TH1F *vihist =
new TH1F(
"vihist",
"", nbits, 0, nbits);
147 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
155 for (
UInt_t i = 1; i < nbits + 1; i++) {
156 roc = 100.0 * importances[i - 1] / normalization;
185 for (
auto &meth : fMethods) {
193 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
194 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
199 std::vector<Float_t> importances(nbits);
200 for (
UInt_t i = 0; i < nbits; i++)
207 std::bitset<NBITS> xbitset(
x);
209 Log() << kFATAL <<
"Error: need at least one variable.";
215 for (
UInt_t index = 0; index < nbits; index++) {
224 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
227 fClassifier->TrainAllMethods();
228 fClassifier->TestAllMethods();
229 fClassifier->EvaluateAllMethods();
232 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
235 fClassifier->DeleteAllMethods();
236 fClassifier->fMethodsMap.clear();
238 for (uint32_t i = 0; i <
NBITS; ++i) {
241 std::bitset<NBITS> ybitset(
y);
247 importances[ny] = SROC - 0.5;
254 for (
UInt_t index = 0; index < nbits; index++) {
255 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
262 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
265 fClassifier->TrainAllMethods();
266 fClassifier->TestAllMethods();
267 fClassifier->EvaluateAllMethods();
270 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
271 importances[ny] += SROC - SSROC;
274 fClassifier->DeleteAllMethods();
275 fClassifier->fMethodsMap.clear();
279 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
281 for(
UInt_t i=0;i<nbits;i++){
283 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
285 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
287 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
295 for (
auto &meth : fMethods) {
307 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
308 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
313 std::vector<Float_t> importances(nbits);
316 for (
UInt_t i = 0; i < nbits; i++)
324 x = rangen->Integer(range);
326 std::bitset<NBITS> xbitset(
x);
334 for (
UInt_t index = 0; index < nbits; index++) {
335 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
342 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
345 fClassifier->TrainAllMethods();
346 fClassifier->TestAllMethods();
347 fClassifier->EvaluateAllMethods();
350 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
353 fClassifier->DeleteAllMethods();
354 fClassifier->fMethodsMap.clear();
356 for (uint32_t i = 0; i < 32; ++i) {
359 std::bitset<NBITS> ybitset(
y);
365 importances[ny] = SROC - 0.5;
366 importances_norm += importances[ny];
373 for (
UInt_t index = 0; index < nbits; index++) {
374 if (ybitset[index]) subseeddl->
AddVariable(varNames[index],
'F');
381 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
384 fClassifier->TrainAllMethods();
385 fClassifier->TestAllMethods();
386 fClassifier->EvaluateAllMethods();
389 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
390 importances[ny] += SROC - SSROC;
393 fClassifier->DeleteAllMethods();
394 fClassifier->fMethodsMap.clear();
400 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
402 for(
UInt_t i=0;i<nbits;i++){
404 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
406 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
408 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
417 for (
auto &meth : fMethods) {
426 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
427 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
432 std::vector<Float_t> importances(nbits);
435 std::vector<Float_t> ROC(range);
437 for (
UInt_t i = 0; i < nbits; i++)
441 for (
x = 1;
x < range;
x++) {
443 std::bitset<NBITS> xbitset(
x);
451 for (
UInt_t index = 0; index < nbits; index++) {
452 if (xbitset[index]) seeddl->
AddVariable(varNames[index],
'F');
457 seeddl->
PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut(
"Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut(
"Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
460 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
463 fClassifier->TrainAllMethods();
464 fClassifier->TestAllMethods();
465 fClassifier->EvaluateAllMethods();
468 ROC[
x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
471 fClassifier->DeleteAllMethods();
472 fClassifier->fMethodsMap.clear();
476 for (
x = 0;
x <range ;
x++)
479 for (uint32_t i = 0; i <
NBITS; ++i) {
482 std::bitset<NBITS> ybitset(
y);
486 importances[ny] = SROC - 0.5;
492 importances[ny] += SROC - SSROC;
498 for (
UInt_t i = 0; i < nbits; i++) normalization += importances[i];
500 for(
UInt_t i=0;i<nbits;i++){
502 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
504 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<
TString>(varNames[i])+
" % ";
506 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
double pow(double, double)
R__EXTERN TStyle * gStyle
virtual void SetTitleOffset(Float_t offset=1)
Set distance between the axis and the axis title.
virtual void SetTitleSize(Float_t size=0.04)
Set size of axis title.
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
virtual void SetBinLabel(Int_t bin, const char *label)
Set label for bin.
void CenterTitle(Bool_t center=kTRUE)
Center axis title.
virtual void SetRangeUser(Double_t ufirst, Double_t ulast)
Set the viewing range for the axis from ufirst to ulast (in user coordinates, that is,...
static Int_t GetColor(const char *hexcolor)
Static method returning color number for color specified by hex color string of form: "#rrggbb",...
1-D histogram with a float per channel (see TH1 documentation)}
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
virtual void LabelsOption(Option_t *option="h", Option_t *axis="X")
Sort bins with labels or set option(s) to draw axis with labels.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual void SetBarWidth(Float_t width=0.5)
Set the width of bars as fraction of the bin width for drawing mode "B".
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
This is the main MVA steering class.
ostringstream derivative to redirect and format output
static void EnableOutput()
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
VariableImportanceResult()
OptionMap fImportanceValues
std::unique_ptr< Factory > fClassifier
void EvaluateImportanceShort()
virtual void Evaluate()
Virtual method to be implemented with your algorithm.
void EvaluateImportanceRandom(UInt_t nseeds)
VariableImportance(DataLoader *loader)
TH1F * GetImportance(const UInt_t nbits, std::vector< Float_t > &importances, std::vector< TString > &varNames)
void EvaluateImportanceAll()
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Random number generator class based on M.
void SetOptStat(Int_t stat=1)
The type of information printed in the histogram statistics box can be selected via the parameter mod...
void SetTitleXOffset(Float_t offset=1)
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
static uint64_t sum(uint64_t i)