31 #include "TDirectory.h"
47 TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase * const method,
std::map<
TString,
TMVA::Interval*> tuneParameters, TString fomType, TString optimizationFitType)
49 fTuneParameters(tuneParameters),
51 fOptimizationFitType(optimizationFitType),
58 std::string
name =
"OptimizeConfigParameters_";
59 name += std::string(
GetMethod()->GetName());
60 fLogger =
new MsgLogger(name);
61 if (fMethod->DoRegression()){
62 Log() <<
kFATAL <<
" ERROR: Sorry, Regression is not yet implement for automatic parameter optimization"
63 <<
" --> exit" <<
Endl;
66 Log() <<
kINFO <<
"Automatic optimisation of tuning parameters in "
69 std::map<TString,TMVA::Interval*>::iterator it;
70 for (it=fTuneParameters.begin(); it!=fTuneParameters.end();it++) {
72 <<
" in range from: " << it->second->GetMin()
73 <<
" to: " << it->second->GetMax()
74 <<
" in : " << it->second->GetNbins() <<
" steps"
77 Log() <<
kINFO <<
" using the options: " << fFOMType <<
" and " << fOptimizationFitType <<
Endl;
98 if (ymin>y[i]) ymin=y[i];
99 if (ymax<y[i]) ymax=y[i];
118 if (fOptimizationFitType ==
"Scan" ) this->optimizeScan();
119 else if (fOptimizationFitType ==
"FitGA" || fOptimizationFitType ==
"Minuit" ) this->optimizeFit();
121 Log() <<
kFATAL <<
"You have chosen as optimization type " << fOptimizationFitType
122 <<
" that is not (yet) coded --> exit()" <<
Endl;
126 std::map<TString,Double_t>::iterator it;
127 for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); it++){
128 Log() <<
kINFO << it->first <<
" = " << it->second <<
Endl;
130 return fTunedParameters;
139 std::vector < int > indices;
140 for (
UInt_t i=0; i< base.size(); i++){
141 indices.push_back(val % base[i] );
142 val = int(
floor(
float(val) /
float(base[i]) ) );
156 Double_t bestFOM=-1000000, currentFOM;
158 std::map<TString,Double_t> currentParameters;
159 std::map<TString,TMVA::Interval*>::iterator it;
163 currentParameters.clear();
164 fTunedParameters.clear();
166 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
167 currentParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
168 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
175 std::vector< std::vector <Double_t> >
v;
176 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
177 std::vector< Double_t > tmp;
178 for (
Int_t k=0; k<it->second->GetNbins(); k++){
179 tmp.push_back(it->second->GetElement(k));
184 std::vector< int > Nindividual;
185 for (
UInt_t i=0; i<v.size(); i++) {
187 Nindividual.push_back(v[i].size());
191 for (
int i=0; i<Ntot; i++){
193 std::vector<int> indices = GetScanIndices(i, Nindividual );
194 for (it=fTuneParameters.begin(), index=0; index< indices.size(); index++, it++){
195 currentParameters[it->first] = v[index][indices[index]];
199 for (std::map<TString,Double_t>::iterator it_print=currentParameters.begin();
200 it_print!=currentParameters.end(); it_print++){
201 Log() <<
kINFO <<
" " << it_print->first <<
" = " << it_print->second <<
Endl;
205 GetMethod()->SetTuneParameters(currentParameters);
208 if (i==0)
GetMethod()->GetTransformationHandler().CalcTransformations(
213 currentFOM = GetFOM();
214 Log() <<
kINFO <<
"FOM was found : " << currentFOM <<
"; current best is " << bestFOM <<
Endl;
216 if (currentFOM > bestFOM) {
217 bestFOM = currentFOM;
218 for (std::map<TString,Double_t>::iterator
iter=currentParameters.begin();
219 iter != currentParameters.end();
iter++){
220 fTunedParameters[
iter->first]=
iter->second;
226 GetMethod()->SetTuneParameters(fTunedParameters);
232 std::vector<TMVA::Interval*> ranges;
233 std::map<TString, TMVA::Interval*>::iterator it;
234 std::vector<Double_t> pars;
236 for (it=fTuneParameters.begin(); it != fTuneParameters.end(); it++){
238 pars.push_back( (it->second)->GetMean() );
247 if ( fOptimizationFitType ==
"Minuit" ) {
250 "FitterMinuit_BDTOptimize",
252 }
else if ( fOptimizationFitType ==
"FitGA" ) {
253 TString opt=
"PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
255 "FitterGA_BDTOptimize",
258 Log() <<
kWARNING <<
" you did not specify a valid OptimizationFitType "
259 <<
" will use the default (FitGA) " <<
Endl;
260 TString opt=
"PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
262 "FitterGA_BDTOptimize",
272 for (
UInt_t ipar=0; ipar<ranges.size(); ipar++)
delete ranges[ipar];
277 fTunedParameters.clear();
279 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
280 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,pars[jcount++]));
283 GetMethod()->SetTuneParameters(fTunedParameters);
292 std::map< std::vector<Double_t> ,
Double_t>::const_iterator
iter;
293 iter = fAlreadyTrainedParCombination.find(pars);
295 if (iter != fAlreadyTrainedParCombination.end()) {
301 std::map<TString,Double_t> currentParameters;
304 std::map<TString, TMVA::Interval*>::iterator it;
305 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){
306 currentParameters[it->first] = pars[icount++];
309 GetMethod()->SetTuneParameters(currentParameters);
314 CalcTransformations(
GetMethod()->
Data()->GetEventCollection());
324 fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM));
336 if (fMethod->DoRegression()){
337 std::cout <<
" ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation"
338 <<
" --> exit" << std::endl;
341 if (fFOMType ==
"Separation") fom = GetSeparation();
342 else if (fFOMType ==
"ROCIntegral") fom = GetROCIntegral();
343 else if (fFOMType ==
"SigEffAtBkgEff01") fom = GetSigEffAtBkgEff(0.1);
344 else if (fFOMType ==
"SigEffAtBkgEff001") fom = GetSigEffAtBkgEff(0.01);
345 else if (fFOMType ==
"SigEffAtBkgEff002") fom = GetSigEffAtBkgEff(0.02);
346 else if (fFOMType ==
"BkgRejAtSigEff05") fom = GetBkgRejAtSigEff(0.5);
347 else if (fFOMType ==
"BkgEffAtSigEff05") fom = GetBkgEffAtSigEff(0.5);
349 Log()<<
kFATAL <<
" ERROR, you've specified as Figure of Merit in the "
350 <<
" parameter optimisation " << fFOMType <<
" which has not"
351 <<
" been implemented yet!! ---> exit " <<
Endl;
354 fFOMvsIter.push_back(fom);
364 if (fMvaSig) fMvaSig->Delete();
365 if (fMvaBkg) fMvaBkg->Delete();
366 if (fMvaSigFineBin) fMvaSigFineBin->Delete();
367 if (fMvaBkgFineBin) fMvaBkgFineBin->Delete();
375 fMvaSig =
new TH1D(
"fMvaSig",
"",100,-1.5,1.5);
376 fMvaBkg =
new TH1D(
"fMvaBkg",
"",100,-1.5,1.5);
377 fMvaSigFineBin =
new TH1D(
"fMvaSigFineBin",
"",100000,-1.5,1.5);
378 fMvaBkgFineBin =
new TH1D(
"fMvaBkgFineBin",
"",100000,-1.5,1.5);
380 const std::vector< Event*> events=fMethod->Data()->GetEventCollection(
Types::kTesting);
382 UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo(
"Signal")->GetNumber();
386 for (
UInt_t iev=0; iev < events.size() ; iev++){
390 if (events[iev]->
GetClass() == signalClassNr) {
391 fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
392 fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
394 fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
395 fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
411 std::cout <<
"Separation caclulcaton via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<<std::endl;
443 for (
UInt_t i=0; i<nsteps; i++){
450 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
451 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
452 std::cout <<
" Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl;
456 Double_t *cumulator = fMvaBkgFineBin->GetIntegral();
463 sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin);
468 integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ;
486 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
487 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
488 std::cout <<
" Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl;
491 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
492 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
503 while (bkgCumulator[nbins-ibin] > (1-bkgEff)) {
504 sigEff = sigCumulator[
nbins]-sigCumulator[nbins-ibin];
522 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
523 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
524 std::cout <<
" Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
528 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
529 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
540 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
541 bkgEff = bkgCumulator[
nbins]-bkgCumulator[nbins-ibin];
558 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
559 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
560 std::cout <<
" Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
564 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
565 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
576 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
577 bkgRej = bkgCumulator[nbins-ibin];
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
MsgLogger & Endl(MsgLogger &ml)
void CheckForUnusedOptions() const
checks for unused options in option string
virtual void SetName(const char *name)
Change (i.e.
Double_t GetSeparation()
return the searation between the signal and background MVA ouput distribution
void optimizeScan()
do the actual optimization using a simple scan method, i.e.
Short_t Min(Short_t a, Short_t b)
virtual void SetYTitle(const char *title)
Double_t GetFOM()
Return the Figure of Merit (FOM) used in the parameter optimization process.
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
void GetMVADists()
fill the private histograms with the mva distributinos for sig/bkg
Double_t Run()
estimator function interface for fitting
std::vector< Float_t > fFOMvsIter
std::map< std::string, std::string >::const_iterator iter
std::vector< std::vector< double > > Data
TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
ClassImp(TMVA::OptimizeConfigParameters) TMVA
Constructor which sets either "Classification or Regression".
Double_t GetROCIntegral()
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
std::map< TString, Double_t > optimize()
virtual ~OptimizeConfigParameters()
the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it) ...
Double_t GetSigEffAtBkgEff(Double_t bkgEff=0.1)
calculate the signal efficiency for a given background efficiency
Double_t GetVal(Double_t x) const
returns value PDF(x)
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Double_t GetBkgEffAtSigEff(Double_t sigEff=0.5)
calculate the background efficiency for a given signal efficiency
Abstract ClassifierFactory template that handles arbitrary types.
virtual void SetXTitle(const char *title)
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
std::vector< int > GetScanIndices(int val, std::vector< int > base)
helper function to scan through the all the combinations in the parameter space
Double_t GetBkgRejAtSigEff(Double_t sigEff=0.5)
calculate the background rejection for a given signal efficiency
Short_t Max(Short_t a, Short_t b)
A Graph is a graphics object made of two arrays X and Y with npoints each.
Double_t EstimatorFunction(std::vector< Double_t > &)
return the estimator (from current FOM) for the fitting interface
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
2-D histogram with a double per channel (see TH1 documentation)}
TString fOptimizationFitType