147 fEpsilon ( 1.e3 * DBL_MIN ),
148 fTransformLikelihoodOutput(
kFALSE ),
152 fHistSig_smooth( 0 ),
153 fHistBgd_smooth( 0 ),
154 fDefaultPDFLik ( 0 ),
160 fAverageEvtPerBin( 0 ),
161 fAverageEvtPerBinVarS (0),
162 fAverageEvtPerBinVarB (0),
163 fKDEfineFactor ( 0 ),
164 fInterpolateString(0)
172 const TString& theWeightFile) :
174 fEpsilon ( 1.e3 * DBL_MIN ),
175 fTransformLikelihoodOutput(
kFALSE ),
179 fHistSig_smooth( 0 ),
180 fHistBgd_smooth( 0 ),
181 fDefaultPDFLik ( 0 ),
187 fAverageEvtPerBin( 0 ),
188 fAverageEvtPerBinVarS (0),
189 fAverageEvtPerBinVarB (0),
190 fKDEfineFactor ( 0 ),
191 fInterpolateString(0)
200 if (NULL != fDefaultPDFLik)
delete fDefaultPDFLik;
201 if (NULL != fHistSig)
delete fHistSig;
202 if (NULL != fHistBgd)
delete fHistBgd;
203 if (NULL != fHistSig_smooth)
delete fHistSig_smooth;
204 if (NULL != fHistBgd_smooth)
delete fHistBgd_smooth;
205 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
206 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
207 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
209 if (NULL != fPDFSig)
delete fPDFSig;
210 if (NULL != fPDFBgd)
delete fPDFBgd;
230 fHistSig =
new std::vector<TH1*> ( GetNvar(), (
TH1*)0 );
231 fHistBgd =
new std::vector<TH1*> ( GetNvar(), (
TH1*)0 );
232 fHistSig_smooth =
new std::vector<TH1*> ( GetNvar(), (
TH1*)0 );
233 fHistBgd_smooth =
new std::vector<TH1*> ( GetNvar(), (
TH1*)0 );
234 fPDFSig =
new std::vector<TMVA::PDF*>( GetNvar(), (
TMVA::PDF*)0 );
235 fPDFBgd =
new std::vector<TMVA::PDF*>( GetNvar(), (
TMVA::PDF*)0 );
245 DeclareOptionRef( fTransformLikelihoodOutput =
kFALSE,
"TransformOutput",
246 "Transform likelihood output by inverse sigmoid function" );
251 TString updatedOptions = GetOptions();
252 fDefaultPDFLik =
new PDF(
TString(GetName()) +
" PDF", updatedOptions );
253 fDefaultPDFLik->DeclareOptions();
254 fDefaultPDFLik->ParseOptions();
255 updatedOptions = fDefaultPDFLik->GetOptions();
256 for (
UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
257 (*fPDFSig)[ivar] =
new PDF(
TString::Format(
"%s PDF Sig[%d]", GetName(), ivar), updatedOptions,
259 (*fPDFSig)[ivar]->DeclareOptions();
260 (*fPDFSig)[ivar]->ParseOptions();
261 updatedOptions = (*fPDFSig)[ivar]->GetOptions();
262 (*fPDFBgd)[ivar] =
new PDF(
TString::Format(
"%s PDF Bkg[%d]", GetName(), ivar), updatedOptions,
264 (*fPDFBgd)[ivar]->DeclareOptions();
265 (*fPDFBgd)[ivar]->ParseOptions();
266 updatedOptions = (*fPDFBgd)[ivar]->GetOptions();
270 SetOptions( updatedOptions );
279 DeclareOptionRef( fNsmooth = 1,
"NSmooth",
280 "Number of smoothing iterations for the input histograms");
281 DeclareOptionRef( fAverageEvtPerBin = 50,
"NAvEvtPerBin",
282 "Average number of events per PDF bin");
283 DeclareOptionRef( fKDEfineFactor =1. ,
"KDEFineFactor",
284 "Fine tuning factor for Adaptive KDE: Factor to multiply the width of the kernel");
285 DeclareOptionRef( fBorderMethodString =
"None",
"KDEborder",
286 "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
287 DeclareOptionRef( fKDEiterString =
"Nonadaptive",
"KDEiter",
288 "Number of iterations (1=non-adaptive, 2=adaptive)" );
289 DeclareOptionRef( fKDEtypeString =
"Gauss",
"KDEtype",
290 "KDE kernel type (1=Gauss)" );
291 fAverageEvtPerBinVarS =
new Int_t[GetNvar()];
292 fAverageEvtPerBinVarB =
new Int_t[GetNvar()];
293 fNsmoothVarS =
new Int_t[GetNvar()];
294 fNsmoothVarB =
new Int_t[GetNvar()];
295 fInterpolateString =
new TString[GetNvar()];
296 for(
UInt_t i=0; i<GetNvar(); ++i) {
297 fAverageEvtPerBinVarS[i] = fAverageEvtPerBinVarB[i] = 0;
298 fNsmoothVarS[i] = fNsmoothVarB[i] = 0;
299 fInterpolateString[i] =
"";
301 DeclareOptionRef( fAverageEvtPerBinVarS, GetNvar(),
"NAvEvtPerBinSig",
302 "Average num of events per PDF bin and variable (signal)");
303 DeclareOptionRef( fAverageEvtPerBinVarB, GetNvar(),
"NAvEvtPerBinBkg",
304 "Average num of events per PDF bin and variable (background)");
305 DeclareOptionRef(fNsmoothVarS, GetNvar(),
"NSmoothSig",
306 "Number of smoothing iterations for the input histograms");
307 DeclareOptionRef(fNsmoothVarB, GetNvar(),
"NSmoothBkg",
308 "Number of smoothing iterations for the input histograms");
309 DeclareOptionRef(fInterpolateString, GetNvar(),
"PDFInterpol",
"Method of interpolating reference histograms (e.g. Spline2 or KDE)");
318 SetSignalReferenceCut( TransformLikelihoodOutput( 0.5, 0.5 ) );
320 fDefaultPDFLik->ProcessOptions();
321 for (
UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
322 (*fPDFBgd)[ivar]->ProcessOptions();
323 (*fPDFSig)[ivar]->ProcessOptions();
338 std::vector<Double_t>
xmin(nvar),
xmax(nvar);
339 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
xmin[ivar]=1e30;
xmax[ivar]=-1e30;}
341 UInt_t nevents=Data()->GetNEvents();
342 for (
UInt_t ievt=0; ievt<nevents; ievt++) {
345 const Event* origEv = Data()->GetEvent(ievt);
346 if (IgnoreEventsWithNegWeightsInTraining() && origEv->
GetWeight()<=0)
continue;
348 for (
int cls=0;cls<2;cls++){
349 GetTransformationHandler().SetTransformationReferenceClass(cls);
350 const Event* ev = GetTransformationHandler().Transform( origEv );
351 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
361 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
362 TString var = (*fInputVars)[ivar];
369 if (DataInfo().GetVariableInfo(ivar).GetVarType() ==
'I') {
374 Int_t nbins = ixmax - ixmin;
375 (*fHistSig)[ivar] =
new TH1F(GetMethodName()+
"_"+var +
"_sig", var +
" signal training", nbins, ixmin, ixmax );
376 (*fHistBgd)[ivar] =
new TH1F(GetMethodName()+
"_"+var +
"_bgd", var +
" background training", nbins, ixmin, ixmax );
379 UInt_t minNEvt =
TMath::Min(Data()->GetNEvtSigTrain(),Data()->GetNEvtBkgdTrain());
380 Int_t nbinsS = (*fPDFSig)[ivar]->GetHistNBins( minNEvt );
381 Int_t nbinsB = (*fPDFBgd)[ivar]->GetHistNBins( minNEvt );
383 (*fHistSig)[ivar] =
new TH1F(
TString::Format(
"%s_%s_%s_sig",DataInfo().GetName(),GetMethodName().Data(),var.
Data()),
384 TString::Format(
"%s_%s_%s signal training",DataInfo().GetName(),GetMethodName().Data(),var.
Data()),
386 (*fHistBgd)[ivar] =
new TH1F(
TString::Format(
"%s_%s_%s_bgd",DataInfo().GetName(),GetMethodName().Data(),var.
Data()),
387 TString::Format(
"%s_%s_%s background training",DataInfo().GetName(),GetMethodName().Data(),var.
Data()),
393 Log() << kINFO <<
"Filling reference histograms" <<
Endl;
396 for (
Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
400 const Event* origEv = Data()->GetEvent(ievt);
401 if (IgnoreEventsWithNegWeightsInTraining() && origEv->
GetWeight()<=0)
continue;
402 GetTransformationHandler().SetTransformationReferenceClass( origEv->
GetClass() );
403 const Event* ev = GetTransformationHandler().Transform( origEv );
409 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
415 if (
value >=(*fHistSig)[ivar]->GetXaxis()->GetXmax() ||
416 value <(*fHistSig)[ivar]->GetXaxis()->GetXmin()){
418 <<
"error in filling likelihood reference histograms var="
419 <<(*fInputVars)[ivar]
420 <<
", xmin="<<(*fHistSig)[ivar]->GetXaxis()->GetXmin()
422 <<
", xmax="<<(*fHistSig)[ivar]->GetXaxis()->GetXmax()
425 if (DataInfo().IsSignal(ev)) (*fHistSig)[ivar]->Fill(
value, weight );
426 else (*fHistBgd)[ivar]->Fill(
value, weight );
431 Log() << kINFO <<
"Building PDF out of reference histograms" <<
Endl;
432 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
436 (*fPDFSig)[ivar]->BuildPDF( (*fHistSig)[ivar] );
437 (*fPDFBgd)[ivar]->BuildPDF( (*fHistBgd)[ivar] );
439 (*fPDFSig)[ivar]->ValidatePDF( (*fHistSig)[ivar] );
440 (*fPDFBgd)[ivar]->ValidatePDF( (*fHistBgd)[ivar] );
443 if ((*fPDFSig)[ivar]->GetSmoothedHist() != 0) (*fHistSig_smooth)[ivar] = (*fPDFSig)[ivar]->GetSmoothedHist();
444 if ((*fPDFBgd)[ivar]->GetSmoothedHist() != 0) (*fHistBgd_smooth)[ivar] = (*fPDFBgd)[ivar]->GetSmoothedHist();
458 NoErrorCalc(err, errUpper);
467 GetTransformationHandler().SetTransformationReferenceClass( fSignalClass );
470 const Event* ev = GetEvent();
471 for (ivar=0; ivar<GetNvar(); ivar++) vs(ivar) = ev->
GetValue(ivar);
473 GetTransformationHandler().SetTransformationReferenceClass( fBackgroundClass );
477 for (ivar=0; ivar<GetNvar(); ivar++) vb(ivar) = ev->
GetValue(ivar);
481 for (ivar=0; ivar<GetNvar(); ivar++) {
484 if ((
Int_t)ivar == fDropVariable)
continue;
488 for (
UInt_t itype=0; itype < 2; itype++) {
491 if (
x[itype] >= (*fPDFSig)[ivar]->GetXmax())
x[itype] = (*fPDFSig)[ivar]->GetXmax() - 1.0e-10;
492 else if (
x[itype] < (*fPDFSig)[ivar]->GetXmin())
x[itype] = (*fPDFSig)[ivar]->GetXmin();
495 PDF* pdf = (itype == 0) ? (*fPDFSig)[ivar] : (*fPDFBgd)[ivar];
496 if (pdf == 0) Log() << kFATAL <<
"<GetMvaValue> Reference histograms don't exist" <<
Endl;
506 DataInfo().GetVariableInfo(ivar).GetVarType() ==
'N') {
523 if (itype == 0) ps *=
p;
529 return TransformLikelihoodOutput( ps, pb );
537 if (ps < fEpsilon) ps = fEpsilon;
538 if (pb < fEpsilon) pb = fEpsilon;
540 if (
r >= 1.0)
r = 1. - 1.e-15;
542 if (fTransformLikelihoodOutput) {
546 if (
r <= 0.0)
r = fEpsilon;
547 else if (
r >= 1.0)
r = 1. - 1.e-15;
564 if (fDefaultPDFLik != 0) {
565 o << prefix << std::endl << prefix <<
"#Default Likelihood PDF Options:" << std::endl << prefix << std::endl;
566 fDefaultPDFLik->WriteOptionsToStream( o, prefix );
568 for (
UInt_t ivar = 0; ivar < fPDFSig->size(); ivar++) {
569 if ((*fPDFSig)[ivar] != 0) {
570 o << prefix << std::endl << prefix <<
TString::Format(
"#Signal[%d] Likelihood PDF Options:",ivar) << std::endl << prefix << std::endl;
571 (*fPDFSig)[ivar]->WriteOptionsToStream( o, prefix );
573 if ((*fPDFBgd)[ivar] != 0) {
574 o << prefix << std::endl << prefix <<
"#Background[%d] Likelihood PDF Options:" << std::endl << prefix << std::endl;
575 (*fPDFBgd)[ivar]->WriteOptionsToStream( o, prefix );
589 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
590 if ( (*fPDFSig)[ivar]==0 || (*fPDFBgd)[ivar]==0 )
591 Log() << kFATAL <<
"Reference histograms for variable " << ivar
592 <<
" don't exist, can't write it to weight file" <<
Endl;
596 (*fPDFSig)[ivar]->AddXMLTo(pdfwrap);
600 (*fPDFBgd)[ivar]->AddXMLTo(pdfwrap);
610 if (fRanking)
delete fRanking;
611 fRanking =
new Ranking( GetName(),
"Delta Separation" );
614 for (
Int_t ivar=-1; ivar<(
Int_t)GetNvar(); ivar++) {
617 fDropVariable = ivar;
621 TH1* rS =
new TH1F( nameS, nameS, 80, 0, 1 );
622 TH1* rB =
new TH1F( nameB, nameB, 80, 0, 1 );
625 for (
Int_t ievt=0; ievt<Data()->GetNTrainingEvents(); ievt++) {
627 const Event* origEv = Data()->GetEvent(ievt);
628 GetTransformationHandler().SetTransformationReferenceClass( origEv->
GetClass() );
629 const Event* ev = GetTransformationHandler().Transform(Data()->GetEvent(ievt));
633 if (DataInfo().IsSignal(ev)) rS->
Fill( lk,
w );
634 else rB->
Fill( lk,
w );
639 if (ivar == -1) sepRef = sep;
646 if (ivar >= 0) fRanking->AddRank(
Rank( DataInfo().GetVariableInfo(ivar).GetInternalName(), sep ) );
660 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
661 (*fPDFSig)[ivar]->Write( pname + GetInputVar( ivar ) +
"_S" );
662 (*fPDFBgd)[ivar]->Write( pname + GetInputVar( ivar ) +
"_B" );
677 for (
UInt_t ivar=0; ivar<nvars; ivar++){
679 Log() << kDEBUG <<
"Reading signal and background PDF for variable: " << GetInputVar( ivar ) <<
Endl;
680 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
681 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
682 (*fPDFSig)[ivar] =
new PDF( GetInputVar( ivar ) +
" PDF Sig" );
683 (*fPDFBgd)[ivar] =
new PDF( GetInputVar( ivar ) +
" PDF Bkg" );
684 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
685 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
686 (*(*fPDFSig)[ivar]).ReadXML(pdfnode);
689 (*(*fPDFBgd)[ivar]).ReadXML(pdfnode);
704 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
705 Log() << kDEBUG <<
"Reading signal and background PDF for variable: " << GetInputVar( ivar ) <<
Endl;
706 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
707 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
708 (*fPDFSig)[ivar] =
new PDF(GetInputVar( ivar ) +
" PDF Sig" );
709 (*fPDFBgd)[ivar] =
new PDF(GetInputVar( ivar ) +
" PDF Bkg");
710 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
711 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
712 istr >> *(*fPDFSig)[ivar];
713 istr >> *(*fPDFBgd)[ivar];
726 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
738 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
741 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
742 (*fHistSig)[ivar]->
Write();
743 (*fHistBgd)[ivar]->Write();
744 if ((*fHistSig_smooth)[ivar] != 0) (*fHistSig_smooth)[ivar]->Write();
745 if ((*fHistBgd_smooth)[ivar] != 0) (*fHistBgd_smooth)[ivar]->Write();
746 (*fPDFSig)[ivar]->GetPDFHist()->Write();
747 (*fPDFBgd)[ivar]->GetPDFHist()->Write();
749 if ((*fPDFSig)[ivar]->GetNSmoothHist() != 0) (*fPDFSig)[ivar]->GetNSmoothHist()->Write();
750 if ((*fPDFBgd)[ivar]->GetNSmoothHist() != 0) (*fPDFBgd)[ivar]->GetNSmoothHist()->Write();
753 Float_t xmin=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmin();
754 Float_t xmax=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmax();
755 TH1F* mm =
new TH1F( (*fInputVars)[ivar]+
"_additional_check",
756 (*fInputVars)[ivar]+
"_additional_check", 15000,
xmin,
xmax );
758 for (
Int_t bin=0; bin < 15000; bin++) {
765 TH1*
h[2] = { (*fHistSig)[ivar], (*fHistBgd)[ivar] };
766 for (
UInt_t i=0; i<2; i++) {
772 hclone->
Rebin( resFactor );
773 hclone->
Scale( 1.0/resFactor );
786 fout <<
"#include <math.h>" << std::endl;
787 fout <<
"#include <cstdlib>" << std::endl;
795 Int_t dp = fout.precision();
796 fout <<
" double fEpsilon;" << std::endl;
801 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
802 nbin[ivar]=(*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX();
803 if (nbin[ivar] > nbinMax) nbinMax=nbin[ivar];
806 fout <<
" static float fRefS[][" << nbinMax <<
"]; "
807 <<
"// signal reference vector [nvars][max_nbins]" << std::endl;
808 fout <<
" static float fRefB[][" << nbinMax <<
"]; "
809 <<
"// backgr reference vector [nvars][max_nbins]" << std::endl << std::endl;
810 fout <<
"// if a variable has its PDF encoded as a spline0 --> treat it like an Integer valued one" <<std::endl;
811 fout <<
" bool fHasDiscretPDF[" << GetNvar() <<
"]; "<< std::endl;
812 fout <<
" int fNbin[" << GetNvar() <<
"]; "
813 <<
"// number of bins (discrete variables may have less bins)" << std::endl;
814 fout <<
" double fHistMin[" << GetNvar() <<
"]; " << std::endl;
815 fout <<
" double fHistMax[" << GetNvar() <<
"]; " << std::endl;
817 fout <<
" double TransformLikelihoodOutput( double, double ) const;" << std::endl;
818 fout <<
"};" << std::endl;
819 fout <<
"" << std::endl;
820 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
821 fout <<
"{" << std::endl;
822 fout <<
" fEpsilon = " << fEpsilon <<
";" << std::endl;
823 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
824 fout <<
" fNbin[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() <<
";" << std::endl;
825 fout <<
" fHistMin[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmin() <<
";" << std::endl;
826 fout <<
" fHistMax[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmax() <<
";" << std::endl;
828 if ((((*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar] ||
829 (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar])
831 (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()) {
832 Log() << kFATAL <<
"<MakeClassSpecific> Mismatch in binning of variable "
833 <<
"\"" << GetOriginalVarName(ivar) <<
"\" of type: \'" << DataInfo().GetVariableInfo(ivar).GetVarType()
835 <<
"nxS = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() <<
", "
836 <<
"nxB = " << (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()
837 <<
" while we expect " << nbin[ivar]
841 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
843 fout <<
" fHasDiscretPDF[" << ivar <<
"] = true; " << std::endl;
845 fout <<
" fHasDiscretPDF[" << ivar <<
"] = false; " << std::endl;
848 fout <<
"}" << std::endl << std::endl;
850 fout <<
"inline double " << className
851 <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
852 fout <<
"{" << std::endl;
853 fout <<
" double ps(1), pb(1);" << std::endl;
854 fout <<
" std::vector<double> inputValuesSig = inputValues;" << std::endl;
855 fout <<
" std::vector<double> inputValuesBgd = inputValues;" << std::endl;
856 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
857 fout <<
" Transform(inputValuesSig,0);" << std::endl;
858 fout <<
" Transform(inputValuesBgd,1);" << std::endl;
860 fout <<
" for (size_t ivar = 0; ivar < GetNvar(); ivar++) {" << std::endl;
862 fout <<
" // dummy at present... will be used for variable transforms" << std::endl;
863 fout <<
" double x[2] = { inputValuesSig[ivar], inputValuesBgd[ivar] };" << std::endl;
865 fout <<
" for (int itype=0; itype < 2; itype++) {" << std::endl;
867 fout <<
" // interpolate linearly between adjacent bins" << std::endl;
868 fout <<
" // this is not useful for discrete variables (or forced Spline0)" << std::endl;
869 fout <<
" int bin = int((x[itype] - fHistMin[ivar])/(fHistMax[ivar] - fHistMin[ivar])*fNbin[ivar]) + 0;" << std::endl;
871 fout <<
" // since the test data sample is in general different from the training sample" << std::endl;
872 fout <<
" // it can happen that the min/max of the training sample are trespassed --> correct this" << std::endl;
873 fout <<
" if (bin < 0) {" << std::endl;
874 fout <<
" bin = 0;" << std::endl;
875 fout <<
" x[itype] = fHistMin[ivar];" << std::endl;
876 fout <<
" }" << std::endl;
877 fout <<
" else if (bin >= fNbin[ivar]) {" << std::endl;
878 fout <<
" bin = fNbin[ivar]-1;" << std::endl;
879 fout <<
" x[itype] = fHistMax[ivar];" << std::endl;
880 fout <<
" }" << std::endl;
882 fout <<
" // find corresponding histogram from cached indices" << std::endl;
883 fout <<
" float ref = (itype == 0) ? fRefS[ivar][bin] : fRefB[ivar][bin];" << std::endl;
885 fout <<
" // sanity check" << std::endl;
886 fout <<
" if (ref < 0) {" << std::endl;
887 fout <<
" std::cout << \"Fatal error in " << className
888 <<
": bin entry < 0 ==> abort\" << std::endl;" << std::endl;
889 fout <<
" std::exit(1);" << std::endl;
890 fout <<
" }" << std::endl;
892 fout <<
" double p = ref;" << std::endl;
894 fout <<
" if (GetType(ivar) != 'I' && !fHasDiscretPDF[ivar]) {" << std::endl;
895 fout <<
" float bincenter = (bin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << std::endl;
896 fout <<
" int nextbin = bin;" << std::endl;
897 fout <<
" if ((x[itype] > bincenter && bin != fNbin[ivar]-1) || bin == 0) " << std::endl;
898 fout <<
" nextbin++;" << std::endl;
899 fout <<
" else" << std::endl;
900 fout <<
" nextbin--; " << std::endl;
902 fout <<
" double refnext = (itype == 0) ? fRefS[ivar][nextbin] : fRefB[ivar][nextbin];" << std::endl;
903 fout <<
" float nextbincenter = (nextbin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << std::endl;
905 fout <<
" double dx = bincenter - nextbincenter;" << std::endl;
906 fout <<
" double dy = ref - refnext;" << std::endl;
907 fout <<
" p += (x[itype] - bincenter) * dy/dx;" << std::endl;
908 fout <<
" }" << std::endl;
910 fout <<
" if (p < fEpsilon) p = fEpsilon; // avoid zero response" << std::endl;
912 fout <<
" if (itype == 0) ps *= p;" << std::endl;
913 fout <<
" else pb *= p;" << std::endl;
914 fout <<
" } " << std::endl;
915 fout <<
" } " << std::endl;
917 fout <<
" // the likelihood ratio (transform it ?)" << std::endl;
918 fout <<
" return TransformLikelihoodOutput( ps, pb ); " << std::endl;
919 fout <<
"}" << std::endl << std::endl;
921 fout <<
"inline double " << className <<
"::TransformLikelihoodOutput( double ps, double pb ) const" << std::endl;
922 fout <<
"{" << std::endl;
923 fout <<
" // returns transformed or non-transformed output" << std::endl;
924 fout <<
" if (ps < fEpsilon) ps = fEpsilon;" << std::endl;
925 fout <<
" if (pb < fEpsilon) pb = fEpsilon;" << std::endl;
926 fout <<
" double r = ps/(ps + pb);" << std::endl;
927 fout <<
" if (r >= 1.0) r = 1. - 1.e-15;" << std::endl;
929 fout <<
" if (" << (fTransformLikelihoodOutput ?
"true" :
"false") <<
") {" << std::endl;
930 fout <<
" // inverse Fermi function" << std::endl;
932 fout <<
" // sanity check" << std::endl;
933 fout <<
" if (r <= 0.0) r = fEpsilon;" << std::endl;
934 fout <<
" else if (r >= 1.0) r = 1. - 1.e-15;" << std::endl;
936 fout <<
" double tau = 15.0;" << std::endl;
937 fout <<
" r = - log(1.0/r - 1.0)/tau;" << std::endl;
938 fout <<
" }" << std::endl;
940 fout <<
" return r;" << std::endl;
941 fout <<
"}" << std::endl;
944 fout <<
"// Clean up" << std::endl;
945 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
946 fout <<
"{" << std::endl;
947 fout <<
" // nothing to clear" << std::endl;
948 fout <<
"}" << std::endl << std::endl;
950 fout <<
"// signal map" << std::endl;
951 fout <<
"float " << className <<
"::fRefS[][" << nbinMax <<
"] = " << std::endl;
952 fout <<
"{ " << std::endl;
953 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
955 for (
Int_t ibin=1; ibin<=nbinMax; ibin++) {
956 if (ibin-1 < nbin[ivar])
957 fout << (*fPDFSig)[ivar]->GetPDFHist()->GetBinContent(ibin);
961 if (ibin < nbinMax) fout <<
", ";
963 fout <<
" }, " << std::endl;
965 fout <<
"}; " << std::endl;
968 fout <<
"// background map" << std::endl;
969 fout <<
"float " << className <<
"::fRefB[][" << nbinMax <<
"] = " << std::endl;
970 fout <<
"{ " << std::endl;
971 for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
973 fout << std::setprecision(8);
974 for (
Int_t ibin=1; ibin<=nbinMax; ibin++) {
975 if (ibin-1 < nbin[ivar])
976 fout << (*fPDFBgd)[ivar]->GetPDFHist()->GetBinContent(ibin);
980 if (ibin < nbinMax) fout <<
", ";
982 fout <<
" }, " << std::endl;
984 fout <<
"}; " << std::endl;
986 fout << std::setprecision(dp);
1002 Log() <<
"The maximum-likelihood classifier models the data with probability " <<
Endl;
1003 Log() <<
"density functions (PDF) reproducing the signal and background" <<
Endl;
1004 Log() <<
"distributions of the input variables. Correlations among the " <<
Endl;
1005 Log() <<
"variables are ignored." <<
Endl;
1009 Log() <<
"Required for good performance are decorrelated input variables" <<
Endl;
1010 Log() <<
"(PCA transformation via the option \"VarTransform=Decorrelate\"" <<
Endl;
1011 Log() <<
"may be tried). Irreducible non-linear correlations may be reduced" <<
Endl;
1012 Log() <<
"by precombining strongly correlated input variables, or by simply" <<
Endl;
1013 Log() <<
"removing one of the variables." <<
Endl;
1017 Log() <<
"High fidelity PDF estimates are mandatory, i.e., sufficient training " <<
Endl;
1018 Log() <<
"statistics is required to populate the tails of the distributions" <<
Endl;
1019 Log() <<
"It would be a surprise if the default Spline or KDE kernel parameters" <<
Endl;
1020 Log() <<
"provide a satisfying fit to the data. The user is advised to properly" <<
Endl;
1021 Log() <<
"tune the events per bin and smooth options in the spline cases" <<
Endl;
1022 Log() <<
"individually per variable. If the KDE kernel is used, the adaptive" <<
Endl;
1023 Log() <<
"Gaussian kernel may lead to artefacts, so please always also try" <<
Endl;
1024 Log() <<
"the non-adaptive one." <<
Endl;
1025 Log() <<
"" <<
Endl;
1026 Log() <<
"All tuning parameters must be adjusted individually for each input" <<
Endl;
1027 Log() <<
"variable!" <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
TVectorT< Float_t > TVector
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
1-D histogram with a float per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
void SetTitle(const char *title) override
Change/set the title.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual TH1 * Rebin(Int_t ngroup=2, const char *newname="", const Double_t *xbins=nullptr)
Rebin this histogram.
void SetName(const char *name) override
Change the name of this histogram.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to output stream (e.g. in writing the MVA weight files
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Likelihood analysis ("non-parametric approach")
const Ranking * CreateRanking()
computes ranking of input variables
void Train()
create reference distributions (PDFs) from signal and background events: fill histograms and smooth t...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes.
virtual void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to stream
void WriteMonitoringHistosToFile() const
write histograms and PDFs to file for monitoring purposes
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual ~MethodLikelihood()
destructor
void Init()
default initialisation called by all constructors
void ReadWeightsFromStream(std::istream &istr)
read weight info from file nothing to do for this method
MethodLikelihood(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
void GetHelpMessage() const
get help message text
void ReadWeightsFromXML(void *wghtnode)
read weights from XML
void MakeClassSpecificHeader(std::ostream &, const TString &="") const
write specific header of the classifier (mostly include files)
Double_t TransformLikelihoodOutput(Double_t ps, Double_t pb) const
returns transformed or non-transformed output
void ProcessOptions()
process user options reference cut value to distinguish signal-like from background-like events
void WriteWeightsToStream(TFile &rf) const
write reference PDFs to ROOT file
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns the likelihood estimator for signal fill a new Likelihood branch into the testTree
void AddWeightsXMLTo(void *parent) const
write weights to XML
void DeclareOptions()
define the options (their key words) that can be set in the option string
PDF wrapper for histograms; uses user-defined spline interpolation.
Ranking for variables in method (implementation)
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
const char * Data() const
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Int_t Nint(T x)
Round to nearest integer. Rounds half integers to the nearest even integer.
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.