49 #ifndef ROOT_TMVA_MsgLogger
52 #ifndef ROOT_TMVA_Tools
55 #ifndef ROOT_TMVA_Version
66 TMVA::VariableGaussTransform::VariableGaussTransform( DataSetInfo& dsi,
TString strcor )
67 : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
73 if (strcor==
"Uniform") {fFlatNotGauss =
kTRUE;
99 if (!IsEnabled() || IsCreated())
return kTRUE;
101 Log() <<
kINFO <<
"Preparing the Gaussian transformation..." <<
Endl;
103 UInt_t inputSize = fGet.size();
104 SetNVariables(inputSize);
106 if (inputSize > 200) {
107 Log() <<
kWARNING <<
"----------------------------------------------------------------------------"
110 <<
": More than 200 variables, I hope you have enough memory!!!!" <<
Endl;
111 Log() <<
kWARNING <<
"----------------------------------------------------------------------------"
116 GetCumulativeDist( events );
128 if (!IsCreated())
Log() <<
kFATAL <<
"Transformation not yet created" <<
Endl;
134 if (cls <0 || cls >= (
int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
138 UInt_t inputSize = fGet.size();
140 std::vector<Float_t> input(0);
141 std::vector<Float_t>
output(0);
143 std::vector<Char_t> mask;
144 GetInput( ev, input, mask );
146 std::vector<Char_t>::iterator itMask = mask.begin();
152 for (
UInt_t ivar=0; ivar<inputSize; ivar++) {
159 if (0 != fCumulativePDF[ivar][cls]) {
162 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
164 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
169 output.push_back( cumulant );
172 Double_t maxErfInvArgRange = 0.99999999;
182 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->
GetNVariables()) {
183 if (fTransformedEvent!=0) {
delete fTransformedEvent; fTransformedEvent = 0; }
184 fTransformedEvent =
new Event();
187 SetOutput( fTransformedEvent, output, mask, ev );
189 return fTransformedEvent;
197 if (!IsCreated())
Log() <<
kFATAL <<
"Transformation not yet created" <<
Endl;
203 if (cls <0 || cls >= (
int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
207 UInt_t inputSize = fGet.size();
209 std::vector<Float_t> input(0);
210 std::vector<Float_t>
output(0);
212 std::vector<Char_t> mask;
213 GetInput( ev, input, mask,
kTRUE );
215 std::vector<Char_t>::iterator itMask = mask.begin();
221 for (
UInt_t ivar=0; ivar<inputSize; ivar++) {
228 if (0 != fCumulativePDF[ivar][cls]) {
229 invCumulant = input.at(ivar);
233 invCumulant = (
TMath::Erf(invCumulant/1.414213562)+1)/2.
f;
237 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,
kTRUE);
239 Log() <<
kFATAL <<
"Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" <<
Endl;
241 output.push_back(invCumulant);
245 if (fBackTransformedEvent==0) fBackTransformedEvent =
new Event( *ev );
247 SetOutput( fBackTransformedEvent, output, mask, ev,
kTRUE );
249 return fBackTransformedEvent;
257 const UInt_t inputSize = fGet.size();
261 UInt_t nevt = events.size();
263 const UInt_t nClasses = GetNClasses();
264 UInt_t numDist = nClasses+1;
266 if (GetNClasses() == 1 ) numDist = nClasses;
270 std::list< TMVA::TMVAGaussPair > **listsForBinning =
new std::list<TMVA::TMVAGaussPair>* [numDist];
271 std::vector< Float_t > **vsForBinning =
new std::vector<Float_t>* [numDist];
272 for (
UInt_t i=0; i < numDist; i++) {
273 listsForBinning[i] =
new std::list<TMVA::TMVAGaussPair> [inputSize];
274 vsForBinning[i] =
new std::vector<Float_t> [inputSize];
275 nbins[i] =
new UInt_t[inputSize];
278 std::vector<Float_t> input;
279 std::vector<Char_t> mask;
285 for (
UInt_t i=0; i<numDist; i++) {
290 for (
UInt_t ievt=0; ievt < nevt; ievt++) {
291 const Event* ev= events[ievt];
294 sumOfWeights[cls] += eventWeight;
295 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
296 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
297 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
299 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
300 if( hasMaskedEntries ){
303 Log() <<
kFATAL <<
"Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." <<
Endl;
308 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
311 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(
TMVA::TMVAGaussPair(value,eventWeight));
316 for (
UInt_t icl=0; icl<numDist-1; icl++){
317 minWeight[numDist-1] =
TMath::Min(minWeight[icl],minWeight[numDist-1]);
318 maxWeight[numDist-1] =
TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
324 const UInt_t nbinsmax=2000;
326 for (
UInt_t icl=0; icl< numDist; icl++){
327 for (
UInt_t ivar=0; ivar<inputSize; ivar++) {
328 listsForBinning[icl][ivar].sort();
329 std::list< TMVA::TMVAGaussPair >::iterator it;
330 Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
331 sumPerBin=
TMath::Max(minWeight[icl]*nevmin,sumPerBin);
333 Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
336 vsForBinning[icl][ivar].push_back(ev_value-eps);
337 vsForBinning[icl][ivar].push_back(ev_value);
339 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); it++){
340 sum+= it->GetWeight();
341 if (sum >= sumPerBin) {
342 ev_value=it->GetValue();
343 if (ev_value>lastev_value) {
344 vsForBinning[icl][ivar].push_back(ev_value);
346 lastev_value=ev_value;
350 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
351 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
355 delete[] sumOfWeights;
360 fCumulativeDist.resize(inputSize);
361 for (
UInt_t icls = 0; icls < numDist; icls++) {
362 for (
UInt_t ivar=0; ivar < inputSize; ivar++){
365 for (
UInt_t k =0 ; k < nbins[icls][ivar]; k++){
366 binnings[k] = vsForBinning[icls][ivar][k];
368 fCumulativeDist[ivar].resize(numDist);
369 if (0 != fCumulativeDist[ivar][icls] ) {
370 delete fCumulativeDist[ivar][icls];
372 fCumulativeDist[ivar][icls] =
new TH1F(
Form(
"Cumulative_Var%d_cls%d",ivar,icls),
373 Form(
"Cumulative_Var%d_cls%d",ivar,icls),
374 nbins[icls][ivar] -1,
376 fCumulativeDist[ivar][icls]->SetDirectory(0);
382 for (
UInt_t i=0; i<numDist; i++) {
383 delete [] listsForBinning[numDist-i-1];
384 delete [] vsForBinning[numDist-i-1];
385 delete [] nbins[numDist-i-1];
387 delete [] listsForBinning;
388 delete [] vsForBinning;
392 std::vector<Int_t> ic(numDist);
393 for (
UInt_t ievt=0; ievt<nevt; ievt++) {
395 const Event* ev= events[ievt];
399 GetInput( ev, input, mask );
402 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
404 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
405 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
412 CleanUpCumulativeArrays(
"PDF");
416 fCumulativePDF.resize(inputSize);
417 for (
UInt_t ivar=0; ivar<inputSize; ivar++) {
419 for (
UInt_t icls=0; icls<numDist; icls++) {
420 (fCumulativeDist[ivar][icls])->Smooth();
423 for (
Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
424 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
425 if (val>0)
total += val;
427 for (
Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
428 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
429 if (val>0) sum += val;
430 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/
total);
433 fCumulativePDF[ivar].push_back(
new PDF(
Form(
"GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls],
PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,
kFALSE,
kFALSE));
442 Log() <<
kFATAL <<
"VariableGaussTransform::WriteTransformationToStream is obsolete" <<
Endl;
449 if (opt ==
"ALL" || opt ==
"PDF"){
450 for (
UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
451 for (
UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
452 if (0 != fCumulativePDF[ivar][icls])
delete fCumulativePDF[ivar][icls];
455 fCumulativePDF.clear();
457 if (opt ==
"ALL" || opt ==
"Dist"){
458 for (
UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
459 for (
UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
460 if (0 != fCumulativeDist[ivar][icls])
delete fCumulativeDist[ivar][icls];
463 fCumulativeDist.clear();
472 gTools().
AddAttr(trfxml,
"FlatOrGauss", (fFlatNotGauss?
"Flat":
"Gauss") );
476 UInt_t nvar = fGet.size();
477 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
482 if ( fCumulativePDF[ivar][0]==0 ||
483 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
484 Log() <<
kFATAL <<
"Cumulative histograms for variable " << ivar <<
" don't exist, can't write it to weight file" <<
Endl;
486 for (
UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
488 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
498 CleanUpCumulativeArrays();
503 if (FlatOrGauss ==
"Flat") fFlatNotGauss =
kTRUE;
504 else fFlatNotGauss =
kFALSE;
508 void* inpnode =
NULL;
514 void* varnode =
NULL;
526 TString varname, histname, classname;
529 if(
gTools().HasAttr(varnode,
"Name") )
540 fCumulativePDF.resize( ivar+1 );
541 fCumulativePDF[ivar].push_back(pdfToRead);
558 istr.getline(buf,512);
562 while (!(buf[0]==
'#'&& buf[1]==
'#')) {
564 while (*p==
' ' || *p==
'\t') p++;
565 if (*p==
'#' || *p==
'\0') {
566 istr.getline(buf,512);
569 std::stringstream sstr(buf);
572 if (strvar==
"CumulativeHistogram") {
574 TString devnullS(
""),hname(
"");
578 sstr >>
type >> ivar >> hname >> nbins >> fElementsperbin;
583 for (
Int_t ibin=0; ibin<nbins+1; ibin++) {
588 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
589 if(
type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(
type+1);
591 TH1F * histToRead = fCumulativeDist[ivar][
type];
592 if ( histToRead !=0 )
delete histToRead;
594 histToRead =
new TH1F( hname, hname, nbins, Binnings );
596 fCumulativeDist[ivar][
type]=histToRead;
606 fCumulativePDF.resize(ivar+1);
607 fCumulativePDF[ivar].resize(
type+1);
608 fCumulativePDF[ivar][
type] = pdf;
613 if (strvar==
"Uniform") {
614 sstr >> fFlatNotGauss;
615 istr.getline(buf,512);
619 istr.getline(buf,512);
623 UInt_t classIdx=(classname==
"signal")?0:1;
624 for(
UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
625 PDF* src = fCumulativePDF[ivar][classIdx];
669 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
672 if (x <= h->GetBinLowEdge(1)){
689 Log() <<
kINFO <<
"I do not know yet how to print this... look in the weight file " << cls <<
":" <<
Endl;
700 const UInt_t nvar = fGet.size();
701 UInt_t numDist = GetNClasses() + 1;
703 for (
UInt_t icls=0; icls<numDist; icls++) {
704 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
705 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
706 if (nbin > nBins) nBins=nbin;
713 fout <<
" int nvar;" << std::endl;
716 fout <<
" double cumulativeDist["<<nvar<<
"]["<<numDist<<
"]["<<nBins+1<<
"];"<<std::endl;
717 fout <<
" double X["<<nvar<<
"]["<<numDist<<
"]["<<nBins+1<<
"];"<<std::endl;
718 fout <<
" double xMin["<<nvar<<
"]["<<numDist<<
"];"<<std::endl;
719 fout <<
" double xMax["<<nvar<<
"]["<<numDist<<
"];"<<std::endl;
720 fout <<
" int nbins["<<nvar<<
"]["<<numDist<<
"];"<<std::endl;
724 fout <<
"#include \"math.h\"" << std::endl;
726 fout <<
"//_______________________________________________________________________" << std::endl;
727 fout <<
"inline void " << fcncName <<
"::InitTransform_"<<trCounter<<
"()" << std::endl;
728 fout <<
"{" << std::endl;
729 fout <<
" // Gauss/Uniform transformation, initialisation" << std::endl;
730 fout <<
" nvar=" << nvar <<
";" << std::endl;
731 for (
UInt_t icls=0; icls<numDist; icls++) {
732 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
733 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
734 fout <<
" nbins["<<ivar<<
"]["<<icls<<
"]="<<nbin<<
";"<<std::endl;
741 for (
UInt_t icls=0; icls<numDist; icls++) {
742 for (
UInt_t ivar=0; ivar<nvar; ivar++) {
748 Log() <<
kWARNING <<
"MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." <<
Endl;
750 }
catch( std::out_of_range
except ){
751 Log() <<
kWARNING <<
"MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar <<
")" <<
Endl;
756 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
757 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
761 for (
Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
762 fout <<
" cumulativeDist[" << ivar <<
"]["<< icls<<
"]["<<ibin<<
"]="<<
gTools().
StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<<
";"<<std::endl;
763 fout <<
" X[" << ivar <<
"]["<< icls<<
"]["<<ibin<<
"]="<<
gTools().
StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<<
";"<<std::endl;
768 fout <<
"}" << std::endl;
770 fout <<
"//_______________________________________________________________________" << std::endl;
771 fout <<
"inline void " << fcncName <<
"::Transform_"<<trCounter<<
"( std::vector<double>& iv, int clsIn) const" << std::endl;
772 fout <<
"{" << std::endl;
773 fout <<
" // Gauss/Uniform transformation" << std::endl;
774 fout <<
" int cls=clsIn;" << std::endl;
775 fout <<
" if (cls < 0 || cls > "<<GetNClasses()<<
") {"<< std::endl;
776 fout <<
" if ("<<GetNClasses()<<
" > 1 ) cls = "<<GetNClasses()<<
";"<< std::endl;
777 fout <<
" else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<
";"<< std::endl;
778 fout <<
" }"<< std::endl;
780 fout <<
" // copy the variables which are going to be transformed "<< std::endl;
782 fout <<
" static std::vector<double> dv; "<< std::endl;
783 fout <<
" dv.resize(nvar); "<< std::endl;
784 fout <<
" for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
785 fout <<
" "<< std::endl;
786 fout <<
" bool FlatNotGauss = "<< (fFlatNotGauss?
"true":
"false") <<
"; "<< std::endl;
787 fout <<
" double cumulant; "<< std::endl;
788 fout <<
" //const int nvar = "<<nvar<<
"; "<< std::endl;
789 fout <<
" for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
790 fout <<
" int nbin = nbins[ivar][cls]; "<< std::endl;
791 fout <<
" int ibin=0; "<< std::endl;
792 fout <<
" while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
793 fout <<
" "<< std::endl;
794 fout <<
" if (ibin<0) { ibin=0;} "<< std::endl;
795 fout <<
" if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
796 fout <<
" int nextbin = ibin; "<< std::endl;
797 fout <<
" if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
798 fout <<
" nextbin++; "<< std::endl;
799 fout <<
" else "<< std::endl;
800 fout <<
" nextbin--; "<< std::endl;
801 fout <<
" "<< std::endl;
802 fout <<
" double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
803 fout <<
" double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
804 fout <<
" cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
805 fout <<
" "<< std::endl;
806 fout <<
" "<< std::endl;
807 fout <<
" if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
808 fout <<
" if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
809 fout <<
" if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
810 fout <<
" else { "<< std::endl;
811 fout <<
" double maxErfInvArgRange = 0.99999999; "<< std::endl;
812 fout <<
" double arg = 2.0*cumulant - 1.0; "<< std::endl;
813 fout <<
" if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
814 fout <<
" if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
815 fout <<
" double inverf=0., stp=1. ; "<< std::endl;
816 fout <<
" while (stp >1.e-10){; "<< std::endl;
817 fout <<
" if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
818 fout <<
" else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
819 fout <<
" else inverf += stp; "<< std::endl;
820 fout <<
" } ; "<< std::endl;
821 fout <<
" //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
822 fout <<
" dv[ivar] = 1.414213562* inverf; "<< std::endl;
823 fout <<
" } "<< std::endl;
824 fout <<
" } "<< std::endl;
825 fout <<
" // copy the transformed variables back "<< std::endl;
826 fout <<
" for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
827 fout <<
"} "<< std::endl;
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1
MsgLogger & Endl(MsgLogger &ml)
void ReadXML(void *pdfnode)
XML file reading.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
static Bool_t AddDirectoryStatus()
static function: cannot be inlined on Windows/NT
1-D histogram with a float per channel (see TH1 documentation)}
Short_t Min(Short_t a, Short_t b)
virtual Int_t GetNbinsX() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
UInt_t GetNVariables() const
accessor to the number of variables
void Initialize(Bool_t useTMVAStyle=kTRUE)
Double_t Erf(Double_t x)
Computation of the error function erf(x).
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
char * Form(const char *fmt,...)
void Print(std::ostream &o) const
print method
static unsigned int total
#define TMVA_VERSION(a, b, c)
static const double x1[5]
static RooMathCoreReg dummy
Abstract ClassifierFactory template that handles arbitrary types.
Short_t Max(Short_t a, Short_t b)
const char * GetName() const
Returns name of object.
static void output(int code)