76 TMVA::MethodPDEFoam::MethodPDEFoam( const
TString& jobName,
81 MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption, theTargetDir )
92 , fMultiTargetRegression(kFALSE)
98 , fKernelEstimator(
NULL)
99 , fTargetSelectionStr("
Mean")
100 , fTargetSelection(
kMean)
101 , fFillFoamWithOrigWeights(kFALSE)
102 , fUseYesNoCell(kFALSE)
104 , fDTSeparation(
kFoam)
129 , fMultiTargetRegression(
kFALSE)
135 , fKernelEstimator(
NULL)
136 , fTargetSelectionStr(
"Mean")
137 , fTargetSelection(
kMean)
138 , fFillFoamWithOrigWeights(
kFALSE)
141 , fDTSeparation(
kFoam)
172 fnCells = fnActiveCells*2-1;
178 fFillFoamWithOrigWeights =
kFALSE;
181 fDTSeparation =
kFoam;
184 fKernelEstimator=
NULL;
185 fTargetSelection=
kMean;
188 fMultiTargetRegression =
kFALSE;
193 SetSignalReferenceCut( 0.0 );
195 SetSignalReferenceCut( 0.5 );
205 DeclareOptionRef( fSigBgSeparated =
kFALSE,
"SigBgSeparate",
"Separate foams for signal and background" );
206 DeclareOptionRef( fFrac = 0.001,
"TailCut",
"Fraction of outlier events that are excluded from the foam in each dimension" );
207 DeclareOptionRef( fVolFrac = 1./15.,
"VolFrac",
"Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
208 DeclareOptionRef( fnActiveCells = 500,
"nActiveCells",
"Maximum number of active cells to be created by the foam");
209 DeclareOptionRef( fnSampl = 2000,
"nSampl",
"Number of generated MC events per cell");
210 DeclareOptionRef( fnBin = 5,
"nBin",
"Number of bins in edge histograms");
211 DeclareOptionRef( fCompress =
kTRUE,
"Compress",
"Compress foam output file");
212 DeclareOptionRef( fMultiTargetRegression =
kFALSE,
"MultiTargetRegression",
"Do regression with multiple targets");
213 DeclareOptionRef( fNmin = 100,
"Nmin",
"Number of events in cell required to split cell");
214 DeclareOptionRef( fMaxDepth = 0,
"MaxDepth",
"Maximum depth of cell tree (0=unlimited)");
215 DeclareOptionRef( fFillFoamWithOrigWeights =
kFALSE,
"FillFoamWithOrigWeights",
"Fill foam with original or boost weights");
216 DeclareOptionRef( fUseYesNoCell =
kFALSE,
"UseYesNoCell",
"Return -1 or 1 for bkg or signal like events");
217 DeclareOptionRef( fDTLogic =
"None",
"DTLogic",
"Use decision tree algorithm to split cells");
219 AddPreDefVal(
TString(
"GiniIndex"));
220 AddPreDefVal(
TString(
"MisClassificationError"));
221 AddPreDefVal(
TString(
"CrossEntropy"));
222 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
223 AddPreDefVal(
TString(
"SdivSqrtSplusB"));
225 DeclareOptionRef( fKernelStr =
"None",
"Kernel",
"Kernel type used");
227 AddPreDefVal(
TString(
"Gauss"));
228 AddPreDefVal(
TString(
"LinNeighbors"));
229 DeclareOptionRef( fTargetSelectionStr =
"Mean",
"TargetSelection",
"Target selection method");
240 DeclareOptionRef(fCutNmin =
kTRUE,
"CutNmin",
"Requirement for minimal number of events in cell");
241 DeclareOptionRef(fPeekMax =
kTRUE,
"PeekMax",
"Peek cell with max. loss for the next split");
249 if (!(fFrac>=0. && fFrac<=1.)) {
250 Log() <<
kWARNING <<
"TailCut not in [0.,1] ==> using 0.001 instead" <<
Endl;
254 if (fnActiveCells < 1) {
255 Log() <<
kWARNING <<
"invalid number of active cells specified: "
256 << fnActiveCells <<
"; setting nActiveCells=2" <<
Endl;
259 fnCells = fnActiveCells*2-1;
262 if (fSigBgSeparated && fDTLogic !=
"None") {
263 Log() <<
kFATAL <<
"Decision tree logic works only for a single foam (SigBgSeparate=F)" <<
Endl;
267 if (fDTLogic ==
"None")
268 fDTSeparation =
kFoam;
269 else if (fDTLogic ==
"GiniIndex")
271 else if (fDTLogic ==
"MisClassificationError")
273 else if (fDTLogic ==
"CrossEntropy")
275 else if (fDTLogic ==
"GiniIndexWithLaplace")
277 else if (fDTLogic ==
"SdivSqrtSplusB")
280 Log() <<
kWARNING <<
"Unknown separation type: " << fDTLogic
281 <<
", setting to None" <<
Endl;
283 fDTSeparation =
kFoam;
286 if (fKernelStr ==
"None" ) fKernel = kNone;
287 else if (fKernelStr ==
"Gauss" ) fKernel = kGaus;
288 else if (fKernelStr ==
"LinNeighbors") fKernel = kLinN;
290 if (fTargetSelectionStr ==
"Mean" ) fTargetSelection =
kMean;
291 else fTargetSelection =
kMpv;
294 if (DoRegression() &&
Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
295 Log() <<
kWARNING <<
"Warning: number of targets > 1"
296 <<
" and MultiTargetRegression=F was set, this makes no sense!"
297 <<
" --> I'm setting MultiTargetRegression=T" <<
Endl;
298 fMultiTargetRegression =
kTRUE;
309 if (fKernelEstimator !=
NULL)
310 delete fKernelEstimator;
324 if (fMultiTargetRegression)
331 for (
UInt_t dim=0; dim<kDim; dim++) {
336 Log() <<
kDEBUG <<
"Number of training events: " <<
Data()->GetNTrainingEvents() <<
Endl;
337 Int_t nevoutside = (
Int_t)((
Data()->GetNTrainingEvents())*(fFrac));
338 Int_t rangehistbins = 10000;
342 for (
Long64_t i=0; i<(GetNEvents()); i++) {
343 const Event* ev = GetEvent(i);
344 for (
UInt_t dim=0; dim<kDim; dim++) {
346 if (fMultiTargetRegression) {
366 for (
UInt_t dim=0; dim<kDim; dim++) {
367 range_h[dim] =
new TH1F(
Form(
"range%i", dim),
"range", rangehistbins, xmin[dim], xmax[dim]);
371 for (
Long64_t i=0; i<GetNEvents(); i++) {
372 const Event* ev = GetEvent(i);
373 for (
UInt_t dim=0; dim<kDim; dim++) {
374 if (fMultiTargetRegression) {
386 for (
UInt_t dim=0; dim<kDim; dim++) {
387 for (
Int_t i=1; i<(rangehistbins+1); i++) {
388 if (range_h[dim]->Integral(0, i) > nevoutside) {
393 for (
Int_t i=rangehistbins; i>0; i--) {
394 if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
405 for (
UInt_t dim=0; dim<kDim; dim++) {
406 fXmin.push_back(xmin[dim]);
407 fXmax.push_back(xmax[dim]);
415 for (
UInt_t dim=0; dim<kDim; dim++)
427 Log() <<
kVERBOSE <<
"Calculate Xmin and Xmax for every dimension" <<
Endl;
434 if (DoRegression()) {
435 if (fMultiTargetRegression)
436 TrainMultiTargetRegression();
438 TrainMonoTargetRegression();
442 TrainMultiClassification();
444 if (DataInfo().GetNormalization() !=
"EQUALNUMEVENTS" ) {
445 Log() <<
kINFO <<
"NormMode=" << DataInfo().GetNormalization()
446 <<
" chosen. Note that only NormMode=EqualNumEvents"
447 <<
" ensures that Discriminant values correspond to"
448 <<
" signal probabilities." <<
Endl;
451 Log() <<
kDEBUG <<
"N_sig for training events: " <<
Data()->GetNEvtSigTrain() <<
Endl;
452 Log() <<
kDEBUG <<
"N_bg for training events: " <<
Data()->GetNEvtBkgdTrain() <<
Endl;
453 Log() <<
kDEBUG <<
"User normalization: " << DataInfo().GetNormalization().Data() <<
Endl;
456 TrainSeparatedClassification();
458 TrainUnifiedClassification();
463 for(
UInt_t i=0; i<fFoam.size(); i++) {
465 fFoam.at(i)->DeleteBinarySearchTree();
478 foamcaption[0] =
"SignalFoam";
479 foamcaption[1] =
"BgFoam";
481 for(
int i=0; i<2; i++) {
483 fFoam.push_back( InitFoam(foamcaption[i],
kSeparate) );
485 Log() <<
kVERBOSE <<
"Filling binary search tree of " << foamcaption[i]
486 <<
" with events" <<
Endl;
488 for (
Long64_t k=0; k<GetNEvents(); ++k) {
489 const Event* ev = GetEvent(k);
490 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
491 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
492 fFoam.back()->FillBinarySearchTree(ev);
495 Log() <<
kINFO <<
"Build up " << foamcaption[i] <<
Endl;
496 fFoam.back()->Create();
500 for (
Long64_t k=0; k<GetNEvents(); ++k) {
501 const Event* ev = GetEvent(k);
503 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
504 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
505 fFoam.back()->FillFoamCells(ev, weight);
516 fFoam.push_back( InitFoam(
"DiscrFoam",
kDiscr, fSignalClass) );
518 Log() <<
kVERBOSE <<
"Filling binary search tree of discriminator foam with events" <<
Endl;
520 for (
Long64_t k=0; k<GetNEvents(); ++k) {
521 const Event* ev = GetEvent(k);
522 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
523 fFoam.back()->FillBinarySearchTree(ev);
526 Log() <<
kINFO <<
"Build up discriminator foam" <<
Endl;
527 fFoam.back()->Create();
531 for (
Long64_t k=0; k<GetNEvents(); ++k) {
532 const Event* ev = GetEvent(k);
534 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
535 fFoam.back()->FillFoamCells(ev, weight);
540 fFoam.back()->Finalize();
552 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
554 fFoam.push_back( InitFoam(
Form(
"MultiClassFoam%u",iClass),
kMultiClass, iClass) );
556 Log() <<
kVERBOSE <<
"Filling binary search tree of multiclass foam "
557 << iClass <<
" with events" <<
Endl;
559 for (
Long64_t k=0; k<GetNEvents(); ++k) {
560 const Event* ev = GetEvent(k);
561 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
562 fFoam.back()->FillBinarySearchTree(ev);
565 Log() <<
kINFO <<
"Build up multiclass foam " << iClass <<
Endl;
566 fFoam.back()->Create();
571 for (
Long64_t k=0; k<GetNEvents(); ++k) {
572 const Event* ev = GetEvent(k);
574 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
575 fFoam.back()->FillFoamCells(ev, weight);
580 fFoam.back()->Finalize();
591 if (
Data()->GetNTargets() != 1) {
592 Log() <<
kFATAL <<
"Can't do mono-target regression with "
593 <<
Data()->GetNTargets() <<
" targets!" <<
Endl;
596 Log() <<
kDEBUG <<
"MethodPDEFoam: number of Targets: " <<
Data()->GetNTargets() <<
Endl;
598 fFoam.push_back( InitFoam(
"MonoTargetRegressionFoam",
kMonoTarget) );
602 for (
Long64_t k=0; k<GetNEvents(); ++k) {
603 const Event* ev = GetEvent(k);
604 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
605 fFoam.back()->FillBinarySearchTree(ev);
608 Log() <<
kINFO <<
"Build mono target regression foam" <<
Endl;
609 fFoam.back()->Create();
613 for (
Long64_t k=0; k<GetNEvents(); ++k) {
614 const Event* ev = GetEvent(k);
616 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
617 fFoam.back()->FillFoamCells(ev, weight);
622 fFoam.back()->Finalize();
636 Log() <<
kFATAL <<
"LinNeighbors kernel currently not supported"
637 <<
" for multi target regression" <<
Endl;
639 fFoam.push_back( InitFoam(
"MultiTargetRegressionFoam",
kMultiTarget) );
641 Log() <<
kVERBOSE <<
"Filling binary search tree of multi target regression foam with events"
644 for (
Long64_t k=0; k<GetNEvents(); ++k) {
648 std::vector<Float_t> targets(ev->
GetTargets());
650 for (
UInt_t i = 0; i < targets.size(); ++i)
651 ev->
SetVal(i+nVariables, targets.at(i));
653 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
654 fFoam.back()->FillBinarySearchTree(ev);
660 Log() <<
kINFO <<
"Build multi target regression foam" <<
Endl;
661 fFoam.back()->Create();
665 for (
Long64_t k=0; k<GetNEvents(); ++k) {
669 std::vector<Float_t> targets = ev->
GetTargets();
672 for (
UInt_t i = 0; i < targets.size(); ++i)
673 ev->
SetVal(i+nVariables, targets.at(i));
675 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
676 fFoam.back()->FillFoamCells(ev, weight);
705 const Event* ev = GetEvent();
708 if (fSigBgSeparated) {
709 std::vector<Float_t> xvec = ev->
GetValues();
713 density_sig = fFoam.at(0)->GetCellValue(xvec,
kValueDensity, fKernelEstimator);
714 density_bg = fFoam.at(1)->GetCellValue(xvec,
kValueDensity, fKernelEstimator);
717 if ( (density_sig+density_bg) > 0 )
718 discr = density_sig/(density_sig+density_bg);
724 discr = fFoam.at(0)->GetCellValue(ev->
GetValues(),
kValue, fKernelEstimator);
728 if (err || errUpper) {
729 const Double_t discr_error = CalculateMVAError();
730 if (err != 0) *err = discr_error;
731 if (errUpper != 0) *errUpper = discr_error;
735 return (discr < 0.5 ? -1 : 1);
751 const Event* ev = GetEvent();
754 if (fSigBgSeparated) {
755 const std::vector<Float_t>& xvec = ev->
GetValues();
757 const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec,
kValue, fKernelEstimator);
758 const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec,
kValue, fKernelEstimator);
764 if ((neventsS > 1e-10) || (neventsB > 1e-10)) {
766 mvaError =
TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
767 Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
786 std::vector<Float_t> xvec = ev->
GetValues();
788 if (fMulticlassReturnVal ==
NULL)
789 fMulticlassReturnVal =
new std::vector<Float_t>();
790 fMulticlassReturnVal->clear();
791 fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
793 std::vector<Float_t> temp;
794 UInt_t nClasses = DataInfo().GetNClasses();
795 temp.reserve(nClasses);
796 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
797 temp.push_back(fFoam.at(iClass)->GetCellValue(xvec,
kValue, fKernelEstimator));
800 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
802 for (
UInt_t j = 0; j < nClasses; ++j) {
804 norm +=
exp(temp[j] - temp[iClass]);
806 fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
809 return *fMulticlassReturnVal;
820 fRanking =
new Ranking(GetName(),
"Variable Importance");
821 std::vector<Float_t> importance(GetNvar(), 0);
824 for (
UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
826 PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
827 std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
828 GetNCuts(root_cell, nCuts);
833 std::vector<Float_t> tmp_importance;
834 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
835 sumOfCuts += nCuts.at(ivar);
836 tmp_importance.push_back( nCuts.at(ivar) );
840 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
842 tmp_importance.at(ivar) /= sumOfCuts;
844 tmp_importance.at(ivar) = 0;
847 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
848 importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
853 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
854 fRanking->AddRank(
Rank(GetInputLabel(ivar), importance.at(ivar)));
878 GetNCuts(cell->
GetDau0(), nCuts);
880 GetNCuts(cell->
GetDau1(), nCuts);
893 UInt_t num_vars = GetNvar();
894 if (fMultiTargetRegression)
895 num_vars +=
Data()->GetNTargets();
897 for (
UInt_t idim=0; idim<num_vars; idim++) {
898 Log()<<
kDEBUG <<
"foam: SetXmin[dim="<<idim<<
"]: " << fXmin.at(idim) <<
Endl;
899 Log()<<
kDEBUG <<
"foam: SetXmax[dim="<<idim<<
"]: " << fXmax.at(idim) <<
Endl;
900 pdefoam->
SetXmin(idim, fXmin.at(idim));
901 pdefoam->
SetXmax(idim, fXmax.at(idim));
933 dim =
Data()->GetNTargets() +
Data()->GetNVariables();
938 std::vector<Double_t>
box;
939 for (
Int_t idim = 0; idim < dim; ++idim) {
940 box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
946 if (fDTSeparation ==
kFoam) {
976 switch (fDTSeparation) {
993 Log() <<
kFATAL <<
"Separation type " << fDTSeparation
994 <<
" currently not supported" <<
Endl;
1004 Log() <<
kFATAL <<
"Decision tree cell split algorithm is only"
1005 <<
" available for (multi) classification with a single"
1006 <<
" PDE-Foam (SigBgSeparate=F)" <<
Endl;
1012 else Log() <<
kFATAL <<
"PDEFoam pointer not set, exiting.." <<
Endl;
1015 fKernelEstimator = CreatePDEFoamKernel();
1035 SetXminXmax(pdefoam);
1045 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>();
1046 fRegressionReturnVal->clear();
1047 fRegressionReturnVal->reserve(
Data()->GetNTargets());
1049 const Event* ev = GetEvent();
1050 std::vector<Float_t> vals = ev->
GetValues();
1053 Log() <<
kWARNING <<
"<GetRegressionValues> value vector is empty. " <<
Endl;
1056 if (fMultiTargetRegression) {
1058 std::map<Int_t, Float_t> xvec;
1059 for (
UInt_t i=0; i<vals.size(); ++i)
1060 xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1062 std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec,
kValue );
1065 if (targets.size() !=
Data()->GetNTargets())
1066 Log() <<
kFATAL <<
"Something wrong with multi-target regression foam: "
1067 <<
"number of targets does not match the DataSet()" <<
Endl;
1068 for(
UInt_t i=0; i<targets.size(); i++)
1069 fRegressionReturnVal->push_back(targets.at(i));
1072 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals,
kValue, fKernelEstimator));
1077 for (
UInt_t itgt = 0; itgt <
Data()->GetNTargets(); itgt++) {
1078 evT->
SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1080 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1081 fRegressionReturnVal->clear();
1082 for (
UInt_t itgt = 0; itgt <
Data()->GetNTargets(); itgt++) {
1083 fRegressionReturnVal->push_back( evT2->
GetTarget(itgt) );
1088 return (*fRegressionReturnVal);
1105 Log() <<
kFATAL <<
"Kernel: " << fKernel <<
" not supported!" <<
Endl;
1116 for (
UInt_t i=0; i<fFoam.size(); i++)
1117 if (fFoam.at(i))
delete fFoam.at(i);
1130 if (fKernelEstimator !=
NULL) {
1131 delete fKernelEstimator;
1132 fKernelEstimator =
NULL;
1147 gTools().
AddAttr( wght,
"SigBgSeparated", fSigBgSeparated );
1162 gTools().
AddAttr( wght,
"TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1163 gTools().
AddAttr( wght,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1168 for (
UInt_t i=0; i<fXmin.size(); i++){
1174 for (
UInt_t i=0; i<fXmax.size(); i++){
1190 FillVariableNamesToFoam();
1192 TString rfname( GetWeightFileName() );
1200 TFile *rootFile = 0;
1201 if (fCompress) rootFile =
new TFile(rfname,
"RECREATE",
"foamfile", 9);
1202 else rootFile =
new TFile(rfname,
"RECREATE");
1205 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1206 Log() <<
"writing foam " << fFoam.at(i)->GetFoamName().Data()
1207 <<
" to file" <<
Endl;
1208 fFoam.at(i)->
Write(fFoam.at(i)->GetFoamName().Data());
1212 Log() <<
kINFO <<
"Foams written to file: "
1221 istr >> fSigBgSeparated;
1223 istr >> fDiscrErrCut;
1235 Bool_t CutNmin, CutRMSmin;
1244 fKernel = UIntToKernel(ker);
1248 fTargetSelection = UIntToTargetSelection(ts);
1250 istr >> fFillFoamWithOrigWeights;
1251 istr >> fUseYesNoCell;
1257 if (fMultiTargetRegression)
1258 kDim +=
Data()->GetNTargets();
1259 fXmin.assign(kDim, 0);
1260 fXmax.assign(kDim, 0);
1263 for (
UInt_t i=0; i<kDim; i++)
1264 istr >> fXmin.at(i);
1265 for (
UInt_t i=0; i<kDim; i++)
1266 istr >> fXmax.at(i);
1269 ReadFoamsFromFile();
1277 gTools().
ReadAttr( wghtnode,
"SigBgSeparated", fSigBgSeparated );
1297 fKernel = UIntToKernel(ker);
1300 fTargetSelection = UIntToTargetSelection(ts);
1301 if (
gTools().HasAttr(wghtnode,
"FillFoamWithOrigWeights"))
1302 gTools().
ReadAttr( wghtnode,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1303 if (
gTools().HasAttr(wghtnode,
"UseYesNoCell"))
1310 if (fMultiTargetRegression)
1311 kDim +=
Data()->GetNTargets();
1312 fXmin.assign(kDim, 0);
1313 fXmax.assign(kDim, 0);
1317 for (
UInt_t counter=0; counter<kDim; counter++) {
1321 Log() <<
kFATAL <<
"dimension index out of range:" << i <<
Endl;
1326 void *xmax_wrap = xmin_wrap;
1327 for (
UInt_t counter=0; counter<kDim; counter++) {
1331 Log() <<
kFATAL <<
"dimension index out of range:" << i <<
Endl;
1340 ReadFoamsFromFile();
1343 if (fKernelEstimator !=
NULL)
1344 delete fKernelEstimator;
1345 fKernelEstimator = CreatePDEFoamKernel();
1369 Log() <<
kWARNING <<
"<ReadClonedFoamFromFile>: NULL pointer given" <<
Endl;
1381 Log() <<
kWARNING <<
"<ReadClonedFoamFromFile>: " << foamname
1382 <<
" could not be cloned!" <<
Endl;
1394 TString rfname( GetWeightFileName() );
1404 TFile *rootFile =
new TFile( rfname,
"READ" );
1408 if (DoRegression()) {
1409 if (fMultiTargetRegression)
1410 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MultiTargetRegressionFoam"));
1412 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MonoTargetRegressionFoam"));
1414 if (fSigBgSeparated) {
1415 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"SignalFoam"));
1416 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"BgFoam"));
1419 PDEFoam *foam = ReadClonedFoamFromFile(rootFile,
"DiscrFoam");
1421 fFoam.push_back(foam);
1424 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1425 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
Form(
"MultiClassFoam%u",iClass)));
1436 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1448 case 0:
return kNone;
1449 case 1:
return kGaus;
1450 case 2:
return kLinN;
1452 Log() <<
kWARNING <<
"<UIntToKernel>: unknown kernel number: " << iker <<
Endl;
1464 case 0:
return kMean;
1465 case 1:
return kMpv;
1467 Log() <<
kWARNING <<
"<UIntToTargetSelection>: unknown method TargetSelection: " << its <<
Endl;
1478 for (
UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1479 for (
Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1480 if(fMultiTargetRegression && (
UInt_t)idim>=DataInfo().GetNVariables())
1481 fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().
Data());
1483 fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().
Data());
1504 Log() <<
"PDE-Foam is a variation of the PDE-RS method using a self-adapting" <<
Endl;
1505 Log() <<
"binning method to divide the multi-dimensional variable space into a" <<
Endl;
1506 Log() <<
"finite number of hyper-rectangles (cells). The binning algorithm " <<
Endl;
1507 Log() <<
"adjusts the size and position of a predefined number of cells such" <<
Endl;
1508 Log() <<
"that the variance of the signal and background densities inside the " <<
Endl;
1509 Log() <<
"cells reaches a minimum" <<
Endl;
1513 Log() <<
"The PDEFoam classifier supports two different algorithms: " <<
Endl;
1515 Log() <<
" (1) Create one foam, which stores the signal over background" <<
Endl;
1516 Log() <<
" probability density. During foam buildup the variance of the" <<
Endl;
1517 Log() <<
" discriminant inside the cells is minimised." <<
Endl;
1519 Log() <<
" Booking option: SigBgSeparated=F" <<
Endl;
1521 Log() <<
" (2) Create two separate foams, one for the signal events and one for" <<
Endl;
1522 Log() <<
" background events. During foam buildup the variance of the" <<
Endl;
1523 Log() <<
" event density inside the cells is minimised separately for" <<
Endl;
1524 Log() <<
" signal and background." <<
Endl;
1526 Log() <<
" Booking option: SigBgSeparated=T" <<
Endl;
1528 Log() <<
"The following options can be set (the listed values are found to be a" <<
Endl;
1529 Log() <<
"good starting point for most applications):" <<
Endl;
1531 Log() <<
" SigBgSeparate False Separate Signal and Background" <<
Endl;
1532 Log() <<
" TailCut 0.001 Fraction of outlier events that excluded" <<
Endl;
1533 Log() <<
" from the foam in each dimension " <<
Endl;
1534 Log() <<
" VolFrac 0.0666 Volume fraction (used for density calculation" <<
Endl;
1535 Log() <<
" during foam build-up) " <<
Endl;
1536 Log() <<
" nActiveCells 500 Maximal number of active cells in final foam " <<
Endl;
1537 Log() <<
" nSampl 2000 Number of MC events per cell in foam build-up " <<
Endl;
1538 Log() <<
" nBin 5 Number of bins used in foam build-up " <<
Endl;
1539 Log() <<
" Nmin 100 Number of events in cell required to split cell" <<
Endl;
1540 Log() <<
" Kernel None Kernel type used (possible valuses are: None," <<
Endl;
1542 Log() <<
" Compress True Compress foam output file " <<
Endl;
1544 Log() <<
" Additional regression options:" <<
Endl;
1546 Log() <<
"MultiTargetRegression False Do regression with multiple targets " <<
Endl;
1547 Log() <<
" TargetSelection Mean Target selection method (possible valuses are: " <<
Endl;
1548 Log() <<
" Mean, Mpv)" <<
Endl;
1552 Log() <<
"The performance of the two implementations was found to be similar for" <<
Endl;
1553 Log() <<
"most examples studied. For the same number of cells per foam, the two-" <<
Endl;
1554 Log() <<
"foam option approximately doubles the amount of computer memory needed" <<
Endl;
1555 Log() <<
"during classification. For special cases where the event-density" <<
Endl;
1556 Log() <<
"distribution of signal and background events is very different, the" <<
Endl;
1557 Log() <<
"two-foam option was found to perform significantly better than the" <<
Endl;
1558 Log() <<
"option with only one foam." <<
Endl;
1560 Log() <<
"In order to gain better classification performance we recommend to set" <<
Endl;
1561 Log() <<
"the parameter \"nActiveCells\" to a high value." <<
Endl;
1563 Log() <<
"The parameter \"VolFrac\" specifies the size of the sampling volume" <<
Endl;
1564 Log() <<
"during foam buildup and should be tuned in order to achieve optimal" <<
Endl;
1565 Log() <<
"performance. A larger box leads to a reduced statistical uncertainty" <<
Endl;
1566 Log() <<
"for small training samples and to smoother sampling. A smaller box on" <<
Endl;
1567 Log() <<
"the other hand increases the sensitivity to statistical fluctuations" <<
Endl;
1568 Log() <<
"in the training samples, but for sufficiently large training samples" <<
Endl;
1569 Log() <<
"it will result in a more precise local estimate of the sampled" <<
Endl;
1570 Log() <<
"density. In general, higher dimensional problems require larger box" <<
Endl;
1571 Log() <<
"sizes, due to the reduced average number of events per box volume. The" <<
Endl;
1572 Log() <<
"default value of 0.0666 was optimised for an example with 5" <<
Endl;
1573 Log() <<
"observables and training samples of the order of 50000 signal and" <<
Endl;
1574 Log() <<
"background events each." <<
Endl;
1576 Log() <<
"Furthermore kernel weighting can be activated, which will lead to an" <<
Endl;
1577 Log() <<
"additional performance improvement. Note that Gauss weighting will" <<
Endl;
1578 Log() <<
"significantly increase the response time of the method. LinNeighbors" <<
Endl;
1579 Log() <<
"weighting performs a linear interpolation with direct neighbor cells" <<
Endl;
1580 Log() <<
"for each dimension and is much faster than Gauss weighting." <<
Endl;
1582 Log() <<
"The classification results were found to be rather insensitive to the" <<
Endl;
1583 Log() <<
"values of the parameters \"nSamples\" and \"nBin\"." <<
Endl;
void Train(void)
Train PDE-Foam depending on the set options.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void Reset()
reset MethodPDEFoam:
MsgLogger & Endl(MsgLogger &ml)
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in 'nCuts' the number of cuts made in every foam dimension, starting at the root cell 'cell'...
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
TString & ReplaceAll(const TString &s1, const TString &s2)
void PrintCoefficients(void)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
1-D histogram with a float per channel (see TH1 documentation)}
void SetXmin(Int_t idim, Double_t wmin)
set lower foam bound in dimension idim
ClassImp(TMVA::MethodPDEFoam) TMVA
init PDEFoam objects
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
void GetHelpMessage() const
provide help message
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
void ReadWeightsFromStream(std::istream &i)
read options and internal parameters
virtual ~MethodPDEFoam(void)
destructor
std::vector< std::vector< double > > Data
void SetMinType(EMsgType minType)
void SetVal(UInt_t ivar, Float_t val)
set variable ivar to val
void DeclareOptions()
Declare MethodPDEFoam options.
Double_t GetOriginalWeight() const
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name 'foamname' from file, and returns a clone of the foam.
void SetMaxDepth(UInt_t maxdepth)
void WriteFoamsToFile() const
Write PDEFoams to file.
std::vector< Float_t > & GetTargets()
void SetnSampl(Long_t nSampl)
void SetXmax(Int_t idim, Double_t wmax)
set upper foam bound in dimension idim
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void Init(void)
default initialization called by all constructors
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of 'fFrac' events lie ou...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
Double_t Mean(Long64_t n, const T *a, const Double_t *w=0)
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
char * Form(const char *fmt,...)
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Return Mva-Value.
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density...
void SetDensity(PDEFoamDensityBase *dens)
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam", TDirectory *theTargetDir=0)
void SetDim(Int_t kDim)
Sets dimension of cubical space.
void FillVariableNamesToFoam() const
store the variable names in all foams
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for backgound events.
void ReadWeightsFromXML(void *wghtnode)
read PDEFoam variables from xml weight file
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
Describe directory structure in memory.
void SetnCells(Long_t nCells)
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
Float_t GetTarget(UInt_t itgt) const
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the PDEFoam classifier.
#define REGISTER_METHOD(CLASS)
for example
const Ranking * CreateRanking()
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
Abstract ClassifierFactory template that handles arbitrary types.
void DeleteFoams()
Deletes all trained foams.
std::vector< Float_t > & GetValues()
PDEFoamCell * GetDau1() const
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
void SetEvPerBin(Int_t EvPerBin)
virtual const std::vector< Float_t > & GetRegressionValues()
Return regression values for both multi- and mono-target regression.
Double_t Sqrt(Double_t x)
void ProcessOptions()
process user options
void MakeClassSpecific(std::ostream &, const TString &) const
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
double norm(double *x, double *p)
void AddWeightsXMLTo(void *parent) const
create XML output of PDEFoam method variables
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
void ReadFoamsFromFile()
read foams from file
virtual void Close(Option_t *option="")
Close a file.
PDEFoamCell * GetDau0() const