100 , fMultiTargetRegression(
kFALSE)
106 , fKernelEstimator(NULL)
107 , fTargetSelectionStr(
"Mean")
108 , fTargetSelection(kMean)
109 , fFillFoamWithOrigWeights(
kFALSE)
112 , fDTSeparation(kFoam)
124 const TString& theWeightFile) :
136 , fMultiTargetRegression(
kFALSE)
142 , fKernelEstimator(NULL)
143 , fTargetSelectionStr(
"Mean")
144 , fTargetSelection(kMean)
145 , fFillFoamWithOrigWeights(
kFALSE)
148 , fDTSeparation(kFoam)
179 fnCells = fnActiveCells*2-1;
185 fFillFoamWithOrigWeights =
kFALSE;
188 fDTSeparation = kFoam;
191 fKernelEstimator= NULL;
192 fTargetSelection= kMean;
195 fMultiTargetRegression =
kFALSE;
200 SetSignalReferenceCut( 0.0 );
202 SetSignalReferenceCut( 0.5 );
210 DeclareOptionRef( fSigBgSeparated =
kFALSE,
"SigBgSeparate",
"Separate foams for signal and background" );
211 DeclareOptionRef( fFrac = 0.001,
"TailCut",
"Fraction of outlier events that are excluded from the foam in each dimension" );
212 DeclareOptionRef( fVolFrac = 1./15.,
"VolFrac",
"Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
213 DeclareOptionRef( fnActiveCells = 500,
"nActiveCells",
"Maximum number of active cells to be created by the foam");
214 DeclareOptionRef( fnSampl = 2000,
"nSampl",
"Number of generated MC events per cell");
215 DeclareOptionRef( fnBin = 5,
"nBin",
"Number of bins in edge histograms");
216 DeclareOptionRef( fCompress =
kTRUE,
"Compress",
"Compress foam output file");
217 DeclareOptionRef( fMultiTargetRegression =
kFALSE,
"MultiTargetRegression",
"Do regression with multiple targets");
218 DeclareOptionRef( fNmin = 100,
"Nmin",
"Number of events in cell required to split cell");
219 DeclareOptionRef( fMaxDepth = 0,
"MaxDepth",
"Maximum depth of cell tree (0=unlimited)");
220 DeclareOptionRef( fFillFoamWithOrigWeights =
kFALSE,
"FillFoamWithOrigWeights",
"Fill foam with original or boost weights");
221 DeclareOptionRef( fUseYesNoCell =
kFALSE,
"UseYesNoCell",
"Return -1 or 1 for bkg or signal like events");
222 DeclareOptionRef( fDTLogic =
"None",
"DTLogic",
"Use decision tree algorithm to split cells");
224 AddPreDefVal(
TString(
"GiniIndex"));
225 AddPreDefVal(
TString(
"MisClassificationError"));
226 AddPreDefVal(
TString(
"CrossEntropy"));
227 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
228 AddPreDefVal(
TString(
"SdivSqrtSplusB"));
230 DeclareOptionRef( fKernelStr =
"None",
"Kernel",
"Kernel type used");
232 AddPreDefVal(
TString(
"Gauss"));
233 AddPreDefVal(
TString(
"LinNeighbors"));
234 DeclareOptionRef( fTargetSelectionStr =
"Mean",
"TargetSelection",
"Target selection method");
245 DeclareOptionRef(fCutNmin =
kTRUE,
"CutNmin",
"Requirement for minimal number of events in cell");
246 DeclareOptionRef(fPeekMax =
kTRUE,
"PeekMax",
"Peek cell with max. loss for the next split");
254 if (!(fFrac>=0. && fFrac<=1.)) {
255 Log() << kWARNING <<
"TailCut not in [0.,1] ==> using 0.001 instead" <<
Endl;
259 if (fnActiveCells < 1) {
260 Log() << kWARNING <<
"invalid number of active cells specified: "
261 << fnActiveCells <<
"; setting nActiveCells=2" <<
Endl;
264 fnCells = fnActiveCells*2-1;
267 if (fSigBgSeparated && fDTLogic !=
"None") {
268 Log() << kFATAL <<
"Decision tree logic works only for a single foam (SigBgSeparate=F)" <<
Endl;
272 if (fDTLogic ==
"None")
273 fDTSeparation = kFoam;
274 else if (fDTLogic ==
"GiniIndex")
275 fDTSeparation = kGiniIndex;
276 else if (fDTLogic ==
"MisClassificationError")
277 fDTSeparation = kMisClassificationError;
278 else if (fDTLogic ==
"CrossEntropy")
280 else if (fDTLogic ==
"GiniIndexWithLaplace")
281 fDTSeparation = kGiniIndexWithLaplace;
282 else if (fDTLogic ==
"SdivSqrtSplusB")
283 fDTSeparation = kSdivSqrtSplusB;
285 Log() << kWARNING <<
"Unknown separation type: " << fDTLogic
286 <<
", setting to None" <<
Endl;
288 fDTSeparation = kFoam;
291 if (fKernelStr ==
"None" ) fKernel = kNone;
292 else if (fKernelStr ==
"Gauss" ) fKernel = kGaus;
293 else if (fKernelStr ==
"LinNeighbors") fKernel = kLinN;
295 if (fTargetSelectionStr ==
"Mean" ) fTargetSelection = kMean;
296 else fTargetSelection = kMpv;
299 if (DoRegression() && Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
300 Log() << kWARNING <<
"Warning: number of targets > 1"
301 <<
" and MultiTargetRegression=F was set, this makes no sense!"
302 <<
" --> I'm setting MultiTargetRegression=T" <<
Endl;
303 fMultiTargetRegression =
kTRUE;
314 if (fKernelEstimator != NULL)
315 delete fKernelEstimator;
327 UInt_t tDim = Data()->GetNTargets();
328 UInt_t vDim = Data()->GetNVariables();
329 if (fMultiTargetRegression)
336 for (
UInt_t dim=0; dim<kDim; dim++) {
341 Log() << kDEBUG <<
"Number of training events: " << Data()->GetNTrainingEvents() <<
Endl;
342 Int_t nevoutside = (
Int_t)((Data()->GetNTrainingEvents())*(fFrac));
343 Int_t rangehistbins = 10000;
347 for (
Long64_t i=0; i<(GetNEvents()); i++) {
348 const Event* ev = GetEvent(i);
349 for (
UInt_t dim=0; dim<kDim; dim++) {
351 if (fMultiTargetRegression) {
371 for (
UInt_t dim=0; dim<kDim; dim++) {
372 range_h[dim] =
new TH1F(
Form(
"range%i", dim),
"range", rangehistbins,
xmin[dim],
xmax[dim]);
376 for (
Long64_t i=0; i<GetNEvents(); i++) {
377 const Event* ev = GetEvent(i);
378 for (
UInt_t dim=0; dim<kDim; dim++) {
379 if (fMultiTargetRegression) {
391 for (
UInt_t dim=0; dim<kDim; dim++) {
392 for (
Int_t i=1; i<(rangehistbins+1); i++) {
393 if (range_h[dim]->Integral(0, i) > nevoutside) {
398 for (
Int_t i=rangehistbins; i>0; i--) {
399 if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
410 for (
UInt_t dim=0; dim<kDim; dim++) {
411 fXmin.push_back(
xmin[dim]);
412 fXmax.push_back(
xmax[dim]);
420 for (
UInt_t dim=0; dim<kDim; dim++)
432 Log() << kVERBOSE <<
"Calculate Xmin and Xmax for every dimension" <<
Endl;
439 if (DoRegression()) {
440 if (fMultiTargetRegression)
441 TrainMultiTargetRegression();
443 TrainMonoTargetRegression();
447 TrainMultiClassification();
449 if (DataInfo().GetNormalization() !=
"EQUALNUMEVENTS" ) {
450 Log() << kHEADER <<
"NormMode=" << DataInfo().GetNormalization()
451 <<
" chosen. Note that only NormMode=EqualNumEvents"
452 <<
" ensures that Discriminant values correspond to"
453 <<
" signal probabilities." <<
Endl;
456 Log() << kDEBUG <<
"N_sig for training events: " << Data()->GetNEvtSigTrain() <<
Endl;
457 Log() << kDEBUG <<
"N_bg for training events: " << Data()->GetNEvtBkgdTrain() <<
Endl;
458 Log() << kDEBUG <<
"User normalization: " << DataInfo().GetNormalization().Data() <<
Endl;
461 TrainSeparatedClassification();
463 TrainUnifiedClassification();
468 for(
UInt_t i=0; i<fFoam.size(); i++) {
470 fFoam.at(i)->DeleteBinarySearchTree();
484 foamcaption[0] =
"SignalFoam";
485 foamcaption[1] =
"BgFoam";
487 for(
int i=0; i<2; i++) {
489 fFoam.push_back( InitFoam(foamcaption[i], kSeparate) );
491 Log() << kVERBOSE <<
"Filling binary search tree of " << foamcaption[i]
492 <<
" with events" <<
Endl;
494 for (
Long64_t k=0; k<GetNEvents(); ++k) {
495 const Event* ev = GetEvent(k);
496 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
497 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
498 fFoam.back()->FillBinarySearchTree(ev);
501 Log() << kINFO <<
"Build up " << foamcaption[i] <<
Endl;
502 fFoam.back()->Create();
504 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
506 for (
Long64_t k=0; k<GetNEvents(); ++k) {
507 const Event* ev = GetEvent(k);
509 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
510 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
511 fFoam.back()->FillFoamCells(ev, weight);
522 fFoam.push_back( InitFoam(
"DiscrFoam", kDiscr, fSignalClass) );
524 Log() << kVERBOSE <<
"Filling binary search tree of discriminator foam with events" <<
Endl;
526 for (
Long64_t k=0; k<GetNEvents(); ++k) {
527 const Event* ev = GetEvent(k);
528 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
529 fFoam.back()->FillBinarySearchTree(ev);
532 Log() << kINFO <<
"Build up discriminator foam" <<
Endl;
533 fFoam.back()->Create();
535 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
537 for (
Long64_t k=0; k<GetNEvents(); ++k) {
538 const Event* ev = GetEvent(k);
540 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
541 fFoam.back()->FillFoamCells(ev, weight);
544 Log() << kVERBOSE <<
"Calculate cell discriminator"<<
Endl;
546 fFoam.back()->Finalize();
558 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
560 fFoam.push_back( InitFoam(
Form(
"MultiClassFoam%u",iClass), kMultiClass, iClass) );
562 Log() << kVERBOSE <<
"Filling binary search tree of multiclass foam "
563 << iClass <<
" with events" <<
Endl;
565 for (
Long64_t k=0; k<GetNEvents(); ++k) {
566 const Event* ev = GetEvent(k);
567 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
568 fFoam.back()->FillBinarySearchTree(ev);
571 Log() << kINFO <<
"Build up multiclass foam " << iClass <<
Endl;
572 fFoam.back()->Create();
574 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
577 for (
Long64_t k=0; k<GetNEvents(); ++k) {
578 const Event* ev = GetEvent(k);
580 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
581 fFoam.back()->FillFoamCells(ev, weight);
584 Log() << kVERBOSE <<
"Calculate cell discriminator"<<
Endl;
586 fFoam.back()->Finalize();
597 if (Data()->GetNTargets() != 1) {
598 Log() << kFATAL <<
"Can't do mono-target regression with "
599 << Data()->GetNTargets() <<
" targets!" <<
Endl;
602 Log() << kDEBUG <<
"MethodPDEFoam: number of Targets: " << Data()->GetNTargets() <<
Endl;
604 fFoam.push_back( InitFoam(
"MonoTargetRegressionFoam", kMonoTarget) );
606 Log() << kVERBOSE <<
"Filling binary search tree with events" <<
Endl;
608 for (
Long64_t k=0; k<GetNEvents(); ++k) {
609 const Event* ev = GetEvent(k);
610 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
611 fFoam.back()->FillBinarySearchTree(ev);
614 Log() << kINFO <<
"Build mono target regression foam" <<
Endl;
615 fFoam.back()->Create();
617 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
619 for (
Long64_t k=0; k<GetNEvents(); ++k) {
620 const Event* ev = GetEvent(k);
622 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
623 fFoam.back()->FillFoamCells(ev, weight);
626 Log() << kVERBOSE <<
"Calculate average cell targets"<<
Endl;
628 fFoam.back()->Finalize();
638 Log() << kDEBUG <<
"Number of variables: " << Data()->GetNVariables() <<
Endl;
639 Log() << kDEBUG <<
"Number of Targets: " << Data()->GetNTargets() <<
Endl;
640 Log() << kDEBUG <<
"Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() <<
Endl;
642 Log() << kFATAL <<
"LinNeighbors kernel currently not supported"
643 <<
" for multi target regression" <<
Endl;
645 fFoam.push_back( InitFoam(
"MultiTargetRegressionFoam", kMultiTarget) );
647 Log() << kVERBOSE <<
"Filling binary search tree of multi target regression foam with events"
650 for (
Long64_t k=0; k<GetNEvents(); ++k) {
654 std::vector<Float_t> targets(ev->
GetTargets());
656 for (
UInt_t i = 0; i < targets.size(); ++i)
657 ev->
SetVal(i+nVariables, targets.at(i));
659 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
660 fFoam.back()->FillBinarySearchTree(ev);
666 Log() << kINFO <<
"Build multi target regression foam" <<
Endl;
667 fFoam.back()->Create();
669 Log() << kVERBOSE <<
"Filling foam cells with events" <<
Endl;
671 for (
Long64_t k=0; k<GetNEvents(); ++k) {
675 std::vector<Float_t> targets = ev->
GetTargets();
678 for (
UInt_t i = 0; i < targets.size(); ++i)
679 ev->
SetVal(i+nVariables, targets.at(i));
681 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->
GetWeight()<=0))
682 fFoam.back()->FillFoamCells(ev, weight);
711 const Event* ev = GetEvent();
714 if (fSigBgSeparated) {
715 std::vector<Float_t> xvec = ev->
GetValues();
719 density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
720 density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
723 if ( (density_sig+density_bg) > 0 )
724 discr = density_sig/(density_sig+density_bg);
730 discr = fFoam.at(0)->GetCellValue(ev->
GetValues(), kValue, fKernelEstimator);
734 if (err || errUpper) {
735 const Double_t discr_error = CalculateMVAError();
736 if (err != 0) *err = discr_error;
737 if (errUpper != 0) *errUpper = discr_error;
741 return (discr < 0.5 ? -1 : 1);
757 const Event* ev = GetEvent();
760 if (fSigBgSeparated) {
761 const std::vector<Float_t>& xvec = ev->
GetValues();
763 const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator);
764 const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator);
770 if ((neventsS > 1
e-10) || (neventsB > 1
e-10)) {
772 mvaError =
TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
773 Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
779 mvaError = fFoam.at(0)->GetCellValue(ev->
GetValues(), kValueError, fKernelEstimator);
792 std::vector<Float_t> xvec = ev->
GetValues();
794 if (fMulticlassReturnVal == NULL)
795 fMulticlassReturnVal =
new std::vector<Float_t>();
796 fMulticlassReturnVal->clear();
797 fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
799 std::vector<Float_t> temp;
800 UInt_t nClasses = DataInfo().GetNClasses();
801 temp.reserve(nClasses);
802 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
803 temp.push_back(fFoam.at(iClass)->GetCellValue(xvec, kValue, fKernelEstimator));
806 for (
UInt_t iClass = 0; iClass < nClasses; ++iClass) {
808 for (
UInt_t j = 0; j < nClasses; ++j) {
810 norm +=
exp(temp[j] - temp[iClass]);
812 fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
815 return *fMulticlassReturnVal;
827 std::vector<Float_t> importance(GetNvar(), 0);
830 for (
UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
832 PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
833 std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
834 GetNCuts(root_cell, nCuts);
839 std::vector<Float_t> tmp_importance;
840 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
841 sumOfCuts += nCuts.at(ivar);
842 tmp_importance.push_back( nCuts.at(ivar) );
846 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
848 tmp_importance.at(ivar) /= sumOfCuts;
850 tmp_importance.at(ivar) = 0;
853 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
854 importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
859 for (
UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
860 fRanking->AddRank(
Rank(GetInputLabel(ivar), importance.at(ivar)));
878 if (cell == NULL || cell->
GetStat() == 1)
884 GetNCuts(cell->
GetDau0(), nCuts);
886 GetNCuts(cell->
GetDau1(), nCuts);
895 Log() << kFATAL <<
"Null pointer given!" <<
Endl;
899 UInt_t num_vars = GetNvar();
900 if (fMultiTargetRegression)
901 num_vars += Data()->GetNTargets();
903 for (
UInt_t idim=0; idim<num_vars; idim++) {
904 Log()<< kDEBUG <<
"foam: SetXmin[dim="<<idim<<
"]: " << fXmin.at(idim) <<
Endl;
905 Log()<< kDEBUG <<
"foam: SetXmax[dim="<<idim<<
"]: " << fXmax.at(idim) <<
Endl;
906 pdefoam->
SetXmin(idim, fXmin.at(idim));
907 pdefoam->
SetXmax(idim, fXmax.at(idim));
938 if (ft == kMultiTarget)
940 dim = Data()->GetNTargets() + Data()->GetNVariables();
945 std::vector<Double_t>
box;
946 for (
Int_t idim = 0; idim < dim; ++idim) {
947 box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
953 if (fDTSeparation == kFoam) {
974 Log() << kFATAL <<
"Unknown PDEFoam type!" <<
Endl;
983 switch (fDTSeparation) {
987 case kMisClassificationError:
993 case kGiniIndexWithLaplace:
996 case kSdivSqrtSplusB:
1000 Log() << kFATAL <<
"Separation type " << fDTSeparation
1001 <<
" currently not supported" <<
Endl;
1011 Log() << kFATAL <<
"Decision tree cell split algorithm is only"
1012 <<
" available for (multi) classification with a single"
1013 <<
" PDE-Foam (SigBgSeparate=F)" <<
Endl;
1019 else Log() << kFATAL <<
"PDEFoam pointer not set, exiting.." <<
Endl;
1022 fKernelEstimator = CreatePDEFoamKernel();
1042 SetXminXmax(pdefoam);
1052 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>();
1053 fRegressionReturnVal->clear();
1054 fRegressionReturnVal->reserve(Data()->GetNTargets());
1056 const Event* ev = GetEvent();
1057 std::vector<Float_t> vals = ev->
GetValues();
1060 Log() << kWARNING <<
"<GetRegressionValues> value vector is empty. " <<
Endl;
1063 if (fMultiTargetRegression) {
1065 std::map<Int_t, Float_t> xvec;
1066 for (
UInt_t i=0; i<vals.size(); ++i)
1067 xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1069 std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec, kValue );
1072 if (targets.size() != Data()->GetNTargets())
1073 Log() << kFATAL <<
"Something wrong with multi-target regression foam: "
1074 <<
"number of targets does not match the DataSet()" <<
Endl;
1075 for(
UInt_t i=0; i<targets.size(); i++)
1076 fRegressionReturnVal->push_back(targets.at(i));
1079 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator));
1084 for (
UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1085 evT->
SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1087 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1088 fRegressionReturnVal->clear();
1089 for (
UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1090 fRegressionReturnVal->push_back( evT2->
GetTarget(itgt) );
1095 return (*fRegressionReturnVal);
1112 Log() << kFATAL <<
"Kernel: " << fKernel <<
" not supported!" <<
Endl;
1123 for (
UInt_t i=0; i<fFoam.size(); i++)
1124 if (fFoam.at(i))
delete fFoam.at(i);
1138 if (fKernelEstimator != NULL) {
1139 delete fKernelEstimator;
1140 fKernelEstimator = NULL;
1155 gTools().
AddAttr( wght,
"SigBgSeparated", fSigBgSeparated );
1170 gTools().
AddAttr( wght,
"TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1171 gTools().
AddAttr( wght,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1176 for (
UInt_t i=0; i<fXmin.size(); i++){
1182 for (
UInt_t i=0; i<fXmax.size(); i++){
1198 FillVariableNamesToFoam();
1200 TString rfname( GetWeightFileName() );
1208 TFile *rootFile = 0;
1209 if (fCompress) rootFile =
new TFile(rfname,
"RECREATE",
"foamfile", 9);
1210 else rootFile =
new TFile(rfname,
"RECREATE");
1213 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1214 Log() <<
"writing foam " << fFoam.at(i)->GetFoamName().Data()
1215 <<
" to file" <<
Endl;
1216 fFoam.at(i)->
Write(fFoam.at(i)->GetFoamName().Data());
1220 Log() << kINFO <<
"Foams written to file: "
1229 istr >> fSigBgSeparated;
1231 istr >> fDiscrErrCut;
1243 Bool_t CutNmin, CutRMSmin;
1252 fKernel = UIntToKernel(ker);
1256 fTargetSelection = UIntToTargetSelection(ts);
1258 istr >> fFillFoamWithOrigWeights;
1259 istr >> fUseYesNoCell;
1265 if (fMultiTargetRegression)
1266 kDim += Data()->GetNTargets();
1267 fXmin.assign(kDim, 0);
1268 fXmax.assign(kDim, 0);
1271 for (
UInt_t i=0; i<kDim; i++)
1272 istr >> fXmin.at(i);
1273 for (
UInt_t i=0; i<kDim; i++)
1274 istr >> fXmax.at(i);
1277 ReadFoamsFromFile();
1285 gTools().
ReadAttr( wghtnode,
"SigBgSeparated", fSigBgSeparated );
1305 fKernel = UIntToKernel(ker);
1308 fTargetSelection = UIntToTargetSelection(ts);
1309 if (
gTools().HasAttr(wghtnode,
"FillFoamWithOrigWeights"))
1310 gTools().
ReadAttr( wghtnode,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1311 if (
gTools().HasAttr(wghtnode,
"UseYesNoCell"))
1318 if (fMultiTargetRegression)
1319 kDim += Data()->GetNTargets();
1320 fXmin.assign(kDim, 0);
1321 fXmax.assign(kDim, 0);
1325 for (
UInt_t counter=0; counter<kDim; counter++) {
1329 Log() << kFATAL <<
"dimension index out of range:" << i <<
Endl;
1334 void *xmax_wrap = xmin_wrap;
1335 for (
UInt_t counter=0; counter<kDim; counter++) {
1339 Log() << kFATAL <<
"dimension index out of range:" << i <<
Endl;
1348 ReadFoamsFromFile();
1351 if (fKernelEstimator != NULL)
1352 delete fKernelEstimator;
1353 fKernelEstimator = CreatePDEFoamKernel();
1377 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: NULL pointer given" <<
Endl;
1389 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: " << foamname
1390 <<
" could not be cloned!" <<
Endl;
1402 TString rfname( GetWeightFileName() );
1410 Log() << kINFO <<
"Read foams from file: " <<
gTools().
Color(
"lightblue")
1412 TFile *rootFile =
new TFile( rfname,
"READ" );
1413 if (rootFile->
IsZombie())
Log() << kFATAL <<
"Cannot open file \"" << rfname <<
"\"" <<
Endl;
1416 if (DoRegression()) {
1417 if (fMultiTargetRegression)
1418 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MultiTargetRegressionFoam"));
1420 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MonoTargetRegressionFoam"));
1422 if (fSigBgSeparated) {
1423 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"SignalFoam"));
1424 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"BgFoam"));
1427 PDEFoam *foam = ReadClonedFoamFromFile(rootFile,
"DiscrFoam");
1429 fFoam.push_back(foam);
1432 for (
UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1433 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
Form(
"MultiClassFoam%u",iClass)));
1444 for (
UInt_t i=0; i<fFoam.size(); ++i) {
1446 Log() << kFATAL <<
"Could not load foam!" <<
Endl;
1456 case 0:
return kNone;
1457 case 1:
return kGaus;
1458 case 2:
return kLinN;
1460 Log() << kWARNING <<
"<UIntToKernel>: unknown kernel number: " << iker <<
Endl;
1472 case 0:
return kMean;
1473 case 1:
return kMpv;
1475 Log() << kWARNING <<
"<UIntToTargetSelection>: unknown method TargetSelection: " << its <<
Endl;
1486 for (
UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1487 for (
Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1488 if(fMultiTargetRegression && (
UInt_t)idim>=DataInfo().GetNVariables())
1489 fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
1491 fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
1512 Log() <<
"PDE-Foam is a variation of the PDE-RS method using a self-adapting" <<
Endl;
1513 Log() <<
"binning method to divide the multi-dimensional variable space into a" <<
Endl;
1514 Log() <<
"finite number of hyper-rectangles (cells). The binning algorithm " <<
Endl;
1515 Log() <<
"adjusts the size and position of a predefined number of cells such" <<
Endl;
1516 Log() <<
"that the variance of the signal and background densities inside the " <<
Endl;
1517 Log() <<
"cells reaches a minimum" <<
Endl;
1521 Log() <<
"The PDEFoam classifier supports two different algorithms: " <<
Endl;
1523 Log() <<
" (1) Create one foam, which stores the signal over background" <<
Endl;
1524 Log() <<
" probability density. During foam buildup the variance of the" <<
Endl;
1525 Log() <<
" discriminant inside the cells is minimised." <<
Endl;
1527 Log() <<
" Booking option: SigBgSeparated=F" <<
Endl;
1529 Log() <<
" (2) Create two separate foams, one for the signal events and one for" <<
Endl;
1530 Log() <<
" background events. During foam buildup the variance of the" <<
Endl;
1531 Log() <<
" event density inside the cells is minimised separately for" <<
Endl;
1532 Log() <<
" signal and background." <<
Endl;
1534 Log() <<
" Booking option: SigBgSeparated=T" <<
Endl;
1536 Log() <<
"The following options can be set (the listed values are found to be a" <<
Endl;
1537 Log() <<
"good starting point for most applications):" <<
Endl;
1539 Log() <<
" SigBgSeparate False Separate Signal and Background" <<
Endl;
1540 Log() <<
" TailCut 0.001 Fraction of outlier events that excluded" <<
Endl;
1541 Log() <<
" from the foam in each dimension " <<
Endl;
1542 Log() <<
" VolFrac 0.0666 Volume fraction (used for density calculation" <<
Endl;
1543 Log() <<
" during foam build-up) " <<
Endl;
1544 Log() <<
" nActiveCells 500 Maximal number of active cells in final foam " <<
Endl;
1545 Log() <<
" nSampl 2000 Number of MC events per cell in foam build-up " <<
Endl;
1546 Log() <<
" nBin 5 Number of bins used in foam build-up " <<
Endl;
1547 Log() <<
" Nmin 100 Number of events in cell required to split cell" <<
Endl;
1548 Log() <<
" Kernel None Kernel type used (possible values are: None," <<
Endl;
1550 Log() <<
" Compress True Compress foam output file " <<
Endl;
1552 Log() <<
" Additional regression options:" <<
Endl;
1554 Log() <<
"MultiTargetRegression False Do regression with multiple targets " <<
Endl;
1555 Log() <<
" TargetSelection Mean Target selection method (possible values are: " <<
Endl;
1556 Log() <<
" Mean, Mpv)" <<
Endl;
1560 Log() <<
"The performance of the two implementations was found to be similar for" <<
Endl;
1561 Log() <<
"most examples studied. For the same number of cells per foam, the two-" <<
Endl;
1562 Log() <<
"foam option approximately doubles the amount of computer memory needed" <<
Endl;
1563 Log() <<
"during classification. For special cases where the event-density" <<
Endl;
1564 Log() <<
"distribution of signal and background events is very different, the" <<
Endl;
1565 Log() <<
"two-foam option was found to perform significantly better than the" <<
Endl;
1566 Log() <<
"option with only one foam." <<
Endl;
1568 Log() <<
"In order to gain better classification performance we recommend to set" <<
Endl;
1569 Log() <<
"the parameter \"nActiveCells\" to a high value." <<
Endl;
1571 Log() <<
"The parameter \"VolFrac\" specifies the size of the sampling volume" <<
Endl;
1572 Log() <<
"during foam buildup and should be tuned in order to achieve optimal" <<
Endl;
1573 Log() <<
"performance. A larger box leads to a reduced statistical uncertainty" <<
Endl;
1574 Log() <<
"for small training samples and to smoother sampling. A smaller box on" <<
Endl;
1575 Log() <<
"the other hand increases the sensitivity to statistical fluctuations" <<
Endl;
1576 Log() <<
"in the training samples, but for sufficiently large training samples" <<
Endl;
1577 Log() <<
"it will result in a more precise local estimate of the sampled" <<
Endl;
1578 Log() <<
"density. In general, higher dimensional problems require larger box" <<
Endl;
1579 Log() <<
"sizes, due to the reduced average number of events per box volume. The" <<
Endl;
1580 Log() <<
"default value of 0.0666 was optimised for an example with 5" <<
Endl;
1581 Log() <<
"observables and training samples of the order of 50000 signal and" <<
Endl;
1582 Log() <<
"background events each." <<
Endl;
1584 Log() <<
"Furthermore kernel weighting can be activated, which will lead to an" <<
Endl;
1585 Log() <<
"additional performance improvement. Note that Gauss weighting will" <<
Endl;
1586 Log() <<
"significantly increase the response time of the method. LinNeighbors" <<
Endl;
1587 Log() <<
"weighting performs a linear interpolation with direct neighbor cells" <<
Endl;
1588 Log() <<
"for each dimension and is much faster than Gauss weighting." <<
Endl;
1590 Log() <<
"The classification results were found to be rather insensitive to the" <<
Endl;
1591 Log() <<
"values of the parameters \"nSamples\" and \"nBin\"." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
virtual void Close(Option_t *option="")
Close a file.
1-D histogram with a float per channel (see TH1 documentation)}
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
std::vector< Float_t > & GetTargets()
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
void SetVal(UInt_t ivar, Float_t val)
set variable ivar to val
std::vector< Float_t > & GetValues()
Float_t GetTarget(UInt_t itgt) const
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
The PDEFoam method is an extension of the PDERS method, which divides the multi-dimensional phase spa...
const Ranking * CreateRanking()
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
void Init(void)
default initialization called by all constructors
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
void Train(void)
Train PDE-Foam depending on the set options.
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the PDEFoam classifier.
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void PrintCoefficients(void)
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density.
void ReadWeightsFromXML(void *wghtnode)
read PDEFoam variables from xml weight file
void DeleteFoams()
Deletes all trained foams.
void ReadWeightsFromStream(std::istream &i)
read options and internal parameters
virtual ~MethodPDEFoam(void)
destructor
void DeclareOptions()
Declare MethodPDEFoam options.
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
virtual const std::vector< Float_t > & GetRegressionValues()
Return regression values for both multi- and mono-target regression.
void FillVariableNamesToFoam() const
store the variable names in all foams
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
void ReadFoamsFromFile()
read foams from file
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of 'fFrac' events lie ou...
void MakeClassSpecific(std::ostream &, const TString &) const
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in 'nCuts' the number of cuts made in every foam dimension, starting at the root cell 'cell'.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name 'foamname' from file, and returns a clone of the foam.
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam")
init PDEFoam objects
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Return Mva-Value.
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
void GetHelpMessage() const
provide help message
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for background events.
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
virtual void Reset()
reset MethodPDEFoam:
void WriteFoamsToFile() const
Write PDEFoams to file.
void AddWeightsXMLTo(void *parent) const
create XML output of PDEFoam method variables
void ProcessOptions()
process user options
Implementation of the MisClassificationError as separation criterion.
void SetMinType(EMsgType minType)
PDEFoamCell * GetDau1() const
PDEFoamCell * GetDau0() const
This is a concrete implementation of PDEFoam.
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
This is an abstract class, which provides an interface for a PDEFoam density estimator.
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the discriminant.
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the sum of event weights and the sum of the squared event w...
This class is the abstract kernel interface for PDEFoam.
This PDEFoam kernel estimates a cell value for a given event by weighting all cell values with a gaus...
This PDEFoam kernel estimates a cell value for a given event by weighting with cell values of the nea...
This class is a trivial PDEFoam kernel estimator.
This PDEFoam variant is used to estimate multiple targets by creating an event density foam (PDEFoamE...
This is a concrete implementation of PDEFoam.
This PDEFoam variant stores in every cell the average target fTarget (see the Constructor) as well as...
Implementation of PDEFoam.
void SetMaxDepth(UInt_t maxdepth)
void SetDensity(PDEFoamDensityBase *dens)
void SetXmax(Int_t idim, Double_t wmax)
set upper foam bound in dimension idim
void SetEvPerBin(Int_t EvPerBin)
void SetXmin(Int_t idim, Double_t wmin)
set lower foam bound in dimension idim
void SetnCells(Long_t nCells)
void SetnSampl(Long_t nSampl)
void SetDim(Int_t kDim)
Sets dimension of cubical space.
Ranking for variables in method (implementation)
Implementation of the SdivSqrtSplusB as separation criterion.
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
R__ALWAYS_INLINE Bool_t IsZombie() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
std::string GetName(const std::string &scope_name)
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)