149#include <unordered_map>
169 , fSigToBkgFraction(0)
174 , fBaggedGradBoost(
kFALSE)
178 , fMinNodeSizeS(
"5%")
181 , fMinLinCorrForFisher(.8)
182 , fUseExclusiveVars(0)
184 , fNodePurityLimit(0)
189 , fFValidationEvents(0)
191 , fRandomisedTrees(
kFALSE)
193 , fUsePoissonNvars(0)
194 , fUseNTrainEvents(0)
195 , fBaggedSampleFraction(0)
196 , fNoNegWeightsInTraining(
kFALSE)
197 , fInverseBoostNegWeights(
kFALSE)
198 , fPairNegWeightsGlobal(
kFALSE)
199 , fTrainWithNegWeights(
kFALSE)
209 , fSkipNormalization(
kFALSE)
224 , fSigToBkgFraction(0)
229 , fBaggedGradBoost(
kFALSE)
233 , fMinNodeSizeS(
"5%")
236 , fMinLinCorrForFisher(.8)
237 , fUseExclusiveVars(0)
239 , fNodePurityLimit(0)
244 , fFValidationEvents(0)
246 , fRandomisedTrees(
kFALSE)
248 , fUsePoissonNvars(0)
249 , fUseNTrainEvents(0)
250 , fBaggedSampleFraction(0)
251 , fNoNegWeightsInTraining(
kFALSE)
252 , fInverseBoostNegWeights(
kFALSE)
253 , fPairNegWeightsGlobal(
kFALSE)
254 , fTrainWithNegWeights(
kFALSE)
264 , fSkipNormalization(
kFALSE)
334 DeclareOptionRef(fNTrees,
"NTrees",
"Number of trees in the forest");
335 if (DoRegression()) {
336 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
338 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
341 TString tmp=
"5%";
if (DoRegression()) tmp=
"0.2%";
342 DeclareOptionRef(fMinNodeSizeS=tmp,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
344 DeclareOptionRef(fNCuts,
"nCuts",
"Number of grid points in variable range used in finding optimal cut in node splitting");
346 DeclareOptionRef(fBoostType,
"BoostType",
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
348 AddPreDefVal(
TString(
"AdaBoost"));
349 AddPreDefVal(
TString(
"RealAdaBoost"));
350 AddPreDefVal(
TString(
"AdaCost"));
351 AddPreDefVal(
TString(
"Bagging"));
353 AddPreDefVal(
TString(
"AdaBoostR2"));
355 if (DoRegression()) {
356 fBoostType =
"AdaBoostR2";
358 fBoostType =
"AdaBoost";
360 DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic",
"AdaBoostR2Loss",
"Type of Loss function in AdaBoostR2");
361 AddPreDefVal(
TString(
"Linear"));
362 AddPreDefVal(
TString(
"Quadratic"));
363 AddPreDefVal(
TString(
"Exponential"));
365 DeclareOptionRef(fBaggedBoost=
kFALSE,
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
366 DeclareOptionRef(fShrinkage = 1.0,
"Shrinkage",
"Learning rate for BoostType=Grad algorithm");
367 DeclareOptionRef(fAdaBoostBeta=.5,
"AdaBoostBeta",
"Learning rate for AdaBoost algorithm");
368 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
369 DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
370 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
371 DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
373 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
374 "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
375 if (DoRegression()) {
379 DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining; Boost With inverse boostweight; Pair events with negative and positive weights in training sample and *annihilate* them (experimental!)");
380 AddPreDefVal(
TString(
"InverseBoostNegWeights"));
381 AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
382 AddPreDefVal(
TString(
"NoNegWeightsInTraining"));
383 AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
388 DeclareOptionRef(fCss=1.,
"Css",
"AdaCost: cost of true signal selected signal");
389 DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
390 DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg selected signal");
391 DeclareOptionRef(fCbb=1.,
"Cbb",
"AdaCost: cost of true bkg selected bkg ");
393 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
396 DeclareOptionRef(fSepTypeS,
"SeparationType",
"Separation criterion for node splitting");
397 AddPreDefVal(
TString(
"CrossEntropy"));
398 AddPreDefVal(
TString(
"GiniIndex"));
399 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
400 AddPreDefVal(
TString(
"MisClassificationError"));
401 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
402 AddPreDefVal(
TString(
"RegressionVariance"));
403 if (DoRegression()) {
404 fSepTypeS =
"RegressionVariance";
406 fSepTypeS =
"GiniIndex";
409 DeclareOptionRef(fRegressionLossFunctionBDTGS =
"Huber",
"RegressionLossFunctionBDTG",
"Loss function for BDTG regression.");
410 AddPreDefVal(
TString(
"Huber"));
411 AddPreDefVal(
TString(
"AbsoluteDeviation"));
412 AddPreDefVal(
TString(
"LeastSquares"));
414 DeclareOptionRef(fHuberQuantile = 0.7,
"HuberQuantile",
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
416 DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
418 DeclareOptionRef(fUseFisherCuts=
kFALSE,
"UseFisherCuts",
"Use multivariate splits using the Fisher criterion");
419 DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher",
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
420 DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
423 DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
426 DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
428 DeclareOptionRef(fPruneMethodS,
"PruneMethod",
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning: Pruning: Method used for pruning (removal) of statistically insignificant branches ");
429 AddPreDefVal(
TString(
"NoPruning"));
430 AddPreDefVal(
TString(
"ExpectedError"));
431 AddPreDefVal(
TString(
"CostComplexity"));
433 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength");
435 DeclareOptionRef(fFValidationEvents=0.5,
"PruningValFraction",
"Fraction of events to use for optimizing automatic pruning.");
437 DeclareOptionRef(fSkipNormalization=
kFALSE,
"SkipNormalization",
"Skip normalization at initialization, to keep expectation value of BDT output according to the fraction of events");
440 DeclareOptionRef(fMinNodeEvents=0,
"nEventsMin",
"deprecated: Use MinNodeSize (in % of training events) instead");
442 DeclareOptionRef(fBaggedGradBoost=
kFALSE,
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead: Use only a random subsample of all events for growing the trees in each iteration.");
443 DeclareOptionRef(fBaggedSampleFraction,
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
444 DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
445 DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
457 DeclareOptionRef(fHistoricBool=
kTRUE,
"UseWeightedTrees",
458 "Use weighted trees or simple average in classification from the forest");
459 DeclareOptionRef(fHistoricBool=
kFALSE,
"PruneBeforeBoost",
"Flag to prune the tree before applying boosting algorithm");
460 DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
462 AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
473 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
475 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
476 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
477 else if (fSepTypeS ==
"regressionvariance") fSepType =
NULL;
479 Log() << kINFO << GetOptions() <<
Endl;
480 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option " << fSepTypeS <<
" called" <<
Endl;
483 if(!(fHuberQuantile >= 0.0 && fHuberQuantile <= 1.0)){
484 Log() << kINFO << GetOptions() <<
Endl;
485 Log() << kFATAL <<
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << fHuberQuantile <<
", does not match this criteria" <<
Endl;
489 fRegressionLossFunctionBDTGS.ToLower();
490 if (fRegressionLossFunctionBDTGS ==
"huber") fRegressionLossFunctionBDTG =
new HuberLossFunctionBDT(fHuberQuantile);
494 Log() << kINFO << GetOptions() <<
Endl;
495 Log() << kFATAL <<
"<ProcessOptions> unknown Regression Loss Function BDT option " << fRegressionLossFunctionBDTGS <<
" called" <<
Endl;
498 fPruneMethodS.ToLower();
503 Log() << kINFO << GetOptions() <<
Endl;
504 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS <<
" option called" <<
Endl;
510 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
514 if (fMinNodeEvents > 0){
515 fMinNodeSize =
Double_t(fMinNodeEvents*100.) / Data()->GetNTrainingEvents();
516 Log() << kWARNING <<
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min absolute number \n"
517 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
518 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
519 <<
"events instead. \n"
520 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
522 Log() << kWARNING <<
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recommended \n"
523 <<
" *MinNodeSize* = " << fMinNodeSizeS <<
" option !!" <<
Endl ;
527 SetMinNodeSize(fMinNodeSizeS);
531 fAdaBoostR2Loss.ToLower();
533 if (fBoostType==
"Grad") {
535 if (fNegWeightTreatment==
"InverseBoostNegWeights"){
536 Log() << kINFO <<
"the option NegWeightTreatment=InverseBoostNegWeights does"
537 <<
" not exist for BoostType=Grad" <<
Endl;
538 Log() << kINFO <<
"--> change to new default NegWeightTreatment=Pray" <<
Endl;
539 Log() << kDEBUG <<
"i.e. simply keep them as if which should work fine for Grad Boost" <<
Endl;
540 fNegWeightTreatment=
"Pray";
541 fNoNegWeightsInTraining=
kFALSE;
543 }
else if (fBoostType==
"RealAdaBoost"){
544 fBoostType =
"AdaBoost";
546 }
else if (fBoostType==
"AdaCost"){
550 if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
551 if (fAutomatic && fFValidationEvents > 0.5) {
552 Log() << kWARNING <<
"You have chosen to use more than half of your training sample "
553 <<
"to optimize the automatic pruning algorithm. This is probably wasteful "
554 <<
"and your overall results will be degraded. Are you sure you want this?"
559 if (this->Data()->HasNegativeEventWeights()){
560 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. "
561 <<
"That should in principle be fine as long as on average you end up with "
562 <<
"something positive. For this you have to make sure that the minimal number "
563 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
564 << fMinNodeSizeS <<
" ("<< fMinNodeSize <<
"%)"
565 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
566 <<
"BDT option string when booking the "
567 <<
"classifier) is large enough to allow for reasonable averaging!!! "
568 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
569 <<
"which ignores events with negative weight in the training. " <<
Endl
570 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
573 if (DoRegression()) {
574 if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
575 Log() << kWARNING <<
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" <<
Endl;
579 if (fSepType !=
NULL){
580 Log() << kWARNING <<
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" <<
Endl;
584 Log() << kWARNING <<
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" <<
Endl;
588 Log() << kWARNING <<
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " <<
Endl;
589 Log() << kWARNING <<
"is not implemented for regression analysis ! " <<
Endl;
590 Log() << kWARNING <<
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
594 if (fRandomisedTrees){
595 Log() << kINFO <<
" Randomised trees use no pruning" <<
Endl;
600 if (fUseFisherCuts) {
601 Log() << kWARNING <<
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" <<
Endl;
602 Log() <<
" a more elaborate node splitting algorithm) is not implemented. " <<
Endl;
609 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! "
610 <<
" I set it to 1 .. just so that the program does not crash"
615 fNegWeightTreatment.ToLower();
616 if (fNegWeightTreatment ==
"ignorenegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
617 else if (fNegWeightTreatment ==
"nonegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
618 else if (fNegWeightTreatment ==
"inverseboostnegweights") fInverseBoostNegWeights =
kTRUE;
619 else if (fNegWeightTreatment ==
"pairnegweightsglobal") fPairNegWeightsGlobal =
kTRUE;
620 else if (fNegWeightTreatment ==
"pray") Log() << kDEBUG <<
"Yes, good luck with praying " <<
Endl;
622 Log() << kINFO << GetOptions() <<
Endl;
623 Log() << kFATAL <<
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment <<
" requested" <<
Endl;
626 if (fNegWeightTreatment ==
"pairnegweightsglobal")
627 Log() << kWARNING <<
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " <<
Endl;
634 while (tmp < fNNodesMax){
638 Log() << kWARNING <<
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
639 <<
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
643 if (fUseNTrainEvents>0){
644 fBaggedSampleFraction = (
Double_t) fUseNTrainEvents/Data()->GetNTrainingEvents();
645 Log() << kWARNING <<
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
646 <<
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
649 if (fBoostType==
"Bagging") fBaggedBoost =
kTRUE;
650 if (fBaggedGradBoost){
651 fBaggedBoost =
kTRUE;
652 Log() << kWARNING <<
"You have specified a deprecated option *UseBaggedGrad* --> please use *UseBaggedBoost* instead" <<
Endl;
664 Log() << kFATAL <<
"you have demanded a minimal node size of "
666 <<
" that somehow does not make sense "<<
Endl;
678 Log() << kFATAL <<
"I had problems reading the option MinNodeEvents, which "
691 fBoostType =
"AdaBoost";
692 if(DataInfo().GetNClasses()!=0)
696 fBoostType =
"AdaBoostR2";
697 fAdaBoostR2Loss =
"Quadratic";
698 if(DataInfo().GetNClasses()!=0)
704 fPruneMethodS =
"NoPruning";
708 fFValidationEvents = 0.5;
709 fRandomisedTrees =
kFALSE;
712 fUsePoissonNvars =
kTRUE;
717 SetSignalReferenceCut( 0 );
730 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
733 fBoostWeights.clear();
734 if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=
NULL; }
735 fVariableImportance.clear();
737 fLossFunctionEventInfo.clear();
741 if (Data()) Data()->DeleteResults(GetMethodName(),
Types::kTraining, GetAnalysisType());
742 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
754 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
762 if (!HasTrainingTree()) Log() << kFATAL <<
"<Init> Data().TrainingTree() is zero pointer" <<
Endl;
764 if (fEventSample.size() > 0) {
766 for (
UInt_t iev=0;
iev<fEventSample.size();
iev++) fEventSample[
iev]->SetBoostWeight(1.);
779 else fDoPreselection =
kFALSE;
790 if (fDoPreselection){
791 if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
797 if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
799 Log() << kWARNING <<
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" <<
Endl;
803 }
else if (event->GetWeight()==0){
806 Log() <<
"Events with weight == 0 are going to be simply ignored " <<
Endl;
810 if (event->GetWeight() < 0) {
811 fTrainWithNegWeights=
kTRUE;
814 if (fPairNegWeightsGlobal){
815 Log() << kWARNING <<
"Events with negative event weights are found and "
816 <<
" will be removed prior to the actual BDT training by global "
817 <<
" paring (and subsequent annihilation) with positiv weight events"
820 Log() << kWARNING <<
"Events with negative event weights are USED during "
821 <<
"the BDT training. This might cause problems with small node sizes "
822 <<
"or with the boosting. Please remove negative events from training "
823 <<
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you "
824 <<
"observe problems with the boosting"
833 if (
ievt %
imodulo == 0) fValidationSample.push_back( event );
834 else fEventSample.push_back( event );
837 fEventSample.push_back(event);
843 Log() << kINFO <<
"<InitEventSample> Internally I use " << fEventSample.size()
844 <<
" for Training and " << fValidationSample.size()
845 <<
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
846 <<
"% of training used for validation)" <<
Endl;
850 if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
853 if (DoRegression()) {
855 }
else if (DoMulticlass()) {
857 }
else if (!fSkipNormalization) {
859 Log() << kDEBUG <<
"\t<InitEventSample> For classification trees, "<<
Endl;
860 Log() << kDEBUG <<
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
861 Log() << kDEBUG <<
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
879 if ((DataInfo().IsSignal(fEventSample[
ievt])) ) {
890 Log() << kDEBUG <<
"\tre-normalise events such that Sig and Bkg have respective sum of weights = "
891 << fSigToBkgFraction <<
Endl;
892 Log() << kDEBUG <<
" \tsig->sig*"<<
normSig <<
"ev. bkg->bkg*"<<
normBkg <<
"ev." <<
Endl;
894 Log() << kINFO <<
"#events: (unweighted) sig: "<<
sumSig <<
" bkg: " <<
sumBkg <<
Endl;
896 if ((DataInfo().IsSignal(fEventSample[
ievt])) ) fEventSample[
ievt]->SetBoostWeight(
normSig);
900 Log() << kINFO <<
"--> could not determine scaling factors as either there are " <<
Endl;
901 Log() << kINFO <<
" no signal events (sumSigW="<<
sumSigW<<
") or no bkg ev. (sumBkgW="<<
sumBkgW<<
")"<<
Endl;
906 fTrainSample = &fEventSample;
908 GetBaggedSubSample(fEventSample);
909 fTrainSample = &fSubSample;
937 if (fEventSample[
iev]->GetWeight() < 0) {
946 Log() << kINFO <<
"no negative event weights found .. no preprocessing necessary" <<
Endl;
949 Log() << kINFO <<
"found a total of " <<
totalNegWeights <<
" of negative event weights which I am going to try to pair with positive events to annihilate them" <<
Endl;
950 Log() << kINFO <<
"found a total of " <<
totalPosWeights <<
" of events with positive weights" <<
Endl;
958 for (
Int_t i=0; i<2; i++){
961 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is almost singular with determinant="
963 <<
" did you use the variables that are linear combinations or highly correlated?"
967 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant="
969 <<
" did you use the variables that are linear combinations?"
978 Log() << kINFO <<
"Found a total of " <<
totalNegWeights <<
" in negative weights out of " << fEventSample.size() <<
" training events " <<
Endl;
991 if (
iClassID==fEventSample[
iev]->GetClass() && fEventSample[
iev]->GetWeight() > 0){
1009 fEventSample[
iMin]->SetBoostWeight(
newWeight/fEventSample[
iMin]->GetOriginalWeight() );
1012 fEventSample[
iMin]->SetBoostWeight( 0 );
1015 }
else Log() << kFATAL <<
"preprocessing didn't find event to pair with the negative weight ... probably a bug" <<
Endl;
1019 Log() << kINFO <<
"<Negative Event Pairing> took: " <<
timer.GetElapsedTime()
1034 if (fEventSample[
iev]->GetWeight() < 0) {
1041 if (fEventSample[
iev]->GetWeight() > 0) {
1043 if (fEventSample[
iev]->GetClass() == fSignalClass){
1054 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1057 Log() << kINFO <<
" after PreProcessing, the Event sample is left with " << fEventSample.size() <<
" events (unweighted), all with positive weights, adding up to " <<
totalWeights <<
Endl;
1087 if (fBoostType==
"AdaBoost"){
1090 }
else if (fBoostType==
"Grad"){
1093 }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
1100 Log()<<kINFO <<
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
1101 std::map<TString,TMVA::Interval*>::iterator it;
1103 Log() << kWARNING << it->first <<
Endl;
1104 std::ostringstream
oss;
1122 std::map<TString,Double_t>::iterator it;
1124 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
1125 if (it->first ==
"MaxDepth" ) SetMaxDepth ((
Int_t)it->second);
1126 else if (it->first ==
"MinNodeSize" ) SetMinNodeSize (it->second);
1127 else if (it->first ==
"NTrees" ) SetNTrees ((
Int_t)it->second);
1128 else if (it->first ==
"NodePurityLimit") SetNodePurityLimit (it->second);
1129 else if (it->first ==
"AdaBoostBeta" ) SetAdaBoostBeta (it->second);
1130 else if (it->first ==
"Shrinkage" ) SetShrinkage (it->second);
1131 else if (it->first ==
"UseNvars" ) SetUseNvars ((
Int_t)it->second);
1132 else if (it->first ==
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
1133 else Log() << kFATAL <<
" SetParameter for " << it->first <<
" not yet implemented " <<
Endl;
1151 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! "
1152 <<
" I set it to 1 .. just so that the program does not crash"
1157 if (fInteractive && fInteractive->NotInitialized()){
1158 std::vector<TString>
titles = {
"Boost weight",
"Error Fraction"};
1159 fInteractive->Init(
titles);
1161 fIPyMaxIter = fNTrees;
1162 fExitFromTraining =
false;
1166 if (IsNormalised()) Log() << kFATAL <<
"\"Normalise\" option cannot be used with BDT; "
1167 <<
"please remove the option from the configuration string, or "
1168 <<
"use \"!Normalise\""
1172 Log() << kINFO <<
"Regression Loss Function: "<< fRegressionLossFunctionBDTG->Name() <<
Endl;
1174 Log() << kINFO <<
"Training "<< fNTrees <<
" Decision Trees ... patience please" <<
Endl;
1176 Log() << kDEBUG <<
"Training with maximal depth = " <<fMaxDepth
1177 <<
", MinNodeEvents=" << fMinNodeEvents
1178 <<
", NTrees="<<fNTrees
1179 <<
", NodePurityLimit="<<fNodePurityLimit
1180 <<
", AdaBoostBeta="<<fAdaBoostBeta
1192 if (DoRegression()) {
1196 hname=
"Boost event weights distribution";
1206 if(!DoMulticlass()){
1209 h->SetXTitle(
"boost weight");
1214 if (fDoBoostMonitor){
1215 TH2*
boostMonitor =
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
1226 h =
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
1227 h->SetXTitle(
"#tree");
1228 h->SetYTitle(
"boost weight");
1229 results->Store(
h,
"BoostWeightsVsTree");
1232 h =
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
1233 h->SetXTitle(
"#tree");
1234 h->SetYTitle(
"error fraction");
1249 fMonitorNtuple=
new TTree(
"MonitorNtuple",
"BDT variables");
1250 fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
1251 fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
1252 fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
1261 if(fBoostType==
"Grad"){
1262 InitGradBoost(fEventSample);
1270 if (fExitFromTraining)
break;
1271 fIPyCurrentIter =
itree;
1284 if (fBoostType!=
"Grad"){
1285 Log() << kFATAL <<
"Multiclass is currently only supported by gradient boost. "
1286 <<
"Please change boost option accordingly (BoostType=Grad)." <<
Endl;
1294 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
1295 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1297 fForest.back()->SetNVars(GetNvar());
1298 if (fUseFisherCuts) {
1299 fForest.back()->SetUseFisherCuts();
1300 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1301 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1306 Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
1308 fBoostWeights.push_back(
bw);
1310 fBoostWeights.push_back(0);
1311 Log() << kWARNING <<
"stopped boosting at itree="<<
itree <<
Endl;
1320 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1323 fForest.push_back(
dt);
1324 fForest.back()->SetNVars(GetNvar());
1325 if (fUseFisherCuts) {
1326 fForest.back()->SetUseFisherCuts();
1327 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1328 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1333 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") {
1340 fForest.back()->SetPruneMethod(fPruneMethod);
1341 fForest.back()->SetPruneStrength(fPruneStrength);
1345 Double_t bw = this->Boost(*fTrainSample, fForest.back());
1347 fBoostWeights.push_back(
bw);
1349 fBoostWeights.push_back(0);
1350 Log() << kWARNING <<
"stopped boosting at itree="<<
itree <<
Endl;
1359 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){
1360 fForest.back()->CleanTree();
1367 fInteractive->AddPoint(
itree, fBoostWeight, fErrorFraction);
1370 fMonitorNtuple->Fill();
1371 if (fDoBoostMonitor){
1372 if (! DoRegression() ){
1380 ) BoostMonitor(
itree);
1388 Log() << kDEBUG <<
"\t<Train> elapsed time: " <<
timer.GetElapsedTime()
1391 Log() << kDEBUG <<
"\t<Train> average number of nodes (w/o pruning) : "
1395 Log() << kDEBUG <<
"\t<Train> average number of nodes before/after pruning : "
1405 Log() << kDEBUG <<
"Now I delete the privat data sample"<<
Endl;
1406 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1407 for (
UInt_t i=0; i<fValidationSample.size(); i++)
delete fValidationSample[i];
1408 fEventSample.clear();
1409 fValidationSample.clear();
1411 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
1427 return 2.0/(1.0+exp(-2.0*
sum))-1;
1435 if (DoMulticlass()) {
1449 std::map<const TMVA::Event *, std::vector<double>> &
residuals = this->fResiduals;
1495 fResiduals[
e].at(
cls) += fForest.back()->CheckEvent(
e,
kFALSE);
1516 std::map<const TMVA::Event *, std::vector<double>> &
residuals = this->fResiduals;
1567 for (
Int_t i = start; i < end; ++i) {
1586 fRegressionLossFunctionBDTG->SetTargets(
eventSample, fLossFunctionEventInfo);
1600 std::unordered_map<TMVA::DecisionTreeNode*, LeafInfo>
leaves;
1606 v.sumWeightTarget +=
target * weight;
1614 const Double_t K = DataInfo().GetNClasses();
1615 iLeave.first->SetResponse(fShrinkage * (K - 1) / K *
iLeave.second.sumWeightTarget /
iLeave.second.sum2);
1620 DoMulticlass() ? UpdateTargets(fEventSample,
cls) : UpdateTargets(fEventSample);
1632 std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > >
leaves;
1635 (
leaves[node]).push_back(fLossFunctionEventInfo[*
e]);
1645 (
iLeave->first)->SetResponse(fShrinkage*fit);
1648 UpdateTargetsRegression(*fTrainSample);
1667 fRegressionLossFunctionBDTG->Init(fLossFunctionEventInfo, fBoostWeights);
1668 UpdateTargetsRegression(*fTrainSample,
kTRUE);
1672 else if(DoMulticlass()){
1679 fResiduals[*
e].push_back(0);
1685 Double_t r = (DataInfo().IsSignal(*
e)?1:0)-0.5;
1687 fResiduals[*
e].push_back(0);
1705 nfalse += fValidationSample[
ievt]->GetWeight();
1722 else if (fBoostType==
"Bagging")
returnVal = this->Bagging ( );
1725 else if (fBoostType==
"Grad"){
1728 else if(DoMulticlass())
1734 Log() << kINFO << GetOptions() <<
Endl;
1735 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
1739 GetBaggedSubSample(fEventSample);
1754 TH1F *
tmpS =
new TH1F(
"tmpS",
"", 100 , -1., 1.00001 );
1755 TH1F *
tmpB =
new TH1F(
"tmpB",
"", 100 , -1., 1.00001 );
1771 const Event*
event = GetTestingEvent(
iev);
1775 tmp->Fill(PrivateGetMvaValue(event),event->GetWeight());
1779 std::vector<TH1F*> hS;
1780 std::vector<TH1F*>
hB;
1784 results->Store(hS.back(),hS.back()->GetTitle());
1790 if (fEventSample[
iev]->GetBoostWeight() > max) max = 1.01*fEventSample[
iev]->GetBoostWeight();
1798 std::vector<TH1F*> *
h;
1810 (*h)[
ivar]->Fill(fEventSample[
iev]->GetValue(
ivar),fEventSample[
iev]->GetWeight());
1848 std::vector<Double_t>
sumw(DataInfo().GetNClasses(),0);
1854 UInt_t iclass=(*e)->GetClass();
1857 if ( DoRegression() ) {
1872 if (DataInfo().IsSignal(*
e))
trueType = 1;
1880 if ( DoRegression() ) {
1882 if (fAdaBoostR2Loss==
"linear"){
1885 else if (fAdaBoostR2Loss==
"quadratic"){
1888 else if (fAdaBoostR2Loss==
"exponential"){
1898 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
1899 <<
" namely " << fAdaBoostR2Loss <<
"\n"
1900 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
1911 if (err >= 0.5 && fUseYesNoLeaf) {
1914 if (
dt->GetNNodes() == 1){
1915 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
1916 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
1918 <<
"please check why this happens, maybe too many events per node requested ?"
1922 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
1923 <<
") That should not happen, please check your code (i.e... the BDT code), I "
1924 <<
" stop boosting here" <<
Endl;
1928 }
else if (err < 0) {
1929 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
1930 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
1931 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
1932 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
1948 if (fUseYesNoLeaf||DoRegression()){
1949 if ((!( (
dt->CheckEvent(*
e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*
e))) || DoRegression()) {
1953 if ( (*e)->GetWeight() > 0 ){
1954 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1958 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
1959 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1967 if (DataInfo().IsSignal(*
e))
trueType = 1;
1971 if ( (*e)->GetWeight() > 0 ){
1972 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1976 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
1977 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1981 newSumw[(*e)->GetClass()] += (*e)->GetWeight();
1994 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight(
globalNormWeight * fSigToBkgFraction );
2000 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2003 fErrorFraction = err;
2031 std::vector<Double_t>
sumw(DataInfo().GetNClasses(),0);
2036 UInt_t iclass=(*e)->GetClass();
2040 if ( DoRegression() ) {
2041 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2056 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
2063 if ( DoRegression() ) {
2064 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2093 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
2096 if (DoRegression())Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2097 if ( (*e)->GetWeight() > 0 ){
2098 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
2100 if (DoRegression())Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2102 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
2118 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight(
globalNormWeight * fSigToBkgFraction );
2125 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2128 fErrorFraction = err;
2155 if (!fSubSample.empty()) fSubSample.clear();
2158 n =
trandom->PoissonD(fBaggedSampleFraction);
2159 for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*
e);
2193 if ( !DoRegression() ) Log() << kFATAL <<
"Somehow you chose a regression boost method for a classification job" <<
Endl;
2208 if (fAdaBoostR2Loss==
"linear"){
2211 else if (fAdaBoostR2Loss==
"quadratic"){
2214 else if (fAdaBoostR2Loss==
"exponential"){
2224 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
2225 <<
" namely " << fAdaBoostR2Loss <<
"\n"
2226 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
2233 if (
dt->GetNNodes() == 1){
2234 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
2235 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
2237 <<
"please check why this happens, maybe too many events per node requested ?"
2241 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
2242 <<
") That should not happen, but is possible for regression trees, and"
2243 <<
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I "
2244 <<
" stop boosting " <<
Endl;
2248 }
else if (err < 0) {
2249 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
2250 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
2251 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
2252 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
2265 if ( (*e)->GetWeight() > 0 ){
2269 Log() << kINFO <<
"Weight= " << (*e)->GetWeight() <<
Endl;
2270 Log() << kINFO <<
"BoostWeight= " << (*e)->GetBoostWeight() <<
Endl;
2271 Log() << kINFO <<
"boostweight="<<
boostWeight <<
" err= " <<err <<
Endl;
2274 Log() << kINFO <<
"maxDev = " <<
maxDev <<
Endl;
2276 Log() << kINFO <<
"target = " << (*e)->GetTarget(0) <<
Endl;
2277 Log() << kINFO <<
"estimate = " <<
dt->CheckEvent(*
e,
kFALSE) <<
Endl;
2282 (*e)->SetBoostWeight( (*e)->GetBoostWeight() /
boostfactor);
2292 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
normWeight );
2296 results->GetHist(
"BoostWeightsVsTree")->SetBinContent(fForest.size(),1./
boostWeight);
2297 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2300 fErrorFraction = err;
2312 if (fDoPreselection){
2329 for (
UInt_t i=0; i< fForest.size(); i++) {
2330 void*
trxml = fForest[i]->AddXMLTo(
wght);
2341 for (i=0; i<fForest.size(); i++)
delete fForest[i];
2343 fBoostWeights.clear();
2351 fIsLowBkgCut.resize(GetNvar());
2352 fLowBkgCut.resize(GetNvar());
2353 fIsLowSigCut.resize(GetNvar());
2354 fLowSigCut.resize(GetNvar());
2355 fIsHighBkgCut.resize(GetNvar());
2356 fHighBkgCut.resize(GetNvar());
2357 fIsHighSigCut.resize(GetNvar());
2358 fHighSigCut.resize(GetNvar());
2385 if(
gTools().HasAttr(parent,
"TreeType")) {
2396 fForest.back()->SetTreeID(i++);
2410 Int_t analysisType(0);
2413 istr >> dummy >> fNTrees;
2414 Log() << kINFO <<
"Read " << fNTrees <<
" Decision trees" <<
Endl;
2416 for (
UInt_t i=0;i<fForest.size();i++)
delete fForest[i];
2418 fBoostWeights.clear();
2421 for (
int i=0;i<fNTrees;i++) {
2424 fForest.back()->Print( std::cout );
2425 Log() << kFATAL <<
"Error while reading weight file; mismatch iTree="
2426 <<
iTree <<
" i=" << i
2427 <<
" dummy " << dummy
2433 fForest.back()->SetTreeID(i);
2434 fForest.back()->
Read(
istr, GetTrainingTMVAVersionCode());
2442 return this->GetMvaValue( err,
errUpper, 0 );
2453 if (fDoPreselection) {
2477 if (fBoostType==
"Grad")
return GetGradBoostMVA(
ev,
nTrees);
2486 return (
norm > std::numeric_limits<double>::epsilon() ) ?
myMVA /=
norm : 0 ;
2496 if (fMulticlassReturnVal ==
NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
2497 fMulticlassReturnVal->clear();
2500 std::vector<Double_t> temp(
nClasses);
2504 std::vector<TMVA::DecisionTree *>
forest = fForest;
2525 std::transform(temp.begin(), temp.end(), temp.begin(), [](
Double_t d){return exp(d);});
2531 (*fMulticlassReturnVal).push_back(
p_cls);
2534 return *fMulticlassReturnVal;
2543 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new std::vector<Float_t>();
2544 fRegressionReturnVal->clear();
2546 const Event *
ev = GetEvent();
2551 if (fBoostType==
"AdaBoostR2") {
2562 vector< Double_t > response(fForest.size());
2563 vector< Double_t > weight(fForest.size());
2572 std::vector< std::vector<Double_t> >
vtemp;
2573 vtemp.push_back( response );
2574 vtemp.push_back( weight );
2594 else if(fBoostType==
"Grad"){
2599 evT->SetTarget(0,
myMVA+fBoostWeights[0] );
2608 evT->SetTarget(0, (
norm > std::numeric_limits<double>::epsilon() ) ?
myMVA /=
norm : 0 );
2613 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT );
2614 fRegressionReturnVal->push_back(
evT2->GetTarget(0) );
2619 return *fRegressionReturnVal;
2628 Log() << kDEBUG <<
"\tWrite monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
2632 fMonitorNtuple->
Write();
2643 fVariableImportance.resize(GetNvar());
2645 fVariableImportance[
ivar]=0;
2657 sum += fVariableImportance[
ivar];
2661 return fVariableImportance;
2673 else Log() << kFATAL <<
"<GetVariableImportance> ivar = " <<
ivar <<
" is out of range " <<
Endl;
2684 fRanking =
new Ranking( GetName(),
"Variable Importance" );
2703 Log() <<
"Boosted Decision Trees are a collection of individual decision" <<
Endl;
2704 Log() <<
"trees which form a multivariate classifier by (weighted) majority " <<
Endl;
2705 Log() <<
"vote of the individual trees. Consecutive decision trees are " <<
Endl;
2706 Log() <<
"trained using the original training data set with re-weighted " <<
Endl;
2707 Log() <<
"events. By default, the AdaBoost method is employed, which gives " <<
Endl;
2708 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
2709 Log() <<
"weight in the training of the following tree." <<
Endl;
2711 Log() <<
"Decision trees are a sequence of binary splits of the data sample" <<
Endl;
2712 Log() <<
"using a single discriminant variable at a time. A test event " <<
Endl;
2713 Log() <<
"ending up after the sequence of left-right splits in a final " <<
Endl;
2714 Log() <<
"(\"leaf\") node is classified as either signal or background" <<
Endl;
2715 Log() <<
"depending on the majority type of training events in that node." <<
Endl;
2719 Log() <<
"By the nature of the binary splits performed on the individual" <<
Endl;
2720 Log() <<
"variables, decision trees do not deal well with linear correlations" <<
Endl;
2721 Log() <<
"between variables (they need to approximate the linear split in" <<
Endl;
2722 Log() <<
"the two dimensional space by a sequence of splits on the two " <<
Endl;
2723 Log() <<
"variables individually). Hence decorrelation could be useful " <<
Endl;
2724 Log() <<
"to optimise the BDT performance." <<
Endl;
2728 Log() <<
"The two most important parameters in the configuration are the " <<
Endl;
2729 Log() <<
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
2730 Log() <<
" number of training events (option \"MinNodeSize\" replacing the actual number " <<
Endl;
2731 Log() <<
" of events \"nEventsMin\" as given in earlier versions" <<
Endl;
2732 Log() <<
"If this number is too large, detailed features " <<
Endl;
2733 Log() <<
"in the parameter space are hard to be modelled. If it is too small, " <<
Endl;
2734 Log() <<
"the risk to overtrain rises and boosting seems to be less effective" <<
Endl;
2735 Log() <<
" typical values from our current experience for best performance " <<
Endl;
2736 Log() <<
" are between 0.5(%) and 10(%) " <<
Endl;
2738 Log() <<
"The default minimal number is currently set to " <<
Endl;
2739 Log() <<
" max(20, (N_training_events / N_variables^2 / 10)) " <<
Endl;
2740 Log() <<
"and can be changed by the user." <<
Endl;
2742 Log() <<
"The other crucial parameter, the pruning strength (\"PruneStrength\")," <<
Endl;
2743 Log() <<
"is also related to overtraining. It is a regularisation parameter " <<
Endl;
2744 Log() <<
"that is used when determining after the training which splits " <<
Endl;
2745 Log() <<
"are considered statistically insignificant and are removed. The" <<
Endl;
2746 Log() <<
"user is advised to carefully watch the BDT screen output for" <<
Endl;
2747 Log() <<
"the comparison between efficiencies obtained on the training and" <<
Endl;
2748 Log() <<
"the independent test sample. They should be equal within statistical" <<
Endl;
2749 Log() <<
"errors, in order to minimize statistical fluctuations in different samples." <<
Endl;
2761 fout <<
" std::vector<"<<
nodeName<<
"*> fForest; // i.e. root nodes of decision trees" << std::endl;
2762 fout <<
" std::vector<double> fBoostWeights; // the weights applied in the individual boosts" << std::endl;
2763 fout <<
"};" << std::endl << std::endl;
2766 fout <<
"std::vector<double> ReadBDTG::GetMulticlassValues__( const std::vector<double>& inputValues ) const" << std::endl;
2767 fout <<
"{" << std::endl;
2768 fout <<
" uint nClasses = " << DataInfo().GetNClasses() <<
";" << std::endl;
2769 fout <<
" std::vector<double> fMulticlassReturnVal;" << std::endl;
2770 fout <<
" fMulticlassReturnVal.reserve(nClasses);" << std::endl;
2772 fout <<
" std::vector<double> temp(nClasses);" << std::endl;
2773 fout <<
" auto forestSize = fForest.size();" << std::endl;
2774 fout <<
" // trees 0, nClasses, 2*nClasses, ... belong to class 0" << std::endl;
2775 fout <<
" // trees 1, nClasses+1, 2*nClasses+1, ... belong to class 1 and so forth" << std::endl;
2776 fout <<
" uint classOfTree = 0;" << std::endl;
2777 fout <<
" for (uint itree = 0; itree < forestSize; ++itree) {" << std::endl;
2778 fout <<
" BDTGNode *current = fForest[itree];" << std::endl;
2779 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2780 fout <<
" if (current->GoesRight(inputValues)) current=(BDTGNode*)current->GetRight();" << std::endl;
2781 fout <<
" else current=(BDTGNode*)current->GetLeft();" << std::endl;
2782 fout <<
" }" << std::endl;
2783 fout <<
" temp[classOfTree] += current->GetResponse();" << std::endl;
2784 fout <<
" if (++classOfTree == nClasses) classOfTree = 0; // cheap modulo" << std::endl;
2785 fout <<
" }" << std::endl;
2787 fout <<
" // we want to calculate sum of exp(temp[j] - temp[i]) for all i,j (i!=j)" << std::endl;
2788 fout <<
" // first calculate exp(), then replace minus with division." << std::endl;
2789 fout <<
" std::transform(temp.begin(), temp.end(), temp.begin(), [](double d){return exp(d);});" << std::endl;
2791 fout <<
" for(uint iClass=0; iClass<nClasses; iClass++){" << std::endl;
2792 fout <<
" double norm = 0.0;" << std::endl;
2793 fout <<
" for(uint j=0;j<nClasses;j++){" << std::endl;
2794 fout <<
" if(iClass!=j)" << std::endl;
2795 fout <<
" norm += temp[j] / temp[iClass];" << std::endl;
2796 fout <<
" }" << std::endl;
2797 fout <<
" fMulticlassReturnVal.push_back(1.0/(1.0+norm));" << std::endl;
2798 fout <<
" }" << std::endl;
2800 fout <<
" return fMulticlassReturnVal;" << std::endl;
2801 fout <<
"}" << std::endl;
2803 fout <<
"double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
2804 fout <<
"{" << std::endl;
2805 fout <<
" double myMVA = 0;" << std::endl;
2806 if (fDoPreselection){
2808 if (fIsLowBkgCut[
ivar]){
2809 fout <<
" if (inputValues["<<
ivar<<
"] < " << fLowBkgCut[
ivar] <<
") return -1; // is background preselection cut" << std::endl;
2811 if (fIsLowSigCut[
ivar]){
2812 fout <<
" if (inputValues["<<
ivar<<
"] < "<< fLowSigCut[
ivar] <<
") return 1; // is signal preselection cut" << std::endl;
2814 if (fIsHighBkgCut[
ivar]){
2815 fout <<
" if (inputValues["<<
ivar<<
"] > "<<fHighBkgCut[
ivar] <<
") return -1; // is background preselection cut" << std::endl;
2817 if (fIsHighSigCut[
ivar]){
2818 fout <<
" if (inputValues["<<
ivar<<
"] > "<<fHighSigCut[
ivar]<<
") return 1; // is signal preselection cut" << std::endl;
2823 if (fBoostType!=
"Grad"){
2824 fout <<
" double norm = 0;" << std::endl;
2826 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
2827 fout <<
" "<<
nodeName<<
" *current = fForest[itree];" << std::endl;
2828 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2829 fout <<
" if (current->GoesRight(inputValues)) current=("<<
nodeName<<
"*)current->GetRight();" << std::endl;
2830 fout <<
" else current=("<<
nodeName<<
"*)current->GetLeft();" << std::endl;
2831 fout <<
" }" << std::endl;
2832 if (fBoostType==
"Grad"){
2833 fout <<
" myMVA += current->GetResponse();" << std::endl;
2835 if (fUseYesNoLeaf)
fout <<
" myMVA += fBoostWeights[itree] * current->GetNodeType();" << std::endl;
2836 else fout <<
" myMVA += fBoostWeights[itree] * current->GetPurity();" << std::endl;
2837 fout <<
" norm += fBoostWeights[itree];" << std::endl;
2839 fout <<
" }" << std::endl;
2840 if (fBoostType==
"Grad"){
2841 fout <<
" return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
2843 else fout <<
" return myMVA /= norm;" << std::endl;
2844 fout <<
"}" << std::endl << std::endl;
2847 fout <<
"void " << className <<
"::Initialize()" << std::endl;
2848 fout <<
"{" << std::endl;
2849 fout <<
" double inf = std::numeric_limits<double>::infinity();" << std::endl;
2850 fout <<
" double nan = std::numeric_limits<double>::quiet_NaN();" << std::endl;
2853 fout <<
" // itree = " <<
itree << std::endl;
2854 fout <<
" fBoostWeights.push_back(" << fBoostWeights[
itree] <<
");" << std::endl;
2855 fout <<
" fForest.push_back( " << std::endl;
2857 fout <<
" );" << std::endl;
2859 fout <<
" return;" << std::endl;
2860 fout <<
"};" << std::endl;
2862 fout <<
"// Clean up" << std::endl;
2863 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
2864 fout <<
"{" << std::endl;
2865 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
2866 fout <<
" delete fForest[itree]; " << std::endl;
2867 fout <<
" }" << std::endl;
2868 fout <<
"}" << std::endl;
2880 fout <<
"#include <algorithm>" << std::endl;
2881 fout <<
"#include <limits>" << std::endl;
2892 fout <<
"public:" << std::endl;
2894 fout <<
" // constructor of an essentially \"empty\" node floating in space" << std::endl;
2896 if (fUseFisherCuts){
2897 fout <<
" int nFisherCoeff," << std::endl;
2898 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2899 fout <<
" double fisherCoeff"<<i<<
"," << std::endl;
2902 fout <<
" int selector, double cutValue, bool cutType, " << std::endl;
2903 fout <<
" int nodeType, double purity, double response ) :" << std::endl;
2904 fout <<
" fLeft ( left )," << std::endl;
2905 fout <<
" fRight ( right )," << std::endl;
2906 if (fUseFisherCuts)
fout <<
" fNFisherCoeff ( nFisherCoeff )," << std::endl;
2907 fout <<
" fSelector ( selector )," << std::endl;
2908 fout <<
" fCutValue ( cutValue )," << std::endl;
2909 fout <<
" fCutType ( cutType )," << std::endl;
2910 fout <<
" fNodeType ( nodeType )," << std::endl;
2911 fout <<
" fPurity ( purity )," << std::endl;
2912 fout <<
" fResponse ( response ){" << std::endl;
2913 if (fUseFisherCuts){
2914 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2915 fout <<
" fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
2918 fout <<
" }" << std::endl << std::endl;
2919 fout <<
" virtual ~"<<
nodeName<<
"();" << std::endl << std::endl;
2920 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2921 fout <<
" virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
2922 fout <<
" "<<
nodeName<<
"* GetRight( void ) {return fRight; };" << std::endl << std::endl;
2923 fout <<
" // test event if it descends the tree at this node to the left " << std::endl;
2924 fout <<
" virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
2925 fout <<
" "<<
nodeName<<
"* GetLeft( void ) { return fLeft; }; " << std::endl << std::endl;
2926 fout <<
" // return S/(S+B) (purity) at this node (from training)" << std::endl << std::endl;
2927 fout <<
" double GetPurity( void ) const { return fPurity; } " << std::endl;
2928 fout <<
" // return the node type" << std::endl;
2929 fout <<
" int GetNodeType( void ) const { return fNodeType; }" << std::endl;
2930 fout <<
" double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
2931 fout <<
"private:" << std::endl << std::endl;
2932 fout <<
" "<<
nodeName<<
"* fLeft; // pointer to the left daughter node" << std::endl;
2933 fout <<
" "<<
nodeName<<
"* fRight; // pointer to the right daughter node" << std::endl;
2934 if (fUseFisherCuts){
2935 fout <<
" int fNFisherCoeff; // =0 if this node doesn't use fisher, else =nvar+1 " << std::endl;
2936 fout <<
" std::vector<double> fFisherCoeff; // the fisher coeff (offset at the last element)" << std::endl;
2938 fout <<
" int fSelector; // index of variable used in node selection (decision tree) " << std::endl;
2939 fout <<
" double fCutValue; // cut value applied on this node to discriminate bkg against sig" << std::endl;
2940 fout <<
" bool fCutType; // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
2941 fout <<
" int fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
2942 fout <<
" double fPurity; // Purity of node from training"<< std::endl;
2943 fout <<
" double fResponse; // Regression response value of node" << std::endl;
2944 fout <<
"}; " << std::endl;
2946 fout <<
"//_______________________________________________________________________" << std::endl;
2948 fout <<
"{" << std::endl;
2949 fout <<
" if (fLeft != NULL) delete fLeft;" << std::endl;
2950 fout <<
" if (fRight != NULL) delete fRight;" << std::endl;
2951 fout <<
"}; " << std::endl;
2953 fout <<
"//_______________________________________________________________________" << std::endl;
2954 fout <<
"bool "<<
nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
2955 fout <<
"{" << std::endl;
2956 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2957 fout <<
" bool result;" << std::endl;
2958 if (fUseFisherCuts){
2959 fout <<
" if (fNFisherCoeff == 0){" << std::endl;
2960 fout <<
" result = (inputValues[fSelector] >= fCutValue );" << std::endl;
2961 fout <<
" }else{" << std::endl;
2962 fout <<
" double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
2963 fout <<
" for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
2964 fout <<
" fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
2965 fout <<
" result = fisher > fCutValue;" << std::endl;
2966 fout <<
" }" << std::endl;
2968 fout <<
" result = (inputValues[fSelector] >= fCutValue );" << std::endl;
2970 fout <<
" if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
2971 fout <<
" else return !result;" << std::endl;
2972 fout <<
"}" << std::endl;
2974 fout <<
"//_______________________________________________________________________" << std::endl;
2975 fout <<
"bool "<<
nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
2976 fout <<
"{" << std::endl;
2977 fout <<
" // test event if it descends the tree at this node to the left" << std::endl;
2978 fout <<
" if (!this->GoesRight(inputValues)) return true;" << std::endl;
2979 fout <<
" else return false;" << std::endl;
2980 fout <<
"}" << std::endl;
2982 fout <<
"#endif" << std::endl;
2992 Log() << kFATAL <<
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
2995 fout <<
"NN("<<std::endl;
2996 if (
n->GetLeft() !=
NULL){
3002 fout <<
", " <<std::endl;
3003 if (
n->GetRight() !=
NULL){
3009 fout <<
", " << std::endl
3010 << std::setprecision(6);
3011 if (fUseFisherCuts){
3012 fout <<
n->GetNFisherCoeff() <<
", ";
3013 for (
UInt_t i=0; i< GetNVariables()+1; i++) {
3014 if (
n->GetNFisherCoeff() == 0 ){
3017 fout <<
n->GetFisherCoeff(i) <<
", ";
3021 fout <<
n->GetSelector() <<
", "
3022 <<
n->GetCutValue() <<
", "
3023 <<
n->GetCutType() <<
", "
3024 <<
n->GetNodeType() <<
", "
3025 <<
n->GetPurity() <<
","
3026 <<
n->GetResponse() <<
") ";
3040 fIsLowSigCut.assign(GetNvar(),
kFALSE);
3041 fIsLowBkgCut.assign(GetNvar(),
kFALSE);
3042 fIsHighSigCut.assign(GetNvar(),
kFALSE);
3043 fIsHighBkgCut.assign(GetNvar(),
kFALSE);
3045 fLowSigCut.assign(GetNvar(),0.);
3046 fLowBkgCut.assign(GetNvar(),0.);
3047 fHighSigCut.assign(GetNvar(),0.);
3048 fHighBkgCut.assign(GetNvar(),0.);
3054 if (DataInfo().IsSignal(*it)){
3055 nTotS += (*it)->GetWeight();
3058 nTotB += (*it)->GetWeight();
3069 for( ; it !=
it_end; ++it ) {
3070 if (DataInfo().IsSignal(**it))
3081 Double_t dVal = (DataInfo().GetVariableInfo(
ivar).GetMax() - DataInfo().GetVariableInfo(
ivar).GetMin())/100. ;
3106 Log() << kDEBUG <<
" \tfound and suggest the following possible pre-selection cuts " <<
Endl;
3107 if (fDoPreselection) Log() << kDEBUG <<
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" <<
Endl;
3108 else Log() << kDEBUG <<
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
3110 if (fIsLowBkgCut[
ivar]){
3111 Log() << kDEBUG <<
" \tfound cut: Bkg if var " <<
ivar <<
" < " << fLowBkgCut[
ivar] <<
Endl;
3113 if (fIsLowSigCut[
ivar]){
3114 Log() << kDEBUG <<
" \tfound cut: Sig if var " <<
ivar <<
" < " << fLowSigCut[
ivar] <<
Endl;
3116 if (fIsHighBkgCut[
ivar]){
3117 Log() << kDEBUG <<
" \tfound cut: Bkg if var " <<
ivar <<
" > " << fHighBkgCut[
ivar] <<
Endl;
3119 if (fIsHighSigCut[
ivar]){
3120 Log() << kDEBUG <<
" \tfound cut: Sig if var " <<
ivar <<
" > " << fHighSigCut[
ivar] <<
Endl;
3136 if (fIsLowBkgCut[
ivar]){
3139 if (fIsLowSigCut[
ivar]){
3142 if (fIsHighBkgCut[
ivar]){
3145 if (fIsHighSigCut[
ivar]){
#define REGISTER_METHOD(CLASS)
for example
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int)
float Float_t
Float 4 bytes (float)
double Double_t
Double 8 bytes.
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void Print(GNN_Data &d, std::string txt="")
const_iterator begin() const
const_iterator end() const
A TGraph is an object made of two arrays X and Y with npoints each.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
1-D histogram with a float per channel (see TH1 documentation)
1-D histogram with an int per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
2-D histogram with a float per channel (see TH1 documentation)
Service class for 2-D histogram classes.
Absolute Deviation BDT Loss Function.
static void SetVarIndex(Int_t iVar)
static Config & Instance()
static function: returns TMVA instance
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
The TMVA::Interval Class.
Least Squares BDT Loss Function.
The TMVA::Interval Class.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
const std::vector< Float_t > & GetMulticlassValues() override
Get the multiclass MVA response for the BDT classifier.
void Init(void) override
Common initialisation with defaults for the BDT-Method.
void MakeClassSpecificHeader(std::ostream &, const TString &) const override
Specific class header.
void AddWeightsXMLTo(void *parent) const override
Write weights to XML.
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
The standard constructor for the "boosted decision trees".
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=nullptr, Double_t *errUpper=nullptr, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void SetTuneParameters(std::map< TString, Double_t > tuneParameters) override
Set the tuning parameters according to the argument.
std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA") override
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
BDT can handle classification with multiple classes and regression with one regression-target.
LossFunctionBDT * fRegressionLossFunctionBDTG
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void Train(void) override
BDT training.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
void ReadWeightsFromStream(std::istream &istr) override
Read the weights (BDT coefficients).
void MakeClassSpecific(std::ostream &, const TString &) const override
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
virtual ~MethodBDT(void)
Destructor.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
void DeclareCompatibilityOptions() override
Options that are used ONLY for the READER to ensure backward compatibility.
void WriteMonitoringHistosToFile(void) const override
Here we could write some histograms created during the processing to the output file.
void DeclareOptions() override
Define the options (their key words).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
void PreProcessNegativeEventWeights()
O.k.
void ReadWeightsFromXML(void *parent) override
Reads the BDT from the xml file.
void Reset(void) override
Reset the method, as if it had just been instantiated (forget all training etc.).
void GetHelpMessage() const override
Get help message text.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
TTree * fMonitorNtuple
monitoring ntuple
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
const std::vector< Float_t > & GetRegressionValues() override
Get the regression value generated by the BDTs.
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const Ranking * CreateRanking() override
Compute ranking of input variables.
SeparationBase * fSepType
the separation used in node splitting
void ProcessOptions() override
The option string is decoded, for available options see "DeclareOptions".
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Implementation of the MisClassificationError as separation criterion.
std::map< TString, Double_t > optimize()
PDF wrapper for histograms; uses user-defined spline interpolation.
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
Implementation of the SdivSqrtSplusB as separation criterion.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
Random number generator class based on M.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
TSeq< unsigned int > TSeqU
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Int_t FloorNint(Double_t x)
Returns the nearest integer of TMath::Floor(x).
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Int_t CeilNint(Double_t x)
Returns the nearest integer of TMath::Ceil(x).
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
static uint64_t sum(uint64_t i)