149#include <unordered_map>
170 , fSigToBkgFraction(0)
175 , fBaggedGradBoost(
kFALSE)
179 , fMinNodeSizeS(
"5%")
182 , fMinLinCorrForFisher(.8)
183 , fUseExclusiveVars(0)
185 , fNodePurityLimit(0)
190 , fFValidationEvents(0)
192 , fRandomisedTrees(
kFALSE)
194 , fUsePoissonNvars(0)
195 , fUseNTrainEvents(0)
196 , fBaggedSampleFraction(0)
197 , fNoNegWeightsInTraining(
kFALSE)
198 , fInverseBoostNegWeights(
kFALSE)
199 , fPairNegWeightsGlobal(
kFALSE)
200 , fTrainWithNegWeights(
kFALSE)
210 , fSkipNormalization(
kFALSE)
225 , fSigToBkgFraction(0)
230 , fBaggedGradBoost(
kFALSE)
234 , fMinNodeSizeS(
"5%")
237 , fMinLinCorrForFisher(.8)
238 , fUseExclusiveVars(0)
240 , fNodePurityLimit(0)
245 , fFValidationEvents(0)
247 , fRandomisedTrees(
kFALSE)
249 , fUsePoissonNvars(0)
250 , fUseNTrainEvents(0)
251 , fBaggedSampleFraction(0)
252 , fNoNegWeightsInTraining(
kFALSE)
253 , fInverseBoostNegWeights(
kFALSE)
254 , fPairNegWeightsGlobal(
kFALSE)
255 , fTrainWithNegWeights(
kFALSE)
265 , fSkipNormalization(
kFALSE)
335 DeclareOptionRef(fNTrees,
"NTrees",
"Number of trees in the forest");
336 if (DoRegression()) {
337 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
339 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
342 TString tmp=
"5%";
if (DoRegression()) tmp=
"0.2%";
343 DeclareOptionRef(fMinNodeSizeS=tmp,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
345 DeclareOptionRef(fNCuts,
"nCuts",
"Number of grid points in variable range used in finding optimal cut in node splitting");
347 DeclareOptionRef(fBoostType,
"BoostType",
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
349 AddPreDefVal(
TString(
"AdaBoost"));
350 AddPreDefVal(
TString(
"RealAdaBoost"));
351 AddPreDefVal(
TString(
"AdaCost"));
352 AddPreDefVal(
TString(
"Bagging"));
354 AddPreDefVal(
TString(
"AdaBoostR2"));
356 if (DoRegression()) {
357 fBoostType =
"AdaBoostR2";
359 fBoostType =
"AdaBoost";
361 DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic",
"AdaBoostR2Loss",
"Type of Loss function in AdaBoostR2");
362 AddPreDefVal(
TString(
"Linear"));
363 AddPreDefVal(
TString(
"Quadratic"));
364 AddPreDefVal(
TString(
"Exponential"));
366 DeclareOptionRef(fBaggedBoost=
kFALSE,
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
367 DeclareOptionRef(fShrinkage = 1.0,
"Shrinkage",
"Learning rate for BoostType=Grad algorithm");
368 DeclareOptionRef(fAdaBoostBeta=.5,
"AdaBoostBeta",
"Learning rate for AdaBoost algorithm");
369 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
370 DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
371 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
372 DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
374 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
375 "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
376 if (DoRegression()) {
380 DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining; Boost With inverse boostweight; Pair events with negative and positive weights in training sample and *annihilate* them (experimental!)");
381 AddPreDefVal(
TString(
"InverseBoostNegWeights"));
382 AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
383 AddPreDefVal(
TString(
"NoNegWeightsInTraining"));
384 AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
389 DeclareOptionRef(fCss=1.,
"Css",
"AdaCost: cost of true signal selected signal");
390 DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
391 DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg selected signal");
392 DeclareOptionRef(fCbb=1.,
"Cbb",
"AdaCost: cost of true bkg selected bkg ");
394 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
397 DeclareOptionRef(fSepTypeS,
"SeparationType",
"Separation criterion for node splitting");
398 AddPreDefVal(
TString(
"CrossEntropy"));
399 AddPreDefVal(
TString(
"GiniIndex"));
400 AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
401 AddPreDefVal(
TString(
"MisClassificationError"));
402 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
403 AddPreDefVal(
TString(
"RegressionVariance"));
404 if (DoRegression()) {
405 fSepTypeS =
"RegressionVariance";
407 fSepTypeS =
"GiniIndex";
410 DeclareOptionRef(fRegressionLossFunctionBDTGS =
"Huber",
"RegressionLossFunctionBDTG",
"Loss function for BDTG regression.");
411 AddPreDefVal(
TString(
"Huber"));
412 AddPreDefVal(
TString(
"AbsoluteDeviation"));
413 AddPreDefVal(
TString(
"LeastSquares"));
415 DeclareOptionRef(fHuberQuantile = 0.7,
"HuberQuantile",
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
417 DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
419 DeclareOptionRef(fUseFisherCuts=
kFALSE,
"UseFisherCuts",
"Use multivariate splits using the Fisher criterion");
420 DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher",
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
421 DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
424 DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
427 DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
429 DeclareOptionRef(fPruneMethodS,
"PruneMethod",
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning: Pruning: Method used for pruning (removal) of statistically insignificant branches ");
430 AddPreDefVal(
TString(
"NoPruning"));
431 AddPreDefVal(
TString(
"ExpectedError"));
432 AddPreDefVal(
TString(
"CostComplexity"));
434 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength");
436 DeclareOptionRef(fFValidationEvents=0.5,
"PruningValFraction",
"Fraction of events to use for optimizing automatic pruning.");
438 DeclareOptionRef(fSkipNormalization=
kFALSE,
"SkipNormalization",
"Skip normalization at initialization, to keep expectation value of BDT output according to the fraction of events");
441 DeclareOptionRef(fMinNodeEvents=0,
"nEventsMin",
"deprecated: Use MinNodeSize (in % of training events) instead");
443 DeclareOptionRef(fBaggedGradBoost=
kFALSE,
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead: Use only a random subsample of all events for growing the trees in each iteration.");
444 DeclareOptionRef(fBaggedSampleFraction,
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
445 DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
446 DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
458 DeclareOptionRef(fHistoricBool=
kTRUE,
"UseWeightedTrees",
459 "Use weighted trees or simple average in classification from the forest");
460 DeclareOptionRef(fHistoricBool=
kFALSE,
"PruneBeforeBoost",
"Flag to prune the tree before applying boosting algorithm");
461 DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
463 AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
474 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
476 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
477 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
478 else if (fSepTypeS ==
"regressionvariance") fSepType =
NULL;
480 Log() << kINFO << GetOptions() <<
Endl;
481 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option " << fSepTypeS <<
" called" <<
Endl;
484 if(!(fHuberQuantile >= 0.0 && fHuberQuantile <= 1.0)){
485 Log() << kINFO << GetOptions() <<
Endl;
486 Log() << kFATAL <<
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << fHuberQuantile <<
", does not match this criteria" <<
Endl;
490 fRegressionLossFunctionBDTGS.ToLower();
491 if (fRegressionLossFunctionBDTGS ==
"huber") fRegressionLossFunctionBDTG =
new HuberLossFunctionBDT(fHuberQuantile);
495 Log() << kINFO << GetOptions() <<
Endl;
496 Log() << kFATAL <<
"<ProcessOptions> unknown Regression Loss Function BDT option " << fRegressionLossFunctionBDTGS <<
" called" <<
Endl;
499 fPruneMethodS.ToLower();
504 Log() << kINFO << GetOptions() <<
Endl;
505 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS <<
" option called" <<
Endl;
511 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
515 if (fMinNodeEvents > 0){
516 fMinNodeSize =
Double_t(fMinNodeEvents*100.) / Data()->GetNTrainingEvents();
517 Log() << kWARNING <<
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min absolute number \n"
518 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
519 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
520 <<
"events instead. \n"
521 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
523 Log() << kWARNING <<
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recommended \n"
524 <<
" *MinNodeSize* = " << fMinNodeSizeS <<
" option !!" <<
Endl ;
528 SetMinNodeSize(fMinNodeSizeS);
532 fAdaBoostR2Loss.ToLower();
534 if (fBoostType==
"Grad") {
536 if (fNegWeightTreatment==
"InverseBoostNegWeights"){
537 Log() << kINFO <<
"the option NegWeightTreatment=InverseBoostNegWeights does"
538 <<
" not exist for BoostType=Grad" <<
Endl;
539 Log() << kINFO <<
"--> change to new default NegWeightTreatment=Pray" <<
Endl;
540 Log() << kDEBUG <<
"i.e. simply keep them as if which should work fine for Grad Boost" <<
Endl;
541 fNegWeightTreatment=
"Pray";
542 fNoNegWeightsInTraining=
kFALSE;
544 }
else if (fBoostType==
"RealAdaBoost"){
545 fBoostType =
"AdaBoost";
547 }
else if (fBoostType==
"AdaCost"){
551 if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
552 if (fAutomatic && fFValidationEvents > 0.5) {
553 Log() << kWARNING <<
"You have chosen to use more than half of your training sample "
554 <<
"to optimize the automatic pruning algorithm. This is probably wasteful "
555 <<
"and your overall results will be degraded. Are you sure you want this?"
560 if (this->Data()->HasNegativeEventWeights()){
561 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. "
562 <<
"That should in principle be fine as long as on average you end up with "
563 <<
"something positive. For this you have to make sure that the minimal number "
564 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
565 << fMinNodeSizeS <<
" ("<< fMinNodeSize <<
"%)"
566 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
567 <<
"BDT option string when booking the "
568 <<
"classifier) is large enough to allow for reasonable averaging!!! "
569 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
570 <<
"which ignores events with negative weight in the training. " <<
Endl
571 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
574 if (DoRegression()) {
575 if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
576 Log() << kWARNING <<
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" <<
Endl;
580 if (fSepType !=
NULL){
581 Log() << kWARNING <<
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" <<
Endl;
585 Log() << kWARNING <<
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" <<
Endl;
589 Log() << kWARNING <<
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " <<
Endl;
590 Log() << kWARNING <<
"is not implemented for regression analysis ! " <<
Endl;
591 Log() << kWARNING <<
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
595 if (fRandomisedTrees){
596 Log() << kINFO <<
" Randomised trees use no pruning" <<
Endl;
601 if (fUseFisherCuts) {
602 Log() << kWARNING <<
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" <<
Endl;
603 Log() <<
" a more elaborate node splitting algorithm) is not implemented. " <<
Endl;
610 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! "
611 <<
" I set it to 1 .. just so that the program does not crash"
616 fNegWeightTreatment.ToLower();
617 if (fNegWeightTreatment ==
"ignorenegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
618 else if (fNegWeightTreatment ==
"nonegweightsintraining") fNoNegWeightsInTraining =
kTRUE;
619 else if (fNegWeightTreatment ==
"inverseboostnegweights") fInverseBoostNegWeights =
kTRUE;
620 else if (fNegWeightTreatment ==
"pairnegweightsglobal") fPairNegWeightsGlobal =
kTRUE;
621 else if (fNegWeightTreatment ==
"pray") Log() << kDEBUG <<
"Yes, good luck with praying " <<
Endl;
623 Log() << kINFO << GetOptions() <<
Endl;
624 Log() << kFATAL <<
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment <<
" requested" <<
Endl;
627 if (fNegWeightTreatment ==
"pairnegweightsglobal")
628 Log() << kWARNING <<
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " <<
Endl;
635 while (tmp < fNNodesMax){
639 Log() << kWARNING <<
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
640 <<
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
644 if (fUseNTrainEvents>0){
645 fBaggedSampleFraction = (
Double_t) fUseNTrainEvents/Data()->GetNTrainingEvents();
646 Log() << kWARNING <<
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
647 <<
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
650 if (fBoostType==
"Bagging") fBaggedBoost =
kTRUE;
651 if (fBaggedGradBoost){
652 fBaggedBoost =
kTRUE;
653 Log() << kWARNING <<
"You have specified a deprecated option *UseBaggedGrad* --> please use *UseBaggedBoost* instead" <<
Endl;
665 Log() << kFATAL <<
"you have demanded a minimal node size of "
667 <<
" that somehow does not make sense "<<
Endl;
679 Log() << kFATAL <<
"I had problems reading the option MinNodeEvents, which "
692 fBoostType =
"AdaBoost";
693 if(DataInfo().GetNClasses()!=0)
697 fBoostType =
"AdaBoostR2";
698 fAdaBoostR2Loss =
"Quadratic";
699 if(DataInfo().GetNClasses()!=0)
705 fPruneMethodS =
"NoPruning";
709 fFValidationEvents = 0.5;
710 fRandomisedTrees =
kFALSE;
713 fUsePoissonNvars =
kTRUE;
718 SetSignalReferenceCut( 0 );
731 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
734 fBoostWeights.clear();
735 if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=
NULL; }
736 fVariableImportance.clear();
738 fLossFunctionEventInfo.clear();
742 if (Data()) Data()->DeleteResults(GetMethodName(),
Types::kTraining, GetAnalysisType());
743 Log() << kDEBUG <<
" successfully(?) reset the method " <<
Endl;
755 for (
UInt_t i=0; i<fForest.size(); i++)
delete fForest[i];
763 if (!HasTrainingTree()) Log() << kFATAL <<
"<Init> Data().TrainingTree() is zero pointer" <<
Endl;
765 if (fEventSample.size() > 0) {
767 for (
UInt_t iev=0;
iev<fEventSample.size();
iev++) fEventSample[
iev]->SetBoostWeight(1.);
780 else fDoPreselection =
kFALSE;
791 if (fDoPreselection){
792 if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
798 if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
800 Log() << kWARNING <<
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" <<
Endl;
804 }
else if (event->GetWeight()==0){
807 Log() <<
"Events with weight == 0 are going to be simply ignored " <<
Endl;
811 if (event->GetWeight() < 0) {
812 fTrainWithNegWeights=
kTRUE;
815 if (fPairNegWeightsGlobal){
816 Log() << kWARNING <<
"Events with negative event weights are found and "
817 <<
" will be removed prior to the actual BDT training by global "
818 <<
" paring (and subsequent annihilation) with positiv weight events"
821 Log() << kWARNING <<
"Events with negative event weights are USED during "
822 <<
"the BDT training. This might cause problems with small node sizes "
823 <<
"or with the boosting. Please remove negative events from training "
824 <<
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you "
825 <<
"observe problems with the boosting"
834 if (
ievt %
imodulo == 0) fValidationSample.push_back( event );
835 else fEventSample.push_back( event );
838 fEventSample.push_back(event);
844 Log() << kINFO <<
"<InitEventSample> Internally I use " << fEventSample.size()
845 <<
" for Training and " << fValidationSample.size()
846 <<
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
847 <<
"% of training used for validation)" <<
Endl;
851 if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
854 if (DoRegression()) {
856 }
else if (DoMulticlass()) {
858 }
else if (!fSkipNormalization) {
860 Log() << kDEBUG <<
"\t<InitEventSample> For classification trees, "<<
Endl;
861 Log() << kDEBUG <<
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
862 Log() << kDEBUG <<
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
880 if ((DataInfo().IsSignal(fEventSample[
ievt])) ) {
891 Log() << kDEBUG <<
"\tre-normalise events such that Sig and Bkg have respective sum of weights = "
892 << fSigToBkgFraction <<
Endl;
893 Log() << kDEBUG <<
" \tsig->sig*"<<
normSig <<
"ev. bkg->bkg*"<<
normBkg <<
"ev." <<
Endl;
895 Log() << kINFO <<
"#events: (unweighted) sig: "<<
sumSig <<
" bkg: " <<
sumBkg <<
Endl;
897 if ((DataInfo().IsSignal(fEventSample[
ievt])) ) fEventSample[
ievt]->SetBoostWeight(
normSig);
901 Log() << kINFO <<
"--> could not determine scaling factors as either there are " <<
Endl;
902 Log() << kINFO <<
" no signal events (sumSigW="<<
sumSigW<<
") or no bkg ev. (sumBkgW="<<
sumBkgW<<
")"<<
Endl;
907 fTrainSample = &fEventSample;
909 GetBaggedSubSample(fEventSample);
910 fTrainSample = &fSubSample;
938 if (fEventSample[
iev]->GetWeight() < 0) {
947 Log() << kINFO <<
"no negative event weights found .. no preprocessing necessary" <<
Endl;
950 Log() << kINFO <<
"found a total of " <<
totalNegWeights <<
" of negative event weights which I am going to try to pair with positive events to annihilate them" <<
Endl;
951 Log() << kINFO <<
"found a total of " <<
totalPosWeights <<
" of events with positive weights" <<
Endl;
959 for (
Int_t i=0; i<2; i++){
962 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is almost singular with determinant="
964 <<
" did you use the variables that are linear combinations or highly correlated?"
968 std::cout <<
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant="
970 <<
" did you use the variables that are linear combinations?"
979 Log() << kINFO <<
"Found a total of " <<
totalNegWeights <<
" in negative weights out of " << fEventSample.size() <<
" training events " <<
Endl;
992 if (
iClassID==fEventSample[
iev]->GetClass() && fEventSample[
iev]->GetWeight() > 0){
1010 fEventSample[
iMin]->SetBoostWeight(
newWeight/fEventSample[
iMin]->GetOriginalWeight() );
1013 fEventSample[
iMin]->SetBoostWeight( 0 );
1016 }
else Log() << kFATAL <<
"preprocessing didn't find event to pair with the negative weight ... probably a bug" <<
Endl;
1020 Log() << kINFO <<
"<Negative Event Pairing> took: " <<
timer.GetElapsedTime()
1035 if (fEventSample[
iev]->GetWeight() < 0) {
1042 if (fEventSample[
iev]->GetWeight() > 0) {
1044 if (fEventSample[
iev]->GetClass() == fSignalClass){
1055 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1058 Log() << kINFO <<
" after PreProcessing, the Event sample is left with " << fEventSample.size() <<
" events (unweighted), all with positive weights, adding up to " <<
totalWeights <<
Endl;
1088 if (fBoostType==
"AdaBoost"){
1091 }
else if (fBoostType==
"Grad"){
1094 }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
1101 Log()<<kINFO <<
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
1102 std::map<TString,TMVA::Interval*>::iterator it;
1104 Log() << kWARNING << it->first <<
Endl;
1105 std::ostringstream
oss;
1123 std::map<TString,Double_t>::iterator it;
1125 Log() << kWARNING << it->first <<
" = " << it->second <<
Endl;
1126 if (it->first ==
"MaxDepth" ) SetMaxDepth ((
Int_t)it->second);
1127 else if (it->first ==
"MinNodeSize" ) SetMinNodeSize (it->second);
1128 else if (it->first ==
"NTrees" ) SetNTrees ((
Int_t)it->second);
1129 else if (it->first ==
"NodePurityLimit") SetNodePurityLimit (it->second);
1130 else if (it->first ==
"AdaBoostBeta" ) SetAdaBoostBeta (it->second);
1131 else if (it->first ==
"Shrinkage" ) SetShrinkage (it->second);
1132 else if (it->first ==
"UseNvars" ) SetUseNvars ((
Int_t)it->second);
1133 else if (it->first ==
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
1134 else Log() << kFATAL <<
" SetParameter for " << it->first <<
" not yet implemented " <<
Endl;
1152 Log() << kERROR <<
" Zero Decision Trees demanded... that does not work !! "
1153 <<
" I set it to 1 .. just so that the program does not crash"
1158 if (fInteractive && fInteractive->NotInitialized()){
1159 std::vector<TString>
titles = {
"Boost weight",
"Error Fraction"};
1160 fInteractive->Init(
titles);
1162 fIPyMaxIter = fNTrees;
1163 fExitFromTraining =
false;
1167 if (IsNormalised()) Log() << kFATAL <<
"\"Normalise\" option cannot be used with BDT; "
1168 <<
"please remove the option from the configuration string, or "
1169 <<
"use \"!Normalise\""
1173 Log() << kINFO <<
"Regression Loss Function: "<< fRegressionLossFunctionBDTG->Name() <<
Endl;
1175 Log() << kINFO <<
"Training "<< fNTrees <<
" Decision Trees ... patience please" <<
Endl;
1177 Log() << kDEBUG <<
"Training with maximal depth = " <<fMaxDepth
1178 <<
", MinNodeEvents=" << fMinNodeEvents
1179 <<
", NTrees="<<fNTrees
1180 <<
", NodePurityLimit="<<fNodePurityLimit
1181 <<
", AdaBoostBeta="<<fAdaBoostBeta
1193 if (DoRegression()) {
1197 hname=
"Boost event weights distribution";
1207 if(!DoMulticlass()){
1210 h->SetXTitle(
"boost weight");
1215 if (fDoBoostMonitor){
1216 TH2*
boostMonitor =
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
1227 h =
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
1228 h->SetXTitle(
"#tree");
1229 h->SetYTitle(
"boost weight");
1230 results->Store(
h,
"BoostWeightsVsTree");
1233 h =
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
1234 h->SetXTitle(
"#tree");
1235 h->SetYTitle(
"error fraction");
1250 fMonitorNtuple=
new TTree(
"MonitorNtuple",
"BDT variables");
1251 fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
1252 fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
1253 fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
1262 if(fBoostType==
"Grad"){
1263 InitGradBoost(fEventSample);
1271 if (fExitFromTraining)
break;
1272 fIPyCurrentIter =
itree;
1285 if (fBoostType!=
"Grad"){
1286 Log() << kFATAL <<
"Multiclass is currently only supported by gradient boost. "
1287 <<
"Please change boost option accordingly (BoostType=Grad)." <<
Endl;
1295 fForest.push_back(
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
1296 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1298 fForest.back()->SetNVars(GetNvar());
1299 if (fUseFisherCuts) {
1300 fForest.back()->SetUseFisherCuts();
1301 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1302 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1307 Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
1309 fBoostWeights.push_back(
bw);
1311 fBoostWeights.push_back(0);
1312 Log() << kWARNING <<
"stopped boosting at itree="<<
itree <<
Endl;
1321 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
1324 fForest.push_back(
dt);
1325 fForest.back()->SetNVars(GetNvar());
1326 if (fUseFisherCuts) {
1327 fForest.back()->SetUseFisherCuts();
1328 fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
1329 fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
1334 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") {
1341 fForest.back()->SetPruneMethod(fPruneMethod);
1342 fForest.back()->SetPruneStrength(fPruneStrength);
1346 Double_t bw = this->Boost(*fTrainSample, fForest.back());
1348 fBoostWeights.push_back(
bw);
1350 fBoostWeights.push_back(0);
1351 Log() << kWARNING <<
"stopped boosting at itree="<<
itree <<
Endl;
1360 if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){
1361 fForest.back()->CleanTree();
1368 fInteractive->AddPoint(
itree, fBoostWeight, fErrorFraction);
1371 fMonitorNtuple->Fill();
1372 if (fDoBoostMonitor){
1373 if (! DoRegression() ){
1381 ) BoostMonitor(
itree);
1389 Log() << kDEBUG <<
"\t<Train> elapsed time: " <<
timer.GetElapsedTime()
1392 Log() << kDEBUG <<
"\t<Train> average number of nodes (w/o pruning) : "
1396 Log() << kDEBUG <<
"\t<Train> average number of nodes before/after pruning : "
1406 Log() << kDEBUG <<
"Now I delete the privat data sample"<<
Endl;
1407 for (
UInt_t i=0; i<fEventSample.size(); i++)
delete fEventSample[i];
1408 for (
UInt_t i=0; i<fValidationSample.size(); i++)
delete fValidationSample[i];
1409 fEventSample.clear();
1410 fValidationSample.clear();
1412 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
1428 return 2.0/(1.0+exp(-2.0*
sum))-1;
1436 if (DoMulticlass()) {
1450 std::map<const TMVA::Event *, std::vector<double>> &
residuals = this->fResiduals;
1496 fResiduals[
e].at(
cls) += fForest.back()->CheckEvent(
e,
kFALSE);
1517 std::map<const TMVA::Event *, std::vector<double>> &
residuals = this->fResiduals;
1568 for (
Int_t i = start; i < end; ++i) {
1587 fRegressionLossFunctionBDTG->SetTargets(
eventSample, fLossFunctionEventInfo);
1601 std::unordered_map<TMVA::DecisionTreeNode*, LeafInfo>
leaves;
1607 v.sumWeightTarget +=
target * weight;
1615 const Double_t K = DataInfo().GetNClasses();
1616 iLeave.first->SetResponse(fShrinkage * (K - 1) / K *
iLeave.second.sumWeightTarget /
iLeave.second.sum2);
1621 DoMulticlass() ? UpdateTargets(fEventSample,
cls) : UpdateTargets(fEventSample);
1633 std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > >
leaves;
1636 (
leaves[node]).push_back(fLossFunctionEventInfo[*
e]);
1646 (
iLeave->first)->SetResponse(fShrinkage*fit);
1649 UpdateTargetsRegression(*fTrainSample);
1668 fRegressionLossFunctionBDTG->Init(fLossFunctionEventInfo, fBoostWeights);
1669 UpdateTargetsRegression(*fTrainSample,
kTRUE);
1673 else if(DoMulticlass()){
1680 fResiduals[*
e].push_back(0);
1686 Double_t r = (DataInfo().IsSignal(*
e)?1:0)-0.5;
1688 fResiduals[*
e].push_back(0);
1706 nfalse += fValidationSample[
ievt]->GetWeight();
1723 else if (fBoostType==
"Bagging")
returnVal = this->Bagging ( );
1726 else if (fBoostType==
"Grad"){
1729 else if(DoMulticlass())
1735 Log() << kINFO << GetOptions() <<
Endl;
1736 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
1740 GetBaggedSubSample(fEventSample);
1755 TH1F *
tmpS =
new TH1F(
"tmpS",
"", 100 , -1., 1.00001 );
1756 TH1F *
tmpB =
new TH1F(
"tmpB",
"", 100 , -1., 1.00001 );
1772 const Event*
event = GetTestingEvent(
iev);
1776 tmp->Fill(PrivateGetMvaValue(event),event->GetWeight());
1780 std::vector<TH1F*> hS;
1781 std::vector<TH1F*>
hB;
1785 results->Store(hS.back(),hS.back()->GetTitle());
1791 if (fEventSample[
iev]->GetBoostWeight() > max) max = 1.01*fEventSample[
iev]->GetBoostWeight();
1799 std::vector<TH1F*> *
h;
1811 (*h)[
ivar]->Fill(fEventSample[
iev]->GetValue(
ivar),fEventSample[
iev]->GetWeight());
1849 std::vector<Double_t>
sumw(DataInfo().GetNClasses(),0);
1855 UInt_t iclass=(*e)->GetClass();
1858 if ( DoRegression() ) {
1873 if (DataInfo().IsSignal(*
e))
trueType = 1;
1881 if ( DoRegression() ) {
1883 if (fAdaBoostR2Loss==
"linear"){
1886 else if (fAdaBoostR2Loss==
"quadratic"){
1889 else if (fAdaBoostR2Loss==
"exponential"){
1899 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
1900 <<
" namely " << fAdaBoostR2Loss <<
"\n"
1901 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
1912 if (err >= 0.5 && fUseYesNoLeaf) {
1915 if (
dt->GetNNodes() == 1){
1916 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
1917 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
1919 <<
"please check why this happens, maybe too many events per node requested ?"
1923 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
1924 <<
") That should not happen, please check your code (i.e... the BDT code), I "
1925 <<
" stop boosting here" <<
Endl;
1929 }
else if (err < 0) {
1930 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
1931 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
1932 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
1933 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
1949 if (fUseYesNoLeaf||DoRegression()){
1950 if ((!( (
dt->CheckEvent(*
e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*
e))) || DoRegression()) {
1954 if ( (*e)->GetWeight() > 0 ){
1955 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1959 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
1960 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1968 if (DataInfo().IsSignal(*
e))
trueType = 1;
1972 if ( (*e)->GetWeight() > 0 ){
1973 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1977 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
1978 else (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
1982 newSumw[(*e)->GetClass()] += (*e)->GetWeight();
1995 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight(
globalNormWeight * fSigToBkgFraction );
2001 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2004 fErrorFraction = err;
2032 std::vector<Double_t>
sumw(DataInfo().GetNClasses(),0);
2037 UInt_t iclass=(*e)->GetClass();
2041 if ( DoRegression() ) {
2042 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2057 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
2064 if ( DoRegression() ) {
2065 Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2094 else Log() << kERROR <<
"something went wrong in AdaCost" <<
Endl;
2097 if (DoRegression())Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2098 if ( (*e)->GetWeight() > 0 ){
2099 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
boostfactor);
2101 if (DoRegression())Log() << kFATAL <<
" AdaCost not implemented for regression"<<
Endl;
2103 if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. /
boostfactor);
2119 if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight(
globalNormWeight * fSigToBkgFraction );
2126 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2129 fErrorFraction = err;
2156 if (!fSubSample.empty()) fSubSample.clear();
2159 n =
trandom->PoissonD(fBaggedSampleFraction);
2160 for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*
e);
2194 if ( !DoRegression() ) Log() << kFATAL <<
"Somehow you chose a regression boost method for a classification job" <<
Endl;
2209 if (fAdaBoostR2Loss==
"linear"){
2212 else if (fAdaBoostR2Loss==
"quadratic"){
2215 else if (fAdaBoostR2Loss==
"exponential"){
2225 Log() << kFATAL <<
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "
2226 <<
" namely " << fAdaBoostR2Loss <<
"\n"
2227 <<
"and this is not implemented... a typo in the options ??" <<
Endl;
2234 if (
dt->GetNNodes() == 1){
2235 Log() << kERROR <<
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "
2236 <<
"boost such a thing... if after 1 step the error rate is == 0.5"
2238 <<
"please check why this happens, maybe too many events per node requested ?"
2242 Log() << kERROR <<
" The error rate in the BDT boosting is > 0.5. ("<< err
2243 <<
") That should not happen, but is possible for regression trees, and"
2244 <<
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I "
2245 <<
" stop boosting " <<
Endl;
2249 }
else if (err < 0) {
2250 Log() << kERROR <<
" The error rate in the BDT boosting is < 0. That can happen"
2251 <<
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"
2252 <<
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"
2253 <<
" for the time being I set it to its absolute value.. just to continue.." <<
Endl;
2266 if ( (*e)->GetWeight() > 0 ){
2270 Log() << kINFO <<
"Weight= " << (*e)->GetWeight() <<
Endl;
2271 Log() << kINFO <<
"BoostWeight= " << (*e)->GetBoostWeight() <<
Endl;
2272 Log() << kINFO <<
"boostweight="<<
boostWeight <<
" err= " <<err <<
Endl;
2275 Log() << kINFO <<
"maxDev = " <<
maxDev <<
Endl;
2277 Log() << kINFO <<
"target = " << (*e)->GetTarget(0) <<
Endl;
2278 Log() << kINFO <<
"estimate = " <<
dt->CheckEvent(*
e,
kFALSE) <<
Endl;
2283 (*e)->SetBoostWeight( (*e)->GetBoostWeight() /
boostfactor);
2293 (*e)->SetBoostWeight( (*e)->GetBoostWeight() *
normWeight );
2297 results->GetHist(
"BoostWeightsVsTree")->SetBinContent(fForest.size(),1./
boostWeight);
2298 results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
2301 fErrorFraction = err;
2313 if (fDoPreselection){
2330 for (
UInt_t i=0; i< fForest.size(); i++) {
2331 void*
trxml = fForest[i]->AddXMLTo(
wght);
2342 for (i=0; i<fForest.size(); i++)
delete fForest[i];
2344 fBoostWeights.clear();
2352 fIsLowBkgCut.resize(GetNvar());
2353 fLowBkgCut.resize(GetNvar());
2354 fIsLowSigCut.resize(GetNvar());
2355 fLowSigCut.resize(GetNvar());
2356 fIsHighBkgCut.resize(GetNvar());
2357 fHighBkgCut.resize(GetNvar());
2358 fIsHighSigCut.resize(GetNvar());
2359 fHighSigCut.resize(GetNvar());
2386 if(
gTools().HasAttr(parent,
"TreeType")) {
2397 fForest.back()->SetTreeID(i++);
2411 Int_t analysisType(0);
2414 istr >> dummy >> fNTrees;
2415 Log() << kINFO <<
"Read " << fNTrees <<
" Decision trees" <<
Endl;
2417 for (
UInt_t i=0;i<fForest.size();i++)
delete fForest[i];
2419 fBoostWeights.clear();
2422 for (
int i=0;i<fNTrees;i++) {
2425 fForest.back()->Print( std::cout );
2426 Log() << kFATAL <<
"Error while reading weight file; mismatch iTree="
2427 <<
iTree <<
" i=" << i
2428 <<
" dummy " << dummy
2434 fForest.back()->SetTreeID(i);
2435 fForest.back()->
Read(
istr, GetTrainingTMVAVersionCode());
2443 return this->GetMvaValue( err,
errUpper, 0 );
2454 if (fDoPreselection) {
2478 if (fBoostType==
"Grad")
return GetGradBoostMVA(
ev,
nTrees);
2487 return (
norm > std::numeric_limits<double>::epsilon() ) ?
myMVA /=
norm : 0 ;
2497 if (fMulticlassReturnVal ==
NULL) fMulticlassReturnVal =
new std::vector<Float_t>();
2498 fMulticlassReturnVal->clear();
2501 std::vector<Double_t> temp(
nClasses);
2505 std::vector<TMVA::DecisionTree *>
forest = fForest;
2526 std::transform(temp.begin(), temp.end(), temp.begin(), [](
Double_t d){return exp(d);});
2532 (*fMulticlassReturnVal).push_back(
p_cls);
2535 return *fMulticlassReturnVal;
2544 if (fRegressionReturnVal ==
NULL) fRegressionReturnVal =
new std::vector<Float_t>();
2545 fRegressionReturnVal->clear();
2547 const Event *
ev = GetEvent();
2552 if (fBoostType==
"AdaBoostR2") {
2563 vector< Double_t > response(fForest.size());
2564 vector< Double_t > weight(fForest.size());
2573 std::vector< std::vector<Double_t> >
vtemp;
2574 vtemp.push_back( response );
2575 vtemp.push_back( weight );
2595 else if(fBoostType==
"Grad"){
2600 evT->SetTarget(0,
myMVA+fBoostWeights[0] );
2609 evT->SetTarget(0, (
norm > std::numeric_limits<double>::epsilon() ) ?
myMVA /=
norm : 0 );
2614 const Event*
evT2 = GetTransformationHandler().InverseTransform(
evT );
2615 fRegressionReturnVal->push_back(
evT2->GetTarget(0) );
2620 return *fRegressionReturnVal;
2629 Log() << kDEBUG <<
"\tWrite monitoring histograms to file: " << BaseDir()->GetPath() <<
Endl;
2633 fMonitorNtuple->
Write();
2644 fVariableImportance.resize(GetNvar());
2646 fVariableImportance[
ivar]=0;
2658 sum += fVariableImportance[
ivar];
2662 return fVariableImportance;
2674 else Log() << kFATAL <<
"<GetVariableImportance> ivar = " <<
ivar <<
" is out of range " <<
Endl;
2685 fRanking =
new Ranking( GetName(),
"Variable Importance" );
2704 Log() <<
"Boosted Decision Trees are a collection of individual decision" <<
Endl;
2705 Log() <<
"trees which form a multivariate classifier by (weighted) majority " <<
Endl;
2706 Log() <<
"vote of the individual trees. Consecutive decision trees are " <<
Endl;
2707 Log() <<
"trained using the original training data set with re-weighted " <<
Endl;
2708 Log() <<
"events. By default, the AdaBoost method is employed, which gives " <<
Endl;
2709 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
2710 Log() <<
"weight in the training of the following tree." <<
Endl;
2712 Log() <<
"Decision trees are a sequence of binary splits of the data sample" <<
Endl;
2713 Log() <<
"using a single discriminant variable at a time. A test event " <<
Endl;
2714 Log() <<
"ending up after the sequence of left-right splits in a final " <<
Endl;
2715 Log() <<
"(\"leaf\") node is classified as either signal or background" <<
Endl;
2716 Log() <<
"depending on the majority type of training events in that node." <<
Endl;
2720 Log() <<
"By the nature of the binary splits performed on the individual" <<
Endl;
2721 Log() <<
"variables, decision trees do not deal well with linear correlations" <<
Endl;
2722 Log() <<
"between variables (they need to approximate the linear split in" <<
Endl;
2723 Log() <<
"the two dimensional space by a sequence of splits on the two " <<
Endl;
2724 Log() <<
"variables individually). Hence decorrelation could be useful " <<
Endl;
2725 Log() <<
"to optimise the BDT performance." <<
Endl;
2729 Log() <<
"The two most important parameters in the configuration are the " <<
Endl;
2730 Log() <<
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
2731 Log() <<
" number of training events (option \"MinNodeSize\" replacing the actual number " <<
Endl;
2732 Log() <<
" of events \"nEventsMin\" as given in earlier versions" <<
Endl;
2733 Log() <<
"If this number is too large, detailed features " <<
Endl;
2734 Log() <<
"in the parameter space are hard to be modelled. If it is too small, " <<
Endl;
2735 Log() <<
"the risk to overtrain rises and boosting seems to be less effective" <<
Endl;
2736 Log() <<
" typical values from our current experience for best performance " <<
Endl;
2737 Log() <<
" are between 0.5(%) and 10(%) " <<
Endl;
2739 Log() <<
"The default minimal number is currently set to " <<
Endl;
2740 Log() <<
" max(20, (N_training_events / N_variables^2 / 10)) " <<
Endl;
2741 Log() <<
"and can be changed by the user." <<
Endl;
2743 Log() <<
"The other crucial parameter, the pruning strength (\"PruneStrength\")," <<
Endl;
2744 Log() <<
"is also related to overtraining. It is a regularisation parameter " <<
Endl;
2745 Log() <<
"that is used when determining after the training which splits " <<
Endl;
2746 Log() <<
"are considered statistically insignificant and are removed. The" <<
Endl;
2747 Log() <<
"user is advised to carefully watch the BDT screen output for" <<
Endl;
2748 Log() <<
"the comparison between efficiencies obtained on the training and" <<
Endl;
2749 Log() <<
"the independent test sample. They should be equal within statistical" <<
Endl;
2750 Log() <<
"errors, in order to minimize statistical fluctuations in different samples." <<
Endl;
2762 fout <<
" std::vector<"<<
nodeName<<
"*> fForest; // i.e. root nodes of decision trees" << std::endl;
2763 fout <<
" std::vector<double> fBoostWeights; // the weights applied in the individual boosts" << std::endl;
2764 fout <<
"};" << std::endl << std::endl;
2767 fout <<
"std::vector<double> ReadBDTG::GetMulticlassValues__( const std::vector<double>& inputValues ) const" << std::endl;
2768 fout <<
"{" << std::endl;
2769 fout <<
" uint nClasses = " << DataInfo().GetNClasses() <<
";" << std::endl;
2770 fout <<
" std::vector<double> fMulticlassReturnVal;" << std::endl;
2771 fout <<
" fMulticlassReturnVal.reserve(nClasses);" << std::endl;
2773 fout <<
" std::vector<double> temp(nClasses);" << std::endl;
2774 fout <<
" auto forestSize = fForest.size();" << std::endl;
2775 fout <<
" // trees 0, nClasses, 2*nClasses, ... belong to class 0" << std::endl;
2776 fout <<
" // trees 1, nClasses+1, 2*nClasses+1, ... belong to class 1 and so forth" << std::endl;
2777 fout <<
" uint classOfTree = 0;" << std::endl;
2778 fout <<
" for (uint itree = 0; itree < forestSize; ++itree) {" << std::endl;
2779 fout <<
" BDTGNode *current = fForest[itree];" << std::endl;
2780 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2781 fout <<
" if (current->GoesRight(inputValues)) current=(BDTGNode*)current->GetRight();" << std::endl;
2782 fout <<
" else current=(BDTGNode*)current->GetLeft();" << std::endl;
2783 fout <<
" }" << std::endl;
2784 fout <<
" temp[classOfTree] += current->GetResponse();" << std::endl;
2785 fout <<
" if (++classOfTree == nClasses) classOfTree = 0; // cheap modulo" << std::endl;
2786 fout <<
" }" << std::endl;
2788 fout <<
" // we want to calculate sum of exp(temp[j] - temp[i]) for all i,j (i!=j)" << std::endl;
2789 fout <<
" // first calculate exp(), then replace minus with division." << std::endl;
2790 fout <<
" std::transform(temp.begin(), temp.end(), temp.begin(), [](double d){return exp(d);});" << std::endl;
2792 fout <<
" for(uint iClass=0; iClass<nClasses; iClass++){" << std::endl;
2793 fout <<
" double norm = 0.0;" << std::endl;
2794 fout <<
" for(uint j=0;j<nClasses;j++){" << std::endl;
2795 fout <<
" if(iClass!=j)" << std::endl;
2796 fout <<
" norm += temp[j] / temp[iClass];" << std::endl;
2797 fout <<
" }" << std::endl;
2798 fout <<
" fMulticlassReturnVal.push_back(1.0/(1.0+norm));" << std::endl;
2799 fout <<
" }" << std::endl;
2801 fout <<
" return fMulticlassReturnVal;" << std::endl;
2802 fout <<
"}" << std::endl;
2804 fout <<
"double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
2805 fout <<
"{" << std::endl;
2806 fout <<
" double myMVA = 0;" << std::endl;
2807 if (fDoPreselection){
2809 if (fIsLowBkgCut[
ivar]){
2810 fout <<
" if (inputValues["<<
ivar<<
"] < " << fLowBkgCut[
ivar] <<
") return -1; // is background preselection cut" << std::endl;
2812 if (fIsLowSigCut[
ivar]){
2813 fout <<
" if (inputValues["<<
ivar<<
"] < "<< fLowSigCut[
ivar] <<
") return 1; // is signal preselection cut" << std::endl;
2815 if (fIsHighBkgCut[
ivar]){
2816 fout <<
" if (inputValues["<<
ivar<<
"] > "<<fHighBkgCut[
ivar] <<
") return -1; // is background preselection cut" << std::endl;
2818 if (fIsHighSigCut[
ivar]){
2819 fout <<
" if (inputValues["<<
ivar<<
"] > "<<fHighSigCut[
ivar]<<
") return 1; // is signal preselection cut" << std::endl;
2824 if (fBoostType!=
"Grad"){
2825 fout <<
" double norm = 0;" << std::endl;
2827 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
2828 fout <<
" "<<
nodeName<<
" *current = fForest[itree];" << std::endl;
2829 fout <<
" while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
2830 fout <<
" if (current->GoesRight(inputValues)) current=("<<
nodeName<<
"*)current->GetRight();" << std::endl;
2831 fout <<
" else current=("<<
nodeName<<
"*)current->GetLeft();" << std::endl;
2832 fout <<
" }" << std::endl;
2833 if (fBoostType==
"Grad"){
2834 fout <<
" myMVA += current->GetResponse();" << std::endl;
2836 if (fUseYesNoLeaf)
fout <<
" myMVA += fBoostWeights[itree] * current->GetNodeType();" << std::endl;
2837 else fout <<
" myMVA += fBoostWeights[itree] * current->GetPurity();" << std::endl;
2838 fout <<
" norm += fBoostWeights[itree];" << std::endl;
2840 fout <<
" }" << std::endl;
2841 if (fBoostType==
"Grad"){
2842 fout <<
" return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
2844 else fout <<
" return myMVA /= norm;" << std::endl;
2845 fout <<
"}" << std::endl << std::endl;
2848 fout <<
"void " << className <<
"::Initialize()" << std::endl;
2849 fout <<
"{" << std::endl;
2850 fout <<
" double inf = std::numeric_limits<double>::infinity();" << std::endl;
2851 fout <<
" double nan = std::numeric_limits<double>::quiet_NaN();" << std::endl;
2854 fout <<
" // itree = " <<
itree << std::endl;
2855 fout <<
" fBoostWeights.push_back(" << fBoostWeights[
itree] <<
");" << std::endl;
2856 fout <<
" fForest.push_back( " << std::endl;
2858 fout <<
" );" << std::endl;
2860 fout <<
" return;" << std::endl;
2861 fout <<
"};" << std::endl;
2863 fout <<
"// Clean up" << std::endl;
2864 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
2865 fout <<
"{" << std::endl;
2866 fout <<
" for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
2867 fout <<
" delete fForest[itree]; " << std::endl;
2868 fout <<
" }" << std::endl;
2869 fout <<
"}" << std::endl;
2881 fout <<
"#include <algorithm>" << std::endl;
2882 fout <<
"#include <limits>" << std::endl;
2893 fout <<
"public:" << std::endl;
2895 fout <<
" // constructor of an essentially \"empty\" node floating in space" << std::endl;
2897 if (fUseFisherCuts){
2898 fout <<
" int nFisherCoeff," << std::endl;
2899 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2900 fout <<
" double fisherCoeff"<<i<<
"," << std::endl;
2903 fout <<
" int selector, double cutValue, bool cutType, " << std::endl;
2904 fout <<
" int nodeType, double purity, double response ) :" << std::endl;
2905 fout <<
" fLeft ( left )," << std::endl;
2906 fout <<
" fRight ( right )," << std::endl;
2907 if (fUseFisherCuts)
fout <<
" fNFisherCoeff ( nFisherCoeff )," << std::endl;
2908 fout <<
" fSelector ( selector )," << std::endl;
2909 fout <<
" fCutValue ( cutValue )," << std::endl;
2910 fout <<
" fCutType ( cutType )," << std::endl;
2911 fout <<
" fNodeType ( nodeType )," << std::endl;
2912 fout <<
" fPurity ( purity )," << std::endl;
2913 fout <<
" fResponse ( response ){" << std::endl;
2914 if (fUseFisherCuts){
2915 for (
UInt_t i=0;i<GetNVariables()+1;i++){
2916 fout <<
" fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
2919 fout <<
" }" << std::endl << std::endl;
2920 fout <<
" virtual ~"<<
nodeName<<
"();" << std::endl << std::endl;
2921 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2922 fout <<
" virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
2923 fout <<
" "<<
nodeName<<
"* GetRight( void ) {return fRight; };" << std::endl << std::endl;
2924 fout <<
" // test event if it descends the tree at this node to the left " << std::endl;
2925 fout <<
" virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
2926 fout <<
" "<<
nodeName<<
"* GetLeft( void ) { return fLeft; }; " << std::endl << std::endl;
2927 fout <<
" // return S/(S+B) (purity) at this node (from training)" << std::endl << std::endl;
2928 fout <<
" double GetPurity( void ) const { return fPurity; } " << std::endl;
2929 fout <<
" // return the node type" << std::endl;
2930 fout <<
" int GetNodeType( void ) const { return fNodeType; }" << std::endl;
2931 fout <<
" double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
2932 fout <<
"private:" << std::endl << std::endl;
2933 fout <<
" "<<
nodeName<<
"* fLeft; // pointer to the left daughter node" << std::endl;
2934 fout <<
" "<<
nodeName<<
"* fRight; // pointer to the right daughter node" << std::endl;
2935 if (fUseFisherCuts){
2936 fout <<
" int fNFisherCoeff; // =0 if this node doesn't use fisher, else =nvar+1 " << std::endl;
2937 fout <<
" std::vector<double> fFisherCoeff; // the fisher coeff (offset at the last element)" << std::endl;
2939 fout <<
" int fSelector; // index of variable used in node selection (decision tree) " << std::endl;
2940 fout <<
" double fCutValue; // cut value applied on this node to discriminate bkg against sig" << std::endl;
2941 fout <<
" bool fCutType; // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
2942 fout <<
" int fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
2943 fout <<
" double fPurity; // Purity of node from training"<< std::endl;
2944 fout <<
" double fResponse; // Regression response value of node" << std::endl;
2945 fout <<
"}; " << std::endl;
2947 fout <<
"//_______________________________________________________________________" << std::endl;
2949 fout <<
"{" << std::endl;
2950 fout <<
" if (fLeft != NULL) delete fLeft;" << std::endl;
2951 fout <<
" if (fRight != NULL) delete fRight;" << std::endl;
2952 fout <<
"}; " << std::endl;
2954 fout <<
"//_______________________________________________________________________" << std::endl;
2955 fout <<
"bool "<<
nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
2956 fout <<
"{" << std::endl;
2957 fout <<
" // test event if it descends the tree at this node to the right" << std::endl;
2958 fout <<
" bool result;" << std::endl;
2959 if (fUseFisherCuts){
2960 fout <<
" if (fNFisherCoeff == 0){" << std::endl;
2961 fout <<
" result = (inputValues[fSelector] >= fCutValue );" << std::endl;
2962 fout <<
" }else{" << std::endl;
2963 fout <<
" double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
2964 fout <<
" for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
2965 fout <<
" fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
2966 fout <<
" result = fisher > fCutValue;" << std::endl;
2967 fout <<
" }" << std::endl;
2969 fout <<
" result = (inputValues[fSelector] >= fCutValue );" << std::endl;
2971 fout <<
" if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
2972 fout <<
" else return !result;" << std::endl;
2973 fout <<
"}" << std::endl;
2975 fout <<
"//_______________________________________________________________________" << std::endl;
2976 fout <<
"bool "<<
nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
2977 fout <<
"{" << std::endl;
2978 fout <<
" // test event if it descends the tree at this node to the left" << std::endl;
2979 fout <<
" if (!this->GoesRight(inputValues)) return true;" << std::endl;
2980 fout <<
" else return false;" << std::endl;
2981 fout <<
"}" << std::endl;
2983 fout <<
"#endif" << std::endl;
2993 Log() << kFATAL <<
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
2996 fout <<
"NN("<<std::endl;
2997 if (
n->GetLeft() !=
NULL){
3003 fout <<
", " <<std::endl;
3004 if (
n->GetRight() !=
NULL){
3010 fout <<
", " << std::endl
3011 << std::setprecision(6);
3012 if (fUseFisherCuts){
3013 fout <<
n->GetNFisherCoeff() <<
", ";
3014 for (
UInt_t i=0; i< GetNVariables()+1; i++) {
3015 if (
n->GetNFisherCoeff() == 0 ){
3018 fout <<
n->GetFisherCoeff(i) <<
", ";
3022 fout <<
n->GetSelector() <<
", "
3023 <<
n->GetCutValue() <<
", "
3024 <<
n->GetCutType() <<
", "
3025 <<
n->GetNodeType() <<
", "
3026 <<
n->GetPurity() <<
","
3027 <<
n->GetResponse() <<
") ";
3041 fIsLowSigCut.assign(GetNvar(),
kFALSE);
3042 fIsLowBkgCut.assign(GetNvar(),
kFALSE);
3043 fIsHighSigCut.assign(GetNvar(),
kFALSE);
3044 fIsHighBkgCut.assign(GetNvar(),
kFALSE);
3046 fLowSigCut.assign(GetNvar(),0.);
3047 fLowBkgCut.assign(GetNvar(),0.);
3048 fHighSigCut.assign(GetNvar(),0.);
3049 fHighBkgCut.assign(GetNvar(),0.);
3055 if (DataInfo().IsSignal(*it)){
3056 nTotS += (*it)->GetWeight();
3059 nTotB += (*it)->GetWeight();
3070 for( ; it !=
it_end; ++it ) {
3071 if (DataInfo().IsSignal(**it))
3082 Double_t dVal = (DataInfo().GetVariableInfo(
ivar).GetMax() - DataInfo().GetVariableInfo(
ivar).GetMin())/100. ;
3107 Log() << kDEBUG <<
" \tfound and suggest the following possible pre-selection cuts " <<
Endl;
3108 if (fDoPreselection) Log() << kDEBUG <<
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" <<
Endl;
3109 else Log() << kDEBUG <<
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
3111 if (fIsLowBkgCut[
ivar]){
3112 Log() << kDEBUG <<
" \tfound cut: Bkg if var " <<
ivar <<
" < " << fLowBkgCut[
ivar] <<
Endl;
3114 if (fIsLowSigCut[
ivar]){
3115 Log() << kDEBUG <<
" \tfound cut: Sig if var " <<
ivar <<
" < " << fLowSigCut[
ivar] <<
Endl;
3117 if (fIsHighBkgCut[
ivar]){
3118 Log() << kDEBUG <<
" \tfound cut: Bkg if var " <<
ivar <<
" > " << fHighBkgCut[
ivar] <<
Endl;
3120 if (fIsHighSigCut[
ivar]){
3121 Log() << kDEBUG <<
" \tfound cut: Sig if var " <<
ivar <<
" > " << fHighSigCut[
ivar] <<
Endl;
3137 if (fIsLowBkgCut[
ivar]){
3140 if (fIsLowSigCut[
ivar]){
3143 if (fIsHighBkgCut[
ivar]){
3146 if (fIsHighSigCut[
ivar]){
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void Print(GNN_Data &d, std::string txt="")
const_iterator begin() const
const_iterator end() const
A TGraph is an object made of two arrays X and Y with npoints each.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
1-D histogram with a float per channel (see TH1 documentation)
1-D histogram with an int per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
2-D histogram with a float per channel (see TH1 documentation)
Service class for 2-D histogram classes.
Absolute Deviation BDT Loss Function.
static void SetVarIndex(Int_t iVar)
static Config & Instance()
static function: returns TMVA instance
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
The TMVA::Interval Class.
Least Squares BDT Loss Function.
The TMVA::Interval Class.
Analysis of Boosted Decision Trees.
void Init(void)
Common initialisation with defaults for the BDT-Method.
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
The standard constructor for the "boosted decision trees".
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=nullptr, Double_t *errUpper=nullptr, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
void GetHelpMessage() const
Get help message text.
LossFunctionBDT * fRegressionLossFunctionBDTG
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void DeclareOptions()
Define the options (their key words).
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
virtual ~MethodBDT(void)
Destructor.
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
void PreProcessNegativeEventWeights()
O.k.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
TTree * fMonitorNtuple
monitoring ntuple
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
void Train(void)
BDT training.
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
SeparationBase * fSepType
the separation used in node splitting
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.).
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Implementation of the MisClassificationError as separation criterion.
std::map< TString, Double_t > optimize()
PDF wrapper for histograms; uses user-defined spline interpolation.
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
Implementation of the SdivSqrtSplusB as separation criterion.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
Random number generator class based on M.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
TSeq< unsigned int > TSeqU
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Int_t FloorNint(Double_t x)
Returns the nearest integer of TMath::Floor(x).
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Int_t CeilNint(Double_t x)
Returns the nearest integer of TMath::Ceil(x).
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
static uint64_t sum(uint64_t i)