149#include <unordered_map> 
  170   , fSigToBkgFraction(0)
 
  175   , fBaggedGradBoost(
kFALSE)
 
  179   , fMinNodeSizeS(
"5%")
 
  182   , fMinLinCorrForFisher(.8) 
 
  183   , fUseExclusiveVars(0)     
 
  185   , fNodePurityLimit(0)
 
  190   , fFValidationEvents(0)
 
  192   , fRandomisedTrees(
kFALSE)
 
  194   , fUsePoissonNvars(0)  
 
  195   , fUseNTrainEvents(0)
 
  196   , fBaggedSampleFraction(0)
 
  197   , fNoNegWeightsInTraining(
kFALSE)
 
  198   , fInverseBoostNegWeights(
kFALSE)
 
  199   , fPairNegWeightsGlobal(
kFALSE)
 
  200   , fTrainWithNegWeights(
kFALSE)
 
  210   , fSkipNormalization(
kFALSE)
 
 
  225   , fSigToBkgFraction(0)
 
  230   , fBaggedGradBoost(
kFALSE)
 
  234   , fMinNodeSizeS(
"5%")
 
  237   , fMinLinCorrForFisher(.8) 
 
  238   , fUseExclusiveVars(0)     
 
  240   , fNodePurityLimit(0)
 
  245   , fFValidationEvents(0)
 
  247   , fRandomisedTrees(
kFALSE)
 
  249   , fUsePoissonNvars(0)  
 
  250   , fUseNTrainEvents(0)
 
  251   , fBaggedSampleFraction(0)
 
  252   , fNoNegWeightsInTraining(
kFALSE)
 
  253   , fInverseBoostNegWeights(
kFALSE)
 
  254   , fPairNegWeightsGlobal(
kFALSE)
 
  255   , fTrainWithNegWeights(
kFALSE)
 
  265   , fSkipNormalization(
kFALSE)
 
 
  335   DeclareOptionRef(fNTrees, 
"NTrees", 
"Number of trees in the forest");
 
  336   if (DoRegression()) {
 
  337      DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  339      DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  342   TString tmp=
"5%"; 
if (DoRegression()) tmp=
"0.2%";
 
  343   DeclareOptionRef(fMinNodeSizeS=tmp, 
"MinNodeSize", 
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
 
  345   DeclareOptionRef(fNCuts, 
"nCuts", 
"Number of grid points in variable range used in finding optimal cut in node splitting");
 
  347   DeclareOptionRef(fBoostType, 
"BoostType", 
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
 
  349   AddPreDefVal(
TString(
"AdaBoost"));
 
  350   AddPreDefVal(
TString(
"RealAdaBoost"));
 
  351   AddPreDefVal(
TString(
"AdaCost"));
 
  352   AddPreDefVal(
TString(
"Bagging"));
 
  354   AddPreDefVal(
TString(
"AdaBoostR2"));
 
  356   if (DoRegression()) {
 
  357      fBoostType = 
"AdaBoostR2";
 
  359      fBoostType = 
"AdaBoost";
 
  361   DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic", 
"AdaBoostR2Loss", 
"Type of Loss function in AdaBoostR2");
 
  362   AddPreDefVal(
TString(
"Linear"));
 
  363   AddPreDefVal(
TString(
"Quadratic"));
 
  364   AddPreDefVal(
TString(
"Exponential"));
 
  366   DeclareOptionRef(fBaggedBoost=
kFALSE, 
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
 
  367   DeclareOptionRef(fShrinkage = 1.0, 
"Shrinkage", 
"Learning rate for BoostType=Grad algorithm");
 
  368   DeclareOptionRef(fAdaBoostBeta=.5, 
"AdaBoostBeta", 
"Learning rate  for AdaBoost algorithm");
 
  369   DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
 
  370   DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
 
  371   DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars", 
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
 
  372   DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
 
  374   DeclareOptionRef(fUseYesNoLeaf=
kTRUE, 
"UseYesNoLeaf",
 
  375                    "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
 
  376   if (DoRegression()) {
 
  380   DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining;  Boost With inverse boostweight; Pair events with negative and positive weights in training sample and *annihilate* them (experimental!)");
 
  381   AddPreDefVal(
TString(
"InverseBoostNegWeights"));
 
  382   AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
 
  383   AddPreDefVal(
TString(
"NoNegWeightsInTraining"));    
 
  384   AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
 
  389   DeclareOptionRef(fCss=1.,   
"Css",   
"AdaCost: cost of true signal selected signal");
 
  390   DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
 
  391   DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg    selected signal");
 
  392   DeclareOptionRef(fCbb=1.,   
"Cbb",   
"AdaCost: cost of true bkg    selected bkg ");
 
  394   DeclareOptionRef(fNodePurityLimit=0.5, 
"NodePurityLimit", 
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
 
  397   DeclareOptionRef(fSepTypeS, 
"SeparationType", 
"Separation criterion for node splitting");
 
  398   AddPreDefVal(
TString(
"CrossEntropy"));
 
  399   AddPreDefVal(
TString(
"GiniIndex"));
 
  400   AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
 
  401   AddPreDefVal(
TString(
"MisClassificationError"));
 
  402   AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
 
  403   AddPreDefVal(
TString(
"RegressionVariance"));
 
  404   if (DoRegression()) {
 
  405      fSepTypeS = 
"RegressionVariance";
 
  407      fSepTypeS = 
"GiniIndex";
 
  410   DeclareOptionRef(fRegressionLossFunctionBDTGS = 
"Huber", 
"RegressionLossFunctionBDTG", 
"Loss function for BDTG regression.");
 
  411   AddPreDefVal(
TString(
"Huber"));
 
  412   AddPreDefVal(
TString(
"AbsoluteDeviation"));
 
  413   AddPreDefVal(
TString(
"LeastSquares"));
 
  415   DeclareOptionRef(fHuberQuantile = 0.7, 
"HuberQuantile", 
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
 
  417   DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
 
  419   DeclareOptionRef(fUseFisherCuts=
kFALSE, 
"UseFisherCuts", 
"Use multivariate splits using the Fisher criterion");
 
  420   DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher", 
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
 
  421   DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
 
  424   DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
 
  427   DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
 
  429   DeclareOptionRef(fPruneMethodS, 
"PruneMethod", 
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning:  Pruning: Method used for pruning (removal) of statistically insignificant branches ");
 
  430   AddPreDefVal(
TString(
"NoPruning"));
 
  431   AddPreDefVal(
TString(
"ExpectedError"));
 
  432   AddPreDefVal(
TString(
"CostComplexity"));
 
  434   DeclareOptionRef(fPruneStrength, 
"PruneStrength", 
"Pruning strength");
 
  436   DeclareOptionRef(fFValidationEvents=0.5, 
"PruningValFraction", 
"Fraction of events to use for optimizing automatic pruning.");
 
  438   DeclareOptionRef(fSkipNormalization=
kFALSE, 
"SkipNormalization", 
"Skip normalization at initialization, to keep expectation value of BDT output according to the fraction of events");
 
  441   DeclareOptionRef(fMinNodeEvents=0, 
"nEventsMin", 
"deprecated: Use MinNodeSize (in % of training events) instead");
 
  443   DeclareOptionRef(fBaggedGradBoost=
kFALSE, 
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead:  Use only a random subsample of all events for growing the trees in each iteration.");
 
  444   DeclareOptionRef(fBaggedSampleFraction, 
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
 
  445   DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
 
  446   DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
 
 
  458   DeclareOptionRef(fHistoricBool=
kTRUE, 
"UseWeightedTrees",
 
  459                    "Use weighted trees or simple average in classification from the forest");
 
  460   DeclareOptionRef(fHistoricBool=
kFALSE, 
"PruneBeforeBoost", 
"Flag to prune the tree before applying boosting algorithm");
 
  461   DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
 
  463   AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
 
 
  474   else if (fSepTypeS == 
"giniindex")              fSepType = 
new GiniIndex();
 
  476   else if (fSepTypeS == 
"crossentropy")           fSepType = 
new CrossEntropy();
 
  477   else if (fSepTypeS == 
"sdivsqrtsplusb")         fSepType = 
new SdivSqrtSplusB();
 
  478   else if (fSepTypeS == 
"regressionvariance")     fSepType = 
NULL;
 
  480      Log() << kINFO << GetOptions() << 
Endl;
 
  481      Log() << kFATAL << 
"<ProcessOptions> unknown Separation Index option " << fSepTypeS << 
" called" << 
Endl;
 
  484   if(!(fHuberQuantile >= 0.0 && fHuberQuantile <= 1.0)){
 
  485      Log() << kINFO << GetOptions() << 
Endl;
 
  486      Log() << kFATAL << 
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << fHuberQuantile << 
", does not match this criteria" << 
Endl;
 
  490   fRegressionLossFunctionBDTGS.ToLower();
 
  491   if      (fRegressionLossFunctionBDTGS == 
"huber")                  fRegressionLossFunctionBDTG = 
new HuberLossFunctionBDT(fHuberQuantile);
 
  495      Log() << kINFO << GetOptions() << 
Endl;
 
  496      Log() << kFATAL << 
"<ProcessOptions> unknown Regression Loss Function BDT option " << fRegressionLossFunctionBDTGS << 
" called" << 
Endl;
 
  499   fPruneMethodS.ToLower();
 
  504      Log() << kINFO << GetOptions() << 
Endl;
 
  505      Log() << kFATAL << 
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS << 
" option called" << 
Endl;
 
  511            <<  
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << 
Endl;
 
  515   if (fMinNodeEvents > 0){
 
  516      fMinNodeSize = 
Double_t(fMinNodeEvents*100.) / Data()->GetNTrainingEvents();
 
  517      Log() << kWARNING << 
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min absolute number \n" 
  518            << 
"of events in a leaf node. This is DEPRECATED, please use the option \n" 
  519            << 
"*MinNodeSize* giving the relative number as percentage of training \n" 
  520            << 
"events instead. \n" 
  521            << 
"nEventsMin="<<fMinNodeEvents<< 
"--> MinNodeSize="<<fMinNodeSize<<
"%" 
  523      Log() << kWARNING << 
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recommended \n" 
  524            << 
" *MinNodeSize* = " << fMinNodeSizeS << 
" option !!" << 
Endl ;
 
  528      SetMinNodeSize(fMinNodeSizeS);
 
  532   fAdaBoostR2Loss.ToLower();
 
  534   if (fBoostType==
"Grad") {
 
  536      if (fNegWeightTreatment==
"InverseBoostNegWeights"){
 
  537         Log() << kINFO << 
"the option NegWeightTreatment=InverseBoostNegWeights does" 
  538               << 
" not exist for BoostType=Grad" << 
Endl;
 
  539         Log() << kINFO << 
"--> change to new default NegWeightTreatment=Pray" << 
Endl;
 
  540         Log() << kDEBUG << 
"i.e. simply keep them as if which should work fine for Grad Boost" << 
Endl;
 
  541         fNegWeightTreatment=
"Pray";
 
  542         fNoNegWeightsInTraining=
kFALSE;
 
  544   } 
else if (fBoostType==
"RealAdaBoost"){
 
  545      fBoostType    = 
"AdaBoost";
 
  547   } 
else if (fBoostType==
"AdaCost"){
 
  551   if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
 
  552   if (fAutomatic && fFValidationEvents > 0.5) {
 
  553      Log() << kWARNING << 
"You have chosen to use more than half of your training sample " 
  554            << 
"to optimize the automatic pruning algorithm. This is probably wasteful " 
  555            << 
"and your overall results will be degraded. Are you sure you want this?" 
  560   if (this->Data()->HasNegativeEventWeights()){
 
  561      Log() << kINFO << 
" You are using a Monte Carlo that has also negative weights. " 
  562            << 
"That should in principle be fine as long as on average you end up with " 
  563            << 
"something positive. For this you have to make sure that the minimal number " 
  564            << 
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize=" 
  565            << fMinNodeSizeS << 
"  ("<< fMinNodeSize << 
"%)" 
  566            <<
", (or the deprecated equivalent nEventsMin) you can set this via the " 
  567            <<
"BDT option string when booking the " 
  568            << 
"classifier) is large enough to allow for reasonable averaging!!! " 
  569            << 
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  " 
  570            << 
"which ignores events with negative weight in the training. " << 
Endl 
  571            << 
Endl << 
"Note: You'll get a WARNING message during the training if that should ever happen" << 
Endl;
 
  574   if (DoRegression()) {
 
  575      if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
 
  576         Log() << kWARNING << 
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" << 
Endl;
 
  580      if (fSepType != 
NULL){
 
  581         Log() << kWARNING << 
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" << 
Endl;
 
  585         Log() << kWARNING << 
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" << 
Endl;
 
  589         Log() << kWARNING << 
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " << 
Endl;
 
  590         Log() << kWARNING << 
"is not implemented for regression analysis ! " << 
Endl;
 
  591         Log() << kWARNING << 
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
 
  595   if (fRandomisedTrees){
 
  596      Log() << kINFO << 
" Randomised trees use no pruning" << 
Endl;
 
  601   if (fUseFisherCuts) {
 
  602      Log() << kWARNING << 
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" << 
Endl;
 
  603      Log() << 
" a more elaborate node splitting algorithm) is not implemented. " << 
Endl;
 
  610      Log() << kERROR << 
" Zero Decision Trees demanded... that does not work !! " 
  611            << 
" I set it to 1 .. just so that the program does not crash" 
  616   fNegWeightTreatment.ToLower();
 
  617   if      (fNegWeightTreatment == 
"ignorenegweightsintraining")   fNoNegWeightsInTraining = 
kTRUE;
 
  618   else if (fNegWeightTreatment == 
"nonegweightsintraining")   fNoNegWeightsInTraining = 
kTRUE;
 
  619   else if (fNegWeightTreatment == 
"inverseboostnegweights") fInverseBoostNegWeights = 
kTRUE;
 
  620   else if (fNegWeightTreatment == 
"pairnegweightsglobal")   fPairNegWeightsGlobal   = 
kTRUE;
 
  621   else if (fNegWeightTreatment == 
"pray")   Log() << kDEBUG << 
"Yes, good luck with praying " << 
Endl;
 
  623      Log() << kINFO << GetOptions() << 
Endl;
 
  624      Log() << kFATAL << 
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment << 
" requested" << 
Endl;
 
  627   if (fNegWeightTreatment == 
"pairnegweightsglobal")
 
  628      Log() << kWARNING << 
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " << 
Endl;
 
  635      while (tmp < fNNodesMax){
 
  639      Log() << kWARNING << 
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
 
  640            << 
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
 
  644   if (fUseNTrainEvents>0){
 
  645      fBaggedSampleFraction  = (
Double_t) fUseNTrainEvents/Data()->GetNTrainingEvents();
 
  646      Log() << kWARNING << 
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
 
  647            << 
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
 
  650   if (fBoostType==
"Bagging") fBaggedBoost = 
kTRUE;
 
  651   if (fBaggedGradBoost){
 
  652      fBaggedBoost = 
kTRUE;
 
  653      Log() << kWARNING << 
"You have specified a deprecated option *UseBaggedGrad* --> please use  *UseBaggedBoost* instead" << 
Endl;
 
 
  665      Log() << kFATAL << 
"you have demanded a minimal node size of " 
  667            << 
" that somehow does not make sense "<<
Endl;
 
 
  679      Log() << kFATAL << 
"I had problems reading the option MinNodeEvents, which " 
 
  692      fBoostType      = 
"AdaBoost";
 
  693      if(DataInfo().GetNClasses()!=0) 
 
  697      fBoostType      = 
"AdaBoostR2";
 
  698      fAdaBoostR2Loss = 
"Quadratic";
 
  699      if(DataInfo().GetNClasses()!=0) 
 
  705   fPruneMethodS   = 
"NoPruning";
 
  709   fFValidationEvents = 0.5;
 
  710   fRandomisedTrees = 
kFALSE;
 
  713   fUsePoissonNvars = 
kTRUE;
 
  718   SetSignalReferenceCut( 0 );
 
 
  731   for (
UInt_t i=0; i<fForest.size();           i++) 
delete fForest[i];
 
  734   fBoostWeights.clear();
 
  735   if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=
NULL; }
 
  736   fVariableImportance.clear();
 
  738   fLossFunctionEventInfo.clear();
 
  742   if (Data()) Data()->DeleteResults(GetMethodName(), 
Types::kTraining, GetAnalysisType());
 
  743   Log() << kDEBUG << 
" successfully(?) reset the method " << 
Endl;
 
 
  755   for (
UInt_t i=0; i<fForest.size();           i++) 
delete fForest[i];
 
 
  763   if (!HasTrainingTree()) Log() << kFATAL << 
"<Init> Data().TrainingTree() is zero pointer" << 
Endl;
 
  765   if (fEventSample.size() > 0) { 
 
  767      for (
UInt_t iev=0; 
iev<fEventSample.size(); 
iev++) fEventSample[
iev]->SetBoostWeight(1.);
 
  780      else fDoPreselection = 
kFALSE; 
 
  791         if (fDoPreselection){
 
  792            if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
 
  798         if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
 
  800               Log() << kWARNING << 
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" << 
Endl;
 
  804         }
else if (event->GetWeight()==0){
 
  807               Log() << 
"Events with weight == 0 are going to be simply ignored " << 
Endl;
 
  811            if (event->GetWeight() < 0) {
 
  812               fTrainWithNegWeights=
kTRUE;
 
  815                  if (fPairNegWeightsGlobal){
 
  816                     Log() << kWARNING << 
"Events with negative event weights are found and " 
  817                           << 
" will be removed prior to the actual BDT training by global " 
  818                           << 
" paring (and subsequent annihilation) with positiv weight events" 
  821                     Log() << kWARNING << 
"Events with negative event weights are USED during " 
  822                           << 
"the BDT training. This might cause problems with small node sizes " 
  823                           << 
"or with the boosting. Please remove negative events from training " 
  824                           << 
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you " 
  825                           << 
"observe problems with the boosting" 
  834               if (
ievt % 
imodulo == 0) fValidationSample.push_back( event );
 
  835               else                     fEventSample.push_back( event );
 
  838               fEventSample.push_back(event);
 
  844         Log() << kINFO << 
"<InitEventSample> Internally I use " << fEventSample.size()
 
  845               << 
" for Training  and " << fValidationSample.size()
 
  846               << 
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
 
  847               << 
"% of training used for validation)" << 
Endl;
 
  851      if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
 
  854   if (DoRegression()) {
 
  856   } 
else if (DoMulticlass()) {
 
  858   } 
else if (!fSkipNormalization) {
 
  860      Log() << kDEBUG << 
"\t<InitEventSample> For classification trees, "<< 
Endl;
 
  861      Log() << kDEBUG << 
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
 
  862      Log() << kDEBUG << 
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
 
  880         if ((DataInfo().IsSignal(fEventSample[
ievt])) ) {
 
  891         Log() << kDEBUG << 
"\tre-normalise events such that Sig and Bkg have respective sum of weights = " 
  892               << fSigToBkgFraction << 
Endl;
 
  893         Log() << kDEBUG << 
"  \tsig->sig*"<<
normSig << 
"ev. bkg->bkg*"<<
normBkg << 
"ev." <<
Endl;
 
  895         Log() << kINFO << 
"#events: (unweighted) sig: "<< 
sumSig << 
" bkg: " << 
sumBkg << 
Endl;
 
  897            if ((DataInfo().IsSignal(fEventSample[
ievt])) ) fEventSample[
ievt]->SetBoostWeight(
normSig);
 
  901         Log() << kINFO << 
"--> could not determine scaling factors as either there are " << 
Endl;
 
  902         Log() << kINFO << 
" no signal events (sumSigW="<<
sumSigW<<
") or no bkg ev. (sumBkgW="<<
sumBkgW<<
")"<<
Endl;
 
  907   fTrainSample = &fEventSample;
 
  909      GetBaggedSubSample(fEventSample);
 
  910      fTrainSample = &fSubSample;
 
 
  938      if (fEventSample[
iev]->GetWeight() < 0) {
 
  947      Log() << kINFO << 
"no negative event weights found .. no preprocessing necessary" << 
Endl;
 
  950      Log() << kINFO << 
"found a total of " << 
totalNegWeights << 
" of negative event weights which I am going to try to pair with positive events to annihilate them" << 
Endl;
 
  951      Log() << kINFO << 
"found a total of " << 
totalPosWeights << 
" of events with positive weights" << 
Endl;
 
  959   for (
Int_t i=0; i<2; i++){
 
  962         std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is almost singular with determinant=" 
  964                   << 
" did you use the variables that are linear combinations or highly correlated?" 
  968         std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant=" 
  970                   << 
" did you use the variables that are linear combinations?" 
  979   Log() << kINFO << 
"Found a total of " << 
totalNegWeights << 
" in negative weights out of " << fEventSample.size() << 
" training events "  << 
Endl;
 
  992            if (
iClassID==fEventSample[
iev]->GetClass() && fEventSample[
iev]->GetWeight() > 0){
 
 1010               fEventSample[
iMin]->SetBoostWeight( 
newWeight/fEventSample[
iMin]->GetOriginalWeight() );  
 
 1013               fEventSample[
iMin]->SetBoostWeight( 0 );
 
 1016         } 
else Log() << kFATAL << 
"preprocessing didn't find event to pair with the negative weight ... probably a bug" << 
Endl;
 
 1020   Log() << kINFO << 
"<Negative Event Pairing> took: " << 
timer.GetElapsedTime()
 
 1035      if (fEventSample[
iev]->GetWeight() < 0) {
 
 1042      if (fEventSample[
iev]->GetWeight() > 0) {
 
 1044         if (fEventSample[
iev]->GetClass() == fSignalClass){
 
 1055   for (
UInt_t i=0; i<fEventSample.size();      i++) 
delete fEventSample[i];
 
 1058   Log() << kINFO  << 
" after PreProcessing, the Event sample is left with " << fEventSample.size() << 
" events (unweighted), all with positive weights, adding up to " << 
totalWeights << 
Endl;
 
 
 1088   if        (fBoostType==
"AdaBoost"){
 
 1091   }
else if (fBoostType==
"Grad"){
 
 1094   }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
 
 1101   Log()<<kINFO << 
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
 
 1102   std::map<TString,TMVA::Interval*>::iterator it;
 
 1104      Log() << kWARNING << it->first << 
Endl;
 
 1105      std::ostringstream 
oss;
 
 
 1123   std::map<TString,Double_t>::iterator it;
 
 1125      Log() << kWARNING << it->first << 
" = " << it->second << 
Endl;
 
 1126      if (it->first ==  
"MaxDepth"       ) SetMaxDepth        ((
Int_t)it->second);
 
 1127      else if (it->first ==  
"MinNodeSize"    ) SetMinNodeSize     (it->second);
 
 1128      else if (it->first ==  
"NTrees"         ) SetNTrees          ((
Int_t)it->second);
 
 1129      else if (it->first ==  
"NodePurityLimit") SetNodePurityLimit (it->second);
 
 1130      else if (it->first ==  
"AdaBoostBeta"   ) SetAdaBoostBeta    (it->second);
 
 1131      else if (it->first ==  
"Shrinkage"      ) SetShrinkage       (it->second);
 
 1132      else if (it->first ==  
"UseNvars"       ) SetUseNvars        ((
Int_t)it->second);
 
 1133      else if (it->first ==  
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
 
 1134      else Log() << kFATAL << 
" SetParameter for " << it->first << 
" not yet implemented " <<
Endl;
 
 
 1152      Log() << kERROR << 
" Zero Decision Trees demanded... that does not work !! " 
 1153            << 
" I set it to 1 .. just so that the program does not crash" 
 1158   if (fInteractive && fInteractive->NotInitialized()){
 
 1159     std::vector<TString> 
titles = {
"Boost weight", 
"Error Fraction"};
 
 1160     fInteractive->Init(
titles);
 
 1162   fIPyMaxIter = fNTrees;
 
 1163   fExitFromTraining = 
false;
 
 1167   if (IsNormalised()) Log() << kFATAL << 
"\"Normalise\" option cannot be used with BDT; " 
 1168                             << 
"please remove the option from the configuration string, or " 
 1169                             << 
"use \"!Normalise\"" 
 1173      Log() << kINFO << 
"Regression Loss Function: "<< fRegressionLossFunctionBDTG->Name() << 
Endl;
 
 1175   Log() << kINFO << 
"Training "<< fNTrees << 
" Decision Trees ... patience please" << 
Endl;
 
 1177   Log() << kDEBUG << 
"Training with maximal depth = " <<fMaxDepth
 
 1178         << 
", MinNodeEvents=" << fMinNodeEvents
 
 1179         << 
", NTrees="<<fNTrees
 
 1180         << 
", NodePurityLimit="<<fNodePurityLimit
 
 1181         << 
", AdaBoostBeta="<<fAdaBoostBeta
 
 1193   if (DoRegression()) {
 
 1197      hname=
"Boost event weights distribution";
 
 1207   if(!DoMulticlass()){
 
 1210      h->SetXTitle(
"boost weight");
 
 1215      if (fDoBoostMonitor){
 
 1216         TH2* 
boostMonitor = 
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
 
 1227      h = 
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
 
 1228      h->SetXTitle(
"#tree");
 
 1229      h->SetYTitle(
"boost weight");
 
 1230      results->Store(
h, 
"BoostWeightsVsTree");
 
 1233      h = 
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
 
 1234      h->SetXTitle(
"#tree");
 
 1235      h->SetYTitle(
"error fraction");
 
 1250   fMonitorNtuple= 
new TTree(
"MonitorNtuple",
"BDT variables");
 
 1251   fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
 
 1252   fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
 
 1253   fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
 
 1262   if(fBoostType==
"Grad"){
 
 1263      InitGradBoost(fEventSample);
 
 1271     if (fExitFromTraining) 
break;
 
 1272     fIPyCurrentIter = 
itree;
 
 1285         if (fBoostType!=
"Grad"){
 
 1286            Log() << kFATAL << 
"Multiclass is currently only supported by gradient boost. " 
 1287                  << 
"Please change boost option accordingly (BoostType=Grad)." << 
Endl;
 
 1295            fForest.push_back( 
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
 
 1296                                                 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
 
 1298            fForest.back()->SetNVars(GetNvar());
 
 1299            if (fUseFisherCuts) {
 
 1300               fForest.back()->SetUseFisherCuts();
 
 1301               fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
 
 1302               fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
 
 1307            Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
 
 1309               fBoostWeights.push_back(
bw);
 
 1311               fBoostWeights.push_back(0);
 
 1312               Log() << kWARNING << 
"stopped boosting at itree="<<
itree << 
Endl;
 
 1321                                              fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
 
 1324         fForest.push_back(
dt);
 
 1325         fForest.back()->SetNVars(GetNvar());
 
 1326         if (fUseFisherCuts) {
 
 1327            fForest.back()->SetUseFisherCuts();
 
 1328            fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
 
 1329            fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
 
 1334         if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") { 
 
 1341         fForest.back()->SetPruneMethod(fPruneMethod); 
 
 1342         fForest.back()->SetPruneStrength(fPruneStrength); 
 
 1346         Double_t bw = this->Boost(*fTrainSample, fForest.back());
 
 1348            fBoostWeights.push_back(
bw);
 
 1350            fBoostWeights.push_back(0);
 
 1351            Log() << kWARNING << 
"stopped boosting at itree="<<
itree << 
Endl;
 
 1360         if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){ 
 
 1361            fForest.back()->CleanTree();
 
 1368           fInteractive->AddPoint(
itree, fBoostWeight, fErrorFraction);
 
 1371         fMonitorNtuple->Fill();
 
 1372         if (fDoBoostMonitor){
 
 1373            if (! DoRegression() ){
 
 1381                     ) BoostMonitor(
itree);
 
 1389   Log() << kDEBUG << 
"\t<Train> elapsed time: " << 
timer.GetElapsedTime()
 
 1392      Log() << kDEBUG << 
"\t<Train> average number of nodes (w/o pruning) : " 
 1396      Log() << kDEBUG << 
"\t<Train> average number of nodes before/after pruning : " 
 1406   Log() << kDEBUG << 
"Now I delete the privat data sample"<< 
Endl;
 
 1407   for (
UInt_t i=0; i<fEventSample.size();      i++) 
delete fEventSample[i];
 
 1408   for (
UInt_t i=0; i<fValidationSample.size(); i++) 
delete fValidationSample[i];
 
 1409   fEventSample.clear();
 
 1410   fValidationSample.clear();
 
 1412   if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
 
 
 1428   return 2.0/(1.0+exp(-2.0*
sum))-1; 
 
 
 1436   if (DoMulticlass()) {
 
 1450      std::map<const TMVA::Event *, std::vector<double>> & 
residuals = this->fResiduals;
 
 1496         fResiduals[
e].at(
cls) += fForest.back()->CheckEvent(
e, 
kFALSE);
 
 1517      std::map<const TMVA::Event *, std::vector<double>> & 
residuals = this->fResiduals;
 
 
 1568         for (
Int_t i = start; i < end; ++i) {
 
 1587   fRegressionLossFunctionBDTG->SetTargets(
eventSample, fLossFunctionEventInfo);
 
 
 1601   std::unordered_map<TMVA::DecisionTreeNode*, LeafInfo> 
leaves;
 
 1607      v.sumWeightTarget += 
target * weight;
 
 1615      const Double_t K = DataInfo().GetNClasses();
 
 1616      iLeave.first->SetResponse(fShrinkage * (K - 1) / K * 
iLeave.second.sumWeightTarget / 
iLeave.second.sum2);
 
 1621   DoMulticlass() ? UpdateTargets(fEventSample, 
cls) : UpdateTargets(fEventSample);
 
 
 1633   std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > > 
leaves;
 
 1636      (
leaves[node]).push_back(fLossFunctionEventInfo[*
e]);
 
 1646      (
iLeave->first)->SetResponse(fShrinkage*fit);
 
 1649   UpdateTargetsRegression(*fTrainSample);
 
 
 1668      fRegressionLossFunctionBDTG->Init(fLossFunctionEventInfo, fBoostWeights);
 
 1669      UpdateTargetsRegression(*fTrainSample,
kTRUE);
 
 1673   else if(DoMulticlass()){
 
 1680            fResiduals[*
e].push_back(0);
 
 1686         Double_t r = (DataInfo().IsSignal(*
e)?1:0)-0.5; 
 
 1688         fResiduals[*
e].push_back(0);
 
 
 1706         nfalse += fValidationSample[
ievt]->GetWeight();
 
 
 1723   else if (fBoostType==
"Bagging")     
returnVal = this->Bagging   ( );
 
 1726   else if (fBoostType==
"Grad"){
 
 1729      else if(DoMulticlass())
 
 1735      Log() << kINFO << GetOptions() << 
Endl;
 
 1736      Log() << kFATAL << 
"<Boost> unknown boost option " << fBoostType<< 
" called" << 
Endl;
 
 1740      GetBaggedSubSample(fEventSample);
 
 
 1755   TH1F *
tmpS = 
new TH1F( 
"tmpS", 
"",     100 , -1., 1.00001 );
 
 1756   TH1F *
tmpB = 
new TH1F( 
"tmpB", 
"",     100 , -1., 1.00001 );
 
 1772      const Event* 
event = GetTestingEvent(
iev);
 
 1776      tmp->Fill(PrivateGetMvaValue(event),event->GetWeight());
 
 1780   std::vector<TH1F*> hS;
 
 1781   std::vector<TH1F*> 
hB;
 
 1785      results->Store(hS.back(),hS.back()->GetTitle());
 
 1791      if (fEventSample[
iev]->GetBoostWeight() > max) max = 1.01*fEventSample[
iev]->GetBoostWeight();
 
 1799   std::vector<TH1F*> *
h;
 
 1811         (*h)[
ivar]->Fill(fEventSample[
iev]->GetValue(
ivar),fEventSample[
iev]->GetWeight());
 
 
 1849   std::vector<Double_t> 
sumw(DataInfo().GetNClasses(),0); 
 
 1855      UInt_t iclass=(*e)->GetClass();
 
 1858      if ( DoRegression() ) {
 
 1873            if (DataInfo().IsSignal(*
e)) 
trueType = 1;
 
 1881   if ( DoRegression() ) {
 
 1883      if (fAdaBoostR2Loss==
"linear"){
 
 1886      else if (fAdaBoostR2Loss==
"quadratic"){
 
 1889      else if (fAdaBoostR2Loss==
"exponential"){
 
 1899         Log() << kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
 1900               << 
" namely " << fAdaBoostR2Loss << 
"\n" 
 1901               << 
"and this is not implemented... a typo in the options ??" <<
Endl;
 
 1912   if (err >= 0.5 && fUseYesNoLeaf) { 
 
 1915      if (
dt->GetNNodes() == 1){
 
 1916         Log() << kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
 1917               << 
"boost such a thing... if after 1 step the error rate is == 0.5" 
 1919               << 
"please check why this happens, maybe too many events per node requested ?" 
 1923         Log() << kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
 
 1924               << 
") That should not happen, please check your code (i.e... the BDT code), I " 
 1925               << 
" stop boosting here" <<  
Endl;
 
 1929   } 
else if (err < 0) {
 
 1930      Log() << kERROR << 
" The error rate in the BDT boosting is < 0. That can happen" 
 1931            << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 
 1932            << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 
 1933            << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
 
 1949      if (fUseYesNoLeaf||DoRegression()){
 
 1950         if ((!( (
dt->CheckEvent(*
e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*
e))) || DoRegression()) {
 
 1954            if ( (*e)->GetWeight() > 0 ){
 
 1955               (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
boostfactor);
 
 1959               if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / 
boostfactor); 
 
 1960               else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
boostfactor);
 
 1968         if (DataInfo().IsSignal(*
e)) 
trueType = 1;
 
 1972         if ( (*e)->GetWeight() > 0 ){
 
 1973            (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
boostfactor);
 
 1977            if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / 
boostfactor); 
 
 1978            else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
boostfactor);
 
 1982      newSumw[(*e)->GetClass()] += (*e)->GetWeight();
 
 1995      if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( 
globalNormWeight * fSigToBkgFraction );
 
 2001   results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
 
 2004   fErrorFraction = err;
 
 
 2032   std::vector<Double_t> 
sumw(DataInfo().GetNClasses(),0);      
 
 2037      UInt_t iclass=(*e)->GetClass();
 
 2041      if ( DoRegression() ) {
 
 2042         Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2057         else Log() << kERROR << 
"something went wrong in AdaCost" << 
Endl;
 
 2064   if ( DoRegression() ) {
 
 2065      Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2094      else Log() << kERROR << 
"something went wrong in AdaCost" << 
Endl;
 
 2097      if (DoRegression())Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2098      if ( (*e)->GetWeight() > 0 ){
 
 2099         (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
boostfactor);
 
 2101         if (DoRegression())Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2103         if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / 
boostfactor); 
 
 2119      if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( 
globalNormWeight * fSigToBkgFraction );
 
 2126   results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
 
 2129   fErrorFraction = err;
 
 
 2156   if (!fSubSample.empty()) fSubSample.clear();
 
 2159      n = 
trandom->PoissonD(fBaggedSampleFraction);
 
 2160      for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*
e);
 
 
 2194   if ( !DoRegression() ) Log() << kFATAL << 
"Somehow you chose a regression boost method for a classification job" << 
Endl;
 
 2209   if (fAdaBoostR2Loss==
"linear"){
 
 2212   else if (fAdaBoostR2Loss==
"quadratic"){
 
 2215   else if (fAdaBoostR2Loss==
"exponential"){
 
 2225      Log() << kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
 2226            << 
" namely " << fAdaBoostR2Loss << 
"\n" 
 2227            << 
"and this is not implemented... a typo in the options ??" <<
Endl;
 
 2234      if (
dt->GetNNodes() == 1){
 
 2235         Log() << kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
 2236               << 
"boost such a thing... if after 1 step the error rate is == 0.5" 
 2238               << 
"please check why this happens, maybe too many events per node requested ?" 
 2242         Log() << kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
 
 2243               << 
") That should not happen, but is possible for regression trees, and" 
 2244               << 
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I " 
 2245               << 
" stop boosting " <<  
Endl;
 
 2249   } 
else if (err < 0) {
 
 2250      Log() << kERROR << 
" The error rate in the BDT boosting is < 0. That can happen" 
 2251            << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 
 2252            << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 
 2253            << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
 
 2266      if ( (*e)->GetWeight() > 0 ){
 
 2270            Log() << kINFO << 
"Weight=    "   <<   (*e)->GetWeight() << 
Endl;
 
 2271            Log() << kINFO  << 
"BoostWeight= " <<   (*e)->GetBoostWeight() << 
Endl;
 
 2272            Log() << kINFO  << 
"boostweight="<<
boostWeight << 
"  err= " <<err << 
Endl;
 
 2275            Log() << kINFO  << 
"maxDev     = " <<  
maxDev << 
Endl;
 
 2277            Log() << kINFO  << 
"target     = " <<  (*e)->GetTarget(0)  << 
Endl;
 
 2278            Log() << kINFO  << 
"estimate   = " <<  
dt->CheckEvent(*
e,
kFALSE)  << 
Endl;
 
 2283         (*e)->SetBoostWeight( (*e)->GetBoostWeight() / 
boostfactor);
 
 2293      (*e)->SetBoostWeight( (*e)->GetBoostWeight() * 
normWeight );
 
 2297   results->GetHist(
"BoostWeightsVsTree")->SetBinContent(fForest.size(),1./
boostWeight);
 
 2298   results->GetHist(
"ErrorFrac")->SetBinContent(fForest.size(),err);
 
 2301   fErrorFraction = err;
 
 
 2313   if (fDoPreselection){
 
 2330   for (
UInt_t i=0; i< fForest.size(); i++) {
 
 2331      void* 
trxml = fForest[i]->AddXMLTo(
wght);
 
 
 2342   for (i=0; i<fForest.size(); i++) 
delete fForest[i];
 
 2344   fBoostWeights.clear();
 
 2352      fIsLowBkgCut.resize(GetNvar());
 
 2353      fLowBkgCut.resize(GetNvar());
 
 2354      fIsLowSigCut.resize(GetNvar());
 
 2355      fLowSigCut.resize(GetNvar());
 
 2356      fIsHighBkgCut.resize(GetNvar());
 
 2357      fHighBkgCut.resize(GetNvar());
 
 2358      fIsHighSigCut.resize(GetNvar());
 
 2359      fHighSigCut.resize(GetNvar());
 
 2386   if(
gTools().HasAttr(parent, 
"TreeType")) { 
 
 2397      fForest.back()->SetTreeID(i++);
 
 
 2411   Int_t analysisType(0);
 
 2414   istr >> dummy >> fNTrees;
 
 2415   Log() << kINFO << 
"Read " << fNTrees << 
" Decision trees" << 
Endl;
 
 2417   for (
UInt_t i=0;i<fForest.size();i++) 
delete fForest[i];
 
 2419   fBoostWeights.clear();
 
 2422   for (
int i=0;i<fNTrees;i++) {
 
 2425         fForest.back()->Print( std::cout );
 
 2426         Log() << kFATAL << 
"Error while reading weight file; mismatch iTree=" 
 2427               << 
iTree << 
" i=" << i
 
 2428               << 
" dummy " << dummy
 
 2434      fForest.back()->SetTreeID(i);
 
 2435      fForest.back()->
Read(
istr, GetTrainingTMVAVersionCode());
 
 
 2443   return this->GetMvaValue( err, 
errUpper, 0 );
 
 
 2454   if (fDoPreselection) {
 
 
 2478   if (fBoostType==
"Grad") 
return GetGradBoostMVA(
ev,
nTrees);
 
 2487   return ( 
norm > std::numeric_limits<double>::epsilon() ) ? 
myMVA /= 
norm : 0 ;
 
 
 2497   if (fMulticlassReturnVal == 
NULL) fMulticlassReturnVal = 
new std::vector<Float_t>();
 
 2498   fMulticlassReturnVal->clear();
 
 2501   std::vector<Double_t> temp(
nClasses);
 
 2505   std::vector<TMVA::DecisionTree *> 
forest = fForest;
 
 2526   std::transform(temp.begin(), temp.end(), temp.begin(), [](
Double_t d){return exp(d);});
 
 2532      (*fMulticlassReturnVal).push_back(
p_cls);
 
 2535   return *fMulticlassReturnVal;
 
 
 2544   if (fRegressionReturnVal == 
NULL) fRegressionReturnVal = 
new std::vector<Float_t>();
 
 2545   fRegressionReturnVal->clear();
 
 2547   const Event * 
ev = GetEvent();
 
 2552   if (fBoostType==
"AdaBoostR2") {
 
 2563      vector< Double_t > response(fForest.size());
 
 2564      vector< Double_t > weight(fForest.size());
 
 2573      std::vector< std::vector<Double_t> > 
vtemp;
 
 2574      vtemp.push_back( response ); 
 
 2575      vtemp.push_back( weight );
 
 2595   else if(fBoostType==
"Grad"){
 
 2600      evT->SetTarget(0, 
myMVA+fBoostWeights[0] );
 
 2609      evT->SetTarget(0, ( 
norm > std::numeric_limits<double>::epsilon() ) ? 
myMVA /= 
norm : 0 );
 
 2614   const Event* 
evT2 = GetTransformationHandler().InverseTransform( 
evT );
 
 2615   fRegressionReturnVal->push_back( 
evT2->GetTarget(0) );
 
 2620   return *fRegressionReturnVal;
 
 
 2629   Log() << kDEBUG << 
"\tWrite monitoring histograms to file: " << BaseDir()->GetPath() << 
Endl;
 
 2633   fMonitorNtuple->
Write();
 
 
 2644   fVariableImportance.resize(GetNvar());
 
 2646      fVariableImportance[
ivar]=0;
 
 2658      sum += fVariableImportance[
ivar];
 
 2662   return fVariableImportance;
 
 
 2674   else Log() << kFATAL << 
"<GetVariableImportance> ivar = " << 
ivar << 
" is out of range " << 
Endl;
 
 
 2685   fRanking = 
new Ranking( GetName(), 
"Variable Importance" );
 
 
 2704   Log() << 
"Boosted Decision Trees are a collection of individual decision" << 
Endl;
 
 2705   Log() << 
"trees which form a multivariate classifier by (weighted) majority " << 
Endl;
 
 2706   Log() << 
"vote of the individual trees. Consecutive decision trees are  " << 
Endl;
 
 2707   Log() << 
"trained using the original training data set with re-weighted " << 
Endl;
 
 2708   Log() << 
"events. By default, the AdaBoost method is employed, which gives " << 
Endl;
 
 2709   Log() << 
"events that were misclassified in the previous tree a larger " << 
Endl;
 
 2710   Log() << 
"weight in the training of the following tree." << 
Endl;
 
 2712   Log() << 
"Decision trees are a sequence of binary splits of the data sample" << 
Endl;
 
 2713   Log() << 
"using a single discriminant variable at a time. A test event " << 
Endl;
 
 2714   Log() << 
"ending up after the sequence of left-right splits in a final " << 
Endl;
 
 2715   Log() << 
"(\"leaf\") node is classified as either signal or background" << 
Endl;
 
 2716   Log() << 
"depending on the majority type of training events in that node." << 
Endl;
 
 2720   Log() << 
"By the nature of the binary splits performed on the individual" << 
Endl;
 
 2721   Log() << 
"variables, decision trees do not deal well with linear correlations" << 
Endl;
 
 2722   Log() << 
"between variables (they need to approximate the linear split in" << 
Endl;
 
 2723   Log() << 
"the two dimensional space by a sequence of splits on the two " << 
Endl;
 
 2724   Log() << 
"variables individually). Hence decorrelation could be useful " << 
Endl;
 
 2725   Log() << 
"to optimise the BDT performance." << 
Endl;
 
 2729   Log() << 
"The two most important parameters in the configuration are the  " << 
Endl;
 
 2730   Log() << 
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
 
 2731   Log() << 
"   number of training events (option \"MinNodeSize\"  replacing the actual number " << 
Endl;
 
 2732   Log() << 
" of events \"nEventsMin\" as given in earlier versions" << 
Endl;
 
 2733   Log() << 
"If this number is too large, detailed features " << 
Endl;
 
 2734   Log() << 
"in the parameter space are hard to be modelled. If it is too small, " << 
Endl;
 
 2735   Log() << 
"the risk to overtrain rises and boosting seems to be less effective" << 
Endl;
 
 2736   Log() << 
"  typical values from our current experience for best performance  " << 
Endl;
 
 2737   Log() << 
"  are between 0.5(%) and 10(%) " << 
Endl;
 
 2739   Log() << 
"The default minimal number is currently set to " << 
Endl;
 
 2740   Log() << 
"   max(20, (N_training_events / N_variables^2 / 10)) " << 
Endl;
 
 2741   Log() << 
"and can be changed by the user." << 
Endl;
 
 2743   Log() << 
"The other crucial parameter, the pruning strength (\"PruneStrength\")," << 
Endl;
 
 2744   Log() << 
"is also related to overtraining. It is a regularisation parameter " << 
Endl;
 
 2745   Log() << 
"that is used when determining after the training which splits " << 
Endl;
 
 2746   Log() << 
"are considered statistically insignificant and are removed. The" << 
Endl;
 
 2747   Log() << 
"user is advised to carefully watch the BDT screen output for" << 
Endl;
 
 2748   Log() << 
"the comparison between efficiencies obtained on the training and" << 
Endl;
 
 2749   Log() << 
"the independent test sample. They should be equal within statistical" << 
Endl;
 
 2750   Log() << 
"errors, in order to minimize statistical fluctuations in different samples." << 
Endl;
 
 
 2762   fout << 
"   std::vector<"<<
nodeName<<
"*> fForest;       // i.e. root nodes of decision trees" << std::endl;
 
 2763   fout << 
"   std::vector<double>                fBoostWeights; // the weights applied in the individual boosts" << std::endl;
 
 2764   fout << 
"};" << std::endl << std::endl;
 
 2767      fout << 
"std::vector<double> ReadBDTG::GetMulticlassValues__( const std::vector<double>& inputValues ) const" << std::endl;
 
 2768      fout << 
"{" << std::endl;
 
 2769      fout << 
"   uint nClasses = " << DataInfo().GetNClasses() << 
";" << std::endl;
 
 2770      fout << 
"   std::vector<double> fMulticlassReturnVal;" << std::endl;
 
 2771      fout << 
"   fMulticlassReturnVal.reserve(nClasses);" << std::endl;
 
 2773      fout << 
"   std::vector<double> temp(nClasses);" << std::endl;
 
 2774      fout << 
"   auto forestSize = fForest.size();" << std::endl;
 
 2775      fout << 
"   // trees 0, nClasses, 2*nClasses, ... belong to class 0" << std::endl;
 
 2776      fout << 
"   // trees 1, nClasses+1, 2*nClasses+1, ... belong to class 1 and so forth" << std::endl;
 
 2777      fout << 
"   uint classOfTree = 0;" << std::endl;
 
 2778      fout << 
"   for (uint itree = 0; itree < forestSize; ++itree) {" << std::endl;
 
 2779      fout << 
"      BDTGNode *current = fForest[itree];" << std::endl;
 
 2780      fout << 
"      while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
 
 2781      fout << 
"         if (current->GoesRight(inputValues)) current=(BDTGNode*)current->GetRight();" << std::endl;
 
 2782      fout << 
"         else current=(BDTGNode*)current->GetLeft();" << std::endl;
 
 2783      fout << 
"      }" << std::endl;
 
 2784      fout << 
"      temp[classOfTree] += current->GetResponse();" << std::endl;
 
 2785      fout << 
"      if (++classOfTree == nClasses) classOfTree = 0; // cheap modulo" << std::endl;
 
 2786      fout << 
"   }" << std::endl;
 
 2788      fout << 
"   // we want to calculate sum of exp(temp[j] - temp[i]) for all i,j (i!=j)" << std::endl;
 
 2789      fout << 
"   // first calculate exp(), then replace minus with division." << std::endl;
 
 2790      fout << 
"   std::transform(temp.begin(), temp.end(), temp.begin(), [](double d){return exp(d);});" << std::endl;
 
 2792      fout << 
"   for(uint iClass=0; iClass<nClasses; iClass++){" << std::endl;
 
 2793      fout << 
"      double norm = 0.0;" << std::endl;
 
 2794      fout << 
"      for(uint j=0;j<nClasses;j++){" << std::endl;
 
 2795      fout << 
"         if(iClass!=j)" << std::endl;
 
 2796      fout << 
"            norm += temp[j] / temp[iClass];" << std::endl;
 
 2797      fout << 
"      }" << std::endl;
 
 2798      fout << 
"      fMulticlassReturnVal.push_back(1.0/(1.0+norm));" << std::endl;
 
 2799      fout << 
"   }" << std::endl;
 
 2801      fout << 
"   return fMulticlassReturnVal;" << std::endl;
 
 2802      fout << 
"}" << std::endl;
 
 2804      fout << 
"double " << className << 
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
 
 2805      fout << 
"{" << std::endl;
 
 2806      fout << 
"   double myMVA = 0;" << std::endl;
 
 2807      if (fDoPreselection){
 
 2809            if (fIsLowBkgCut[
ivar]){
 
 2810               fout << 
"   if (inputValues["<<
ivar<<
"] < " << fLowBkgCut[
ivar] << 
") return -1;  // is background preselection cut" << std::endl;
 
 2812            if (fIsLowSigCut[
ivar]){
 
 2813               fout << 
"   if (inputValues["<<
ivar<<
"] < "<< fLowSigCut[
ivar] << 
") return  1;  // is signal preselection cut" << std::endl;
 
 2815            if (fIsHighBkgCut[
ivar]){
 
 2816               fout << 
"   if (inputValues["<<
ivar<<
"] > "<<fHighBkgCut[
ivar] <<
")  return -1;  // is background preselection cut" << std::endl;
 
 2818            if (fIsHighSigCut[
ivar]){
 
 2819               fout << 
"   if (inputValues["<<
ivar<<
"] > "<<fHighSigCut[
ivar]<<
")  return  1;  // is signal preselection cut" << std::endl;
 
 2824      if (fBoostType!=
"Grad"){
 
 2825         fout << 
"   double norm  = 0;" << std::endl;
 
 2827      fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
 
 2828      fout << 
"      "<<
nodeName<<
" *current = fForest[itree];" << std::endl;
 
 2829      fout << 
"      while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
 
 2830      fout << 
"         if (current->GoesRight(inputValues)) current=("<<
nodeName<<
"*)current->GetRight();" << std::endl;
 
 2831      fout << 
"         else current=("<<
nodeName<<
"*)current->GetLeft();" << std::endl;
 
 2832      fout << 
"      }" << std::endl;
 
 2833      if (fBoostType==
"Grad"){
 
 2834         fout << 
"      myMVA += current->GetResponse();" << std::endl;
 
 2836         if (fUseYesNoLeaf) 
fout << 
"      myMVA += fBoostWeights[itree] *  current->GetNodeType();" << std::endl;
 
 2837         else               fout << 
"      myMVA += fBoostWeights[itree] *  current->GetPurity();" << std::endl;
 
 2838         fout << 
"      norm  += fBoostWeights[itree];" << std::endl;
 
 2840      fout << 
"   }" << std::endl;
 
 2841      if (fBoostType==
"Grad"){
 
 2842         fout << 
"   return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
 
 2844      else fout << 
"   return myMVA /= norm;" << std::endl;
 
 2845      fout << 
"}" << std::endl << std::endl;
 
 2848   fout << 
"void " << className << 
"::Initialize()" << std::endl;
 
 2849   fout << 
"{" << std::endl;
 
 2850   fout << 
"  double inf = std::numeric_limits<double>::infinity();" << std::endl;
 
 2851   fout << 
"  double nan = std::numeric_limits<double>::quiet_NaN();" << std::endl;
 
 2854      fout << 
"  // itree = " << 
itree << std::endl;
 
 2855      fout << 
"  fBoostWeights.push_back(" << fBoostWeights[
itree] << 
");" << std::endl;
 
 2856      fout << 
"  fForest.push_back( " << std::endl;
 
 2858      fout <<
"   );" << std::endl;
 
 2860   fout << 
"   return;" << std::endl;
 
 2861   fout << 
"};" << std::endl;
 
 2863   fout << 
"// Clean up" << std::endl;
 
 2864   fout << 
"inline void " << className << 
"::Clear() " << std::endl;
 
 2865   fout << 
"{" << std::endl;
 
 2866   fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
 
 2867   fout << 
"      delete fForest[itree]; " << std::endl;
 
 2868   fout << 
"   }" << std::endl;
 
 2869   fout << 
"}" << std::endl;
 
 
 2881   fout << 
"#include <algorithm>" << std::endl;
 
 2882   fout << 
"#include <limits>" << std::endl;
 
 2893   fout << 
"public:" << std::endl;
 
 2895   fout << 
"   // constructor of an essentially \"empty\" node floating in space" << std::endl;
 
 2897   if (fUseFisherCuts){
 
 2898      fout << 
"                          int nFisherCoeff," << std::endl;
 
 2899      for (
UInt_t i=0;i<GetNVariables()+1;i++){
 
 2900         fout << 
"                          double fisherCoeff"<<i<<
"," << std::endl;
 
 2903   fout << 
"                          int selector, double cutValue, bool cutType, " << std::endl;
 
 2904   fout << 
"                          int nodeType, double purity, double response ) :" << std::endl;
 
 2905   fout << 
"   fLeft         ( left         )," << std::endl;
 
 2906   fout << 
"   fRight        ( right        )," << std::endl;
 
 2907   if (fUseFisherCuts) 
fout << 
"   fNFisherCoeff ( nFisherCoeff )," << std::endl;
 
 2908   fout << 
"   fSelector     ( selector     )," << std::endl;
 
 2909   fout << 
"   fCutValue     ( cutValue     )," << std::endl;
 
 2910   fout << 
"   fCutType      ( cutType      )," << std::endl;
 
 2911   fout << 
"   fNodeType     ( nodeType     )," << std::endl;
 
 2912   fout << 
"   fPurity       ( purity       )," << std::endl;
 
 2913   fout << 
"   fResponse     ( response     ){" << std::endl;
 
 2914   if (fUseFisherCuts){
 
 2915      for (
UInt_t i=0;i<GetNVariables()+1;i++){
 
 2916         fout << 
"     fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
 
 2919   fout << 
"   }" << std::endl << std::endl;
 
 2920   fout << 
"   virtual ~"<<
nodeName<<
"();" << std::endl << std::endl;
 
 2921   fout << 
"   // test event if it descends the tree at this node to the right" << std::endl;
 
 2922   fout << 
"   virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
 
 2923   fout << 
"   "<<
nodeName<<
"* GetRight( void )  {return fRight; };" << std::endl << std::endl;
 
 2924   fout << 
"   // test event if it descends the tree at this node to the left " << std::endl;
 
 2925   fout << 
"   virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
 
 2926   fout << 
"   "<<
nodeName<<
"* GetLeft( void ) { return fLeft; };   " << std::endl << std::endl;
 
 2927   fout << 
"   // return  S/(S+B) (purity) at this node (from  training)" << std::endl << std::endl;
 
 2928   fout << 
"   double GetPurity( void ) const { return fPurity; } " << std::endl;
 
 2929   fout << 
"   // return the node type" << std::endl;
 
 2930   fout << 
"   int    GetNodeType( void ) const { return fNodeType; }" << std::endl;
 
 2931   fout << 
"   double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
 
 2932   fout << 
"private:" << std::endl << std::endl;
 
 2933   fout << 
"   "<<
nodeName<<
"*   fLeft;     // pointer to the left daughter node" << std::endl;
 
 2934   fout << 
"   "<<
nodeName<<
"*   fRight;    // pointer to the right daughter node" << std::endl;
 
 2935   if (fUseFisherCuts){
 
 2936      fout << 
"   int                     fNFisherCoeff; // =0 if this node doesn't use fisher, else =nvar+1 " << std::endl;
 
 2937      fout << 
"   std::vector<double>     fFisherCoeff;  // the fisher coeff (offset at the last element)" << std::endl;
 
 2939   fout << 
"   int                     fSelector; // index of variable used in node selection (decision tree)   " << std::endl;
 
 2940   fout << 
"   double                  fCutValue; // cut value applied on this node to discriminate bkg against sig" << std::endl;
 
 2941   fout << 
"   bool                    fCutType;  // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
 
 2942   fout << 
"   int                     fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
 
 2943   fout << 
"   double                  fPurity;   // Purity of node from training"<< std::endl;
 
 2944   fout << 
"   double                  fResponse; // Regression response value of node" << std::endl;
 
 2945   fout << 
"}; " << std::endl;
 
 2947   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2949   fout << 
"{" << std::endl;
 
 2950   fout << 
"   if (fLeft  != NULL) delete fLeft;" << std::endl;
 
 2951   fout << 
"   if (fRight != NULL) delete fRight;" << std::endl;
 
 2952   fout << 
"}; " << std::endl;
 
 2954   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2955   fout << 
"bool "<<
nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
 
 2956   fout << 
"{" << std::endl;
 
 2957   fout << 
"   // test event if it descends the tree at this node to the right" << std::endl;
 
 2958   fout << 
"   bool result;" << std::endl;
 
 2959   if (fUseFisherCuts){
 
 2960      fout << 
"   if (fNFisherCoeff == 0){" << std::endl;
 
 2961      fout << 
"     result = (inputValues[fSelector] >= fCutValue );" << std::endl;
 
 2962      fout << 
"   }else{" << std::endl;
 
 2963      fout << 
"     double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
 
 2964      fout << 
"     for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
 
 2965      fout << 
"       fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
 
 2966      fout << 
"     result = fisher > fCutValue;" << std::endl;
 
 2967      fout << 
"   }" << std::endl;
 
 2969      fout << 
"     result = (inputValues[fSelector] >= fCutValue );" << std::endl;
 
 2971   fout << 
"   if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
 
 2972   fout << 
"   else return !result;" << std::endl;
 
 2973   fout << 
"}" << std::endl;
 
 2975   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2976   fout << 
"bool "<<
nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
 
 2977   fout << 
"{" << std::endl;
 
 2978   fout << 
"   // test event if it descends the tree at this node to the left" << std::endl;
 
 2979   fout << 
"   if (!this->GoesRight(inputValues)) return true;" << std::endl;
 
 2980   fout << 
"   else return false;" << std::endl;
 
 2981   fout << 
"}" << std::endl;
 
 2983   fout << 
"#endif" << std::endl;
 
 
 2993      Log() << kFATAL << 
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
 
 2996   fout << 
"NN("<<std::endl;
 
 2997   if (
n->GetLeft() != 
NULL){
 
 3003   fout << 
", " <<std::endl;
 
 3004   if (
n->GetRight() != 
NULL){
 
 3010   fout << 
", " <<  std::endl
 
 3011        << std::setprecision(6);
 
 3012   if (fUseFisherCuts){
 
 3013      fout << 
n->GetNFisherCoeff() << 
", ";
 
 3014      for (
UInt_t i=0; i< GetNVariables()+1; i++) {
 
 3015         if (
n->GetNFisherCoeff() == 0 ){
 
 3018            fout << 
n->GetFisherCoeff(i) << 
", ";
 
 3022   fout << 
n->GetSelector() << 
", " 
 3023        << 
n->GetCutValue() << 
", " 
 3024        << 
n->GetCutType() << 
", " 
 3025        << 
n->GetNodeType() << 
", " 
 3026        << 
n->GetPurity() << 
"," 
 3027        << 
n->GetResponse() << 
") ";
 
 
 3041   fIsLowSigCut.assign(GetNvar(),
kFALSE);
 
 3042   fIsLowBkgCut.assign(GetNvar(),
kFALSE);
 
 3043   fIsHighSigCut.assign(GetNvar(),
kFALSE);
 
 3044   fIsHighBkgCut.assign(GetNvar(),
kFALSE);
 
 3046   fLowSigCut.assign(GetNvar(),0.);   
 
 3047   fLowBkgCut.assign(GetNvar(),0.);   
 
 3048   fHighSigCut.assign(GetNvar(),0.);  
 
 3049   fHighBkgCut.assign(GetNvar(),0.);  
 
 3055      if (DataInfo().IsSignal(*it)){
 
 3056         nTotS += (*it)->GetWeight();
 
 3059         nTotB += (*it)->GetWeight();
 
 3070      for( ; it != 
it_end; ++it ) {
 
 3071         if (DataInfo().IsSignal(**it))
 
 3082      Double_t dVal = (DataInfo().GetVariableInfo(
ivar).GetMax() - DataInfo().GetVariableInfo(
ivar).GetMin())/100. ;
 
 3107   Log() << kDEBUG << 
" \tfound and suggest the following possible pre-selection cuts " << 
Endl;
 
 3108   if (fDoPreselection) Log() << kDEBUG << 
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" << 
Endl;
 
 3109   else  Log() << kDEBUG << 
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
 
 3111      if (fIsLowBkgCut[
ivar]){
 
 3112         Log() << kDEBUG  << 
" \tfound cut: Bkg if var " << 
ivar << 
" < "  << fLowBkgCut[
ivar] << 
Endl;
 
 3114      if (fIsLowSigCut[
ivar]){
 
 3115         Log() << kDEBUG  << 
" \tfound cut: Sig if var " << 
ivar << 
" < "  << fLowSigCut[
ivar] << 
Endl;
 
 3117      if (fIsHighBkgCut[
ivar]){
 
 3118         Log() << kDEBUG  << 
" \tfound cut: Bkg if var " << 
ivar << 
" > "  << fHighBkgCut[
ivar] << 
Endl;
 
 3120      if (fIsHighSigCut[
ivar]){
 
 3121         Log() << kDEBUG  << 
" \tfound cut: Sig if var " << 
ivar << 
" > "  << fHighSigCut[
ivar] << 
Endl;
 
 
 3137      if (fIsLowBkgCut[
ivar]){
 
 3140      if (fIsLowSigCut[
ivar]){
 
 3143      if (fIsHighBkgCut[
ivar]){
 
 3146      if (fIsHighSigCut[
ivar]){
 
 
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
void Print(Option_t *option="") const override
const_iterator begin() const
const_iterator end() const
A TGraph is an object made of two arrays X and Y with npoints each.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
1-D histogram with a float per channel (see TH1 documentation)
1-D histogram with an int per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
2-D histogram with a float per channel (see TH1 documentation)
Service class for 2-D histogram classes.
Absolute Deviation BDT Loss Function.
static void SetVarIndex(Int_t iVar)
static Config & Instance()
static function: returns TMVA instance
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=262657)
re-create a new tree (decision tree or search tree) from XML
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
The TMVA::Interval Class.
Least Squares BDT Loss Function.
The TMVA::Interval Class.
Analysis of Boosted Decision Trees.
void Init(void)
Common initialisation with defaults for the BDT-Method.
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
The standard constructor for the "boosted decision trees".
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=nullptr, Double_t *errUpper=nullptr, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
void GetHelpMessage() const
Get help message text.
LossFunctionBDT * fRegressionLossFunctionBDTG
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void DeclareOptions()
Define the options (their key words).
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
virtual ~MethodBDT(void)
Destructor.
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
void PreProcessNegativeEventWeights()
O.k.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
TTree * fMonitorNtuple
monitoring ntuple
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
void Train(void)
BDT training.
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
SeparationBase * fSepType
the separation used in node splitting
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.).
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Implementation of the MisClassificationError as separation criterion.
std::map< TString, Double_t > optimize()
PDF wrapper for histograms; uses user-defined spline interpolation.
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
Implementation of the SdivSqrtSplusB as separation criterion.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
Random number generator class based on M.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
TSeq< unsigned int > TSeqU
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Int_t FloorNint(Double_t x)
Returns the nearest integer of TMath::Floor(x).
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Double_t Sqrt(Double_t x)
Returns the square root of x.
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Int_t CeilNint(Double_t x)
Returns the nearest integer of TMath::Ceil(x).
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
static uint64_t sum(uint64_t i)